Remove trailing period from various diagnostic messages (PR translation/79923)
[official-gcc.git] / gcc / config / arm / arm.c
blob511e16377640894c143e905faace17c72044b5aa
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "flags.h"
49 #include "reload.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "cfgrtl.h"
53 #include "sched-int.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "intl.h"
57 #include "libfuncs.h"
58 #include "params.h"
59 #include "opts.h"
60 #include "dumpfile.h"
61 #include "target-globals.h"
62 #include "builtins.h"
63 #include "tm-constrs.h"
64 #include "rtl-iter.h"
65 #include "optabs-libfuncs.h"
66 #include "gimplify.h"
68 /* This file should be included last. */
69 #include "target-def.h"
71 /* Forward definitions of types. */
72 typedef struct minipool_node Mnode;
73 typedef struct minipool_fixup Mfix;
75 void (*arm_lang_output_object_attributes_hook)(void);
77 struct four_ints
79 int i[4];
82 /* Forward function declarations. */
83 static bool arm_const_not_ok_for_debug_p (rtx);
84 static bool arm_needs_doubleword_align (machine_mode, const_tree);
85 static int arm_compute_static_chain_stack_bytes (void);
86 static arm_stack_offsets *arm_get_frame_offsets (void);
87 static void arm_add_gc_roots (void);
88 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
89 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
90 static unsigned bit_count (unsigned long);
91 static unsigned bitmap_popcount (const sbitmap);
92 static int arm_address_register_rtx_p (rtx, int);
93 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
94 static bool is_called_in_ARM_mode (tree);
95 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
96 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
97 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
98 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
99 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
100 inline static int thumb1_index_register_rtx_p (rtx, int);
101 static int thumb_far_jump_used_p (void);
102 static bool thumb_force_lr_save (void);
103 static unsigned arm_size_return_regs (void);
104 static bool arm_assemble_integer (rtx, unsigned int, int);
105 static void arm_print_operand (FILE *, rtx, int);
106 static void arm_print_operand_address (FILE *, machine_mode, rtx);
107 static bool arm_print_operand_punct_valid_p (unsigned char code);
108 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
109 static arm_cc get_arm_condition_code (rtx);
110 static const char *output_multi_immediate (rtx *, const char *, const char *,
111 int, HOST_WIDE_INT);
112 static const char *shift_op (rtx, HOST_WIDE_INT *);
113 static struct machine_function *arm_init_machine_status (void);
114 static void thumb_exit (FILE *, int);
115 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
116 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
117 static Mnode *add_minipool_forward_ref (Mfix *);
118 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
119 static Mnode *add_minipool_backward_ref (Mfix *);
120 static void assign_minipool_offsets (Mfix *);
121 static void arm_print_value (FILE *, rtx);
122 static void dump_minipool (rtx_insn *);
123 static int arm_barrier_cost (rtx_insn *);
124 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
125 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
126 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
127 machine_mode, rtx);
128 static void arm_reorg (void);
129 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
130 static unsigned long arm_compute_save_reg0_reg12_mask (void);
131 static unsigned long arm_compute_save_reg_mask (void);
132 static unsigned long arm_isr_value (tree);
133 static unsigned long arm_compute_func_type (void);
134 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
135 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
136 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
137 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
138 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
139 #endif
140 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
141 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
142 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
143 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
144 static int arm_comp_type_attributes (const_tree, const_tree);
145 static void arm_set_default_type_attributes (tree);
146 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
147 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
148 static int optimal_immediate_sequence (enum rtx_code code,
149 unsigned HOST_WIDE_INT val,
150 struct four_ints *return_sequence);
151 static int optimal_immediate_sequence_1 (enum rtx_code code,
152 unsigned HOST_WIDE_INT val,
153 struct four_ints *return_sequence,
154 int i);
155 static int arm_get_strip_length (int);
156 static bool arm_function_ok_for_sibcall (tree, tree);
157 static machine_mode arm_promote_function_mode (const_tree,
158 machine_mode, int *,
159 const_tree, int);
160 static bool arm_return_in_memory (const_tree, const_tree);
161 static rtx arm_function_value (const_tree, const_tree, bool);
162 static rtx arm_libcall_value_1 (machine_mode);
163 static rtx arm_libcall_value (machine_mode, const_rtx);
164 static bool arm_function_value_regno_p (const unsigned int);
165 static void arm_internal_label (FILE *, const char *, unsigned long);
166 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
167 tree);
168 static bool arm_have_conditional_execution (void);
169 static bool arm_cannot_force_const_mem (machine_mode, rtx);
170 static bool arm_legitimate_constant_p (machine_mode, rtx);
171 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
172 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
173 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
174 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
175 static void emit_constant_insn (rtx cond, rtx pattern);
176 static rtx_insn *emit_set_insn (rtx, rtx);
177 static rtx emit_multi_reg_push (unsigned long, unsigned long);
178 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
179 tree, bool);
180 static rtx arm_function_arg (cumulative_args_t, machine_mode,
181 const_tree, bool);
182 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
183 const_tree, bool);
184 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
185 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
186 const_tree);
187 static rtx aapcs_libcall_value (machine_mode);
188 static int aapcs_select_return_coproc (const_tree, const_tree);
190 #ifdef OBJECT_FORMAT_ELF
191 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
192 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
193 #endif
194 #ifndef ARM_PE
195 static void arm_encode_section_info (tree, rtx, int);
196 #endif
198 static void arm_file_end (void);
199 static void arm_file_start (void);
200 static void arm_insert_attributes (tree, tree *);
202 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
203 tree, int *, int);
204 static bool arm_pass_by_reference (cumulative_args_t,
205 machine_mode, const_tree, bool);
206 static bool arm_promote_prototypes (const_tree);
207 static bool arm_default_short_enums (void);
208 static bool arm_align_anon_bitfield (void);
209 static bool arm_return_in_msb (const_tree);
210 static bool arm_must_pass_in_stack (machine_mode, const_tree);
211 static bool arm_return_in_memory (const_tree, const_tree);
212 #if ARM_UNWIND_INFO
213 static void arm_unwind_emit (FILE *, rtx_insn *);
214 static bool arm_output_ttype (rtx);
215 static void arm_asm_emit_except_personality (rtx);
216 #endif
217 static void arm_asm_init_sections (void);
218 static rtx arm_dwarf_register_span (rtx);
220 static tree arm_cxx_guard_type (void);
221 static bool arm_cxx_guard_mask_bit (void);
222 static tree arm_get_cookie_size (tree);
223 static bool arm_cookie_has_size (void);
224 static bool arm_cxx_cdtor_returns_this (void);
225 static bool arm_cxx_key_method_may_be_inline (void);
226 static void arm_cxx_determine_class_data_visibility (tree);
227 static bool arm_cxx_class_data_always_comdat (void);
228 static bool arm_cxx_use_aeabi_atexit (void);
229 static void arm_init_libfuncs (void);
230 static tree arm_build_builtin_va_list (void);
231 static void arm_expand_builtin_va_start (tree, rtx);
232 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
233 static void arm_option_override (void);
234 static void arm_option_restore (struct gcc_options *,
235 struct cl_target_option *);
236 static void arm_override_options_after_change (void);
237 static void arm_option_print (FILE *, int, struct cl_target_option *);
238 static void arm_set_current_function (tree);
239 static bool arm_can_inline_p (tree, tree);
240 static void arm_relayout_function (tree);
241 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
242 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
243 static bool arm_sched_can_speculate_insn (rtx_insn *);
244 static bool arm_macro_fusion_p (void);
245 static bool arm_cannot_copy_insn_p (rtx_insn *);
246 static int arm_issue_rate (void);
247 static int arm_first_cycle_multipass_dfa_lookahead (void);
248 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
249 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
250 static bool arm_output_addr_const_extra (FILE *, rtx);
251 static bool arm_allocate_stack_slots_for_args (void);
252 static bool arm_warn_func_return (tree);
253 static tree arm_promoted_type (const_tree t);
254 static bool arm_scalar_mode_supported_p (machine_mode);
255 static bool arm_frame_pointer_required (void);
256 static bool arm_can_eliminate (const int, const int);
257 static void arm_asm_trampoline_template (FILE *);
258 static void arm_trampoline_init (rtx, tree, rtx);
259 static rtx arm_trampoline_adjust_address (rtx);
260 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
261 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
262 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
263 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
264 static bool arm_array_mode_supported_p (machine_mode,
265 unsigned HOST_WIDE_INT);
266 static machine_mode arm_preferred_simd_mode (machine_mode);
267 static bool arm_class_likely_spilled_p (reg_class_t);
268 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
269 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
270 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
271 const_tree type,
272 int misalignment,
273 bool is_packed);
274 static void arm_conditional_register_usage (void);
275 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
276 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
277 static unsigned int arm_autovectorize_vector_sizes (void);
278 static int arm_default_branch_cost (bool, bool);
279 static int arm_cortex_a5_branch_cost (bool, bool);
280 static int arm_cortex_m_branch_cost (bool, bool);
281 static int arm_cortex_m7_branch_cost (bool, bool);
283 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
284 const unsigned char *sel);
286 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
288 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
289 tree vectype,
290 int misalign ATTRIBUTE_UNUSED);
291 static unsigned arm_add_stmt_cost (void *data, int count,
292 enum vect_cost_for_stmt kind,
293 struct _stmt_vec_info *stmt_info,
294 int misalign,
295 enum vect_cost_model_location where);
297 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
298 bool op0_preserve_value);
299 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
301 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
302 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
303 const_tree);
304 static section *arm_function_section (tree, enum node_frequency, bool, bool);
305 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
306 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
307 int reloc);
308 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
309 static machine_mode arm_floatn_mode (int, bool);
311 /* Table of machine attributes. */
312 static const struct attribute_spec arm_attribute_table[] =
314 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
315 affects_type_identity } */
316 /* Function calls made to this symbol must be done indirectly, because
317 it may lie outside of the 26 bit addressing range of a normal function
318 call. */
319 { "long_call", 0, 0, false, true, true, NULL, false },
320 /* Whereas these functions are always known to reside within the 26 bit
321 addressing range. */
322 { "short_call", 0, 0, false, true, true, NULL, false },
323 /* Specify the procedure call conventions for a function. */
324 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
325 false },
326 /* Interrupt Service Routines have special prologue and epilogue requirements. */
327 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
328 false },
329 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
330 false },
331 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
332 false },
333 #ifdef ARM_PE
334 /* ARM/PE has three new attributes:
335 interfacearm - ?
336 dllexport - for exporting a function/variable that will live in a dll
337 dllimport - for importing a function/variable from a dll
339 Microsoft allows multiple declspecs in one __declspec, separating
340 them with spaces. We do NOT support this. Instead, use __declspec
341 multiple times.
343 { "dllimport", 0, 0, true, false, false, NULL, false },
344 { "dllexport", 0, 0, true, false, false, NULL, false },
345 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
346 false },
347 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
348 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
349 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
350 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
351 false },
352 #endif
353 /* ARMv8-M Security Extensions support. */
354 { "cmse_nonsecure_entry", 0, 0, true, false, false,
355 arm_handle_cmse_nonsecure_entry, false },
356 { "cmse_nonsecure_call", 0, 0, true, false, false,
357 arm_handle_cmse_nonsecure_call, true },
358 { NULL, 0, 0, false, false, false, NULL, false }
361 /* Initialize the GCC target structure. */
362 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
363 #undef TARGET_MERGE_DECL_ATTRIBUTES
364 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
365 #endif
367 #undef TARGET_LEGITIMIZE_ADDRESS
368 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
370 #undef TARGET_ATTRIBUTE_TABLE
371 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
373 #undef TARGET_INSERT_ATTRIBUTES
374 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
376 #undef TARGET_ASM_FILE_START
377 #define TARGET_ASM_FILE_START arm_file_start
378 #undef TARGET_ASM_FILE_END
379 #define TARGET_ASM_FILE_END arm_file_end
381 #undef TARGET_ASM_ALIGNED_SI_OP
382 #define TARGET_ASM_ALIGNED_SI_OP NULL
383 #undef TARGET_ASM_INTEGER
384 #define TARGET_ASM_INTEGER arm_assemble_integer
386 #undef TARGET_PRINT_OPERAND
387 #define TARGET_PRINT_OPERAND arm_print_operand
388 #undef TARGET_PRINT_OPERAND_ADDRESS
389 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
390 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
391 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
393 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
394 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
396 #undef TARGET_ASM_FUNCTION_PROLOGUE
397 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
399 #undef TARGET_ASM_FUNCTION_EPILOGUE
400 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
402 #undef TARGET_CAN_INLINE_P
403 #define TARGET_CAN_INLINE_P arm_can_inline_p
405 #undef TARGET_RELAYOUT_FUNCTION
406 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
408 #undef TARGET_OPTION_OVERRIDE
409 #define TARGET_OPTION_OVERRIDE arm_option_override
411 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
412 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
414 #undef TARGET_OPTION_RESTORE
415 #define TARGET_OPTION_RESTORE arm_option_restore
417 #undef TARGET_OPTION_PRINT
418 #define TARGET_OPTION_PRINT arm_option_print
420 #undef TARGET_COMP_TYPE_ATTRIBUTES
421 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
423 #undef TARGET_SCHED_CAN_SPECULATE_INSN
424 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
426 #undef TARGET_SCHED_MACRO_FUSION_P
427 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
429 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
430 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
432 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
433 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
435 #undef TARGET_SCHED_ADJUST_COST
436 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
438 #undef TARGET_SET_CURRENT_FUNCTION
439 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
441 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
442 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
444 #undef TARGET_SCHED_REORDER
445 #define TARGET_SCHED_REORDER arm_sched_reorder
447 #undef TARGET_REGISTER_MOVE_COST
448 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
450 #undef TARGET_MEMORY_MOVE_COST
451 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
453 #undef TARGET_ENCODE_SECTION_INFO
454 #ifdef ARM_PE
455 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
456 #else
457 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
458 #endif
460 #undef TARGET_STRIP_NAME_ENCODING
461 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
463 #undef TARGET_ASM_INTERNAL_LABEL
464 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
466 #undef TARGET_FLOATN_MODE
467 #define TARGET_FLOATN_MODE arm_floatn_mode
469 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
470 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
472 #undef TARGET_FUNCTION_VALUE
473 #define TARGET_FUNCTION_VALUE arm_function_value
475 #undef TARGET_LIBCALL_VALUE
476 #define TARGET_LIBCALL_VALUE arm_libcall_value
478 #undef TARGET_FUNCTION_VALUE_REGNO_P
479 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
481 #undef TARGET_ASM_OUTPUT_MI_THUNK
482 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
483 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
484 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
486 #undef TARGET_RTX_COSTS
487 #define TARGET_RTX_COSTS arm_rtx_costs
488 #undef TARGET_ADDRESS_COST
489 #define TARGET_ADDRESS_COST arm_address_cost
491 #undef TARGET_SHIFT_TRUNCATION_MASK
492 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
493 #undef TARGET_VECTOR_MODE_SUPPORTED_P
494 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
495 #undef TARGET_ARRAY_MODE_SUPPORTED_P
496 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
497 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
498 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
499 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
500 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
501 arm_autovectorize_vector_sizes
503 #undef TARGET_MACHINE_DEPENDENT_REORG
504 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
506 #undef TARGET_INIT_BUILTINS
507 #define TARGET_INIT_BUILTINS arm_init_builtins
508 #undef TARGET_EXPAND_BUILTIN
509 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
510 #undef TARGET_BUILTIN_DECL
511 #define TARGET_BUILTIN_DECL arm_builtin_decl
513 #undef TARGET_INIT_LIBFUNCS
514 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
516 #undef TARGET_PROMOTE_FUNCTION_MODE
517 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
518 #undef TARGET_PROMOTE_PROTOTYPES
519 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
520 #undef TARGET_PASS_BY_REFERENCE
521 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
522 #undef TARGET_ARG_PARTIAL_BYTES
523 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
524 #undef TARGET_FUNCTION_ARG
525 #define TARGET_FUNCTION_ARG arm_function_arg
526 #undef TARGET_FUNCTION_ARG_ADVANCE
527 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
528 #undef TARGET_FUNCTION_ARG_BOUNDARY
529 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
531 #undef TARGET_SETUP_INCOMING_VARARGS
532 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
534 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
535 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
537 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
538 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
539 #undef TARGET_TRAMPOLINE_INIT
540 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
541 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
542 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
544 #undef TARGET_WARN_FUNC_RETURN
545 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
547 #undef TARGET_DEFAULT_SHORT_ENUMS
548 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
550 #undef TARGET_ALIGN_ANON_BITFIELD
551 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
553 #undef TARGET_NARROW_VOLATILE_BITFIELD
554 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
556 #undef TARGET_CXX_GUARD_TYPE
557 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
559 #undef TARGET_CXX_GUARD_MASK_BIT
560 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
562 #undef TARGET_CXX_GET_COOKIE_SIZE
563 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
565 #undef TARGET_CXX_COOKIE_HAS_SIZE
566 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
568 #undef TARGET_CXX_CDTOR_RETURNS_THIS
569 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
571 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
572 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
574 #undef TARGET_CXX_USE_AEABI_ATEXIT
575 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
577 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
578 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
579 arm_cxx_determine_class_data_visibility
581 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
582 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
584 #undef TARGET_RETURN_IN_MSB
585 #define TARGET_RETURN_IN_MSB arm_return_in_msb
587 #undef TARGET_RETURN_IN_MEMORY
588 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
590 #undef TARGET_MUST_PASS_IN_STACK
591 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
593 #if ARM_UNWIND_INFO
594 #undef TARGET_ASM_UNWIND_EMIT
595 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
597 /* EABI unwinding tables use a different format for the typeinfo tables. */
598 #undef TARGET_ASM_TTYPE
599 #define TARGET_ASM_TTYPE arm_output_ttype
601 #undef TARGET_ARM_EABI_UNWINDER
602 #define TARGET_ARM_EABI_UNWINDER true
604 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
605 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
607 #endif /* ARM_UNWIND_INFO */
609 #undef TARGET_ASM_INIT_SECTIONS
610 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
612 #undef TARGET_DWARF_REGISTER_SPAN
613 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
615 #undef TARGET_CANNOT_COPY_INSN_P
616 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
618 #ifdef HAVE_AS_TLS
619 #undef TARGET_HAVE_TLS
620 #define TARGET_HAVE_TLS true
621 #endif
623 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
624 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
626 #undef TARGET_LEGITIMATE_CONSTANT_P
627 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
629 #undef TARGET_CANNOT_FORCE_CONST_MEM
630 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
632 #undef TARGET_MAX_ANCHOR_OFFSET
633 #define TARGET_MAX_ANCHOR_OFFSET 4095
635 /* The minimum is set such that the total size of the block
636 for a particular anchor is -4088 + 1 + 4095 bytes, which is
637 divisible by eight, ensuring natural spacing of anchors. */
638 #undef TARGET_MIN_ANCHOR_OFFSET
639 #define TARGET_MIN_ANCHOR_OFFSET -4088
641 #undef TARGET_SCHED_ISSUE_RATE
642 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
644 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
645 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
646 arm_first_cycle_multipass_dfa_lookahead
648 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
649 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
650 arm_first_cycle_multipass_dfa_lookahead_guard
652 #undef TARGET_MANGLE_TYPE
653 #define TARGET_MANGLE_TYPE arm_mangle_type
655 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
656 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
658 #undef TARGET_BUILD_BUILTIN_VA_LIST
659 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
660 #undef TARGET_EXPAND_BUILTIN_VA_START
661 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
662 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
663 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
665 #ifdef HAVE_AS_TLS
666 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
667 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
668 #endif
670 #undef TARGET_LEGITIMATE_ADDRESS_P
671 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
673 #undef TARGET_PREFERRED_RELOAD_CLASS
674 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
676 #undef TARGET_PROMOTED_TYPE
677 #define TARGET_PROMOTED_TYPE arm_promoted_type
679 #undef TARGET_SCALAR_MODE_SUPPORTED_P
680 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
682 #undef TARGET_FRAME_POINTER_REQUIRED
683 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
685 #undef TARGET_CAN_ELIMINATE
686 #define TARGET_CAN_ELIMINATE arm_can_eliminate
688 #undef TARGET_CONDITIONAL_REGISTER_USAGE
689 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
691 #undef TARGET_CLASS_LIKELY_SPILLED_P
692 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
694 #undef TARGET_VECTORIZE_BUILTINS
695 #define TARGET_VECTORIZE_BUILTINS
697 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
698 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
699 arm_builtin_vectorized_function
701 #undef TARGET_VECTOR_ALIGNMENT
702 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
704 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
705 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
706 arm_vector_alignment_reachable
708 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
709 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
710 arm_builtin_support_vector_misalignment
712 #undef TARGET_PREFERRED_RENAME_CLASS
713 #define TARGET_PREFERRED_RENAME_CLASS \
714 arm_preferred_rename_class
716 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
717 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
718 arm_vectorize_vec_perm_const_ok
720 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
721 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
722 arm_builtin_vectorization_cost
723 #undef TARGET_VECTORIZE_ADD_STMT_COST
724 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
726 #undef TARGET_CANONICALIZE_COMPARISON
727 #define TARGET_CANONICALIZE_COMPARISON \
728 arm_canonicalize_comparison
730 #undef TARGET_ASAN_SHADOW_OFFSET
731 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
733 #undef MAX_INSN_PER_IT_BLOCK
734 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
736 #undef TARGET_CAN_USE_DOLOOP_P
737 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
739 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
740 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
742 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
743 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
745 #undef TARGET_SCHED_FUSION_PRIORITY
746 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
748 #undef TARGET_ASM_FUNCTION_SECTION
749 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
751 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
752 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
754 #undef TARGET_SECTION_TYPE_FLAGS
755 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
757 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
758 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
760 #undef TARGET_C_EXCESS_PRECISION
761 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
763 /* Although the architecture reserves bits 0 and 1, only the former is
764 used for ARM/Thumb ISA selection in v7 and earlier versions. */
765 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
766 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
768 struct gcc_target targetm = TARGET_INITIALIZER;
770 /* Obstack for minipool constant handling. */
771 static struct obstack minipool_obstack;
772 static char * minipool_startobj;
774 /* The maximum number of insns skipped which
775 will be conditionalised if possible. */
776 static int max_insns_skipped = 5;
778 extern FILE * asm_out_file;
780 /* True if we are currently building a constant table. */
781 int making_const_table;
783 /* The processor for which instructions should be scheduled. */
784 enum processor_type arm_tune = TARGET_CPU_arm_none;
786 /* The current tuning set. */
787 const struct tune_params *current_tune;
789 /* Which floating point hardware to schedule for. */
790 int arm_fpu_attr;
792 /* Used for Thumb call_via trampolines. */
793 rtx thumb_call_via_label[14];
794 static int thumb_call_reg_needed;
796 /* The bits in this mask specify which instruction scheduling options should
797 be used. */
798 unsigned int tune_flags = 0;
800 /* The highest ARM architecture version supported by the
801 target. */
802 enum base_architecture arm_base_arch = BASE_ARCH_0;
804 /* Active target architecture and tuning. */
806 struct arm_build_target arm_active_target;
808 /* The following are used in the arm.md file as equivalents to bits
809 in the above two flag variables. */
811 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
812 int arm_arch3m = 0;
814 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
815 int arm_arch4 = 0;
817 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
818 int arm_arch4t = 0;
820 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
821 int arm_arch5 = 0;
823 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
824 int arm_arch5e = 0;
826 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
827 int arm_arch5te = 0;
829 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
830 int arm_arch6 = 0;
832 /* Nonzero if this chip supports the ARM 6K extensions. */
833 int arm_arch6k = 0;
835 /* Nonzero if this chip supports the ARM 6KZ extensions. */
836 int arm_arch6kz = 0;
838 /* Nonzero if instructions present in ARMv6-M can be used. */
839 int arm_arch6m = 0;
841 /* Nonzero if this chip supports the ARM 7 extensions. */
842 int arm_arch7 = 0;
844 /* Nonzero if this chip supports the ARM 7ve extensions. */
845 int arm_arch7ve = 0;
847 /* Nonzero if instructions not present in the 'M' profile can be used. */
848 int arm_arch_notm = 0;
850 /* Nonzero if instructions present in ARMv7E-M can be used. */
851 int arm_arch7em = 0;
853 /* Nonzero if instructions present in ARMv8 can be used. */
854 int arm_arch8 = 0;
856 /* Nonzero if this chip supports the ARMv8.1 extensions. */
857 int arm_arch8_1 = 0;
859 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
860 int arm_arch8_2 = 0;
862 /* Nonzero if this chip supports the FP16 instructions extension of ARM
863 Architecture 8.2. */
864 int arm_fp16_inst = 0;
866 /* Nonzero if this chip can benefit from load scheduling. */
867 int arm_ld_sched = 0;
869 /* Nonzero if this chip is a StrongARM. */
870 int arm_tune_strongarm = 0;
872 /* Nonzero if this chip supports Intel Wireless MMX technology. */
873 int arm_arch_iwmmxt = 0;
875 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
876 int arm_arch_iwmmxt2 = 0;
878 /* Nonzero if this chip is an XScale. */
879 int arm_arch_xscale = 0;
881 /* Nonzero if tuning for XScale */
882 int arm_tune_xscale = 0;
884 /* Nonzero if we want to tune for stores that access the write-buffer.
885 This typically means an ARM6 or ARM7 with MMU or MPU. */
886 int arm_tune_wbuf = 0;
888 /* Nonzero if tuning for Cortex-A9. */
889 int arm_tune_cortex_a9 = 0;
891 /* Nonzero if we should define __THUMB_INTERWORK__ in the
892 preprocessor.
893 XXX This is a bit of a hack, it's intended to help work around
894 problems in GLD which doesn't understand that armv5t code is
895 interworking clean. */
896 int arm_cpp_interwork = 0;
898 /* Nonzero if chip supports Thumb 1. */
899 int arm_arch_thumb1;
901 /* Nonzero if chip supports Thumb 2. */
902 int arm_arch_thumb2;
904 /* Nonzero if chip supports integer division instruction. */
905 int arm_arch_arm_hwdiv;
906 int arm_arch_thumb_hwdiv;
908 /* Nonzero if chip disallows volatile memory access in IT block. */
909 int arm_arch_no_volatile_ce;
911 /* Nonzero if we should use Neon to handle 64-bits operations rather
912 than core registers. */
913 int prefer_neon_for_64bits = 0;
915 /* Nonzero if we shouldn't use literal pools. */
916 bool arm_disable_literal_pool = false;
918 /* The register number to be used for the PIC offset register. */
919 unsigned arm_pic_register = INVALID_REGNUM;
921 enum arm_pcs arm_pcs_default;
923 /* For an explanation of these variables, see final_prescan_insn below. */
924 int arm_ccfsm_state;
925 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
926 enum arm_cond_code arm_current_cc;
928 rtx arm_target_insn;
929 int arm_target_label;
930 /* The number of conditionally executed insns, including the current insn. */
931 int arm_condexec_count = 0;
932 /* A bitmask specifying the patterns for the IT block.
933 Zero means do not output an IT block before this insn. */
934 int arm_condexec_mask = 0;
935 /* The number of bits used in arm_condexec_mask. */
936 int arm_condexec_masklen = 0;
938 /* Nonzero if chip supports the ARMv8 CRC instructions. */
939 int arm_arch_crc = 0;
941 /* Nonzero if chip supports the ARMv8-M security extensions. */
942 int arm_arch_cmse = 0;
944 /* Nonzero if the core has a very small, high-latency, multiply unit. */
945 int arm_m_profile_small_mul = 0;
947 /* The condition codes of the ARM, and the inverse function. */
948 static const char * const arm_condition_codes[] =
950 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
951 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
954 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
955 int arm_regs_in_sequence[] =
957 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
960 #define ARM_LSL_NAME "lsl"
961 #define streq(string1, string2) (strcmp (string1, string2) == 0)
963 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
964 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
965 | (1 << PIC_OFFSET_TABLE_REGNUM)))
967 /* Initialization code. */
969 struct processors
971 const char *const name;
972 enum processor_type core;
973 unsigned int tune_flags;
974 const char *arch;
975 enum base_architecture base_arch;
976 enum isa_feature isa_bits[isa_num_bits];
977 const struct tune_params *const tune;
981 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
982 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
984 num_slots, \
985 l1_size, \
986 l1_line_size \
989 /* arm generic vectorizer costs. */
990 static const
991 struct cpu_vec_costs arm_default_vec_cost = {
992 1, /* scalar_stmt_cost. */
993 1, /* scalar load_cost. */
994 1, /* scalar_store_cost. */
995 1, /* vec_stmt_cost. */
996 1, /* vec_to_scalar_cost. */
997 1, /* scalar_to_vec_cost. */
998 1, /* vec_align_load_cost. */
999 1, /* vec_unalign_load_cost. */
1000 1, /* vec_unalign_store_cost. */
1001 1, /* vec_store_cost. */
1002 3, /* cond_taken_branch_cost. */
1003 1, /* cond_not_taken_branch_cost. */
1006 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1007 #include "aarch-cost-tables.h"
1011 const struct cpu_cost_table cortexa9_extra_costs =
1013 /* ALU */
1015 0, /* arith. */
1016 0, /* logical. */
1017 0, /* shift. */
1018 COSTS_N_INSNS (1), /* shift_reg. */
1019 COSTS_N_INSNS (1), /* arith_shift. */
1020 COSTS_N_INSNS (2), /* arith_shift_reg. */
1021 0, /* log_shift. */
1022 COSTS_N_INSNS (1), /* log_shift_reg. */
1023 COSTS_N_INSNS (1), /* extend. */
1024 COSTS_N_INSNS (2), /* extend_arith. */
1025 COSTS_N_INSNS (1), /* bfi. */
1026 COSTS_N_INSNS (1), /* bfx. */
1027 0, /* clz. */
1028 0, /* rev. */
1029 0, /* non_exec. */
1030 true /* non_exec_costs_exec. */
1033 /* MULT SImode */
1035 COSTS_N_INSNS (3), /* simple. */
1036 COSTS_N_INSNS (3), /* flag_setting. */
1037 COSTS_N_INSNS (2), /* extend. */
1038 COSTS_N_INSNS (3), /* add. */
1039 COSTS_N_INSNS (2), /* extend_add. */
1040 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1042 /* MULT DImode */
1044 0, /* simple (N/A). */
1045 0, /* flag_setting (N/A). */
1046 COSTS_N_INSNS (4), /* extend. */
1047 0, /* add (N/A). */
1048 COSTS_N_INSNS (4), /* extend_add. */
1049 0 /* idiv (N/A). */
1052 /* LD/ST */
1054 COSTS_N_INSNS (2), /* load. */
1055 COSTS_N_INSNS (2), /* load_sign_extend. */
1056 COSTS_N_INSNS (2), /* ldrd. */
1057 COSTS_N_INSNS (2), /* ldm_1st. */
1058 1, /* ldm_regs_per_insn_1st. */
1059 2, /* ldm_regs_per_insn_subsequent. */
1060 COSTS_N_INSNS (5), /* loadf. */
1061 COSTS_N_INSNS (5), /* loadd. */
1062 COSTS_N_INSNS (1), /* load_unaligned. */
1063 COSTS_N_INSNS (2), /* store. */
1064 COSTS_N_INSNS (2), /* strd. */
1065 COSTS_N_INSNS (2), /* stm_1st. */
1066 1, /* stm_regs_per_insn_1st. */
1067 2, /* stm_regs_per_insn_subsequent. */
1068 COSTS_N_INSNS (1), /* storef. */
1069 COSTS_N_INSNS (1), /* stored. */
1070 COSTS_N_INSNS (1), /* store_unaligned. */
1071 COSTS_N_INSNS (1), /* loadv. */
1072 COSTS_N_INSNS (1) /* storev. */
1075 /* FP SFmode */
1077 COSTS_N_INSNS (14), /* div. */
1078 COSTS_N_INSNS (4), /* mult. */
1079 COSTS_N_INSNS (7), /* mult_addsub. */
1080 COSTS_N_INSNS (30), /* fma. */
1081 COSTS_N_INSNS (3), /* addsub. */
1082 COSTS_N_INSNS (1), /* fpconst. */
1083 COSTS_N_INSNS (1), /* neg. */
1084 COSTS_N_INSNS (3), /* compare. */
1085 COSTS_N_INSNS (3), /* widen. */
1086 COSTS_N_INSNS (3), /* narrow. */
1087 COSTS_N_INSNS (3), /* toint. */
1088 COSTS_N_INSNS (3), /* fromint. */
1089 COSTS_N_INSNS (3) /* roundint. */
1091 /* FP DFmode */
1093 COSTS_N_INSNS (24), /* div. */
1094 COSTS_N_INSNS (5), /* mult. */
1095 COSTS_N_INSNS (8), /* mult_addsub. */
1096 COSTS_N_INSNS (30), /* fma. */
1097 COSTS_N_INSNS (3), /* addsub. */
1098 COSTS_N_INSNS (1), /* fpconst. */
1099 COSTS_N_INSNS (1), /* neg. */
1100 COSTS_N_INSNS (3), /* compare. */
1101 COSTS_N_INSNS (3), /* widen. */
1102 COSTS_N_INSNS (3), /* narrow. */
1103 COSTS_N_INSNS (3), /* toint. */
1104 COSTS_N_INSNS (3), /* fromint. */
1105 COSTS_N_INSNS (3) /* roundint. */
1108 /* Vector */
1110 COSTS_N_INSNS (1) /* alu. */
1114 const struct cpu_cost_table cortexa8_extra_costs =
1116 /* ALU */
1118 0, /* arith. */
1119 0, /* logical. */
1120 COSTS_N_INSNS (1), /* shift. */
1121 0, /* shift_reg. */
1122 COSTS_N_INSNS (1), /* arith_shift. */
1123 0, /* arith_shift_reg. */
1124 COSTS_N_INSNS (1), /* log_shift. */
1125 0, /* log_shift_reg. */
1126 0, /* extend. */
1127 0, /* extend_arith. */
1128 0, /* bfi. */
1129 0, /* bfx. */
1130 0, /* clz. */
1131 0, /* rev. */
1132 0, /* non_exec. */
1133 true /* non_exec_costs_exec. */
1136 /* MULT SImode */
1138 COSTS_N_INSNS (1), /* simple. */
1139 COSTS_N_INSNS (1), /* flag_setting. */
1140 COSTS_N_INSNS (1), /* extend. */
1141 COSTS_N_INSNS (1), /* add. */
1142 COSTS_N_INSNS (1), /* extend_add. */
1143 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1145 /* MULT DImode */
1147 0, /* simple (N/A). */
1148 0, /* flag_setting (N/A). */
1149 COSTS_N_INSNS (2), /* extend. */
1150 0, /* add (N/A). */
1151 COSTS_N_INSNS (2), /* extend_add. */
1152 0 /* idiv (N/A). */
1155 /* LD/ST */
1157 COSTS_N_INSNS (1), /* load. */
1158 COSTS_N_INSNS (1), /* load_sign_extend. */
1159 COSTS_N_INSNS (1), /* ldrd. */
1160 COSTS_N_INSNS (1), /* ldm_1st. */
1161 1, /* ldm_regs_per_insn_1st. */
1162 2, /* ldm_regs_per_insn_subsequent. */
1163 COSTS_N_INSNS (1), /* loadf. */
1164 COSTS_N_INSNS (1), /* loadd. */
1165 COSTS_N_INSNS (1), /* load_unaligned. */
1166 COSTS_N_INSNS (1), /* store. */
1167 COSTS_N_INSNS (1), /* strd. */
1168 COSTS_N_INSNS (1), /* stm_1st. */
1169 1, /* stm_regs_per_insn_1st. */
1170 2, /* stm_regs_per_insn_subsequent. */
1171 COSTS_N_INSNS (1), /* storef. */
1172 COSTS_N_INSNS (1), /* stored. */
1173 COSTS_N_INSNS (1), /* store_unaligned. */
1174 COSTS_N_INSNS (1), /* loadv. */
1175 COSTS_N_INSNS (1) /* storev. */
1178 /* FP SFmode */
1180 COSTS_N_INSNS (36), /* div. */
1181 COSTS_N_INSNS (11), /* mult. */
1182 COSTS_N_INSNS (20), /* mult_addsub. */
1183 COSTS_N_INSNS (30), /* fma. */
1184 COSTS_N_INSNS (9), /* addsub. */
1185 COSTS_N_INSNS (3), /* fpconst. */
1186 COSTS_N_INSNS (3), /* neg. */
1187 COSTS_N_INSNS (6), /* compare. */
1188 COSTS_N_INSNS (4), /* widen. */
1189 COSTS_N_INSNS (4), /* narrow. */
1190 COSTS_N_INSNS (8), /* toint. */
1191 COSTS_N_INSNS (8), /* fromint. */
1192 COSTS_N_INSNS (8) /* roundint. */
1194 /* FP DFmode */
1196 COSTS_N_INSNS (64), /* div. */
1197 COSTS_N_INSNS (16), /* mult. */
1198 COSTS_N_INSNS (25), /* mult_addsub. */
1199 COSTS_N_INSNS (30), /* fma. */
1200 COSTS_N_INSNS (9), /* addsub. */
1201 COSTS_N_INSNS (3), /* fpconst. */
1202 COSTS_N_INSNS (3), /* neg. */
1203 COSTS_N_INSNS (6), /* compare. */
1204 COSTS_N_INSNS (6), /* widen. */
1205 COSTS_N_INSNS (6), /* narrow. */
1206 COSTS_N_INSNS (8), /* toint. */
1207 COSTS_N_INSNS (8), /* fromint. */
1208 COSTS_N_INSNS (8) /* roundint. */
1211 /* Vector */
1213 COSTS_N_INSNS (1) /* alu. */
1217 const struct cpu_cost_table cortexa5_extra_costs =
1219 /* ALU */
1221 0, /* arith. */
1222 0, /* logical. */
1223 COSTS_N_INSNS (1), /* shift. */
1224 COSTS_N_INSNS (1), /* shift_reg. */
1225 COSTS_N_INSNS (1), /* arith_shift. */
1226 COSTS_N_INSNS (1), /* arith_shift_reg. */
1227 COSTS_N_INSNS (1), /* log_shift. */
1228 COSTS_N_INSNS (1), /* log_shift_reg. */
1229 COSTS_N_INSNS (1), /* extend. */
1230 COSTS_N_INSNS (1), /* extend_arith. */
1231 COSTS_N_INSNS (1), /* bfi. */
1232 COSTS_N_INSNS (1), /* bfx. */
1233 COSTS_N_INSNS (1), /* clz. */
1234 COSTS_N_INSNS (1), /* rev. */
1235 0, /* non_exec. */
1236 true /* non_exec_costs_exec. */
1240 /* MULT SImode */
1242 0, /* simple. */
1243 COSTS_N_INSNS (1), /* flag_setting. */
1244 COSTS_N_INSNS (1), /* extend. */
1245 COSTS_N_INSNS (1), /* add. */
1246 COSTS_N_INSNS (1), /* extend_add. */
1247 COSTS_N_INSNS (7) /* idiv. */
1249 /* MULT DImode */
1251 0, /* simple (N/A). */
1252 0, /* flag_setting (N/A). */
1253 COSTS_N_INSNS (1), /* extend. */
1254 0, /* add. */
1255 COSTS_N_INSNS (2), /* extend_add. */
1256 0 /* idiv (N/A). */
1259 /* LD/ST */
1261 COSTS_N_INSNS (1), /* load. */
1262 COSTS_N_INSNS (1), /* load_sign_extend. */
1263 COSTS_N_INSNS (6), /* ldrd. */
1264 COSTS_N_INSNS (1), /* ldm_1st. */
1265 1, /* ldm_regs_per_insn_1st. */
1266 2, /* ldm_regs_per_insn_subsequent. */
1267 COSTS_N_INSNS (2), /* loadf. */
1268 COSTS_N_INSNS (4), /* loadd. */
1269 COSTS_N_INSNS (1), /* load_unaligned. */
1270 COSTS_N_INSNS (1), /* store. */
1271 COSTS_N_INSNS (3), /* strd. */
1272 COSTS_N_INSNS (1), /* stm_1st. */
1273 1, /* stm_regs_per_insn_1st. */
1274 2, /* stm_regs_per_insn_subsequent. */
1275 COSTS_N_INSNS (2), /* storef. */
1276 COSTS_N_INSNS (2), /* stored. */
1277 COSTS_N_INSNS (1), /* store_unaligned. */
1278 COSTS_N_INSNS (1), /* loadv. */
1279 COSTS_N_INSNS (1) /* storev. */
1282 /* FP SFmode */
1284 COSTS_N_INSNS (15), /* div. */
1285 COSTS_N_INSNS (3), /* mult. */
1286 COSTS_N_INSNS (7), /* mult_addsub. */
1287 COSTS_N_INSNS (7), /* fma. */
1288 COSTS_N_INSNS (3), /* addsub. */
1289 COSTS_N_INSNS (3), /* fpconst. */
1290 COSTS_N_INSNS (3), /* neg. */
1291 COSTS_N_INSNS (3), /* compare. */
1292 COSTS_N_INSNS (3), /* widen. */
1293 COSTS_N_INSNS (3), /* narrow. */
1294 COSTS_N_INSNS (3), /* toint. */
1295 COSTS_N_INSNS (3), /* fromint. */
1296 COSTS_N_INSNS (3) /* roundint. */
1298 /* FP DFmode */
1300 COSTS_N_INSNS (30), /* div. */
1301 COSTS_N_INSNS (6), /* mult. */
1302 COSTS_N_INSNS (10), /* mult_addsub. */
1303 COSTS_N_INSNS (7), /* fma. */
1304 COSTS_N_INSNS (3), /* addsub. */
1305 COSTS_N_INSNS (3), /* fpconst. */
1306 COSTS_N_INSNS (3), /* neg. */
1307 COSTS_N_INSNS (3), /* compare. */
1308 COSTS_N_INSNS (3), /* widen. */
1309 COSTS_N_INSNS (3), /* narrow. */
1310 COSTS_N_INSNS (3), /* toint. */
1311 COSTS_N_INSNS (3), /* fromint. */
1312 COSTS_N_INSNS (3) /* roundint. */
1315 /* Vector */
1317 COSTS_N_INSNS (1) /* alu. */
1322 const struct cpu_cost_table cortexa7_extra_costs =
1324 /* ALU */
1326 0, /* arith. */
1327 0, /* logical. */
1328 COSTS_N_INSNS (1), /* shift. */
1329 COSTS_N_INSNS (1), /* shift_reg. */
1330 COSTS_N_INSNS (1), /* arith_shift. */
1331 COSTS_N_INSNS (1), /* arith_shift_reg. */
1332 COSTS_N_INSNS (1), /* log_shift. */
1333 COSTS_N_INSNS (1), /* log_shift_reg. */
1334 COSTS_N_INSNS (1), /* extend. */
1335 COSTS_N_INSNS (1), /* extend_arith. */
1336 COSTS_N_INSNS (1), /* bfi. */
1337 COSTS_N_INSNS (1), /* bfx. */
1338 COSTS_N_INSNS (1), /* clz. */
1339 COSTS_N_INSNS (1), /* rev. */
1340 0, /* non_exec. */
1341 true /* non_exec_costs_exec. */
1345 /* MULT SImode */
1347 0, /* simple. */
1348 COSTS_N_INSNS (1), /* flag_setting. */
1349 COSTS_N_INSNS (1), /* extend. */
1350 COSTS_N_INSNS (1), /* add. */
1351 COSTS_N_INSNS (1), /* extend_add. */
1352 COSTS_N_INSNS (7) /* idiv. */
1354 /* MULT DImode */
1356 0, /* simple (N/A). */
1357 0, /* flag_setting (N/A). */
1358 COSTS_N_INSNS (1), /* extend. */
1359 0, /* add. */
1360 COSTS_N_INSNS (2), /* extend_add. */
1361 0 /* idiv (N/A). */
1364 /* LD/ST */
1366 COSTS_N_INSNS (1), /* load. */
1367 COSTS_N_INSNS (1), /* load_sign_extend. */
1368 COSTS_N_INSNS (3), /* ldrd. */
1369 COSTS_N_INSNS (1), /* ldm_1st. */
1370 1, /* ldm_regs_per_insn_1st. */
1371 2, /* ldm_regs_per_insn_subsequent. */
1372 COSTS_N_INSNS (2), /* loadf. */
1373 COSTS_N_INSNS (2), /* loadd. */
1374 COSTS_N_INSNS (1), /* load_unaligned. */
1375 COSTS_N_INSNS (1), /* store. */
1376 COSTS_N_INSNS (3), /* strd. */
1377 COSTS_N_INSNS (1), /* stm_1st. */
1378 1, /* stm_regs_per_insn_1st. */
1379 2, /* stm_regs_per_insn_subsequent. */
1380 COSTS_N_INSNS (2), /* storef. */
1381 COSTS_N_INSNS (2), /* stored. */
1382 COSTS_N_INSNS (1), /* store_unaligned. */
1383 COSTS_N_INSNS (1), /* loadv. */
1384 COSTS_N_INSNS (1) /* storev. */
1387 /* FP SFmode */
1389 COSTS_N_INSNS (15), /* div. */
1390 COSTS_N_INSNS (3), /* mult. */
1391 COSTS_N_INSNS (7), /* mult_addsub. */
1392 COSTS_N_INSNS (7), /* fma. */
1393 COSTS_N_INSNS (3), /* addsub. */
1394 COSTS_N_INSNS (3), /* fpconst. */
1395 COSTS_N_INSNS (3), /* neg. */
1396 COSTS_N_INSNS (3), /* compare. */
1397 COSTS_N_INSNS (3), /* widen. */
1398 COSTS_N_INSNS (3), /* narrow. */
1399 COSTS_N_INSNS (3), /* toint. */
1400 COSTS_N_INSNS (3), /* fromint. */
1401 COSTS_N_INSNS (3) /* roundint. */
1403 /* FP DFmode */
1405 COSTS_N_INSNS (30), /* div. */
1406 COSTS_N_INSNS (6), /* mult. */
1407 COSTS_N_INSNS (10), /* mult_addsub. */
1408 COSTS_N_INSNS (7), /* fma. */
1409 COSTS_N_INSNS (3), /* addsub. */
1410 COSTS_N_INSNS (3), /* fpconst. */
1411 COSTS_N_INSNS (3), /* neg. */
1412 COSTS_N_INSNS (3), /* compare. */
1413 COSTS_N_INSNS (3), /* widen. */
1414 COSTS_N_INSNS (3), /* narrow. */
1415 COSTS_N_INSNS (3), /* toint. */
1416 COSTS_N_INSNS (3), /* fromint. */
1417 COSTS_N_INSNS (3) /* roundint. */
1420 /* Vector */
1422 COSTS_N_INSNS (1) /* alu. */
1426 const struct cpu_cost_table cortexa12_extra_costs =
1428 /* ALU */
1430 0, /* arith. */
1431 0, /* logical. */
1432 0, /* shift. */
1433 COSTS_N_INSNS (1), /* shift_reg. */
1434 COSTS_N_INSNS (1), /* arith_shift. */
1435 COSTS_N_INSNS (1), /* arith_shift_reg. */
1436 COSTS_N_INSNS (1), /* log_shift. */
1437 COSTS_N_INSNS (1), /* log_shift_reg. */
1438 0, /* extend. */
1439 COSTS_N_INSNS (1), /* extend_arith. */
1440 0, /* bfi. */
1441 COSTS_N_INSNS (1), /* bfx. */
1442 COSTS_N_INSNS (1), /* clz. */
1443 COSTS_N_INSNS (1), /* rev. */
1444 0, /* non_exec. */
1445 true /* non_exec_costs_exec. */
1447 /* MULT SImode */
1450 COSTS_N_INSNS (2), /* simple. */
1451 COSTS_N_INSNS (3), /* flag_setting. */
1452 COSTS_N_INSNS (2), /* extend. */
1453 COSTS_N_INSNS (3), /* add. */
1454 COSTS_N_INSNS (2), /* extend_add. */
1455 COSTS_N_INSNS (18) /* idiv. */
1457 /* MULT DImode */
1459 0, /* simple (N/A). */
1460 0, /* flag_setting (N/A). */
1461 COSTS_N_INSNS (3), /* extend. */
1462 0, /* add (N/A). */
1463 COSTS_N_INSNS (3), /* extend_add. */
1464 0 /* idiv (N/A). */
1467 /* LD/ST */
1469 COSTS_N_INSNS (3), /* load. */
1470 COSTS_N_INSNS (3), /* load_sign_extend. */
1471 COSTS_N_INSNS (3), /* ldrd. */
1472 COSTS_N_INSNS (3), /* ldm_1st. */
1473 1, /* ldm_regs_per_insn_1st. */
1474 2, /* ldm_regs_per_insn_subsequent. */
1475 COSTS_N_INSNS (3), /* loadf. */
1476 COSTS_N_INSNS (3), /* loadd. */
1477 0, /* load_unaligned. */
1478 0, /* store. */
1479 0, /* strd. */
1480 0, /* stm_1st. */
1481 1, /* stm_regs_per_insn_1st. */
1482 2, /* stm_regs_per_insn_subsequent. */
1483 COSTS_N_INSNS (2), /* storef. */
1484 COSTS_N_INSNS (2), /* stored. */
1485 0, /* store_unaligned. */
1486 COSTS_N_INSNS (1), /* loadv. */
1487 COSTS_N_INSNS (1) /* storev. */
1490 /* FP SFmode */
1492 COSTS_N_INSNS (17), /* div. */
1493 COSTS_N_INSNS (4), /* mult. */
1494 COSTS_N_INSNS (8), /* mult_addsub. */
1495 COSTS_N_INSNS (8), /* fma. */
1496 COSTS_N_INSNS (4), /* addsub. */
1497 COSTS_N_INSNS (2), /* fpconst. */
1498 COSTS_N_INSNS (2), /* neg. */
1499 COSTS_N_INSNS (2), /* compare. */
1500 COSTS_N_INSNS (4), /* widen. */
1501 COSTS_N_INSNS (4), /* narrow. */
1502 COSTS_N_INSNS (4), /* toint. */
1503 COSTS_N_INSNS (4), /* fromint. */
1504 COSTS_N_INSNS (4) /* roundint. */
1506 /* FP DFmode */
1508 COSTS_N_INSNS (31), /* div. */
1509 COSTS_N_INSNS (4), /* mult. */
1510 COSTS_N_INSNS (8), /* mult_addsub. */
1511 COSTS_N_INSNS (8), /* fma. */
1512 COSTS_N_INSNS (4), /* addsub. */
1513 COSTS_N_INSNS (2), /* fpconst. */
1514 COSTS_N_INSNS (2), /* neg. */
1515 COSTS_N_INSNS (2), /* compare. */
1516 COSTS_N_INSNS (4), /* widen. */
1517 COSTS_N_INSNS (4), /* narrow. */
1518 COSTS_N_INSNS (4), /* toint. */
1519 COSTS_N_INSNS (4), /* fromint. */
1520 COSTS_N_INSNS (4) /* roundint. */
1523 /* Vector */
1525 COSTS_N_INSNS (1) /* alu. */
1529 const struct cpu_cost_table cortexa15_extra_costs =
1531 /* ALU */
1533 0, /* arith. */
1534 0, /* logical. */
1535 0, /* shift. */
1536 0, /* shift_reg. */
1537 COSTS_N_INSNS (1), /* arith_shift. */
1538 COSTS_N_INSNS (1), /* arith_shift_reg. */
1539 COSTS_N_INSNS (1), /* log_shift. */
1540 COSTS_N_INSNS (1), /* log_shift_reg. */
1541 0, /* extend. */
1542 COSTS_N_INSNS (1), /* extend_arith. */
1543 COSTS_N_INSNS (1), /* bfi. */
1544 0, /* bfx. */
1545 0, /* clz. */
1546 0, /* rev. */
1547 0, /* non_exec. */
1548 true /* non_exec_costs_exec. */
1550 /* MULT SImode */
1553 COSTS_N_INSNS (2), /* simple. */
1554 COSTS_N_INSNS (3), /* flag_setting. */
1555 COSTS_N_INSNS (2), /* extend. */
1556 COSTS_N_INSNS (2), /* add. */
1557 COSTS_N_INSNS (2), /* extend_add. */
1558 COSTS_N_INSNS (18) /* idiv. */
1560 /* MULT DImode */
1562 0, /* simple (N/A). */
1563 0, /* flag_setting (N/A). */
1564 COSTS_N_INSNS (3), /* extend. */
1565 0, /* add (N/A). */
1566 COSTS_N_INSNS (3), /* extend_add. */
1567 0 /* idiv (N/A). */
1570 /* LD/ST */
1572 COSTS_N_INSNS (3), /* load. */
1573 COSTS_N_INSNS (3), /* load_sign_extend. */
1574 COSTS_N_INSNS (3), /* ldrd. */
1575 COSTS_N_INSNS (4), /* ldm_1st. */
1576 1, /* ldm_regs_per_insn_1st. */
1577 2, /* ldm_regs_per_insn_subsequent. */
1578 COSTS_N_INSNS (4), /* loadf. */
1579 COSTS_N_INSNS (4), /* loadd. */
1580 0, /* load_unaligned. */
1581 0, /* store. */
1582 0, /* strd. */
1583 COSTS_N_INSNS (1), /* stm_1st. */
1584 1, /* stm_regs_per_insn_1st. */
1585 2, /* stm_regs_per_insn_subsequent. */
1586 0, /* storef. */
1587 0, /* stored. */
1588 0, /* store_unaligned. */
1589 COSTS_N_INSNS (1), /* loadv. */
1590 COSTS_N_INSNS (1) /* storev. */
1593 /* FP SFmode */
1595 COSTS_N_INSNS (17), /* div. */
1596 COSTS_N_INSNS (4), /* mult. */
1597 COSTS_N_INSNS (8), /* mult_addsub. */
1598 COSTS_N_INSNS (8), /* fma. */
1599 COSTS_N_INSNS (4), /* addsub. */
1600 COSTS_N_INSNS (2), /* fpconst. */
1601 COSTS_N_INSNS (2), /* neg. */
1602 COSTS_N_INSNS (5), /* compare. */
1603 COSTS_N_INSNS (4), /* widen. */
1604 COSTS_N_INSNS (4), /* narrow. */
1605 COSTS_N_INSNS (4), /* toint. */
1606 COSTS_N_INSNS (4), /* fromint. */
1607 COSTS_N_INSNS (4) /* roundint. */
1609 /* FP DFmode */
1611 COSTS_N_INSNS (31), /* div. */
1612 COSTS_N_INSNS (4), /* mult. */
1613 COSTS_N_INSNS (8), /* mult_addsub. */
1614 COSTS_N_INSNS (8), /* fma. */
1615 COSTS_N_INSNS (4), /* addsub. */
1616 COSTS_N_INSNS (2), /* fpconst. */
1617 COSTS_N_INSNS (2), /* neg. */
1618 COSTS_N_INSNS (2), /* compare. */
1619 COSTS_N_INSNS (4), /* widen. */
1620 COSTS_N_INSNS (4), /* narrow. */
1621 COSTS_N_INSNS (4), /* toint. */
1622 COSTS_N_INSNS (4), /* fromint. */
1623 COSTS_N_INSNS (4) /* roundint. */
1626 /* Vector */
1628 COSTS_N_INSNS (1) /* alu. */
1632 const struct cpu_cost_table v7m_extra_costs =
1634 /* ALU */
1636 0, /* arith. */
1637 0, /* logical. */
1638 0, /* shift. */
1639 0, /* shift_reg. */
1640 0, /* arith_shift. */
1641 COSTS_N_INSNS (1), /* arith_shift_reg. */
1642 0, /* log_shift. */
1643 COSTS_N_INSNS (1), /* log_shift_reg. */
1644 0, /* extend. */
1645 COSTS_N_INSNS (1), /* extend_arith. */
1646 0, /* bfi. */
1647 0, /* bfx. */
1648 0, /* clz. */
1649 0, /* rev. */
1650 COSTS_N_INSNS (1), /* non_exec. */
1651 false /* non_exec_costs_exec. */
1654 /* MULT SImode */
1656 COSTS_N_INSNS (1), /* simple. */
1657 COSTS_N_INSNS (1), /* flag_setting. */
1658 COSTS_N_INSNS (2), /* extend. */
1659 COSTS_N_INSNS (1), /* add. */
1660 COSTS_N_INSNS (3), /* extend_add. */
1661 COSTS_N_INSNS (8) /* idiv. */
1663 /* MULT DImode */
1665 0, /* simple (N/A). */
1666 0, /* flag_setting (N/A). */
1667 COSTS_N_INSNS (2), /* extend. */
1668 0, /* add (N/A). */
1669 COSTS_N_INSNS (3), /* extend_add. */
1670 0 /* idiv (N/A). */
1673 /* LD/ST */
1675 COSTS_N_INSNS (2), /* load. */
1676 0, /* load_sign_extend. */
1677 COSTS_N_INSNS (3), /* ldrd. */
1678 COSTS_N_INSNS (2), /* ldm_1st. */
1679 1, /* ldm_regs_per_insn_1st. */
1680 1, /* ldm_regs_per_insn_subsequent. */
1681 COSTS_N_INSNS (2), /* loadf. */
1682 COSTS_N_INSNS (3), /* loadd. */
1683 COSTS_N_INSNS (1), /* load_unaligned. */
1684 COSTS_N_INSNS (2), /* store. */
1685 COSTS_N_INSNS (3), /* strd. */
1686 COSTS_N_INSNS (2), /* stm_1st. */
1687 1, /* stm_regs_per_insn_1st. */
1688 1, /* stm_regs_per_insn_subsequent. */
1689 COSTS_N_INSNS (2), /* storef. */
1690 COSTS_N_INSNS (3), /* stored. */
1691 COSTS_N_INSNS (1), /* store_unaligned. */
1692 COSTS_N_INSNS (1), /* loadv. */
1693 COSTS_N_INSNS (1) /* storev. */
1696 /* FP SFmode */
1698 COSTS_N_INSNS (7), /* div. */
1699 COSTS_N_INSNS (2), /* mult. */
1700 COSTS_N_INSNS (5), /* mult_addsub. */
1701 COSTS_N_INSNS (3), /* fma. */
1702 COSTS_N_INSNS (1), /* addsub. */
1703 0, /* fpconst. */
1704 0, /* neg. */
1705 0, /* compare. */
1706 0, /* widen. */
1707 0, /* narrow. */
1708 0, /* toint. */
1709 0, /* fromint. */
1710 0 /* roundint. */
1712 /* FP DFmode */
1714 COSTS_N_INSNS (15), /* div. */
1715 COSTS_N_INSNS (5), /* mult. */
1716 COSTS_N_INSNS (7), /* mult_addsub. */
1717 COSTS_N_INSNS (7), /* fma. */
1718 COSTS_N_INSNS (3), /* addsub. */
1719 0, /* fpconst. */
1720 0, /* neg. */
1721 0, /* compare. */
1722 0, /* widen. */
1723 0, /* narrow. */
1724 0, /* toint. */
1725 0, /* fromint. */
1726 0 /* roundint. */
1729 /* Vector */
1731 COSTS_N_INSNS (1) /* alu. */
1735 const struct tune_params arm_slowmul_tune =
1737 &generic_extra_costs, /* Insn extra costs. */
1738 NULL, /* Sched adj cost. */
1739 arm_default_branch_cost,
1740 &arm_default_vec_cost,
1741 3, /* Constant limit. */
1742 5, /* Max cond insns. */
1743 8, /* Memset max inline. */
1744 1, /* Issue rate. */
1745 ARM_PREFETCH_NOT_BENEFICIAL,
1746 tune_params::PREF_CONST_POOL_TRUE,
1747 tune_params::PREF_LDRD_FALSE,
1748 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1749 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1750 tune_params::DISPARAGE_FLAGS_NEITHER,
1751 tune_params::PREF_NEON_64_FALSE,
1752 tune_params::PREF_NEON_STRINGOPS_FALSE,
1753 tune_params::FUSE_NOTHING,
1754 tune_params::SCHED_AUTOPREF_OFF
1757 const struct tune_params arm_fastmul_tune =
1759 &generic_extra_costs, /* Insn extra costs. */
1760 NULL, /* Sched adj cost. */
1761 arm_default_branch_cost,
1762 &arm_default_vec_cost,
1763 1, /* Constant limit. */
1764 5, /* Max cond insns. */
1765 8, /* Memset max inline. */
1766 1, /* Issue rate. */
1767 ARM_PREFETCH_NOT_BENEFICIAL,
1768 tune_params::PREF_CONST_POOL_TRUE,
1769 tune_params::PREF_LDRD_FALSE,
1770 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1771 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1772 tune_params::DISPARAGE_FLAGS_NEITHER,
1773 tune_params::PREF_NEON_64_FALSE,
1774 tune_params::PREF_NEON_STRINGOPS_FALSE,
1775 tune_params::FUSE_NOTHING,
1776 tune_params::SCHED_AUTOPREF_OFF
1779 /* StrongARM has early execution of branches, so a sequence that is worth
1780 skipping is shorter. Set max_insns_skipped to a lower value. */
1782 const struct tune_params arm_strongarm_tune =
1784 &generic_extra_costs, /* Insn extra costs. */
1785 NULL, /* Sched adj cost. */
1786 arm_default_branch_cost,
1787 &arm_default_vec_cost,
1788 1, /* Constant limit. */
1789 3, /* Max cond insns. */
1790 8, /* Memset max inline. */
1791 1, /* Issue rate. */
1792 ARM_PREFETCH_NOT_BENEFICIAL,
1793 tune_params::PREF_CONST_POOL_TRUE,
1794 tune_params::PREF_LDRD_FALSE,
1795 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1796 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1797 tune_params::DISPARAGE_FLAGS_NEITHER,
1798 tune_params::PREF_NEON_64_FALSE,
1799 tune_params::PREF_NEON_STRINGOPS_FALSE,
1800 tune_params::FUSE_NOTHING,
1801 tune_params::SCHED_AUTOPREF_OFF
1804 const struct tune_params arm_xscale_tune =
1806 &generic_extra_costs, /* Insn extra costs. */
1807 xscale_sched_adjust_cost,
1808 arm_default_branch_cost,
1809 &arm_default_vec_cost,
1810 2, /* Constant limit. */
1811 3, /* Max cond insns. */
1812 8, /* Memset max inline. */
1813 1, /* Issue rate. */
1814 ARM_PREFETCH_NOT_BENEFICIAL,
1815 tune_params::PREF_CONST_POOL_TRUE,
1816 tune_params::PREF_LDRD_FALSE,
1817 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1818 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1819 tune_params::DISPARAGE_FLAGS_NEITHER,
1820 tune_params::PREF_NEON_64_FALSE,
1821 tune_params::PREF_NEON_STRINGOPS_FALSE,
1822 tune_params::FUSE_NOTHING,
1823 tune_params::SCHED_AUTOPREF_OFF
1826 const struct tune_params arm_9e_tune =
1828 &generic_extra_costs, /* Insn extra costs. */
1829 NULL, /* Sched adj cost. */
1830 arm_default_branch_cost,
1831 &arm_default_vec_cost,
1832 1, /* Constant limit. */
1833 5, /* Max cond insns. */
1834 8, /* Memset max inline. */
1835 1, /* Issue rate. */
1836 ARM_PREFETCH_NOT_BENEFICIAL,
1837 tune_params::PREF_CONST_POOL_TRUE,
1838 tune_params::PREF_LDRD_FALSE,
1839 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1840 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1841 tune_params::DISPARAGE_FLAGS_NEITHER,
1842 tune_params::PREF_NEON_64_FALSE,
1843 tune_params::PREF_NEON_STRINGOPS_FALSE,
1844 tune_params::FUSE_NOTHING,
1845 tune_params::SCHED_AUTOPREF_OFF
1848 const struct tune_params arm_marvell_pj4_tune =
1850 &generic_extra_costs, /* Insn extra costs. */
1851 NULL, /* Sched adj cost. */
1852 arm_default_branch_cost,
1853 &arm_default_vec_cost,
1854 1, /* Constant limit. */
1855 5, /* Max cond insns. */
1856 8, /* Memset max inline. */
1857 2, /* Issue rate. */
1858 ARM_PREFETCH_NOT_BENEFICIAL,
1859 tune_params::PREF_CONST_POOL_TRUE,
1860 tune_params::PREF_LDRD_FALSE,
1861 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1862 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1863 tune_params::DISPARAGE_FLAGS_NEITHER,
1864 tune_params::PREF_NEON_64_FALSE,
1865 tune_params::PREF_NEON_STRINGOPS_FALSE,
1866 tune_params::FUSE_NOTHING,
1867 tune_params::SCHED_AUTOPREF_OFF
1870 const struct tune_params arm_v6t2_tune =
1872 &generic_extra_costs, /* Insn extra costs. */
1873 NULL, /* Sched adj cost. */
1874 arm_default_branch_cost,
1875 &arm_default_vec_cost,
1876 1, /* Constant limit. */
1877 5, /* Max cond insns. */
1878 8, /* Memset max inline. */
1879 1, /* Issue rate. */
1880 ARM_PREFETCH_NOT_BENEFICIAL,
1881 tune_params::PREF_CONST_POOL_FALSE,
1882 tune_params::PREF_LDRD_FALSE,
1883 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1884 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1885 tune_params::DISPARAGE_FLAGS_NEITHER,
1886 tune_params::PREF_NEON_64_FALSE,
1887 tune_params::PREF_NEON_STRINGOPS_FALSE,
1888 tune_params::FUSE_NOTHING,
1889 tune_params::SCHED_AUTOPREF_OFF
1893 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1894 const struct tune_params arm_cortex_tune =
1896 &generic_extra_costs,
1897 NULL, /* Sched adj cost. */
1898 arm_default_branch_cost,
1899 &arm_default_vec_cost,
1900 1, /* Constant limit. */
1901 5, /* Max cond insns. */
1902 8, /* Memset max inline. */
1903 2, /* Issue rate. */
1904 ARM_PREFETCH_NOT_BENEFICIAL,
1905 tune_params::PREF_CONST_POOL_FALSE,
1906 tune_params::PREF_LDRD_FALSE,
1907 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1908 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1909 tune_params::DISPARAGE_FLAGS_NEITHER,
1910 tune_params::PREF_NEON_64_FALSE,
1911 tune_params::PREF_NEON_STRINGOPS_FALSE,
1912 tune_params::FUSE_NOTHING,
1913 tune_params::SCHED_AUTOPREF_OFF
1916 const struct tune_params arm_cortex_a8_tune =
1918 &cortexa8_extra_costs,
1919 NULL, /* Sched adj cost. */
1920 arm_default_branch_cost,
1921 &arm_default_vec_cost,
1922 1, /* Constant limit. */
1923 5, /* Max cond insns. */
1924 8, /* Memset max inline. */
1925 2, /* Issue rate. */
1926 ARM_PREFETCH_NOT_BENEFICIAL,
1927 tune_params::PREF_CONST_POOL_FALSE,
1928 tune_params::PREF_LDRD_FALSE,
1929 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1930 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1931 tune_params::DISPARAGE_FLAGS_NEITHER,
1932 tune_params::PREF_NEON_64_FALSE,
1933 tune_params::PREF_NEON_STRINGOPS_TRUE,
1934 tune_params::FUSE_NOTHING,
1935 tune_params::SCHED_AUTOPREF_OFF
1938 const struct tune_params arm_cortex_a7_tune =
1940 &cortexa7_extra_costs,
1941 NULL, /* Sched adj cost. */
1942 arm_default_branch_cost,
1943 &arm_default_vec_cost,
1944 1, /* Constant limit. */
1945 5, /* Max cond insns. */
1946 8, /* Memset max inline. */
1947 2, /* Issue rate. */
1948 ARM_PREFETCH_NOT_BENEFICIAL,
1949 tune_params::PREF_CONST_POOL_FALSE,
1950 tune_params::PREF_LDRD_FALSE,
1951 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1952 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1953 tune_params::DISPARAGE_FLAGS_NEITHER,
1954 tune_params::PREF_NEON_64_FALSE,
1955 tune_params::PREF_NEON_STRINGOPS_TRUE,
1956 tune_params::FUSE_NOTHING,
1957 tune_params::SCHED_AUTOPREF_OFF
1960 const struct tune_params arm_cortex_a15_tune =
1962 &cortexa15_extra_costs,
1963 NULL, /* Sched adj cost. */
1964 arm_default_branch_cost,
1965 &arm_default_vec_cost,
1966 1, /* Constant limit. */
1967 2, /* Max cond insns. */
1968 8, /* Memset max inline. */
1969 3, /* Issue rate. */
1970 ARM_PREFETCH_NOT_BENEFICIAL,
1971 tune_params::PREF_CONST_POOL_FALSE,
1972 tune_params::PREF_LDRD_TRUE,
1973 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1974 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1975 tune_params::DISPARAGE_FLAGS_ALL,
1976 tune_params::PREF_NEON_64_FALSE,
1977 tune_params::PREF_NEON_STRINGOPS_TRUE,
1978 tune_params::FUSE_NOTHING,
1979 tune_params::SCHED_AUTOPREF_FULL
1982 const struct tune_params arm_cortex_a35_tune =
1984 &cortexa53_extra_costs,
1985 NULL, /* Sched adj cost. */
1986 arm_default_branch_cost,
1987 &arm_default_vec_cost,
1988 1, /* Constant limit. */
1989 5, /* Max cond insns. */
1990 8, /* Memset max inline. */
1991 1, /* Issue rate. */
1992 ARM_PREFETCH_NOT_BENEFICIAL,
1993 tune_params::PREF_CONST_POOL_FALSE,
1994 tune_params::PREF_LDRD_FALSE,
1995 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1996 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1997 tune_params::DISPARAGE_FLAGS_NEITHER,
1998 tune_params::PREF_NEON_64_FALSE,
1999 tune_params::PREF_NEON_STRINGOPS_TRUE,
2000 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2001 tune_params::SCHED_AUTOPREF_OFF
2004 const struct tune_params arm_cortex_a53_tune =
2006 &cortexa53_extra_costs,
2007 NULL, /* Sched adj cost. */
2008 arm_default_branch_cost,
2009 &arm_default_vec_cost,
2010 1, /* Constant limit. */
2011 5, /* Max cond insns. */
2012 8, /* Memset max inline. */
2013 2, /* Issue rate. */
2014 ARM_PREFETCH_NOT_BENEFICIAL,
2015 tune_params::PREF_CONST_POOL_FALSE,
2016 tune_params::PREF_LDRD_FALSE,
2017 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2019 tune_params::DISPARAGE_FLAGS_NEITHER,
2020 tune_params::PREF_NEON_64_FALSE,
2021 tune_params::PREF_NEON_STRINGOPS_TRUE,
2022 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2023 tune_params::SCHED_AUTOPREF_OFF
2026 const struct tune_params arm_cortex_a57_tune =
2028 &cortexa57_extra_costs,
2029 NULL, /* Sched adj cost. */
2030 arm_default_branch_cost,
2031 &arm_default_vec_cost,
2032 1, /* Constant limit. */
2033 2, /* Max cond insns. */
2034 8, /* Memset max inline. */
2035 3, /* Issue rate. */
2036 ARM_PREFETCH_NOT_BENEFICIAL,
2037 tune_params::PREF_CONST_POOL_FALSE,
2038 tune_params::PREF_LDRD_TRUE,
2039 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2041 tune_params::DISPARAGE_FLAGS_ALL,
2042 tune_params::PREF_NEON_64_FALSE,
2043 tune_params::PREF_NEON_STRINGOPS_TRUE,
2044 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2045 tune_params::SCHED_AUTOPREF_FULL
2048 const struct tune_params arm_exynosm1_tune =
2050 &exynosm1_extra_costs,
2051 NULL, /* Sched adj cost. */
2052 arm_default_branch_cost,
2053 &arm_default_vec_cost,
2054 1, /* Constant limit. */
2055 2, /* Max cond insns. */
2056 8, /* Memset max inline. */
2057 3, /* Issue rate. */
2058 ARM_PREFETCH_NOT_BENEFICIAL,
2059 tune_params::PREF_CONST_POOL_FALSE,
2060 tune_params::PREF_LDRD_TRUE,
2061 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2062 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2063 tune_params::DISPARAGE_FLAGS_ALL,
2064 tune_params::PREF_NEON_64_FALSE,
2065 tune_params::PREF_NEON_STRINGOPS_TRUE,
2066 tune_params::FUSE_NOTHING,
2067 tune_params::SCHED_AUTOPREF_OFF
2070 const struct tune_params arm_xgene1_tune =
2072 &xgene1_extra_costs,
2073 NULL, /* Sched adj cost. */
2074 arm_default_branch_cost,
2075 &arm_default_vec_cost,
2076 1, /* Constant limit. */
2077 2, /* Max cond insns. */
2078 32, /* Memset max inline. */
2079 4, /* Issue rate. */
2080 ARM_PREFETCH_NOT_BENEFICIAL,
2081 tune_params::PREF_CONST_POOL_FALSE,
2082 tune_params::PREF_LDRD_TRUE,
2083 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2084 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2085 tune_params::DISPARAGE_FLAGS_ALL,
2086 tune_params::PREF_NEON_64_FALSE,
2087 tune_params::PREF_NEON_STRINGOPS_FALSE,
2088 tune_params::FUSE_NOTHING,
2089 tune_params::SCHED_AUTOPREF_OFF
2092 const struct tune_params arm_qdf24xx_tune =
2094 &qdf24xx_extra_costs,
2095 NULL, /* Scheduler cost adjustment. */
2096 arm_default_branch_cost,
2097 &arm_default_vec_cost, /* Vectorizer costs. */
2098 1, /* Constant limit. */
2099 2, /* Max cond insns. */
2100 8, /* Memset max inline. */
2101 4, /* Issue rate. */
2102 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2103 tune_params::PREF_CONST_POOL_FALSE,
2104 tune_params::PREF_LDRD_TRUE,
2105 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2106 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2107 tune_params::DISPARAGE_FLAGS_ALL,
2108 tune_params::PREF_NEON_64_FALSE,
2109 tune_params::PREF_NEON_STRINGOPS_TRUE,
2110 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2111 tune_params::SCHED_AUTOPREF_FULL
2114 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2115 less appealing. Set max_insns_skipped to a low value. */
2117 const struct tune_params arm_cortex_a5_tune =
2119 &cortexa5_extra_costs,
2120 NULL, /* Sched adj cost. */
2121 arm_cortex_a5_branch_cost,
2122 &arm_default_vec_cost,
2123 1, /* Constant limit. */
2124 1, /* Max cond insns. */
2125 8, /* Memset max inline. */
2126 2, /* Issue rate. */
2127 ARM_PREFETCH_NOT_BENEFICIAL,
2128 tune_params::PREF_CONST_POOL_FALSE,
2129 tune_params::PREF_LDRD_FALSE,
2130 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2131 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2132 tune_params::DISPARAGE_FLAGS_NEITHER,
2133 tune_params::PREF_NEON_64_FALSE,
2134 tune_params::PREF_NEON_STRINGOPS_TRUE,
2135 tune_params::FUSE_NOTHING,
2136 tune_params::SCHED_AUTOPREF_OFF
2139 const struct tune_params arm_cortex_a9_tune =
2141 &cortexa9_extra_costs,
2142 cortex_a9_sched_adjust_cost,
2143 arm_default_branch_cost,
2144 &arm_default_vec_cost,
2145 1, /* Constant limit. */
2146 5, /* Max cond insns. */
2147 8, /* Memset max inline. */
2148 2, /* Issue rate. */
2149 ARM_PREFETCH_BENEFICIAL(4,32,32),
2150 tune_params::PREF_CONST_POOL_FALSE,
2151 tune_params::PREF_LDRD_FALSE,
2152 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2153 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2154 tune_params::DISPARAGE_FLAGS_NEITHER,
2155 tune_params::PREF_NEON_64_FALSE,
2156 tune_params::PREF_NEON_STRINGOPS_FALSE,
2157 tune_params::FUSE_NOTHING,
2158 tune_params::SCHED_AUTOPREF_OFF
2161 const struct tune_params arm_cortex_a12_tune =
2163 &cortexa12_extra_costs,
2164 NULL, /* Sched adj cost. */
2165 arm_default_branch_cost,
2166 &arm_default_vec_cost, /* Vectorizer costs. */
2167 1, /* Constant limit. */
2168 2, /* Max cond insns. */
2169 8, /* Memset max inline. */
2170 2, /* Issue rate. */
2171 ARM_PREFETCH_NOT_BENEFICIAL,
2172 tune_params::PREF_CONST_POOL_FALSE,
2173 tune_params::PREF_LDRD_TRUE,
2174 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2175 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2176 tune_params::DISPARAGE_FLAGS_ALL,
2177 tune_params::PREF_NEON_64_FALSE,
2178 tune_params::PREF_NEON_STRINGOPS_TRUE,
2179 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2180 tune_params::SCHED_AUTOPREF_OFF
2183 const struct tune_params arm_cortex_a73_tune =
2185 &cortexa57_extra_costs,
2186 NULL, /* Sched adj cost. */
2187 arm_default_branch_cost,
2188 &arm_default_vec_cost, /* Vectorizer costs. */
2189 1, /* Constant limit. */
2190 2, /* Max cond insns. */
2191 8, /* Memset max inline. */
2192 2, /* Issue rate. */
2193 ARM_PREFETCH_NOT_BENEFICIAL,
2194 tune_params::PREF_CONST_POOL_FALSE,
2195 tune_params::PREF_LDRD_TRUE,
2196 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2197 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2198 tune_params::DISPARAGE_FLAGS_ALL,
2199 tune_params::PREF_NEON_64_FALSE,
2200 tune_params::PREF_NEON_STRINGOPS_TRUE,
2201 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2202 tune_params::SCHED_AUTOPREF_FULL
2205 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2206 cycle to execute each. An LDR from the constant pool also takes two cycles
2207 to execute, but mildly increases pipelining opportunity (consecutive
2208 loads/stores can be pipelined together, saving one cycle), and may also
2209 improve icache utilisation. Hence we prefer the constant pool for such
2210 processors. */
2212 const struct tune_params arm_v7m_tune =
2214 &v7m_extra_costs,
2215 NULL, /* Sched adj cost. */
2216 arm_cortex_m_branch_cost,
2217 &arm_default_vec_cost,
2218 1, /* Constant limit. */
2219 2, /* Max cond insns. */
2220 8, /* Memset max inline. */
2221 1, /* Issue rate. */
2222 ARM_PREFETCH_NOT_BENEFICIAL,
2223 tune_params::PREF_CONST_POOL_TRUE,
2224 tune_params::PREF_LDRD_FALSE,
2225 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2226 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2227 tune_params::DISPARAGE_FLAGS_NEITHER,
2228 tune_params::PREF_NEON_64_FALSE,
2229 tune_params::PREF_NEON_STRINGOPS_FALSE,
2230 tune_params::FUSE_NOTHING,
2231 tune_params::SCHED_AUTOPREF_OFF
2234 /* Cortex-M7 tuning. */
2236 const struct tune_params arm_cortex_m7_tune =
2238 &v7m_extra_costs,
2239 NULL, /* Sched adj cost. */
2240 arm_cortex_m7_branch_cost,
2241 &arm_default_vec_cost,
2242 0, /* Constant limit. */
2243 1, /* Max cond insns. */
2244 8, /* Memset max inline. */
2245 2, /* Issue rate. */
2246 ARM_PREFETCH_NOT_BENEFICIAL,
2247 tune_params::PREF_CONST_POOL_TRUE,
2248 tune_params::PREF_LDRD_FALSE,
2249 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2251 tune_params::DISPARAGE_FLAGS_NEITHER,
2252 tune_params::PREF_NEON_64_FALSE,
2253 tune_params::PREF_NEON_STRINGOPS_FALSE,
2254 tune_params::FUSE_NOTHING,
2255 tune_params::SCHED_AUTOPREF_OFF
2258 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2259 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2260 cortex-m23. */
2261 const struct tune_params arm_v6m_tune =
2263 &generic_extra_costs, /* Insn extra costs. */
2264 NULL, /* Sched adj cost. */
2265 arm_default_branch_cost,
2266 &arm_default_vec_cost, /* Vectorizer costs. */
2267 1, /* Constant limit. */
2268 5, /* Max cond insns. */
2269 8, /* Memset max inline. */
2270 1, /* Issue rate. */
2271 ARM_PREFETCH_NOT_BENEFICIAL,
2272 tune_params::PREF_CONST_POOL_FALSE,
2273 tune_params::PREF_LDRD_FALSE,
2274 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2275 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2276 tune_params::DISPARAGE_FLAGS_NEITHER,
2277 tune_params::PREF_NEON_64_FALSE,
2278 tune_params::PREF_NEON_STRINGOPS_FALSE,
2279 tune_params::FUSE_NOTHING,
2280 tune_params::SCHED_AUTOPREF_OFF
2283 const struct tune_params arm_fa726te_tune =
2285 &generic_extra_costs, /* Insn extra costs. */
2286 fa726te_sched_adjust_cost,
2287 arm_default_branch_cost,
2288 &arm_default_vec_cost,
2289 1, /* Constant limit. */
2290 5, /* Max cond insns. */
2291 8, /* Memset max inline. */
2292 2, /* Issue rate. */
2293 ARM_PREFETCH_NOT_BENEFICIAL,
2294 tune_params::PREF_CONST_POOL_TRUE,
2295 tune_params::PREF_LDRD_FALSE,
2296 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2297 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2298 tune_params::DISPARAGE_FLAGS_NEITHER,
2299 tune_params::PREF_NEON_64_FALSE,
2300 tune_params::PREF_NEON_STRINGOPS_FALSE,
2301 tune_params::FUSE_NOTHING,
2302 tune_params::SCHED_AUTOPREF_OFF
2305 /* Auto-generated CPU, FPU and architecture tables. */
2306 #include "arm-cpu-data.h"
2308 /* The name of the preprocessor macro to define for this architecture. PROFILE
2309 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2310 is thus chosen to be big enough to hold the longest architecture name. */
2312 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2314 /* Supported TLS relocations. */
2316 enum tls_reloc {
2317 TLS_GD32,
2318 TLS_LDM32,
2319 TLS_LDO32,
2320 TLS_IE32,
2321 TLS_LE32,
2322 TLS_DESCSEQ /* GNU scheme */
2325 /* The maximum number of insns to be used when loading a constant. */
2326 inline static int
2327 arm_constant_limit (bool size_p)
2329 return size_p ? 1 : current_tune->constant_limit;
2332 /* Emit an insn that's a simple single-set. Both the operands must be known
2333 to be valid. */
2334 inline static rtx_insn *
2335 emit_set_insn (rtx x, rtx y)
2337 return emit_insn (gen_rtx_SET (x, y));
2340 /* Return the number of bits set in VALUE. */
2341 static unsigned
2342 bit_count (unsigned long value)
2344 unsigned long count = 0;
2346 while (value)
2348 count++;
2349 value &= value - 1; /* Clear the least-significant set bit. */
2352 return count;
2355 /* Return the number of bits set in BMAP. */
2356 static unsigned
2357 bitmap_popcount (const sbitmap bmap)
2359 unsigned int count = 0;
2360 unsigned int n = 0;
2361 sbitmap_iterator sbi;
2363 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2364 count++;
2365 return count;
2368 typedef struct
2370 machine_mode mode;
2371 const char *name;
2372 } arm_fixed_mode_set;
2374 /* A small helper for setting fixed-point library libfuncs. */
2376 static void
2377 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2378 const char *funcname, const char *modename,
2379 int num_suffix)
2381 char buffer[50];
2383 if (num_suffix == 0)
2384 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2385 else
2386 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2388 set_optab_libfunc (optable, mode, buffer);
2391 static void
2392 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2393 machine_mode from, const char *funcname,
2394 const char *toname, const char *fromname)
2396 char buffer[50];
2397 const char *maybe_suffix_2 = "";
2399 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2400 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2401 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2402 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2403 maybe_suffix_2 = "2";
2405 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2406 maybe_suffix_2);
2408 set_conv_libfunc (optable, to, from, buffer);
2411 /* Set up library functions unique to ARM. */
2413 static void
2414 arm_init_libfuncs (void)
2416 /* For Linux, we have access to kernel support for atomic operations. */
2417 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2418 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2420 /* There are no special library functions unless we are using the
2421 ARM BPABI. */
2422 if (!TARGET_BPABI)
2423 return;
2425 /* The functions below are described in Section 4 of the "Run-Time
2426 ABI for the ARM architecture", Version 1.0. */
2428 /* Double-precision floating-point arithmetic. Table 2. */
2429 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2430 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2431 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2432 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2433 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2435 /* Double-precision comparisons. Table 3. */
2436 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2437 set_optab_libfunc (ne_optab, DFmode, NULL);
2438 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2439 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2440 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2441 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2442 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2444 /* Single-precision floating-point arithmetic. Table 4. */
2445 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2446 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2447 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2448 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2449 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2451 /* Single-precision comparisons. Table 5. */
2452 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2453 set_optab_libfunc (ne_optab, SFmode, NULL);
2454 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2455 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2456 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2457 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2458 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2460 /* Floating-point to integer conversions. Table 6. */
2461 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2462 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2463 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2464 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2465 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2466 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2467 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2468 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2470 /* Conversions between floating types. Table 7. */
2471 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2472 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2474 /* Integer to floating-point conversions. Table 8. */
2475 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2476 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2477 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2478 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2479 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2480 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2481 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2482 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2484 /* Long long. Table 9. */
2485 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2486 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2487 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2488 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2489 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2490 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2491 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2492 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2494 /* Integer (32/32->32) division. \S 4.3.1. */
2495 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2496 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2498 /* The divmod functions are designed so that they can be used for
2499 plain division, even though they return both the quotient and the
2500 remainder. The quotient is returned in the usual location (i.e.,
2501 r0 for SImode, {r0, r1} for DImode), just as would be expected
2502 for an ordinary division routine. Because the AAPCS calling
2503 conventions specify that all of { r0, r1, r2, r3 } are
2504 callee-saved registers, there is no need to tell the compiler
2505 explicitly that those registers are clobbered by these
2506 routines. */
2507 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2508 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2510 /* For SImode division the ABI provides div-without-mod routines,
2511 which are faster. */
2512 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2513 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2515 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2516 divmod libcalls instead. */
2517 set_optab_libfunc (smod_optab, DImode, NULL);
2518 set_optab_libfunc (umod_optab, DImode, NULL);
2519 set_optab_libfunc (smod_optab, SImode, NULL);
2520 set_optab_libfunc (umod_optab, SImode, NULL);
2522 /* Half-precision float operations. The compiler handles all operations
2523 with NULL libfuncs by converting the SFmode. */
2524 switch (arm_fp16_format)
2526 case ARM_FP16_FORMAT_IEEE:
2527 case ARM_FP16_FORMAT_ALTERNATIVE:
2529 /* Conversions. */
2530 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2531 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2532 ? "__gnu_f2h_ieee"
2533 : "__gnu_f2h_alternative"));
2534 set_conv_libfunc (sext_optab, SFmode, HFmode,
2535 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2536 ? "__gnu_h2f_ieee"
2537 : "__gnu_h2f_alternative"));
2539 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2540 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2541 ? "__gnu_d2h_ieee"
2542 : "__gnu_d2h_alternative"));
2544 /* Arithmetic. */
2545 set_optab_libfunc (add_optab, HFmode, NULL);
2546 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2547 set_optab_libfunc (smul_optab, HFmode, NULL);
2548 set_optab_libfunc (neg_optab, HFmode, NULL);
2549 set_optab_libfunc (sub_optab, HFmode, NULL);
2551 /* Comparisons. */
2552 set_optab_libfunc (eq_optab, HFmode, NULL);
2553 set_optab_libfunc (ne_optab, HFmode, NULL);
2554 set_optab_libfunc (lt_optab, HFmode, NULL);
2555 set_optab_libfunc (le_optab, HFmode, NULL);
2556 set_optab_libfunc (ge_optab, HFmode, NULL);
2557 set_optab_libfunc (gt_optab, HFmode, NULL);
2558 set_optab_libfunc (unord_optab, HFmode, NULL);
2559 break;
2561 default:
2562 break;
2565 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2567 const arm_fixed_mode_set fixed_arith_modes[] =
2569 { QQmode, "qq" },
2570 { UQQmode, "uqq" },
2571 { HQmode, "hq" },
2572 { UHQmode, "uhq" },
2573 { SQmode, "sq" },
2574 { USQmode, "usq" },
2575 { DQmode, "dq" },
2576 { UDQmode, "udq" },
2577 { TQmode, "tq" },
2578 { UTQmode, "utq" },
2579 { HAmode, "ha" },
2580 { UHAmode, "uha" },
2581 { SAmode, "sa" },
2582 { USAmode, "usa" },
2583 { DAmode, "da" },
2584 { UDAmode, "uda" },
2585 { TAmode, "ta" },
2586 { UTAmode, "uta" }
2588 const arm_fixed_mode_set fixed_conv_modes[] =
2590 { QQmode, "qq" },
2591 { UQQmode, "uqq" },
2592 { HQmode, "hq" },
2593 { UHQmode, "uhq" },
2594 { SQmode, "sq" },
2595 { USQmode, "usq" },
2596 { DQmode, "dq" },
2597 { UDQmode, "udq" },
2598 { TQmode, "tq" },
2599 { UTQmode, "utq" },
2600 { HAmode, "ha" },
2601 { UHAmode, "uha" },
2602 { SAmode, "sa" },
2603 { USAmode, "usa" },
2604 { DAmode, "da" },
2605 { UDAmode, "uda" },
2606 { TAmode, "ta" },
2607 { UTAmode, "uta" },
2608 { QImode, "qi" },
2609 { HImode, "hi" },
2610 { SImode, "si" },
2611 { DImode, "di" },
2612 { TImode, "ti" },
2613 { SFmode, "sf" },
2614 { DFmode, "df" }
2616 unsigned int i, j;
2618 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2620 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2621 "add", fixed_arith_modes[i].name, 3);
2622 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2623 "ssadd", fixed_arith_modes[i].name, 3);
2624 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2625 "usadd", fixed_arith_modes[i].name, 3);
2626 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2627 "sub", fixed_arith_modes[i].name, 3);
2628 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2629 "sssub", fixed_arith_modes[i].name, 3);
2630 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2631 "ussub", fixed_arith_modes[i].name, 3);
2632 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2633 "mul", fixed_arith_modes[i].name, 3);
2634 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2635 "ssmul", fixed_arith_modes[i].name, 3);
2636 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2637 "usmul", fixed_arith_modes[i].name, 3);
2638 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2639 "div", fixed_arith_modes[i].name, 3);
2640 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2641 "udiv", fixed_arith_modes[i].name, 3);
2642 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2643 "ssdiv", fixed_arith_modes[i].name, 3);
2644 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2645 "usdiv", fixed_arith_modes[i].name, 3);
2646 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2647 "neg", fixed_arith_modes[i].name, 2);
2648 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2649 "ssneg", fixed_arith_modes[i].name, 2);
2650 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2651 "usneg", fixed_arith_modes[i].name, 2);
2652 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2653 "ashl", fixed_arith_modes[i].name, 3);
2654 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2655 "ashr", fixed_arith_modes[i].name, 3);
2656 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2657 "lshr", fixed_arith_modes[i].name, 3);
2658 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2659 "ssashl", fixed_arith_modes[i].name, 3);
2660 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2661 "usashl", fixed_arith_modes[i].name, 3);
2662 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2663 "cmp", fixed_arith_modes[i].name, 2);
2666 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2667 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2669 if (i == j
2670 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2671 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2672 continue;
2674 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2675 fixed_conv_modes[j].mode, "fract",
2676 fixed_conv_modes[i].name,
2677 fixed_conv_modes[j].name);
2678 arm_set_fixed_conv_libfunc (satfract_optab,
2679 fixed_conv_modes[i].mode,
2680 fixed_conv_modes[j].mode, "satfract",
2681 fixed_conv_modes[i].name,
2682 fixed_conv_modes[j].name);
2683 arm_set_fixed_conv_libfunc (fractuns_optab,
2684 fixed_conv_modes[i].mode,
2685 fixed_conv_modes[j].mode, "fractuns",
2686 fixed_conv_modes[i].name,
2687 fixed_conv_modes[j].name);
2688 arm_set_fixed_conv_libfunc (satfractuns_optab,
2689 fixed_conv_modes[i].mode,
2690 fixed_conv_modes[j].mode, "satfractuns",
2691 fixed_conv_modes[i].name,
2692 fixed_conv_modes[j].name);
2696 if (TARGET_AAPCS_BASED)
2697 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2700 /* On AAPCS systems, this is the "struct __va_list". */
2701 static GTY(()) tree va_list_type;
2703 /* Return the type to use as __builtin_va_list. */
2704 static tree
2705 arm_build_builtin_va_list (void)
2707 tree va_list_name;
2708 tree ap_field;
2710 if (!TARGET_AAPCS_BASED)
2711 return std_build_builtin_va_list ();
2713 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2714 defined as:
2716 struct __va_list
2718 void *__ap;
2721 The C Library ABI further reinforces this definition in \S
2722 4.1.
2724 We must follow this definition exactly. The structure tag
2725 name is visible in C++ mangled names, and thus forms a part
2726 of the ABI. The field name may be used by people who
2727 #include <stdarg.h>. */
2728 /* Create the type. */
2729 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2730 /* Give it the required name. */
2731 va_list_name = build_decl (BUILTINS_LOCATION,
2732 TYPE_DECL,
2733 get_identifier ("__va_list"),
2734 va_list_type);
2735 DECL_ARTIFICIAL (va_list_name) = 1;
2736 TYPE_NAME (va_list_type) = va_list_name;
2737 TYPE_STUB_DECL (va_list_type) = va_list_name;
2738 /* Create the __ap field. */
2739 ap_field = build_decl (BUILTINS_LOCATION,
2740 FIELD_DECL,
2741 get_identifier ("__ap"),
2742 ptr_type_node);
2743 DECL_ARTIFICIAL (ap_field) = 1;
2744 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2745 TYPE_FIELDS (va_list_type) = ap_field;
2746 /* Compute its layout. */
2747 layout_type (va_list_type);
2749 return va_list_type;
2752 /* Return an expression of type "void *" pointing to the next
2753 available argument in a variable-argument list. VALIST is the
2754 user-level va_list object, of type __builtin_va_list. */
2755 static tree
2756 arm_extract_valist_ptr (tree valist)
2758 if (TREE_TYPE (valist) == error_mark_node)
2759 return error_mark_node;
2761 /* On an AAPCS target, the pointer is stored within "struct
2762 va_list". */
2763 if (TARGET_AAPCS_BASED)
2765 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2766 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2767 valist, ap_field, NULL_TREE);
2770 return valist;
2773 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2774 static void
2775 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2777 valist = arm_extract_valist_ptr (valist);
2778 std_expand_builtin_va_start (valist, nextarg);
2781 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2782 static tree
2783 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2784 gimple_seq *post_p)
2786 valist = arm_extract_valist_ptr (valist);
2787 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2790 /* Check any incompatible options that the user has specified. */
2791 static void
2792 arm_option_check_internal (struct gcc_options *opts)
2794 int flags = opts->x_target_flags;
2796 /* iWMMXt and NEON are incompatible. */
2797 if (TARGET_IWMMXT
2798 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2799 error ("iWMMXt and NEON are incompatible");
2801 /* Make sure that the processor choice does not conflict with any of the
2802 other command line choices. */
2803 if (TARGET_ARM_P (flags)
2804 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2805 error ("target CPU does not support ARM mode");
2807 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2808 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2809 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2811 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2812 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2814 /* If this target is normally configured to use APCS frames, warn if they
2815 are turned off and debugging is turned on. */
2816 if (TARGET_ARM_P (flags)
2817 && write_symbols != NO_DEBUG
2818 && !TARGET_APCS_FRAME
2819 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2820 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2822 /* iWMMXt unsupported under Thumb mode. */
2823 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2824 error ("iWMMXt unsupported under Thumb mode");
2826 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2827 error ("can not use -mtp=cp15 with 16-bit Thumb");
2829 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2831 error ("RTP PIC is incompatible with Thumb");
2832 flag_pic = 0;
2835 /* We only support -mslow-flash-data on armv7-m targets. */
2836 if (target_slow_flash_data
2837 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2838 || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON)))
2839 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2841 /* We only support pure-code on Thumb-2 M-profile targets. */
2842 if (target_pure_code
2843 && (!arm_arch_thumb2 || arm_arch_notm || flag_pic || TARGET_NEON))
2844 error ("-mpure-code only supports non-pic code on armv7-m targets");
2848 /* Recompute the global settings depending on target attribute options. */
2850 static void
2851 arm_option_params_internal (void)
2853 /* If we are not using the default (ARM mode) section anchor offset
2854 ranges, then set the correct ranges now. */
2855 if (TARGET_THUMB1)
2857 /* Thumb-1 LDR instructions cannot have negative offsets.
2858 Permissible positive offset ranges are 5-bit (for byte loads),
2859 6-bit (for halfword loads), or 7-bit (for word loads).
2860 Empirical results suggest a 7-bit anchor range gives the best
2861 overall code size. */
2862 targetm.min_anchor_offset = 0;
2863 targetm.max_anchor_offset = 127;
2865 else if (TARGET_THUMB2)
2867 /* The minimum is set such that the total size of the block
2868 for a particular anchor is 248 + 1 + 4095 bytes, which is
2869 divisible by eight, ensuring natural spacing of anchors. */
2870 targetm.min_anchor_offset = -248;
2871 targetm.max_anchor_offset = 4095;
2873 else
2875 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2876 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2879 if (optimize_size)
2881 /* If optimizing for size, bump the number of instructions that we
2882 are prepared to conditionally execute (even on a StrongARM). */
2883 max_insns_skipped = 6;
2885 /* For THUMB2, we limit the conditional sequence to one IT block. */
2886 if (TARGET_THUMB2)
2887 max_insns_skipped = arm_restrict_it ? 1 : 4;
2889 else
2890 /* When -mrestrict-it is in use tone down the if-conversion. */
2891 max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2892 ? 1 : current_tune->max_insns_skipped;
2895 /* True if -mflip-thumb should next add an attribute for the default
2896 mode, false if it should next add an attribute for the opposite mode. */
2897 static GTY(()) bool thumb_flipper;
2899 /* Options after initial target override. */
2900 static GTY(()) tree init_optimize;
2902 static void
2903 arm_override_options_after_change_1 (struct gcc_options *opts)
2905 if (opts->x_align_functions <= 0)
2906 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2907 && opts->x_optimize_size ? 2 : 4;
2910 /* Implement targetm.override_options_after_change. */
2912 static void
2913 arm_override_options_after_change (void)
2915 arm_configure_build_target (&arm_active_target,
2916 TREE_TARGET_OPTION (target_option_default_node),
2917 &global_options_set, false);
2919 arm_override_options_after_change_1 (&global_options);
2922 static void
2923 arm_option_restore (struct gcc_options *, struct cl_target_option *ptr)
2925 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2926 false);
2929 /* Reset options between modes that the user has specified. */
2930 static void
2931 arm_option_override_internal (struct gcc_options *opts,
2932 struct gcc_options *opts_set)
2934 arm_override_options_after_change_1 (opts);
2936 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2938 /* The default is to enable interworking, so this warning message would
2939 be confusing to users who have just compiled with, eg, -march=armv3. */
2940 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2941 opts->x_target_flags &= ~MASK_INTERWORK;
2944 if (TARGET_THUMB_P (opts->x_target_flags)
2945 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2947 warning (0, "target CPU does not support THUMB instructions");
2948 opts->x_target_flags &= ~MASK_THUMB;
2951 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2953 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2954 opts->x_target_flags &= ~MASK_APCS_FRAME;
2957 /* Callee super interworking implies thumb interworking. Adding
2958 this to the flags here simplifies the logic elsewhere. */
2959 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2960 opts->x_target_flags |= MASK_INTERWORK;
2962 /* need to remember initial values so combinaisons of options like
2963 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2964 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2966 if (! opts_set->x_arm_restrict_it)
2967 opts->x_arm_restrict_it = arm_arch8;
2969 /* ARM execution state and M profile don't have [restrict] IT. */
2970 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2971 opts->x_arm_restrict_it = 0;
2973 /* Enable -munaligned-access by default for
2974 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2975 i.e. Thumb2 and ARM state only.
2976 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2977 - ARMv8 architecture-base processors.
2979 Disable -munaligned-access by default for
2980 - all pre-ARMv6 architecture-based processors
2981 - ARMv6-M architecture-based processors
2982 - ARMv8-M Baseline processors. */
2984 if (! opts_set->x_unaligned_access)
2986 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2987 && arm_arch6 && (arm_arch_notm || arm_arch7));
2989 else if (opts->x_unaligned_access == 1
2990 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2992 warning (0, "target CPU does not support unaligned accesses");
2993 opts->x_unaligned_access = 0;
2996 /* Don't warn since it's on by default in -O2. */
2997 if (TARGET_THUMB1_P (opts->x_target_flags))
2998 opts->x_flag_schedule_insns = 0;
2999 else
3000 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3002 /* Disable shrink-wrap when optimizing function for size, since it tends to
3003 generate additional returns. */
3004 if (optimize_function_for_size_p (cfun)
3005 && TARGET_THUMB2_P (opts->x_target_flags))
3006 opts->x_flag_shrink_wrap = false;
3007 else
3008 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3010 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3011 - epilogue_insns - does not accurately model the corresponding insns
3012 emitted in the asm file. In particular, see the comment in thumb_exit
3013 'Find out how many of the (return) argument registers we can corrupt'.
3014 As a consequence, the epilogue may clobber registers without fipa-ra
3015 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3016 TODO: Accurately model clobbers for epilogue_insns and reenable
3017 fipa-ra. */
3018 if (TARGET_THUMB1_P (opts->x_target_flags))
3019 opts->x_flag_ipa_ra = 0;
3020 else
3021 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3023 /* Thumb2 inline assembly code should always use unified syntax.
3024 This will apply to ARM and Thumb1 eventually. */
3025 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3027 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3028 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3029 #endif
3032 /* Convert a static initializer array of feature bits to sbitmap
3033 representation. */
3034 static void
3035 arm_initialize_isa (sbitmap isa, const enum isa_feature *isa_bits)
3037 bitmap_clear (isa);
3038 while (*isa_bits != isa_nobit)
3039 bitmap_set_bit (isa, *(isa_bits++));
3042 static sbitmap isa_all_fpubits;
3043 static sbitmap isa_quirkbits;
3045 /* Configure a build target TARGET from the user-specified options OPTS and
3046 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3047 architecture have been specified, but the two are not identical. */
3048 void
3049 arm_configure_build_target (struct arm_build_target *target,
3050 struct cl_target_option *opts,
3051 struct gcc_options *opts_set,
3052 bool warn_compatible)
3054 const struct processors *arm_selected_tune = NULL;
3055 const struct processors *arm_selected_arch = NULL;
3056 const struct processors *arm_selected_cpu = NULL;
3057 const struct arm_fpu_desc *arm_selected_fpu = NULL;
3059 bitmap_clear (target->isa);
3060 target->core_name = NULL;
3061 target->arch_name = NULL;
3063 if (opts_set->x_arm_arch_option)
3064 arm_selected_arch = &all_architectures[opts->x_arm_arch_option];
3066 if (opts_set->x_arm_cpu_option)
3068 arm_selected_cpu = &all_cores[(int) opts->x_arm_cpu_option];
3069 arm_selected_tune = &all_cores[(int) opts->x_arm_cpu_option];
3072 if (opts_set->x_arm_tune_option)
3073 arm_selected_tune = &all_cores[(int) opts->x_arm_tune_option];
3075 if (arm_selected_arch)
3077 arm_initialize_isa (target->isa, arm_selected_arch->isa_bits);
3079 if (arm_selected_cpu)
3081 auto_sbitmap cpu_isa (isa_num_bits);
3083 arm_initialize_isa (cpu_isa, arm_selected_cpu->isa_bits);
3084 bitmap_xor (cpu_isa, cpu_isa, target->isa);
3085 /* Ignore any bits that are quirk bits. */
3086 bitmap_and_compl (cpu_isa, cpu_isa, isa_quirkbits);
3087 /* Ignore (for now) any bits that might be set by -mfpu. */
3088 bitmap_and_compl (cpu_isa, cpu_isa, isa_all_fpubits);
3090 if (!bitmap_empty_p (cpu_isa))
3092 if (warn_compatible)
3093 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3094 arm_selected_cpu->name, arm_selected_arch->name);
3095 /* -march wins for code generation.
3096 -mcpu wins for default tuning. */
3097 if (!arm_selected_tune)
3098 arm_selected_tune = arm_selected_cpu;
3100 arm_selected_cpu = arm_selected_arch;
3102 else
3104 /* Architecture and CPU are essentially the same.
3105 Prefer the CPU setting. */
3106 arm_selected_arch = NULL;
3109 target->core_name = arm_selected_cpu->name;
3111 else
3113 /* Pick a CPU based on the architecture. */
3114 arm_selected_cpu = arm_selected_arch;
3115 target->arch_name = arm_selected_arch->name;
3116 /* Note: target->core_name is left unset in this path. */
3119 else if (arm_selected_cpu)
3121 target->core_name = arm_selected_cpu->name;
3122 arm_initialize_isa (target->isa, arm_selected_cpu->isa_bits);
3124 /* If the user did not specify a processor, choose one for them. */
3125 else
3127 const struct processors * sel;
3128 auto_sbitmap sought_isa (isa_num_bits);
3129 bitmap_clear (sought_isa);
3130 auto_sbitmap default_isa (isa_num_bits);
3132 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
3133 gcc_assert (arm_selected_cpu->name);
3135 /* RWE: All of the selection logic below (to the end of this
3136 'if' clause) looks somewhat suspect. It appears to be mostly
3137 there to support forcing thumb support when the default CPU
3138 does not have thumb (somewhat dubious in terms of what the
3139 user might be expecting). I think it should be removed once
3140 support for the pre-thumb era cores is removed. */
3141 sel = arm_selected_cpu;
3142 arm_initialize_isa (default_isa, sel->isa_bits);
3144 /* Now check to see if the user has specified any command line
3145 switches that require certain abilities from the cpu. */
3147 if (TARGET_INTERWORK || TARGET_THUMB)
3149 bitmap_set_bit (sought_isa, isa_bit_thumb);
3150 bitmap_set_bit (sought_isa, isa_bit_mode32);
3152 /* There are no ARM processors that support both APCS-26 and
3153 interworking. Therefore we forcibly remove MODE26 from
3154 from the isa features here (if it was set), so that the
3155 search below will always be able to find a compatible
3156 processor. */
3157 bitmap_clear_bit (default_isa, isa_bit_mode26);
3160 /* If there are such requirements and the default CPU does not
3161 satisfy them, we need to run over the complete list of
3162 cores looking for one that is satisfactory. */
3163 if (!bitmap_empty_p (sought_isa)
3164 && !bitmap_subset_p (sought_isa, default_isa))
3166 auto_sbitmap candidate_isa (isa_num_bits);
3167 /* We're only interested in a CPU with at least the
3168 capabilities of the default CPU and the required
3169 additional features. */
3170 bitmap_ior (default_isa, default_isa, sought_isa);
3172 /* Try to locate a CPU type that supports all of the abilities
3173 of the default CPU, plus the extra abilities requested by
3174 the user. */
3175 for (sel = all_cores; sel->name != NULL; sel++)
3177 arm_initialize_isa (candidate_isa, sel->isa_bits);
3178 /* An exact match? */
3179 if (bitmap_equal_p (default_isa, candidate_isa))
3180 break;
3183 if (sel->name == NULL)
3185 unsigned current_bit_count = isa_num_bits;
3186 const struct processors * best_fit = NULL;
3188 /* Ideally we would like to issue an error message here
3189 saying that it was not possible to find a CPU compatible
3190 with the default CPU, but which also supports the command
3191 line options specified by the programmer, and so they
3192 ought to use the -mcpu=<name> command line option to
3193 override the default CPU type.
3195 If we cannot find a CPU that has exactly the
3196 characteristics of the default CPU and the given
3197 command line options we scan the array again looking
3198 for a best match. The best match must have at least
3199 the capabilities of the perfect match. */
3200 for (sel = all_cores; sel->name != NULL; sel++)
3202 arm_initialize_isa (candidate_isa, sel->isa_bits);
3204 if (bitmap_subset_p (default_isa, candidate_isa))
3206 unsigned count;
3208 bitmap_and_compl (candidate_isa, candidate_isa,
3209 default_isa);
3210 count = bitmap_popcount (candidate_isa);
3212 if (count < current_bit_count)
3214 best_fit = sel;
3215 current_bit_count = count;
3219 gcc_assert (best_fit);
3220 sel = best_fit;
3223 arm_selected_cpu = sel;
3226 /* Now we know the CPU, we can finally initialize the target
3227 structure. */
3228 target->core_name = arm_selected_cpu->name;
3229 arm_initialize_isa (target->isa, arm_selected_cpu->isa_bits);
3232 gcc_assert (arm_selected_cpu);
3234 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3236 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3237 auto_sbitmap fpu_bits (isa_num_bits);
3239 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3240 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3241 bitmap_ior (target->isa, target->isa, fpu_bits);
3243 else if (target->core_name == NULL)
3244 /* To support this we need to be able to parse FPU feature options
3245 from the architecture string. */
3246 sorry ("-mfpu=auto not currently supported without an explicit CPU.");
3248 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3249 if (!arm_selected_tune)
3250 arm_selected_tune = &all_cores[arm_selected_cpu->core];
3252 /* Finish initializing the target structure. */
3253 target->arch_pp_name = arm_selected_cpu->arch;
3254 target->base_arch = arm_selected_cpu->base_arch;
3255 target->arch_core = arm_selected_cpu->core;
3257 target->tune_flags = arm_selected_tune->tune_flags;
3258 target->tune = arm_selected_tune->tune;
3259 target->tune_core = arm_selected_tune->core;
3262 /* Fix up any incompatible options that the user has specified. */
3263 static void
3264 arm_option_override (void)
3266 static const enum isa_feature fpu_bitlist[] = { ISA_ALL_FPU, isa_nobit };
3267 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3268 cl_target_option opts;
3270 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3271 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3273 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3274 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3276 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3278 if (!global_options_set.x_arm_fpu_index)
3280 const char *target_fpu_name;
3281 bool ok;
3282 int fpu_index;
3284 #ifdef FPUTYPE_DEFAULT
3285 target_fpu_name = FPUTYPE_DEFAULT;
3286 #else
3287 target_fpu_name = "vfp";
3288 #endif
3290 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &fpu_index,
3291 CL_TARGET);
3292 gcc_assert (ok);
3293 arm_fpu_index = (enum fpu_type) fpu_index;
3296 cl_target_option_save (&opts, &global_options);
3297 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3298 true);
3300 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3301 SUBTARGET_OVERRIDE_OPTIONS;
3302 #endif
3304 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3305 arm_base_arch = arm_active_target.base_arch;
3307 arm_tune = arm_active_target.tune_core;
3308 tune_flags = arm_active_target.tune_flags;
3309 current_tune = arm_active_target.tune;
3311 /* TBD: Dwarf info for apcs frame is not handled yet. */
3312 if (TARGET_APCS_FRAME)
3313 flag_shrink_wrap = false;
3315 /* BPABI targets use linker tricks to allow interworking on cores
3316 without thumb support. */
3317 if (TARGET_INTERWORK
3318 && !TARGET_BPABI
3319 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3321 warning (0, "target CPU does not support interworking" );
3322 target_flags &= ~MASK_INTERWORK;
3325 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3327 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3328 target_flags |= MASK_APCS_FRAME;
3331 if (TARGET_POKE_FUNCTION_NAME)
3332 target_flags |= MASK_APCS_FRAME;
3334 if (TARGET_APCS_REENT && flag_pic)
3335 error ("-fpic and -mapcs-reent are incompatible");
3337 if (TARGET_APCS_REENT)
3338 warning (0, "APCS reentrant code not supported. Ignored");
3340 /* Initialize boolean versions of the architectural flags, for use
3341 in the arm.md file. */
3342 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv3m);
3343 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv4);
3344 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3345 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5);
3346 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5e);
3347 arm_arch5te = arm_arch5e
3348 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3349 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6);
3350 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6k);
3351 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3352 arm_arch6m = arm_arch6 && !arm_arch_notm;
3353 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7);
3354 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7em);
3355 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8);
3356 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_1);
3357 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_2);
3358 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3359 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3360 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3361 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3362 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3363 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3364 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3365 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3366 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3367 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3368 arm_arch7ve
3369 = (arm_arch6k && arm_arch7 && arm_arch_thumb_hwdiv && arm_arch_arm_hwdiv);
3370 if (arm_fp16_inst)
3372 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3373 error ("selected fp16 options are incompatible");
3374 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3378 /* Set up some tuning parameters. */
3379 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3380 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3381 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3382 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3383 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3384 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3386 /* And finally, set up some quirks. */
3387 arm_arch_no_volatile_ce
3388 = bitmap_bit_p (arm_active_target.isa, isa_quirk_no_volatile_ce);
3389 arm_arch6kz
3390 = arm_arch6k && bitmap_bit_p (arm_active_target.isa, isa_quirk_ARMv6kz);
3392 /* V5 code we generate is completely interworking capable, so we turn off
3393 TARGET_INTERWORK here to avoid many tests later on. */
3395 /* XXX However, we must pass the right pre-processor defines to CPP
3396 or GLD can get confused. This is a hack. */
3397 if (TARGET_INTERWORK)
3398 arm_cpp_interwork = 1;
3400 if (arm_arch5)
3401 target_flags &= ~MASK_INTERWORK;
3403 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3404 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3406 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3407 error ("iwmmxt abi requires an iwmmxt capable cpu");
3409 /* If soft-float is specified then don't use FPU. */
3410 if (TARGET_SOFT_FLOAT)
3411 arm_fpu_attr = FPU_NONE;
3412 else
3413 arm_fpu_attr = FPU_VFP;
3415 if (TARGET_AAPCS_BASED)
3417 if (TARGET_CALLER_INTERWORKING)
3418 error ("AAPCS does not support -mcaller-super-interworking");
3419 else
3420 if (TARGET_CALLEE_INTERWORKING)
3421 error ("AAPCS does not support -mcallee-super-interworking");
3424 /* __fp16 support currently assumes the core has ldrh. */
3425 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3426 sorry ("__fp16 and no ldrh");
3428 if (TARGET_AAPCS_BASED)
3430 if (arm_abi == ARM_ABI_IWMMXT)
3431 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3432 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3433 && TARGET_HARD_FLOAT)
3435 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3436 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_VFPv2))
3437 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3439 else
3440 arm_pcs_default = ARM_PCS_AAPCS;
3442 else
3444 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3445 sorry ("-mfloat-abi=hard and VFP");
3447 if (arm_abi == ARM_ABI_APCS)
3448 arm_pcs_default = ARM_PCS_APCS;
3449 else
3450 arm_pcs_default = ARM_PCS_ATPCS;
3453 /* For arm2/3 there is no need to do any scheduling if we are doing
3454 software floating-point. */
3455 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3456 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3458 /* Use the cp15 method if it is available. */
3459 if (target_thread_pointer == TP_AUTO)
3461 if (arm_arch6k && !TARGET_THUMB1)
3462 target_thread_pointer = TP_CP15;
3463 else
3464 target_thread_pointer = TP_SOFT;
3467 /* Override the default structure alignment for AAPCS ABI. */
3468 if (!global_options_set.x_arm_structure_size_boundary)
3470 if (TARGET_AAPCS_BASED)
3471 arm_structure_size_boundary = 8;
3473 else
3475 if (arm_structure_size_boundary != 8
3476 && arm_structure_size_boundary != 32
3477 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3479 if (ARM_DOUBLEWORD_ALIGN)
3480 warning (0,
3481 "structure size boundary can only be set to 8, 32 or 64");
3482 else
3483 warning (0, "structure size boundary can only be set to 8 or 32");
3484 arm_structure_size_boundary
3485 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3489 if (TARGET_VXWORKS_RTP)
3491 if (!global_options_set.x_arm_pic_data_is_text_relative)
3492 arm_pic_data_is_text_relative = 0;
3494 else if (flag_pic
3495 && !arm_pic_data_is_text_relative
3496 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3497 /* When text & data segments don't have a fixed displacement, the
3498 intended use is with a single, read only, pic base register.
3499 Unless the user explicitly requested not to do that, set
3500 it. */
3501 target_flags |= MASK_SINGLE_PIC_BASE;
3503 /* If stack checking is disabled, we can use r10 as the PIC register,
3504 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3505 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3507 if (TARGET_VXWORKS_RTP)
3508 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3509 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3512 if (flag_pic && TARGET_VXWORKS_RTP)
3513 arm_pic_register = 9;
3515 if (arm_pic_register_string != NULL)
3517 int pic_register = decode_reg_name (arm_pic_register_string);
3519 if (!flag_pic)
3520 warning (0, "-mpic-register= is useless without -fpic");
3522 /* Prevent the user from choosing an obviously stupid PIC register. */
3523 else if (pic_register < 0 || call_used_regs[pic_register]
3524 || pic_register == HARD_FRAME_POINTER_REGNUM
3525 || pic_register == STACK_POINTER_REGNUM
3526 || pic_register >= PC_REGNUM
3527 || (TARGET_VXWORKS_RTP
3528 && (unsigned int) pic_register != arm_pic_register))
3529 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3530 else
3531 arm_pic_register = pic_register;
3534 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3535 if (fix_cm3_ldrd == 2)
3537 if (bitmap_bit_p (arm_active_target.isa, isa_quirk_cm3_ldrd))
3538 fix_cm3_ldrd = 1;
3539 else
3540 fix_cm3_ldrd = 0;
3543 /* Hot/Cold partitioning is not currently supported, since we can't
3544 handle literal pool placement in that case. */
3545 if (flag_reorder_blocks_and_partition)
3547 inform (input_location,
3548 "-freorder-blocks-and-partition not supported on this architecture");
3549 flag_reorder_blocks_and_partition = 0;
3550 flag_reorder_blocks = 1;
3553 if (flag_pic)
3554 /* Hoisting PIC address calculations more aggressively provides a small,
3555 but measurable, size reduction for PIC code. Therefore, we decrease
3556 the bar for unrestricted expression hoisting to the cost of PIC address
3557 calculation, which is 2 instructions. */
3558 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3559 global_options.x_param_values,
3560 global_options_set.x_param_values);
3562 /* ARM EABI defaults to strict volatile bitfields. */
3563 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3564 && abi_version_at_least(2))
3565 flag_strict_volatile_bitfields = 1;
3567 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3568 have deemed it beneficial (signified by setting
3569 prefetch.num_slots to 1 or more). */
3570 if (flag_prefetch_loop_arrays < 0
3571 && HAVE_prefetch
3572 && optimize >= 3
3573 && current_tune->prefetch.num_slots > 0)
3574 flag_prefetch_loop_arrays = 1;
3576 /* Set up parameters to be used in prefetching algorithm. Do not
3577 override the defaults unless we are tuning for a core we have
3578 researched values for. */
3579 if (current_tune->prefetch.num_slots > 0)
3580 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3581 current_tune->prefetch.num_slots,
3582 global_options.x_param_values,
3583 global_options_set.x_param_values);
3584 if (current_tune->prefetch.l1_cache_line_size >= 0)
3585 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3586 current_tune->prefetch.l1_cache_line_size,
3587 global_options.x_param_values,
3588 global_options_set.x_param_values);
3589 if (current_tune->prefetch.l1_cache_size >= 0)
3590 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3591 current_tune->prefetch.l1_cache_size,
3592 global_options.x_param_values,
3593 global_options_set.x_param_values);
3595 /* Use Neon to perform 64-bits operations rather than core
3596 registers. */
3597 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3598 if (use_neon_for_64bits == 1)
3599 prefer_neon_for_64bits = true;
3601 /* Use the alternative scheduling-pressure algorithm by default. */
3602 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3603 global_options.x_param_values,
3604 global_options_set.x_param_values);
3606 /* Look through ready list and all of queue for instructions
3607 relevant for L2 auto-prefetcher. */
3608 int param_sched_autopref_queue_depth;
3610 switch (current_tune->sched_autopref)
3612 case tune_params::SCHED_AUTOPREF_OFF:
3613 param_sched_autopref_queue_depth = -1;
3614 break;
3616 case tune_params::SCHED_AUTOPREF_RANK:
3617 param_sched_autopref_queue_depth = 0;
3618 break;
3620 case tune_params::SCHED_AUTOPREF_FULL:
3621 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3622 break;
3624 default:
3625 gcc_unreachable ();
3628 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3629 param_sched_autopref_queue_depth,
3630 global_options.x_param_values,
3631 global_options_set.x_param_values);
3633 /* Currently, for slow flash data, we just disable literal pools. We also
3634 disable it for pure-code. */
3635 if (target_slow_flash_data || target_pure_code)
3636 arm_disable_literal_pool = true;
3638 if (use_cmse && !arm_arch_cmse)
3639 error ("target CPU does not support ARMv8-M Security Extensions");
3641 /* Disable scheduling fusion by default if it's not armv7 processor
3642 or doesn't prefer ldrd/strd. */
3643 if (flag_schedule_fusion == 2
3644 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3645 flag_schedule_fusion = 0;
3647 /* Need to remember initial options before they are overriden. */
3648 init_optimize = build_optimization_node (&global_options);
3650 arm_option_override_internal (&global_options, &global_options_set);
3651 arm_option_check_internal (&global_options);
3652 arm_option_params_internal ();
3654 /* Create the default target_options structure. */
3655 target_option_default_node = target_option_current_node
3656 = build_target_option_node (&global_options);
3658 /* Register global variables with the garbage collector. */
3659 arm_add_gc_roots ();
3661 /* Init initial mode for testing. */
3662 thumb_flipper = TARGET_THUMB;
3665 static void
3666 arm_add_gc_roots (void)
3668 gcc_obstack_init(&minipool_obstack);
3669 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3672 /* A table of known ARM exception types.
3673 For use with the interrupt function attribute. */
3675 typedef struct
3677 const char *const arg;
3678 const unsigned long return_value;
3680 isr_attribute_arg;
3682 static const isr_attribute_arg isr_attribute_args [] =
3684 { "IRQ", ARM_FT_ISR },
3685 { "irq", ARM_FT_ISR },
3686 { "FIQ", ARM_FT_FIQ },
3687 { "fiq", ARM_FT_FIQ },
3688 { "ABORT", ARM_FT_ISR },
3689 { "abort", ARM_FT_ISR },
3690 { "ABORT", ARM_FT_ISR },
3691 { "abort", ARM_FT_ISR },
3692 { "UNDEF", ARM_FT_EXCEPTION },
3693 { "undef", ARM_FT_EXCEPTION },
3694 { "SWI", ARM_FT_EXCEPTION },
3695 { "swi", ARM_FT_EXCEPTION },
3696 { NULL, ARM_FT_NORMAL }
3699 /* Returns the (interrupt) function type of the current
3700 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3702 static unsigned long
3703 arm_isr_value (tree argument)
3705 const isr_attribute_arg * ptr;
3706 const char * arg;
3708 if (!arm_arch_notm)
3709 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3711 /* No argument - default to IRQ. */
3712 if (argument == NULL_TREE)
3713 return ARM_FT_ISR;
3715 /* Get the value of the argument. */
3716 if (TREE_VALUE (argument) == NULL_TREE
3717 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3718 return ARM_FT_UNKNOWN;
3720 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3722 /* Check it against the list of known arguments. */
3723 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3724 if (streq (arg, ptr->arg))
3725 return ptr->return_value;
3727 /* An unrecognized interrupt type. */
3728 return ARM_FT_UNKNOWN;
3731 /* Computes the type of the current function. */
3733 static unsigned long
3734 arm_compute_func_type (void)
3736 unsigned long type = ARM_FT_UNKNOWN;
3737 tree a;
3738 tree attr;
3740 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3742 /* Decide if the current function is volatile. Such functions
3743 never return, and many memory cycles can be saved by not storing
3744 register values that will never be needed again. This optimization
3745 was added to speed up context switching in a kernel application. */
3746 if (optimize > 0
3747 && (TREE_NOTHROW (current_function_decl)
3748 || !(flag_unwind_tables
3749 || (flag_exceptions
3750 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3751 && TREE_THIS_VOLATILE (current_function_decl))
3752 type |= ARM_FT_VOLATILE;
3754 if (cfun->static_chain_decl != NULL)
3755 type |= ARM_FT_NESTED;
3757 attr = DECL_ATTRIBUTES (current_function_decl);
3759 a = lookup_attribute ("naked", attr);
3760 if (a != NULL_TREE)
3761 type |= ARM_FT_NAKED;
3763 a = lookup_attribute ("isr", attr);
3764 if (a == NULL_TREE)
3765 a = lookup_attribute ("interrupt", attr);
3767 if (a == NULL_TREE)
3768 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3769 else
3770 type |= arm_isr_value (TREE_VALUE (a));
3772 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3773 type |= ARM_FT_CMSE_ENTRY;
3775 return type;
3778 /* Returns the type of the current function. */
3780 unsigned long
3781 arm_current_func_type (void)
3783 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3784 cfun->machine->func_type = arm_compute_func_type ();
3786 return cfun->machine->func_type;
3789 bool
3790 arm_allocate_stack_slots_for_args (void)
3792 /* Naked functions should not allocate stack slots for arguments. */
3793 return !IS_NAKED (arm_current_func_type ());
3796 static bool
3797 arm_warn_func_return (tree decl)
3799 /* Naked functions are implemented entirely in assembly, including the
3800 return sequence, so suppress warnings about this. */
3801 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3805 /* Output assembler code for a block containing the constant parts
3806 of a trampoline, leaving space for the variable parts.
3808 On the ARM, (if r8 is the static chain regnum, and remembering that
3809 referencing pc adds an offset of 8) the trampoline looks like:
3810 ldr r8, [pc, #0]
3811 ldr pc, [pc]
3812 .word static chain value
3813 .word function's address
3814 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3816 static void
3817 arm_asm_trampoline_template (FILE *f)
3819 fprintf (f, "\t.syntax unified\n");
3821 if (TARGET_ARM)
3823 fprintf (f, "\t.arm\n");
3824 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3825 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3827 else if (TARGET_THUMB2)
3829 fprintf (f, "\t.thumb\n");
3830 /* The Thumb-2 trampoline is similar to the arm implementation.
3831 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3832 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3833 STATIC_CHAIN_REGNUM, PC_REGNUM);
3834 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3836 else
3838 ASM_OUTPUT_ALIGN (f, 2);
3839 fprintf (f, "\t.code\t16\n");
3840 fprintf (f, ".Ltrampoline_start:\n");
3841 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3842 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3843 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3844 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3845 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3846 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3848 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3849 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3852 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3854 static void
3855 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3857 rtx fnaddr, mem, a_tramp;
3859 emit_block_move (m_tramp, assemble_trampoline_template (),
3860 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3862 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3863 emit_move_insn (mem, chain_value);
3865 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3866 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3867 emit_move_insn (mem, fnaddr);
3869 a_tramp = XEXP (m_tramp, 0);
3870 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3871 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3872 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3875 /* Thumb trampolines should be entered in thumb mode, so set
3876 the bottom bit of the address. */
3878 static rtx
3879 arm_trampoline_adjust_address (rtx addr)
3881 if (TARGET_THUMB)
3882 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3883 NULL, 0, OPTAB_LIB_WIDEN);
3884 return addr;
3887 /* Return 1 if it is possible to return using a single instruction.
3888 If SIBLING is non-null, this is a test for a return before a sibling
3889 call. SIBLING is the call insn, so we can examine its register usage. */
3892 use_return_insn (int iscond, rtx sibling)
3894 int regno;
3895 unsigned int func_type;
3896 unsigned long saved_int_regs;
3897 unsigned HOST_WIDE_INT stack_adjust;
3898 arm_stack_offsets *offsets;
3900 /* Never use a return instruction before reload has run. */
3901 if (!reload_completed)
3902 return 0;
3904 func_type = arm_current_func_type ();
3906 /* Naked, volatile and stack alignment functions need special
3907 consideration. */
3908 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3909 return 0;
3911 /* So do interrupt functions that use the frame pointer and Thumb
3912 interrupt functions. */
3913 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3914 return 0;
3916 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3917 && !optimize_function_for_size_p (cfun))
3918 return 0;
3920 offsets = arm_get_frame_offsets ();
3921 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3923 /* As do variadic functions. */
3924 if (crtl->args.pretend_args_size
3925 || cfun->machine->uses_anonymous_args
3926 /* Or if the function calls __builtin_eh_return () */
3927 || crtl->calls_eh_return
3928 /* Or if the function calls alloca */
3929 || cfun->calls_alloca
3930 /* Or if there is a stack adjustment. However, if the stack pointer
3931 is saved on the stack, we can use a pre-incrementing stack load. */
3932 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3933 && stack_adjust == 4))
3934 /* Or if the static chain register was saved above the frame, under the
3935 assumption that the stack pointer isn't saved on the stack. */
3936 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3937 && arm_compute_static_chain_stack_bytes() != 0))
3938 return 0;
3940 saved_int_regs = offsets->saved_regs_mask;
3942 /* Unfortunately, the insn
3944 ldmib sp, {..., sp, ...}
3946 triggers a bug on most SA-110 based devices, such that the stack
3947 pointer won't be correctly restored if the instruction takes a
3948 page fault. We work around this problem by popping r3 along with
3949 the other registers, since that is never slower than executing
3950 another instruction.
3952 We test for !arm_arch5 here, because code for any architecture
3953 less than this could potentially be run on one of the buggy
3954 chips. */
3955 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3957 /* Validate that r3 is a call-clobbered register (always true in
3958 the default abi) ... */
3959 if (!call_used_regs[3])
3960 return 0;
3962 /* ... that it isn't being used for a return value ... */
3963 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3964 return 0;
3966 /* ... or for a tail-call argument ... */
3967 if (sibling)
3969 gcc_assert (CALL_P (sibling));
3971 if (find_regno_fusage (sibling, USE, 3))
3972 return 0;
3975 /* ... and that there are no call-saved registers in r0-r2
3976 (always true in the default ABI). */
3977 if (saved_int_regs & 0x7)
3978 return 0;
3981 /* Can't be done if interworking with Thumb, and any registers have been
3982 stacked. */
3983 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3984 return 0;
3986 /* On StrongARM, conditional returns are expensive if they aren't
3987 taken and multiple registers have been stacked. */
3988 if (iscond && arm_tune_strongarm)
3990 /* Conditional return when just the LR is stored is a simple
3991 conditional-load instruction, that's not expensive. */
3992 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3993 return 0;
3995 if (flag_pic
3996 && arm_pic_register != INVALID_REGNUM
3997 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3998 return 0;
4001 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4002 several instructions if anything needs to be popped. */
4003 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4004 return 0;
4006 /* If there are saved registers but the LR isn't saved, then we need
4007 two instructions for the return. */
4008 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4009 return 0;
4011 /* Can't be done if any of the VFP regs are pushed,
4012 since this also requires an insn. */
4013 if (TARGET_HARD_FLOAT)
4014 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4015 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4016 return 0;
4018 if (TARGET_REALLY_IWMMXT)
4019 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4020 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4021 return 0;
4023 return 1;
4026 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4027 shrink-wrapping if possible. This is the case if we need to emit a
4028 prologue, which we can test by looking at the offsets. */
4029 bool
4030 use_simple_return_p (void)
4032 arm_stack_offsets *offsets;
4034 offsets = arm_get_frame_offsets ();
4035 return offsets->outgoing_args != 0;
4038 /* Return TRUE if int I is a valid immediate ARM constant. */
4041 const_ok_for_arm (HOST_WIDE_INT i)
4043 int lowbit;
4045 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4046 be all zero, or all one. */
4047 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4048 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4049 != ((~(unsigned HOST_WIDE_INT) 0)
4050 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4051 return FALSE;
4053 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4055 /* Fast return for 0 and small values. We must do this for zero, since
4056 the code below can't handle that one case. */
4057 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4058 return TRUE;
4060 /* Get the number of trailing zeros. */
4061 lowbit = ffs((int) i) - 1;
4063 /* Only even shifts are allowed in ARM mode so round down to the
4064 nearest even number. */
4065 if (TARGET_ARM)
4066 lowbit &= ~1;
4068 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4069 return TRUE;
4071 if (TARGET_ARM)
4073 /* Allow rotated constants in ARM mode. */
4074 if (lowbit <= 4
4075 && ((i & ~0xc000003f) == 0
4076 || (i & ~0xf000000f) == 0
4077 || (i & ~0xfc000003) == 0))
4078 return TRUE;
4080 else
4082 HOST_WIDE_INT v;
4084 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4085 v = i & 0xff;
4086 v |= v << 16;
4087 if (i == v || i == (v | (v << 8)))
4088 return TRUE;
4090 /* Allow repeated pattern 0xXY00XY00. */
4091 v = i & 0xff00;
4092 v |= v << 16;
4093 if (i == v)
4094 return TRUE;
4097 return FALSE;
4100 /* Return true if I is a valid constant for the operation CODE. */
4102 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4104 if (const_ok_for_arm (i))
4105 return 1;
4107 switch (code)
4109 case SET:
4110 /* See if we can use movw. */
4111 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4112 return 1;
4113 else
4114 /* Otherwise, try mvn. */
4115 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4117 case PLUS:
4118 /* See if we can use addw or subw. */
4119 if (TARGET_THUMB2
4120 && ((i & 0xfffff000) == 0
4121 || ((-i) & 0xfffff000) == 0))
4122 return 1;
4123 /* Fall through. */
4124 case COMPARE:
4125 case EQ:
4126 case NE:
4127 case GT:
4128 case LE:
4129 case LT:
4130 case GE:
4131 case GEU:
4132 case LTU:
4133 case GTU:
4134 case LEU:
4135 case UNORDERED:
4136 case ORDERED:
4137 case UNEQ:
4138 case UNGE:
4139 case UNLT:
4140 case UNGT:
4141 case UNLE:
4142 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4144 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4145 case XOR:
4146 return 0;
4148 case IOR:
4149 if (TARGET_THUMB2)
4150 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4151 return 0;
4153 case AND:
4154 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4156 default:
4157 gcc_unreachable ();
4161 /* Return true if I is a valid di mode constant for the operation CODE. */
4163 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4165 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4166 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4167 rtx hi = GEN_INT (hi_val);
4168 rtx lo = GEN_INT (lo_val);
4170 if (TARGET_THUMB1)
4171 return 0;
4173 switch (code)
4175 case AND:
4176 case IOR:
4177 case XOR:
4178 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4179 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4180 case PLUS:
4181 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4183 default:
4184 return 0;
4188 /* Emit a sequence of insns to handle a large constant.
4189 CODE is the code of the operation required, it can be any of SET, PLUS,
4190 IOR, AND, XOR, MINUS;
4191 MODE is the mode in which the operation is being performed;
4192 VAL is the integer to operate on;
4193 SOURCE is the other operand (a register, or a null-pointer for SET);
4194 SUBTARGETS means it is safe to create scratch registers if that will
4195 either produce a simpler sequence, or we will want to cse the values.
4196 Return value is the number of insns emitted. */
4198 /* ??? Tweak this for thumb2. */
4200 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4201 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4203 rtx cond;
4205 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4206 cond = COND_EXEC_TEST (PATTERN (insn));
4207 else
4208 cond = NULL_RTX;
4210 if (subtargets || code == SET
4211 || (REG_P (target) && REG_P (source)
4212 && REGNO (target) != REGNO (source)))
4214 /* After arm_reorg has been called, we can't fix up expensive
4215 constants by pushing them into memory so we must synthesize
4216 them in-line, regardless of the cost. This is only likely to
4217 be more costly on chips that have load delay slots and we are
4218 compiling without running the scheduler (so no splitting
4219 occurred before the final instruction emission).
4221 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4223 if (!cfun->machine->after_arm_reorg
4224 && !cond
4225 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4226 1, 0)
4227 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4228 + (code != SET))))
4230 if (code == SET)
4232 /* Currently SET is the only monadic value for CODE, all
4233 the rest are diadic. */
4234 if (TARGET_USE_MOVT)
4235 arm_emit_movpair (target, GEN_INT (val));
4236 else
4237 emit_set_insn (target, GEN_INT (val));
4239 return 1;
4241 else
4243 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4245 if (TARGET_USE_MOVT)
4246 arm_emit_movpair (temp, GEN_INT (val));
4247 else
4248 emit_set_insn (temp, GEN_INT (val));
4250 /* For MINUS, the value is subtracted from, since we never
4251 have subtraction of a constant. */
4252 if (code == MINUS)
4253 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4254 else
4255 emit_set_insn (target,
4256 gen_rtx_fmt_ee (code, mode, source, temp));
4257 return 2;
4262 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4266 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4267 ARM/THUMB2 immediates, and add up to VAL.
4268 Thr function return value gives the number of insns required. */
4269 static int
4270 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4271 struct four_ints *return_sequence)
4273 int best_consecutive_zeros = 0;
4274 int i;
4275 int best_start = 0;
4276 int insns1, insns2;
4277 struct four_ints tmp_sequence;
4279 /* If we aren't targeting ARM, the best place to start is always at
4280 the bottom, otherwise look more closely. */
4281 if (TARGET_ARM)
4283 for (i = 0; i < 32; i += 2)
4285 int consecutive_zeros = 0;
4287 if (!(val & (3 << i)))
4289 while ((i < 32) && !(val & (3 << i)))
4291 consecutive_zeros += 2;
4292 i += 2;
4294 if (consecutive_zeros > best_consecutive_zeros)
4296 best_consecutive_zeros = consecutive_zeros;
4297 best_start = i - consecutive_zeros;
4299 i -= 2;
4304 /* So long as it won't require any more insns to do so, it's
4305 desirable to emit a small constant (in bits 0...9) in the last
4306 insn. This way there is more chance that it can be combined with
4307 a later addressing insn to form a pre-indexed load or store
4308 operation. Consider:
4310 *((volatile int *)0xe0000100) = 1;
4311 *((volatile int *)0xe0000110) = 2;
4313 We want this to wind up as:
4315 mov rA, #0xe0000000
4316 mov rB, #1
4317 str rB, [rA, #0x100]
4318 mov rB, #2
4319 str rB, [rA, #0x110]
4321 rather than having to synthesize both large constants from scratch.
4323 Therefore, we calculate how many insns would be required to emit
4324 the constant starting from `best_start', and also starting from
4325 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4326 yield a shorter sequence, we may as well use zero. */
4327 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4328 if (best_start != 0
4329 && ((HOST_WIDE_INT_1U << best_start) < val))
4331 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4332 if (insns2 <= insns1)
4334 *return_sequence = tmp_sequence;
4335 insns1 = insns2;
4339 return insns1;
4342 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4343 static int
4344 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4345 struct four_ints *return_sequence, int i)
4347 int remainder = val & 0xffffffff;
4348 int insns = 0;
4350 /* Try and find a way of doing the job in either two or three
4351 instructions.
4353 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4354 location. We start at position I. This may be the MSB, or
4355 optimial_immediate_sequence may have positioned it at the largest block
4356 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4357 wrapping around to the top of the word when we drop off the bottom.
4358 In the worst case this code should produce no more than four insns.
4360 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4361 constants, shifted to any arbitrary location. We should always start
4362 at the MSB. */
4365 int end;
4366 unsigned int b1, b2, b3, b4;
4367 unsigned HOST_WIDE_INT result;
4368 int loc;
4370 gcc_assert (insns < 4);
4372 if (i <= 0)
4373 i += 32;
4375 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4376 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4378 loc = i;
4379 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4380 /* We can use addw/subw for the last 12 bits. */
4381 result = remainder;
4382 else
4384 /* Use an 8-bit shifted/rotated immediate. */
4385 end = i - 8;
4386 if (end < 0)
4387 end += 32;
4388 result = remainder & ((0x0ff << end)
4389 | ((i < end) ? (0xff >> (32 - end))
4390 : 0));
4391 i -= 8;
4394 else
4396 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4397 arbitrary shifts. */
4398 i -= TARGET_ARM ? 2 : 1;
4399 continue;
4402 /* Next, see if we can do a better job with a thumb2 replicated
4403 constant.
4405 We do it this way around to catch the cases like 0x01F001E0 where
4406 two 8-bit immediates would work, but a replicated constant would
4407 make it worse.
4409 TODO: 16-bit constants that don't clear all the bits, but still win.
4410 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4411 if (TARGET_THUMB2)
4413 b1 = (remainder & 0xff000000) >> 24;
4414 b2 = (remainder & 0x00ff0000) >> 16;
4415 b3 = (remainder & 0x0000ff00) >> 8;
4416 b4 = remainder & 0xff;
4418 if (loc > 24)
4420 /* The 8-bit immediate already found clears b1 (and maybe b2),
4421 but must leave b3 and b4 alone. */
4423 /* First try to find a 32-bit replicated constant that clears
4424 almost everything. We can assume that we can't do it in one,
4425 or else we wouldn't be here. */
4426 unsigned int tmp = b1 & b2 & b3 & b4;
4427 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4428 + (tmp << 24);
4429 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4430 + (tmp == b3) + (tmp == b4);
4431 if (tmp
4432 && (matching_bytes >= 3
4433 || (matching_bytes == 2
4434 && const_ok_for_op (remainder & ~tmp2, code))))
4436 /* At least 3 of the bytes match, and the fourth has at
4437 least as many bits set, or two of the bytes match
4438 and it will only require one more insn to finish. */
4439 result = tmp2;
4440 i = tmp != b1 ? 32
4441 : tmp != b2 ? 24
4442 : tmp != b3 ? 16
4443 : 8;
4446 /* Second, try to find a 16-bit replicated constant that can
4447 leave three of the bytes clear. If b2 or b4 is already
4448 zero, then we can. If the 8-bit from above would not
4449 clear b2 anyway, then we still win. */
4450 else if (b1 == b3 && (!b2 || !b4
4451 || (remainder & 0x00ff0000 & ~result)))
4453 result = remainder & 0xff00ff00;
4454 i = 24;
4457 else if (loc > 16)
4459 /* The 8-bit immediate already found clears b2 (and maybe b3)
4460 and we don't get here unless b1 is alredy clear, but it will
4461 leave b4 unchanged. */
4463 /* If we can clear b2 and b4 at once, then we win, since the
4464 8-bits couldn't possibly reach that far. */
4465 if (b2 == b4)
4467 result = remainder & 0x00ff00ff;
4468 i = 16;
4473 return_sequence->i[insns++] = result;
4474 remainder &= ~result;
4476 if (code == SET || code == MINUS)
4477 code = PLUS;
4479 while (remainder);
4481 return insns;
4484 /* Emit an instruction with the indicated PATTERN. If COND is
4485 non-NULL, conditionalize the execution of the instruction on COND
4486 being true. */
4488 static void
4489 emit_constant_insn (rtx cond, rtx pattern)
4491 if (cond)
4492 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4493 emit_insn (pattern);
4496 /* As above, but extra parameter GENERATE which, if clear, suppresses
4497 RTL generation. */
4499 static int
4500 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4501 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4502 int subtargets, int generate)
4504 int can_invert = 0;
4505 int can_negate = 0;
4506 int final_invert = 0;
4507 int i;
4508 int set_sign_bit_copies = 0;
4509 int clear_sign_bit_copies = 0;
4510 int clear_zero_bit_copies = 0;
4511 int set_zero_bit_copies = 0;
4512 int insns = 0, neg_insns, inv_insns;
4513 unsigned HOST_WIDE_INT temp1, temp2;
4514 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4515 struct four_ints *immediates;
4516 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4518 /* Find out which operations are safe for a given CODE. Also do a quick
4519 check for degenerate cases; these can occur when DImode operations
4520 are split. */
4521 switch (code)
4523 case SET:
4524 can_invert = 1;
4525 break;
4527 case PLUS:
4528 can_negate = 1;
4529 break;
4531 case IOR:
4532 if (remainder == 0xffffffff)
4534 if (generate)
4535 emit_constant_insn (cond,
4536 gen_rtx_SET (target,
4537 GEN_INT (ARM_SIGN_EXTEND (val))));
4538 return 1;
4541 if (remainder == 0)
4543 if (reload_completed && rtx_equal_p (target, source))
4544 return 0;
4546 if (generate)
4547 emit_constant_insn (cond, gen_rtx_SET (target, source));
4548 return 1;
4550 break;
4552 case AND:
4553 if (remainder == 0)
4555 if (generate)
4556 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4557 return 1;
4559 if (remainder == 0xffffffff)
4561 if (reload_completed && rtx_equal_p (target, source))
4562 return 0;
4563 if (generate)
4564 emit_constant_insn (cond, gen_rtx_SET (target, source));
4565 return 1;
4567 can_invert = 1;
4568 break;
4570 case XOR:
4571 if (remainder == 0)
4573 if (reload_completed && rtx_equal_p (target, source))
4574 return 0;
4575 if (generate)
4576 emit_constant_insn (cond, gen_rtx_SET (target, source));
4577 return 1;
4580 if (remainder == 0xffffffff)
4582 if (generate)
4583 emit_constant_insn (cond,
4584 gen_rtx_SET (target,
4585 gen_rtx_NOT (mode, source)));
4586 return 1;
4588 final_invert = 1;
4589 break;
4591 case MINUS:
4592 /* We treat MINUS as (val - source), since (source - val) is always
4593 passed as (source + (-val)). */
4594 if (remainder == 0)
4596 if (generate)
4597 emit_constant_insn (cond,
4598 gen_rtx_SET (target,
4599 gen_rtx_NEG (mode, source)));
4600 return 1;
4602 if (const_ok_for_arm (val))
4604 if (generate)
4605 emit_constant_insn (cond,
4606 gen_rtx_SET (target,
4607 gen_rtx_MINUS (mode, GEN_INT (val),
4608 source)));
4609 return 1;
4612 break;
4614 default:
4615 gcc_unreachable ();
4618 /* If we can do it in one insn get out quickly. */
4619 if (const_ok_for_op (val, code))
4621 if (generate)
4622 emit_constant_insn (cond,
4623 gen_rtx_SET (target,
4624 (source
4625 ? gen_rtx_fmt_ee (code, mode, source,
4626 GEN_INT (val))
4627 : GEN_INT (val))));
4628 return 1;
4631 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4632 insn. */
4633 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4634 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4636 if (generate)
4638 if (mode == SImode && i == 16)
4639 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4640 smaller insn. */
4641 emit_constant_insn (cond,
4642 gen_zero_extendhisi2
4643 (target, gen_lowpart (HImode, source)));
4644 else
4645 /* Extz only supports SImode, but we can coerce the operands
4646 into that mode. */
4647 emit_constant_insn (cond,
4648 gen_extzv_t2 (gen_lowpart (SImode, target),
4649 gen_lowpart (SImode, source),
4650 GEN_INT (i), const0_rtx));
4653 return 1;
4656 /* Calculate a few attributes that may be useful for specific
4657 optimizations. */
4658 /* Count number of leading zeros. */
4659 for (i = 31; i >= 0; i--)
4661 if ((remainder & (1 << i)) == 0)
4662 clear_sign_bit_copies++;
4663 else
4664 break;
4667 /* Count number of leading 1's. */
4668 for (i = 31; i >= 0; i--)
4670 if ((remainder & (1 << i)) != 0)
4671 set_sign_bit_copies++;
4672 else
4673 break;
4676 /* Count number of trailing zero's. */
4677 for (i = 0; i <= 31; i++)
4679 if ((remainder & (1 << i)) == 0)
4680 clear_zero_bit_copies++;
4681 else
4682 break;
4685 /* Count number of trailing 1's. */
4686 for (i = 0; i <= 31; i++)
4688 if ((remainder & (1 << i)) != 0)
4689 set_zero_bit_copies++;
4690 else
4691 break;
4694 switch (code)
4696 case SET:
4697 /* See if we can do this by sign_extending a constant that is known
4698 to be negative. This is a good, way of doing it, since the shift
4699 may well merge into a subsequent insn. */
4700 if (set_sign_bit_copies > 1)
4702 if (const_ok_for_arm
4703 (temp1 = ARM_SIGN_EXTEND (remainder
4704 << (set_sign_bit_copies - 1))))
4706 if (generate)
4708 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4709 emit_constant_insn (cond,
4710 gen_rtx_SET (new_src, GEN_INT (temp1)));
4711 emit_constant_insn (cond,
4712 gen_ashrsi3 (target, new_src,
4713 GEN_INT (set_sign_bit_copies - 1)));
4715 return 2;
4717 /* For an inverted constant, we will need to set the low bits,
4718 these will be shifted out of harm's way. */
4719 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4720 if (const_ok_for_arm (~temp1))
4722 if (generate)
4724 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4725 emit_constant_insn (cond,
4726 gen_rtx_SET (new_src, GEN_INT (temp1)));
4727 emit_constant_insn (cond,
4728 gen_ashrsi3 (target, new_src,
4729 GEN_INT (set_sign_bit_copies - 1)));
4731 return 2;
4735 /* See if we can calculate the value as the difference between two
4736 valid immediates. */
4737 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4739 int topshift = clear_sign_bit_copies & ~1;
4741 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4742 & (0xff000000 >> topshift));
4744 /* If temp1 is zero, then that means the 9 most significant
4745 bits of remainder were 1 and we've caused it to overflow.
4746 When topshift is 0 we don't need to do anything since we
4747 can borrow from 'bit 32'. */
4748 if (temp1 == 0 && topshift != 0)
4749 temp1 = 0x80000000 >> (topshift - 1);
4751 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4753 if (const_ok_for_arm (temp2))
4755 if (generate)
4757 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4758 emit_constant_insn (cond,
4759 gen_rtx_SET (new_src, GEN_INT (temp1)));
4760 emit_constant_insn (cond,
4761 gen_addsi3 (target, new_src,
4762 GEN_INT (-temp2)));
4765 return 2;
4769 /* See if we can generate this by setting the bottom (or the top)
4770 16 bits, and then shifting these into the other half of the
4771 word. We only look for the simplest cases, to do more would cost
4772 too much. Be careful, however, not to generate this when the
4773 alternative would take fewer insns. */
4774 if (val & 0xffff0000)
4776 temp1 = remainder & 0xffff0000;
4777 temp2 = remainder & 0x0000ffff;
4779 /* Overlaps outside this range are best done using other methods. */
4780 for (i = 9; i < 24; i++)
4782 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4783 && !const_ok_for_arm (temp2))
4785 rtx new_src = (subtargets
4786 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4787 : target);
4788 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4789 source, subtargets, generate);
4790 source = new_src;
4791 if (generate)
4792 emit_constant_insn
4793 (cond,
4794 gen_rtx_SET
4795 (target,
4796 gen_rtx_IOR (mode,
4797 gen_rtx_ASHIFT (mode, source,
4798 GEN_INT (i)),
4799 source)));
4800 return insns + 1;
4804 /* Don't duplicate cases already considered. */
4805 for (i = 17; i < 24; i++)
4807 if (((temp1 | (temp1 >> i)) == remainder)
4808 && !const_ok_for_arm (temp1))
4810 rtx new_src = (subtargets
4811 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4812 : target);
4813 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4814 source, subtargets, generate);
4815 source = new_src;
4816 if (generate)
4817 emit_constant_insn
4818 (cond,
4819 gen_rtx_SET (target,
4820 gen_rtx_IOR
4821 (mode,
4822 gen_rtx_LSHIFTRT (mode, source,
4823 GEN_INT (i)),
4824 source)));
4825 return insns + 1;
4829 break;
4831 case IOR:
4832 case XOR:
4833 /* If we have IOR or XOR, and the constant can be loaded in a
4834 single instruction, and we can find a temporary to put it in,
4835 then this can be done in two instructions instead of 3-4. */
4836 if (subtargets
4837 /* TARGET can't be NULL if SUBTARGETS is 0 */
4838 || (reload_completed && !reg_mentioned_p (target, source)))
4840 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4842 if (generate)
4844 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4846 emit_constant_insn (cond,
4847 gen_rtx_SET (sub, GEN_INT (val)));
4848 emit_constant_insn (cond,
4849 gen_rtx_SET (target,
4850 gen_rtx_fmt_ee (code, mode,
4851 source, sub)));
4853 return 2;
4857 if (code == XOR)
4858 break;
4860 /* Convert.
4861 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4862 and the remainder 0s for e.g. 0xfff00000)
4863 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4865 This can be done in 2 instructions by using shifts with mov or mvn.
4866 e.g. for
4867 x = x | 0xfff00000;
4868 we generate.
4869 mvn r0, r0, asl #12
4870 mvn r0, r0, lsr #12 */
4871 if (set_sign_bit_copies > 8
4872 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4874 if (generate)
4876 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4877 rtx shift = GEN_INT (set_sign_bit_copies);
4879 emit_constant_insn
4880 (cond,
4881 gen_rtx_SET (sub,
4882 gen_rtx_NOT (mode,
4883 gen_rtx_ASHIFT (mode,
4884 source,
4885 shift))));
4886 emit_constant_insn
4887 (cond,
4888 gen_rtx_SET (target,
4889 gen_rtx_NOT (mode,
4890 gen_rtx_LSHIFTRT (mode, sub,
4891 shift))));
4893 return 2;
4896 /* Convert
4897 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4899 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4901 For eg. r0 = r0 | 0xfff
4902 mvn r0, r0, lsr #12
4903 mvn r0, r0, asl #12
4906 if (set_zero_bit_copies > 8
4907 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4909 if (generate)
4911 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4912 rtx shift = GEN_INT (set_zero_bit_copies);
4914 emit_constant_insn
4915 (cond,
4916 gen_rtx_SET (sub,
4917 gen_rtx_NOT (mode,
4918 gen_rtx_LSHIFTRT (mode,
4919 source,
4920 shift))));
4921 emit_constant_insn
4922 (cond,
4923 gen_rtx_SET (target,
4924 gen_rtx_NOT (mode,
4925 gen_rtx_ASHIFT (mode, sub,
4926 shift))));
4928 return 2;
4931 /* This will never be reached for Thumb2 because orn is a valid
4932 instruction. This is for Thumb1 and the ARM 32 bit cases.
4934 x = y | constant (such that ~constant is a valid constant)
4935 Transform this to
4936 x = ~(~y & ~constant).
4938 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4940 if (generate)
4942 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4943 emit_constant_insn (cond,
4944 gen_rtx_SET (sub,
4945 gen_rtx_NOT (mode, source)));
4946 source = sub;
4947 if (subtargets)
4948 sub = gen_reg_rtx (mode);
4949 emit_constant_insn (cond,
4950 gen_rtx_SET (sub,
4951 gen_rtx_AND (mode, source,
4952 GEN_INT (temp1))));
4953 emit_constant_insn (cond,
4954 gen_rtx_SET (target,
4955 gen_rtx_NOT (mode, sub)));
4957 return 3;
4959 break;
4961 case AND:
4962 /* See if two shifts will do 2 or more insn's worth of work. */
4963 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4965 HOST_WIDE_INT shift_mask = ((0xffffffff
4966 << (32 - clear_sign_bit_copies))
4967 & 0xffffffff);
4969 if ((remainder | shift_mask) != 0xffffffff)
4971 HOST_WIDE_INT new_val
4972 = ARM_SIGN_EXTEND (remainder | shift_mask);
4974 if (generate)
4976 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4977 insns = arm_gen_constant (AND, SImode, cond, new_val,
4978 new_src, source, subtargets, 1);
4979 source = new_src;
4981 else
4983 rtx targ = subtargets ? NULL_RTX : target;
4984 insns = arm_gen_constant (AND, mode, cond, new_val,
4985 targ, source, subtargets, 0);
4989 if (generate)
4991 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4992 rtx shift = GEN_INT (clear_sign_bit_copies);
4994 emit_insn (gen_ashlsi3 (new_src, source, shift));
4995 emit_insn (gen_lshrsi3 (target, new_src, shift));
4998 return insns + 2;
5001 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5003 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5005 if ((remainder | shift_mask) != 0xffffffff)
5007 HOST_WIDE_INT new_val
5008 = ARM_SIGN_EXTEND (remainder | shift_mask);
5009 if (generate)
5011 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5013 insns = arm_gen_constant (AND, mode, cond, new_val,
5014 new_src, source, subtargets, 1);
5015 source = new_src;
5017 else
5019 rtx targ = subtargets ? NULL_RTX : target;
5021 insns = arm_gen_constant (AND, mode, cond, new_val,
5022 targ, source, subtargets, 0);
5026 if (generate)
5028 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5029 rtx shift = GEN_INT (clear_zero_bit_copies);
5031 emit_insn (gen_lshrsi3 (new_src, source, shift));
5032 emit_insn (gen_ashlsi3 (target, new_src, shift));
5035 return insns + 2;
5038 break;
5040 default:
5041 break;
5044 /* Calculate what the instruction sequences would be if we generated it
5045 normally, negated, or inverted. */
5046 if (code == AND)
5047 /* AND cannot be split into multiple insns, so invert and use BIC. */
5048 insns = 99;
5049 else
5050 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5052 if (can_negate)
5053 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5054 &neg_immediates);
5055 else
5056 neg_insns = 99;
5058 if (can_invert || final_invert)
5059 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5060 &inv_immediates);
5061 else
5062 inv_insns = 99;
5064 immediates = &pos_immediates;
5066 /* Is the negated immediate sequence more efficient? */
5067 if (neg_insns < insns && neg_insns <= inv_insns)
5069 insns = neg_insns;
5070 immediates = &neg_immediates;
5072 else
5073 can_negate = 0;
5075 /* Is the inverted immediate sequence more efficient?
5076 We must allow for an extra NOT instruction for XOR operations, although
5077 there is some chance that the final 'mvn' will get optimized later. */
5078 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5080 insns = inv_insns;
5081 immediates = &inv_immediates;
5083 else
5085 can_invert = 0;
5086 final_invert = 0;
5089 /* Now output the chosen sequence as instructions. */
5090 if (generate)
5092 for (i = 0; i < insns; i++)
5094 rtx new_src, temp1_rtx;
5096 temp1 = immediates->i[i];
5098 if (code == SET || code == MINUS)
5099 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5100 else if ((final_invert || i < (insns - 1)) && subtargets)
5101 new_src = gen_reg_rtx (mode);
5102 else
5103 new_src = target;
5105 if (can_invert)
5106 temp1 = ~temp1;
5107 else if (can_negate)
5108 temp1 = -temp1;
5110 temp1 = trunc_int_for_mode (temp1, mode);
5111 temp1_rtx = GEN_INT (temp1);
5113 if (code == SET)
5115 else if (code == MINUS)
5116 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5117 else
5118 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5120 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5121 source = new_src;
5123 if (code == SET)
5125 can_negate = can_invert;
5126 can_invert = 0;
5127 code = PLUS;
5129 else if (code == MINUS)
5130 code = PLUS;
5134 if (final_invert)
5136 if (generate)
5137 emit_constant_insn (cond, gen_rtx_SET (target,
5138 gen_rtx_NOT (mode, source)));
5139 insns++;
5142 return insns;
5145 /* Canonicalize a comparison so that we are more likely to recognize it.
5146 This can be done for a few constant compares, where we can make the
5147 immediate value easier to load. */
5149 static void
5150 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5151 bool op0_preserve_value)
5153 machine_mode mode;
5154 unsigned HOST_WIDE_INT i, maxval;
5156 mode = GET_MODE (*op0);
5157 if (mode == VOIDmode)
5158 mode = GET_MODE (*op1);
5160 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5162 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5163 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5164 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5165 for GTU/LEU in Thumb mode. */
5166 if (mode == DImode)
5169 if (*code == GT || *code == LE
5170 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5172 /* Missing comparison. First try to use an available
5173 comparison. */
5174 if (CONST_INT_P (*op1))
5176 i = INTVAL (*op1);
5177 switch (*code)
5179 case GT:
5180 case LE:
5181 if (i != maxval
5182 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5184 *op1 = GEN_INT (i + 1);
5185 *code = *code == GT ? GE : LT;
5186 return;
5188 break;
5189 case GTU:
5190 case LEU:
5191 if (i != ~((unsigned HOST_WIDE_INT) 0)
5192 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5194 *op1 = GEN_INT (i + 1);
5195 *code = *code == GTU ? GEU : LTU;
5196 return;
5198 break;
5199 default:
5200 gcc_unreachable ();
5204 /* If that did not work, reverse the condition. */
5205 if (!op0_preserve_value)
5207 std::swap (*op0, *op1);
5208 *code = (int)swap_condition ((enum rtx_code)*code);
5211 return;
5214 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5215 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5216 to facilitate possible combining with a cmp into 'ands'. */
5217 if (mode == SImode
5218 && GET_CODE (*op0) == ZERO_EXTEND
5219 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5220 && GET_MODE (XEXP (*op0, 0)) == QImode
5221 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5222 && subreg_lowpart_p (XEXP (*op0, 0))
5223 && *op1 == const0_rtx)
5224 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5225 GEN_INT (255));
5227 /* Comparisons smaller than DImode. Only adjust comparisons against
5228 an out-of-range constant. */
5229 if (!CONST_INT_P (*op1)
5230 || const_ok_for_arm (INTVAL (*op1))
5231 || const_ok_for_arm (- INTVAL (*op1)))
5232 return;
5234 i = INTVAL (*op1);
5236 switch (*code)
5238 case EQ:
5239 case NE:
5240 return;
5242 case GT:
5243 case LE:
5244 if (i != maxval
5245 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5247 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5248 *code = *code == GT ? GE : LT;
5249 return;
5251 break;
5253 case GE:
5254 case LT:
5255 if (i != ~maxval
5256 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5258 *op1 = GEN_INT (i - 1);
5259 *code = *code == GE ? GT : LE;
5260 return;
5262 break;
5264 case GTU:
5265 case LEU:
5266 if (i != ~((unsigned HOST_WIDE_INT) 0)
5267 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5269 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5270 *code = *code == GTU ? GEU : LTU;
5271 return;
5273 break;
5275 case GEU:
5276 case LTU:
5277 if (i != 0
5278 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5280 *op1 = GEN_INT (i - 1);
5281 *code = *code == GEU ? GTU : LEU;
5282 return;
5284 break;
5286 default:
5287 gcc_unreachable ();
5292 /* Define how to find the value returned by a function. */
5294 static rtx
5295 arm_function_value(const_tree type, const_tree func,
5296 bool outgoing ATTRIBUTE_UNUSED)
5298 machine_mode mode;
5299 int unsignedp ATTRIBUTE_UNUSED;
5300 rtx r ATTRIBUTE_UNUSED;
5302 mode = TYPE_MODE (type);
5304 if (TARGET_AAPCS_BASED)
5305 return aapcs_allocate_return_reg (mode, type, func);
5307 /* Promote integer types. */
5308 if (INTEGRAL_TYPE_P (type))
5309 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5311 /* Promotes small structs returned in a register to full-word size
5312 for big-endian AAPCS. */
5313 if (arm_return_in_msb (type))
5315 HOST_WIDE_INT size = int_size_in_bytes (type);
5316 if (size % UNITS_PER_WORD != 0)
5318 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5319 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5323 return arm_libcall_value_1 (mode);
5326 /* libcall hashtable helpers. */
5328 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5330 static inline hashval_t hash (const rtx_def *);
5331 static inline bool equal (const rtx_def *, const rtx_def *);
5332 static inline void remove (rtx_def *);
5335 inline bool
5336 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5338 return rtx_equal_p (p1, p2);
5341 inline hashval_t
5342 libcall_hasher::hash (const rtx_def *p1)
5344 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5347 typedef hash_table<libcall_hasher> libcall_table_type;
5349 static void
5350 add_libcall (libcall_table_type *htab, rtx libcall)
5352 *htab->find_slot (libcall, INSERT) = libcall;
5355 static bool
5356 arm_libcall_uses_aapcs_base (const_rtx libcall)
5358 static bool init_done = false;
5359 static libcall_table_type *libcall_htab = NULL;
5361 if (!init_done)
5363 init_done = true;
5365 libcall_htab = new libcall_table_type (31);
5366 add_libcall (libcall_htab,
5367 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5368 add_libcall (libcall_htab,
5369 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5370 add_libcall (libcall_htab,
5371 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5372 add_libcall (libcall_htab,
5373 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5375 add_libcall (libcall_htab,
5376 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5377 add_libcall (libcall_htab,
5378 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5379 add_libcall (libcall_htab,
5380 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5381 add_libcall (libcall_htab,
5382 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5384 add_libcall (libcall_htab,
5385 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5386 add_libcall (libcall_htab,
5387 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5388 add_libcall (libcall_htab,
5389 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5390 add_libcall (libcall_htab,
5391 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5392 add_libcall (libcall_htab,
5393 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5394 add_libcall (libcall_htab,
5395 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5396 add_libcall (libcall_htab,
5397 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5398 add_libcall (libcall_htab,
5399 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5401 /* Values from double-precision helper functions are returned in core
5402 registers if the selected core only supports single-precision
5403 arithmetic, even if we are using the hard-float ABI. The same is
5404 true for single-precision helpers, but we will never be using the
5405 hard-float ABI on a CPU which doesn't support single-precision
5406 operations in hardware. */
5407 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5408 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5409 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5410 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5411 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5412 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5413 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5414 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5415 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5416 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5417 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5418 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5419 SFmode));
5420 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5421 DFmode));
5422 add_libcall (libcall_htab,
5423 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5426 return libcall && libcall_htab->find (libcall) != NULL;
5429 static rtx
5430 arm_libcall_value_1 (machine_mode mode)
5432 if (TARGET_AAPCS_BASED)
5433 return aapcs_libcall_value (mode);
5434 else if (TARGET_IWMMXT_ABI
5435 && arm_vector_mode_supported_p (mode))
5436 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5437 else
5438 return gen_rtx_REG (mode, ARG_REGISTER (1));
5441 /* Define how to find the value returned by a library function
5442 assuming the value has mode MODE. */
5444 static rtx
5445 arm_libcall_value (machine_mode mode, const_rtx libcall)
5447 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5448 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5450 /* The following libcalls return their result in integer registers,
5451 even though they return a floating point value. */
5452 if (arm_libcall_uses_aapcs_base (libcall))
5453 return gen_rtx_REG (mode, ARG_REGISTER(1));
5457 return arm_libcall_value_1 (mode);
5460 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5462 static bool
5463 arm_function_value_regno_p (const unsigned int regno)
5465 if (regno == ARG_REGISTER (1)
5466 || (TARGET_32BIT
5467 && TARGET_AAPCS_BASED
5468 && TARGET_HARD_FLOAT
5469 && regno == FIRST_VFP_REGNUM)
5470 || (TARGET_IWMMXT_ABI
5471 && regno == FIRST_IWMMXT_REGNUM))
5472 return true;
5474 return false;
5477 /* Determine the amount of memory needed to store the possible return
5478 registers of an untyped call. */
5480 arm_apply_result_size (void)
5482 int size = 16;
5484 if (TARGET_32BIT)
5486 if (TARGET_HARD_FLOAT_ABI)
5487 size += 32;
5488 if (TARGET_IWMMXT_ABI)
5489 size += 8;
5492 return size;
5495 /* Decide whether TYPE should be returned in memory (true)
5496 or in a register (false). FNTYPE is the type of the function making
5497 the call. */
5498 static bool
5499 arm_return_in_memory (const_tree type, const_tree fntype)
5501 HOST_WIDE_INT size;
5503 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5505 if (TARGET_AAPCS_BASED)
5507 /* Simple, non-aggregate types (ie not including vectors and
5508 complex) are always returned in a register (or registers).
5509 We don't care about which register here, so we can short-cut
5510 some of the detail. */
5511 if (!AGGREGATE_TYPE_P (type)
5512 && TREE_CODE (type) != VECTOR_TYPE
5513 && TREE_CODE (type) != COMPLEX_TYPE)
5514 return false;
5516 /* Any return value that is no larger than one word can be
5517 returned in r0. */
5518 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5519 return false;
5521 /* Check any available co-processors to see if they accept the
5522 type as a register candidate (VFP, for example, can return
5523 some aggregates in consecutive registers). These aren't
5524 available if the call is variadic. */
5525 if (aapcs_select_return_coproc (type, fntype) >= 0)
5526 return false;
5528 /* Vector values should be returned using ARM registers, not
5529 memory (unless they're over 16 bytes, which will break since
5530 we only have four call-clobbered registers to play with). */
5531 if (TREE_CODE (type) == VECTOR_TYPE)
5532 return (size < 0 || size > (4 * UNITS_PER_WORD));
5534 /* The rest go in memory. */
5535 return true;
5538 if (TREE_CODE (type) == VECTOR_TYPE)
5539 return (size < 0 || size > (4 * UNITS_PER_WORD));
5541 if (!AGGREGATE_TYPE_P (type) &&
5542 (TREE_CODE (type) != VECTOR_TYPE))
5543 /* All simple types are returned in registers. */
5544 return false;
5546 if (arm_abi != ARM_ABI_APCS)
5548 /* ATPCS and later return aggregate types in memory only if they are
5549 larger than a word (or are variable size). */
5550 return (size < 0 || size > UNITS_PER_WORD);
5553 /* For the arm-wince targets we choose to be compatible with Microsoft's
5554 ARM and Thumb compilers, which always return aggregates in memory. */
5555 #ifndef ARM_WINCE
5556 /* All structures/unions bigger than one word are returned in memory.
5557 Also catch the case where int_size_in_bytes returns -1. In this case
5558 the aggregate is either huge or of variable size, and in either case
5559 we will want to return it via memory and not in a register. */
5560 if (size < 0 || size > UNITS_PER_WORD)
5561 return true;
5563 if (TREE_CODE (type) == RECORD_TYPE)
5565 tree field;
5567 /* For a struct the APCS says that we only return in a register
5568 if the type is 'integer like' and every addressable element
5569 has an offset of zero. For practical purposes this means
5570 that the structure can have at most one non bit-field element
5571 and that this element must be the first one in the structure. */
5573 /* Find the first field, ignoring non FIELD_DECL things which will
5574 have been created by C++. */
5575 for (field = TYPE_FIELDS (type);
5576 field && TREE_CODE (field) != FIELD_DECL;
5577 field = DECL_CHAIN (field))
5578 continue;
5580 if (field == NULL)
5581 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5583 /* Check that the first field is valid for returning in a register. */
5585 /* ... Floats are not allowed */
5586 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5587 return true;
5589 /* ... Aggregates that are not themselves valid for returning in
5590 a register are not allowed. */
5591 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5592 return true;
5594 /* Now check the remaining fields, if any. Only bitfields are allowed,
5595 since they are not addressable. */
5596 for (field = DECL_CHAIN (field);
5597 field;
5598 field = DECL_CHAIN (field))
5600 if (TREE_CODE (field) != FIELD_DECL)
5601 continue;
5603 if (!DECL_BIT_FIELD_TYPE (field))
5604 return true;
5607 return false;
5610 if (TREE_CODE (type) == UNION_TYPE)
5612 tree field;
5614 /* Unions can be returned in registers if every element is
5615 integral, or can be returned in an integer register. */
5616 for (field = TYPE_FIELDS (type);
5617 field;
5618 field = DECL_CHAIN (field))
5620 if (TREE_CODE (field) != FIELD_DECL)
5621 continue;
5623 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5624 return true;
5626 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5627 return true;
5630 return false;
5632 #endif /* not ARM_WINCE */
5634 /* Return all other types in memory. */
5635 return true;
5638 const struct pcs_attribute_arg
5640 const char *arg;
5641 enum arm_pcs value;
5642 } pcs_attribute_args[] =
5644 {"aapcs", ARM_PCS_AAPCS},
5645 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5646 #if 0
5647 /* We could recognize these, but changes would be needed elsewhere
5648 * to implement them. */
5649 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5650 {"atpcs", ARM_PCS_ATPCS},
5651 {"apcs", ARM_PCS_APCS},
5652 #endif
5653 {NULL, ARM_PCS_UNKNOWN}
5656 static enum arm_pcs
5657 arm_pcs_from_attribute (tree attr)
5659 const struct pcs_attribute_arg *ptr;
5660 const char *arg;
5662 /* Get the value of the argument. */
5663 if (TREE_VALUE (attr) == NULL_TREE
5664 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5665 return ARM_PCS_UNKNOWN;
5667 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5669 /* Check it against the list of known arguments. */
5670 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5671 if (streq (arg, ptr->arg))
5672 return ptr->value;
5674 /* An unrecognized interrupt type. */
5675 return ARM_PCS_UNKNOWN;
5678 /* Get the PCS variant to use for this call. TYPE is the function's type
5679 specification, DECL is the specific declartion. DECL may be null if
5680 the call could be indirect or if this is a library call. */
5681 static enum arm_pcs
5682 arm_get_pcs_model (const_tree type, const_tree decl)
5684 bool user_convention = false;
5685 enum arm_pcs user_pcs = arm_pcs_default;
5686 tree attr;
5688 gcc_assert (type);
5690 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5691 if (attr)
5693 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5694 user_convention = true;
5697 if (TARGET_AAPCS_BASED)
5699 /* Detect varargs functions. These always use the base rules
5700 (no argument is ever a candidate for a co-processor
5701 register). */
5702 bool base_rules = stdarg_p (type);
5704 if (user_convention)
5706 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5707 sorry ("non-AAPCS derived PCS variant");
5708 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5709 error ("variadic functions must use the base AAPCS variant");
5712 if (base_rules)
5713 return ARM_PCS_AAPCS;
5714 else if (user_convention)
5715 return user_pcs;
5716 else if (decl && flag_unit_at_a_time)
5718 /* Local functions never leak outside this compilation unit,
5719 so we are free to use whatever conventions are
5720 appropriate. */
5721 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5722 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5723 if (i && i->local)
5724 return ARM_PCS_AAPCS_LOCAL;
5727 else if (user_convention && user_pcs != arm_pcs_default)
5728 sorry ("PCS variant");
5730 /* For everything else we use the target's default. */
5731 return arm_pcs_default;
5735 static void
5736 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5737 const_tree fntype ATTRIBUTE_UNUSED,
5738 rtx libcall ATTRIBUTE_UNUSED,
5739 const_tree fndecl ATTRIBUTE_UNUSED)
5741 /* Record the unallocated VFP registers. */
5742 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5743 pcum->aapcs_vfp_reg_alloc = 0;
5746 /* Walk down the type tree of TYPE counting consecutive base elements.
5747 If *MODEP is VOIDmode, then set it to the first valid floating point
5748 type. If a non-floating point type is found, or if a floating point
5749 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5750 otherwise return the count in the sub-tree. */
5751 static int
5752 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5754 machine_mode mode;
5755 HOST_WIDE_INT size;
5757 switch (TREE_CODE (type))
5759 case REAL_TYPE:
5760 mode = TYPE_MODE (type);
5761 if (mode != DFmode && mode != SFmode && mode != HFmode)
5762 return -1;
5764 if (*modep == VOIDmode)
5765 *modep = mode;
5767 if (*modep == mode)
5768 return 1;
5770 break;
5772 case COMPLEX_TYPE:
5773 mode = TYPE_MODE (TREE_TYPE (type));
5774 if (mode != DFmode && mode != SFmode)
5775 return -1;
5777 if (*modep == VOIDmode)
5778 *modep = mode;
5780 if (*modep == mode)
5781 return 2;
5783 break;
5785 case VECTOR_TYPE:
5786 /* Use V2SImode and V4SImode as representatives of all 64-bit
5787 and 128-bit vector types, whether or not those modes are
5788 supported with the present options. */
5789 size = int_size_in_bytes (type);
5790 switch (size)
5792 case 8:
5793 mode = V2SImode;
5794 break;
5795 case 16:
5796 mode = V4SImode;
5797 break;
5798 default:
5799 return -1;
5802 if (*modep == VOIDmode)
5803 *modep = mode;
5805 /* Vector modes are considered to be opaque: two vectors are
5806 equivalent for the purposes of being homogeneous aggregates
5807 if they are the same size. */
5808 if (*modep == mode)
5809 return 1;
5811 break;
5813 case ARRAY_TYPE:
5815 int count;
5816 tree index = TYPE_DOMAIN (type);
5818 /* Can't handle incomplete types nor sizes that are not
5819 fixed. */
5820 if (!COMPLETE_TYPE_P (type)
5821 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5822 return -1;
5824 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5825 if (count == -1
5826 || !index
5827 || !TYPE_MAX_VALUE (index)
5828 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5829 || !TYPE_MIN_VALUE (index)
5830 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5831 || count < 0)
5832 return -1;
5834 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5835 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5837 /* There must be no padding. */
5838 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5839 return -1;
5841 return count;
5844 case RECORD_TYPE:
5846 int count = 0;
5847 int sub_count;
5848 tree field;
5850 /* Can't handle incomplete types nor sizes that are not
5851 fixed. */
5852 if (!COMPLETE_TYPE_P (type)
5853 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5854 return -1;
5856 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5858 if (TREE_CODE (field) != FIELD_DECL)
5859 continue;
5861 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5862 if (sub_count < 0)
5863 return -1;
5864 count += sub_count;
5867 /* There must be no padding. */
5868 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5869 return -1;
5871 return count;
5874 case UNION_TYPE:
5875 case QUAL_UNION_TYPE:
5877 /* These aren't very interesting except in a degenerate case. */
5878 int count = 0;
5879 int sub_count;
5880 tree field;
5882 /* Can't handle incomplete types nor sizes that are not
5883 fixed. */
5884 if (!COMPLETE_TYPE_P (type)
5885 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5886 return -1;
5888 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5890 if (TREE_CODE (field) != FIELD_DECL)
5891 continue;
5893 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5894 if (sub_count < 0)
5895 return -1;
5896 count = count > sub_count ? count : sub_count;
5899 /* There must be no padding. */
5900 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5901 return -1;
5903 return count;
5906 default:
5907 break;
5910 return -1;
5913 /* Return true if PCS_VARIANT should use VFP registers. */
5914 static bool
5915 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5917 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5919 static bool seen_thumb1_vfp = false;
5921 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5923 sorry ("Thumb-1 hard-float VFP ABI");
5924 /* sorry() is not immediately fatal, so only display this once. */
5925 seen_thumb1_vfp = true;
5928 return true;
5931 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5932 return false;
5934 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5935 (TARGET_VFP_DOUBLE || !is_double));
5938 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5939 suitable for passing or returning in VFP registers for the PCS
5940 variant selected. If it is, then *BASE_MODE is updated to contain
5941 a machine mode describing each element of the argument's type and
5942 *COUNT to hold the number of such elements. */
5943 static bool
5944 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5945 machine_mode mode, const_tree type,
5946 machine_mode *base_mode, int *count)
5948 machine_mode new_mode = VOIDmode;
5950 /* If we have the type information, prefer that to working things
5951 out from the mode. */
5952 if (type)
5954 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5956 if (ag_count > 0 && ag_count <= 4)
5957 *count = ag_count;
5958 else
5959 return false;
5961 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5962 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5963 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5965 *count = 1;
5966 new_mode = mode;
5968 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5970 *count = 2;
5971 new_mode = (mode == DCmode ? DFmode : SFmode);
5973 else
5974 return false;
5977 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5978 return false;
5980 *base_mode = new_mode;
5981 return true;
5984 static bool
5985 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5986 machine_mode mode, const_tree type)
5988 int count ATTRIBUTE_UNUSED;
5989 machine_mode ag_mode ATTRIBUTE_UNUSED;
5991 if (!use_vfp_abi (pcs_variant, false))
5992 return false;
5993 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5994 &ag_mode, &count);
5997 static bool
5998 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5999 const_tree type)
6001 if (!use_vfp_abi (pcum->pcs_variant, false))
6002 return false;
6004 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6005 &pcum->aapcs_vfp_rmode,
6006 &pcum->aapcs_vfp_rcount);
6009 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6010 for the behaviour of this function. */
6012 static bool
6013 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6014 const_tree type ATTRIBUTE_UNUSED)
6016 int rmode_size
6017 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6018 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6019 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6020 int regno;
6022 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6023 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6025 pcum->aapcs_vfp_reg_alloc = mask << regno;
6026 if (mode == BLKmode
6027 || (mode == TImode && ! TARGET_NEON)
6028 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6030 int i;
6031 int rcount = pcum->aapcs_vfp_rcount;
6032 int rshift = shift;
6033 machine_mode rmode = pcum->aapcs_vfp_rmode;
6034 rtx par;
6035 if (!TARGET_NEON)
6037 /* Avoid using unsupported vector modes. */
6038 if (rmode == V2SImode)
6039 rmode = DImode;
6040 else if (rmode == V4SImode)
6042 rmode = DImode;
6043 rcount *= 2;
6044 rshift /= 2;
6047 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6048 for (i = 0; i < rcount; i++)
6050 rtx tmp = gen_rtx_REG (rmode,
6051 FIRST_VFP_REGNUM + regno + i * rshift);
6052 tmp = gen_rtx_EXPR_LIST
6053 (VOIDmode, tmp,
6054 GEN_INT (i * GET_MODE_SIZE (rmode)));
6055 XVECEXP (par, 0, i) = tmp;
6058 pcum->aapcs_reg = par;
6060 else
6061 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6062 return true;
6064 return false;
6067 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6068 comment there for the behaviour of this function. */
6070 static rtx
6071 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6072 machine_mode mode,
6073 const_tree type ATTRIBUTE_UNUSED)
6075 if (!use_vfp_abi (pcs_variant, false))
6076 return NULL;
6078 if (mode == BLKmode
6079 || (GET_MODE_CLASS (mode) == MODE_INT
6080 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6081 && !TARGET_NEON))
6083 int count;
6084 machine_mode ag_mode;
6085 int i;
6086 rtx par;
6087 int shift;
6089 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6090 &ag_mode, &count);
6092 if (!TARGET_NEON)
6094 if (ag_mode == V2SImode)
6095 ag_mode = DImode;
6096 else if (ag_mode == V4SImode)
6098 ag_mode = DImode;
6099 count *= 2;
6102 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6103 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6104 for (i = 0; i < count; i++)
6106 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6107 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6108 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6109 XVECEXP (par, 0, i) = tmp;
6112 return par;
6115 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6118 static void
6119 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6120 machine_mode mode ATTRIBUTE_UNUSED,
6121 const_tree type ATTRIBUTE_UNUSED)
6123 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6124 pcum->aapcs_vfp_reg_alloc = 0;
6125 return;
6128 #define AAPCS_CP(X) \
6130 aapcs_ ## X ## _cum_init, \
6131 aapcs_ ## X ## _is_call_candidate, \
6132 aapcs_ ## X ## _allocate, \
6133 aapcs_ ## X ## _is_return_candidate, \
6134 aapcs_ ## X ## _allocate_return_reg, \
6135 aapcs_ ## X ## _advance \
6138 /* Table of co-processors that can be used to pass arguments in
6139 registers. Idealy no arugment should be a candidate for more than
6140 one co-processor table entry, but the table is processed in order
6141 and stops after the first match. If that entry then fails to put
6142 the argument into a co-processor register, the argument will go on
6143 the stack. */
6144 static struct
6146 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6147 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6149 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6150 BLKmode) is a candidate for this co-processor's registers; this
6151 function should ignore any position-dependent state in
6152 CUMULATIVE_ARGS and only use call-type dependent information. */
6153 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6155 /* Return true if the argument does get a co-processor register; it
6156 should set aapcs_reg to an RTX of the register allocated as is
6157 required for a return from FUNCTION_ARG. */
6158 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6160 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6161 be returned in this co-processor's registers. */
6162 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6164 /* Allocate and return an RTX element to hold the return type of a call. This
6165 routine must not fail and will only be called if is_return_candidate
6166 returned true with the same parameters. */
6167 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6169 /* Finish processing this argument and prepare to start processing
6170 the next one. */
6171 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6172 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6174 AAPCS_CP(vfp)
6177 #undef AAPCS_CP
6179 static int
6180 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6181 const_tree type)
6183 int i;
6185 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6186 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6187 return i;
6189 return -1;
6192 static int
6193 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6195 /* We aren't passed a decl, so we can't check that a call is local.
6196 However, it isn't clear that that would be a win anyway, since it
6197 might limit some tail-calling opportunities. */
6198 enum arm_pcs pcs_variant;
6200 if (fntype)
6202 const_tree fndecl = NULL_TREE;
6204 if (TREE_CODE (fntype) == FUNCTION_DECL)
6206 fndecl = fntype;
6207 fntype = TREE_TYPE (fntype);
6210 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6212 else
6213 pcs_variant = arm_pcs_default;
6215 if (pcs_variant != ARM_PCS_AAPCS)
6217 int i;
6219 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6220 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6221 TYPE_MODE (type),
6222 type))
6223 return i;
6225 return -1;
6228 static rtx
6229 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6230 const_tree fntype)
6232 /* We aren't passed a decl, so we can't check that a call is local.
6233 However, it isn't clear that that would be a win anyway, since it
6234 might limit some tail-calling opportunities. */
6235 enum arm_pcs pcs_variant;
6236 int unsignedp ATTRIBUTE_UNUSED;
6238 if (fntype)
6240 const_tree fndecl = NULL_TREE;
6242 if (TREE_CODE (fntype) == FUNCTION_DECL)
6244 fndecl = fntype;
6245 fntype = TREE_TYPE (fntype);
6248 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6250 else
6251 pcs_variant = arm_pcs_default;
6253 /* Promote integer types. */
6254 if (type && INTEGRAL_TYPE_P (type))
6255 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6257 if (pcs_variant != ARM_PCS_AAPCS)
6259 int i;
6261 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6262 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6263 type))
6264 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6265 mode, type);
6268 /* Promotes small structs returned in a register to full-word size
6269 for big-endian AAPCS. */
6270 if (type && arm_return_in_msb (type))
6272 HOST_WIDE_INT size = int_size_in_bytes (type);
6273 if (size % UNITS_PER_WORD != 0)
6275 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6276 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6280 return gen_rtx_REG (mode, R0_REGNUM);
6283 static rtx
6284 aapcs_libcall_value (machine_mode mode)
6286 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6287 && GET_MODE_SIZE (mode) <= 4)
6288 mode = SImode;
6290 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6293 /* Lay out a function argument using the AAPCS rules. The rule
6294 numbers referred to here are those in the AAPCS. */
6295 static void
6296 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6297 const_tree type, bool named)
6299 int nregs, nregs2;
6300 int ncrn;
6302 /* We only need to do this once per argument. */
6303 if (pcum->aapcs_arg_processed)
6304 return;
6306 pcum->aapcs_arg_processed = true;
6308 /* Special case: if named is false then we are handling an incoming
6309 anonymous argument which is on the stack. */
6310 if (!named)
6311 return;
6313 /* Is this a potential co-processor register candidate? */
6314 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6316 int slot = aapcs_select_call_coproc (pcum, mode, type);
6317 pcum->aapcs_cprc_slot = slot;
6319 /* We don't have to apply any of the rules from part B of the
6320 preparation phase, these are handled elsewhere in the
6321 compiler. */
6323 if (slot >= 0)
6325 /* A Co-processor register candidate goes either in its own
6326 class of registers or on the stack. */
6327 if (!pcum->aapcs_cprc_failed[slot])
6329 /* C1.cp - Try to allocate the argument to co-processor
6330 registers. */
6331 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6332 return;
6334 /* C2.cp - Put the argument on the stack and note that we
6335 can't assign any more candidates in this slot. We also
6336 need to note that we have allocated stack space, so that
6337 we won't later try to split a non-cprc candidate between
6338 core registers and the stack. */
6339 pcum->aapcs_cprc_failed[slot] = true;
6340 pcum->can_split = false;
6343 /* We didn't get a register, so this argument goes on the
6344 stack. */
6345 gcc_assert (pcum->can_split == false);
6346 return;
6350 /* C3 - For double-word aligned arguments, round the NCRN up to the
6351 next even number. */
6352 ncrn = pcum->aapcs_ncrn;
6353 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
6354 ncrn++;
6356 nregs = ARM_NUM_REGS2(mode, type);
6358 /* Sigh, this test should really assert that nregs > 0, but a GCC
6359 extension allows empty structs and then gives them empty size; it
6360 then allows such a structure to be passed by value. For some of
6361 the code below we have to pretend that such an argument has
6362 non-zero size so that we 'locate' it correctly either in
6363 registers or on the stack. */
6364 gcc_assert (nregs >= 0);
6366 nregs2 = nregs ? nregs : 1;
6368 /* C4 - Argument fits entirely in core registers. */
6369 if (ncrn + nregs2 <= NUM_ARG_REGS)
6371 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6372 pcum->aapcs_next_ncrn = ncrn + nregs;
6373 return;
6376 /* C5 - Some core registers left and there are no arguments already
6377 on the stack: split this argument between the remaining core
6378 registers and the stack. */
6379 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6381 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6382 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6383 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6384 return;
6387 /* C6 - NCRN is set to 4. */
6388 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6390 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6391 return;
6394 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6395 for a call to a function whose data type is FNTYPE.
6396 For a library call, FNTYPE is NULL. */
6397 void
6398 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6399 rtx libname,
6400 tree fndecl ATTRIBUTE_UNUSED)
6402 /* Long call handling. */
6403 if (fntype)
6404 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6405 else
6406 pcum->pcs_variant = arm_pcs_default;
6408 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6410 if (arm_libcall_uses_aapcs_base (libname))
6411 pcum->pcs_variant = ARM_PCS_AAPCS;
6413 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6414 pcum->aapcs_reg = NULL_RTX;
6415 pcum->aapcs_partial = 0;
6416 pcum->aapcs_arg_processed = false;
6417 pcum->aapcs_cprc_slot = -1;
6418 pcum->can_split = true;
6420 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6422 int i;
6424 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6426 pcum->aapcs_cprc_failed[i] = false;
6427 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6430 return;
6433 /* Legacy ABIs */
6435 /* On the ARM, the offset starts at 0. */
6436 pcum->nregs = 0;
6437 pcum->iwmmxt_nregs = 0;
6438 pcum->can_split = true;
6440 /* Varargs vectors are treated the same as long long.
6441 named_count avoids having to change the way arm handles 'named' */
6442 pcum->named_count = 0;
6443 pcum->nargs = 0;
6445 if (TARGET_REALLY_IWMMXT && fntype)
6447 tree fn_arg;
6449 for (fn_arg = TYPE_ARG_TYPES (fntype);
6450 fn_arg;
6451 fn_arg = TREE_CHAIN (fn_arg))
6452 pcum->named_count += 1;
6454 if (! pcum->named_count)
6455 pcum->named_count = INT_MAX;
6459 /* Return true if mode/type need doubleword alignment. */
6460 static bool
6461 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6463 if (!type)
6464 return PARM_BOUNDARY < GET_MODE_ALIGNMENT (mode);
6466 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6467 if (!AGGREGATE_TYPE_P (type))
6468 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6470 /* Array types: Use member alignment of element type. */
6471 if (TREE_CODE (type) == ARRAY_TYPE)
6472 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6474 /* Record/aggregate types: Use greatest member alignment of any member. */
6475 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6476 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6477 return true;
6479 return false;
6483 /* Determine where to put an argument to a function.
6484 Value is zero to push the argument on the stack,
6485 or a hard register in which to store the argument.
6487 MODE is the argument's machine mode.
6488 TYPE is the data type of the argument (as a tree).
6489 This is null for libcalls where that information may
6490 not be available.
6491 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6492 the preceding args and about the function being called.
6493 NAMED is nonzero if this argument is a named parameter
6494 (otherwise it is an extra parameter matching an ellipsis).
6496 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6497 other arguments are passed on the stack. If (NAMED == 0) (which happens
6498 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6499 defined), say it is passed in the stack (function_prologue will
6500 indeed make it pass in the stack if necessary). */
6502 static rtx
6503 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6504 const_tree type, bool named)
6506 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6507 int nregs;
6509 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6510 a call insn (op3 of a call_value insn). */
6511 if (mode == VOIDmode)
6512 return const0_rtx;
6514 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6516 aapcs_layout_arg (pcum, mode, type, named);
6517 return pcum->aapcs_reg;
6520 /* Varargs vectors are treated the same as long long.
6521 named_count avoids having to change the way arm handles 'named' */
6522 if (TARGET_IWMMXT_ABI
6523 && arm_vector_mode_supported_p (mode)
6524 && pcum->named_count > pcum->nargs + 1)
6526 if (pcum->iwmmxt_nregs <= 9)
6527 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6528 else
6530 pcum->can_split = false;
6531 return NULL_RTX;
6535 /* Put doubleword aligned quantities in even register pairs. */
6536 if (pcum->nregs & 1
6537 && ARM_DOUBLEWORD_ALIGN
6538 && arm_needs_doubleword_align (mode, type))
6539 pcum->nregs++;
6541 /* Only allow splitting an arg between regs and memory if all preceding
6542 args were allocated to regs. For args passed by reference we only count
6543 the reference pointer. */
6544 if (pcum->can_split)
6545 nregs = 1;
6546 else
6547 nregs = ARM_NUM_REGS2 (mode, type);
6549 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6550 return NULL_RTX;
6552 return gen_rtx_REG (mode, pcum->nregs);
6555 static unsigned int
6556 arm_function_arg_boundary (machine_mode mode, const_tree type)
6558 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6559 ? DOUBLEWORD_ALIGNMENT
6560 : PARM_BOUNDARY);
6563 static int
6564 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6565 tree type, bool named)
6567 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6568 int nregs = pcum->nregs;
6570 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6572 aapcs_layout_arg (pcum, mode, type, named);
6573 return pcum->aapcs_partial;
6576 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6577 return 0;
6579 if (NUM_ARG_REGS > nregs
6580 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6581 && pcum->can_split)
6582 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6584 return 0;
6587 /* Update the data in PCUM to advance over an argument
6588 of mode MODE and data type TYPE.
6589 (TYPE is null for libcalls where that information may not be available.) */
6591 static void
6592 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6593 const_tree type, bool named)
6595 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6597 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6599 aapcs_layout_arg (pcum, mode, type, named);
6601 if (pcum->aapcs_cprc_slot >= 0)
6603 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6604 type);
6605 pcum->aapcs_cprc_slot = -1;
6608 /* Generic stuff. */
6609 pcum->aapcs_arg_processed = false;
6610 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6611 pcum->aapcs_reg = NULL_RTX;
6612 pcum->aapcs_partial = 0;
6614 else
6616 pcum->nargs += 1;
6617 if (arm_vector_mode_supported_p (mode)
6618 && pcum->named_count > pcum->nargs
6619 && TARGET_IWMMXT_ABI)
6620 pcum->iwmmxt_nregs += 1;
6621 else
6622 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6626 /* Variable sized types are passed by reference. This is a GCC
6627 extension to the ARM ABI. */
6629 static bool
6630 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6631 machine_mode mode ATTRIBUTE_UNUSED,
6632 const_tree type, bool named ATTRIBUTE_UNUSED)
6634 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6637 /* Encode the current state of the #pragma [no_]long_calls. */
6638 typedef enum
6640 OFF, /* No #pragma [no_]long_calls is in effect. */
6641 LONG, /* #pragma long_calls is in effect. */
6642 SHORT /* #pragma no_long_calls is in effect. */
6643 } arm_pragma_enum;
6645 static arm_pragma_enum arm_pragma_long_calls = OFF;
6647 void
6648 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6650 arm_pragma_long_calls = LONG;
6653 void
6654 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6656 arm_pragma_long_calls = SHORT;
6659 void
6660 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6662 arm_pragma_long_calls = OFF;
6665 /* Handle an attribute requiring a FUNCTION_DECL;
6666 arguments as in struct attribute_spec.handler. */
6667 static tree
6668 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6669 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6671 if (TREE_CODE (*node) != FUNCTION_DECL)
6673 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6674 name);
6675 *no_add_attrs = true;
6678 return NULL_TREE;
6681 /* Handle an "interrupt" or "isr" attribute;
6682 arguments as in struct attribute_spec.handler. */
6683 static tree
6684 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6685 bool *no_add_attrs)
6687 if (DECL_P (*node))
6689 if (TREE_CODE (*node) != FUNCTION_DECL)
6691 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6692 name);
6693 *no_add_attrs = true;
6695 /* FIXME: the argument if any is checked for type attributes;
6696 should it be checked for decl ones? */
6698 else
6700 if (TREE_CODE (*node) == FUNCTION_TYPE
6701 || TREE_CODE (*node) == METHOD_TYPE)
6703 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6705 warning (OPT_Wattributes, "%qE attribute ignored",
6706 name);
6707 *no_add_attrs = true;
6710 else if (TREE_CODE (*node) == POINTER_TYPE
6711 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6712 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6713 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6715 *node = build_variant_type_copy (*node);
6716 TREE_TYPE (*node) = build_type_attribute_variant
6717 (TREE_TYPE (*node),
6718 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6719 *no_add_attrs = true;
6721 else
6723 /* Possibly pass this attribute on from the type to a decl. */
6724 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6725 | (int) ATTR_FLAG_FUNCTION_NEXT
6726 | (int) ATTR_FLAG_ARRAY_NEXT))
6728 *no_add_attrs = true;
6729 return tree_cons (name, args, NULL_TREE);
6731 else
6733 warning (OPT_Wattributes, "%qE attribute ignored",
6734 name);
6739 return NULL_TREE;
6742 /* Handle a "pcs" attribute; arguments as in struct
6743 attribute_spec.handler. */
6744 static tree
6745 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6746 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6748 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6750 warning (OPT_Wattributes, "%qE attribute ignored", name);
6751 *no_add_attrs = true;
6753 return NULL_TREE;
6756 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6757 /* Handle the "notshared" attribute. This attribute is another way of
6758 requesting hidden visibility. ARM's compiler supports
6759 "__declspec(notshared)"; we support the same thing via an
6760 attribute. */
6762 static tree
6763 arm_handle_notshared_attribute (tree *node,
6764 tree name ATTRIBUTE_UNUSED,
6765 tree args ATTRIBUTE_UNUSED,
6766 int flags ATTRIBUTE_UNUSED,
6767 bool *no_add_attrs)
6769 tree decl = TYPE_NAME (*node);
6771 if (decl)
6773 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6774 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6775 *no_add_attrs = false;
6777 return NULL_TREE;
6779 #endif
6781 /* This function returns true if a function with declaration FNDECL and type
6782 FNTYPE uses the stack to pass arguments or return variables and false
6783 otherwise. This is used for functions with the attributes
6784 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6785 diagnostic messages if the stack is used. NAME is the name of the attribute
6786 used. */
6788 static bool
6789 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6791 function_args_iterator args_iter;
6792 CUMULATIVE_ARGS args_so_far_v;
6793 cumulative_args_t args_so_far;
6794 bool first_param = true;
6795 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6797 /* Error out if any argument is passed on the stack. */
6798 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6799 args_so_far = pack_cumulative_args (&args_so_far_v);
6800 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6802 rtx arg_rtx;
6803 machine_mode arg_mode = TYPE_MODE (arg_type);
6805 prev_arg_type = arg_type;
6806 if (VOID_TYPE_P (arg_type))
6807 continue;
6809 if (!first_param)
6810 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6811 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6812 if (!arg_rtx
6813 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6815 error ("%qE attribute not available to functions with arguments "
6816 "passed on the stack", name);
6817 return true;
6819 first_param = false;
6822 /* Error out for variadic functions since we cannot control how many
6823 arguments will be passed and thus stack could be used. stdarg_p () is not
6824 used for the checking to avoid browsing arguments twice. */
6825 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6827 error ("%qE attribute not available to functions with variable number "
6828 "of arguments", name);
6829 return true;
6832 /* Error out if return value is passed on the stack. */
6833 ret_type = TREE_TYPE (fntype);
6834 if (arm_return_in_memory (ret_type, fntype))
6836 error ("%qE attribute not available to functions that return value on "
6837 "the stack", name);
6838 return true;
6840 return false;
6843 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6844 function will check whether the attribute is allowed here and will add the
6845 attribute to the function declaration tree or otherwise issue a warning. */
6847 static tree
6848 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6849 tree /* args */,
6850 int /* flags */,
6851 bool *no_add_attrs)
6853 tree fndecl;
6855 if (!use_cmse)
6857 *no_add_attrs = true;
6858 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6859 name);
6860 return NULL_TREE;
6863 /* Ignore attribute for function types. */
6864 if (TREE_CODE (*node) != FUNCTION_DECL)
6866 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6867 name);
6868 *no_add_attrs = true;
6869 return NULL_TREE;
6872 fndecl = *node;
6874 /* Warn for static linkage functions. */
6875 if (!TREE_PUBLIC (fndecl))
6877 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6878 "with static linkage", name);
6879 *no_add_attrs = true;
6880 return NULL_TREE;
6883 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6884 TREE_TYPE (fndecl));
6885 return NULL_TREE;
6889 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6890 function will check whether the attribute is allowed here and will add the
6891 attribute to the function type tree or otherwise issue a diagnostic. The
6892 reason we check this at declaration time is to only allow the use of the
6893 attribute with declarations of function pointers and not function
6894 declarations. This function checks NODE is of the expected type and issues
6895 diagnostics otherwise using NAME. If it is not of the expected type
6896 *NO_ADD_ATTRS will be set to true. */
6898 static tree
6899 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6900 tree /* args */,
6901 int /* flags */,
6902 bool *no_add_attrs)
6904 tree decl = NULL_TREE, fntype = NULL_TREE;
6905 tree type;
6907 if (!use_cmse)
6909 *no_add_attrs = true;
6910 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6911 name);
6912 return NULL_TREE;
6915 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
6917 decl = *node;
6918 fntype = TREE_TYPE (decl);
6921 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
6922 fntype = TREE_TYPE (fntype);
6924 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
6926 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
6927 "function pointer", name);
6928 *no_add_attrs = true;
6929 return NULL_TREE;
6932 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
6934 if (*no_add_attrs)
6935 return NULL_TREE;
6937 /* Prevent trees being shared among function types with and without
6938 cmse_nonsecure_call attribute. */
6939 type = TREE_TYPE (decl);
6941 type = build_distinct_type_copy (type);
6942 TREE_TYPE (decl) = type;
6943 fntype = type;
6945 while (TREE_CODE (fntype) != FUNCTION_TYPE)
6947 type = fntype;
6948 fntype = TREE_TYPE (fntype);
6949 fntype = build_distinct_type_copy (fntype);
6950 TREE_TYPE (type) = fntype;
6953 /* Construct a type attribute and add it to the function type. */
6954 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
6955 TYPE_ATTRIBUTES (fntype));
6956 TYPE_ATTRIBUTES (fntype) = attrs;
6957 return NULL_TREE;
6960 /* Return 0 if the attributes for two types are incompatible, 1 if they
6961 are compatible, and 2 if they are nearly compatible (which causes a
6962 warning to be generated). */
6963 static int
6964 arm_comp_type_attributes (const_tree type1, const_tree type2)
6966 int l1, l2, s1, s2;
6968 /* Check for mismatch of non-default calling convention. */
6969 if (TREE_CODE (type1) != FUNCTION_TYPE)
6970 return 1;
6972 /* Check for mismatched call attributes. */
6973 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6974 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6975 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6976 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6978 /* Only bother to check if an attribute is defined. */
6979 if (l1 | l2 | s1 | s2)
6981 /* If one type has an attribute, the other must have the same attribute. */
6982 if ((l1 != l2) || (s1 != s2))
6983 return 0;
6985 /* Disallow mixed attributes. */
6986 if ((l1 & s2) || (l2 & s1))
6987 return 0;
6990 /* Check for mismatched ISR attribute. */
6991 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6992 if (! l1)
6993 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6994 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6995 if (! l2)
6996 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6997 if (l1 != l2)
6998 return 0;
7000 l1 = lookup_attribute ("cmse_nonsecure_call",
7001 TYPE_ATTRIBUTES (type1)) != NULL;
7002 l2 = lookup_attribute ("cmse_nonsecure_call",
7003 TYPE_ATTRIBUTES (type2)) != NULL;
7005 if (l1 != l2)
7006 return 0;
7008 return 1;
7011 /* Assigns default attributes to newly defined type. This is used to
7012 set short_call/long_call attributes for function types of
7013 functions defined inside corresponding #pragma scopes. */
7014 static void
7015 arm_set_default_type_attributes (tree type)
7017 /* Add __attribute__ ((long_call)) to all functions, when
7018 inside #pragma long_calls or __attribute__ ((short_call)),
7019 when inside #pragma no_long_calls. */
7020 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7022 tree type_attr_list, attr_name;
7023 type_attr_list = TYPE_ATTRIBUTES (type);
7025 if (arm_pragma_long_calls == LONG)
7026 attr_name = get_identifier ("long_call");
7027 else if (arm_pragma_long_calls == SHORT)
7028 attr_name = get_identifier ("short_call");
7029 else
7030 return;
7032 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7033 TYPE_ATTRIBUTES (type) = type_attr_list;
7037 /* Return true if DECL is known to be linked into section SECTION. */
7039 static bool
7040 arm_function_in_section_p (tree decl, section *section)
7042 /* We can only be certain about the prevailing symbol definition. */
7043 if (!decl_binds_to_current_def_p (decl))
7044 return false;
7046 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7047 if (!DECL_SECTION_NAME (decl))
7049 /* Make sure that we will not create a unique section for DECL. */
7050 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7051 return false;
7054 return function_section (decl) == section;
7057 /* Return nonzero if a 32-bit "long_call" should be generated for
7058 a call from the current function to DECL. We generate a long_call
7059 if the function:
7061 a. has an __attribute__((long call))
7062 or b. is within the scope of a #pragma long_calls
7063 or c. the -mlong-calls command line switch has been specified
7065 However we do not generate a long call if the function:
7067 d. has an __attribute__ ((short_call))
7068 or e. is inside the scope of a #pragma no_long_calls
7069 or f. is defined in the same section as the current function. */
7071 bool
7072 arm_is_long_call_p (tree decl)
7074 tree attrs;
7076 if (!decl)
7077 return TARGET_LONG_CALLS;
7079 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7080 if (lookup_attribute ("short_call", attrs))
7081 return false;
7083 /* For "f", be conservative, and only cater for cases in which the
7084 whole of the current function is placed in the same section. */
7085 if (!flag_reorder_blocks_and_partition
7086 && TREE_CODE (decl) == FUNCTION_DECL
7087 && arm_function_in_section_p (decl, current_function_section ()))
7088 return false;
7090 if (lookup_attribute ("long_call", attrs))
7091 return true;
7093 return TARGET_LONG_CALLS;
7096 /* Return nonzero if it is ok to make a tail-call to DECL. */
7097 static bool
7098 arm_function_ok_for_sibcall (tree decl, tree exp)
7100 unsigned long func_type;
7102 if (cfun->machine->sibcall_blocked)
7103 return false;
7105 /* Never tailcall something if we are generating code for Thumb-1. */
7106 if (TARGET_THUMB1)
7107 return false;
7109 /* The PIC register is live on entry to VxWorks PLT entries, so we
7110 must make the call before restoring the PIC register. */
7111 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7112 return false;
7114 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7115 may be used both as target of the call and base register for restoring
7116 the VFP registers */
7117 if (TARGET_APCS_FRAME && TARGET_ARM
7118 && TARGET_HARD_FLOAT
7119 && decl && arm_is_long_call_p (decl))
7120 return false;
7122 /* If we are interworking and the function is not declared static
7123 then we can't tail-call it unless we know that it exists in this
7124 compilation unit (since it might be a Thumb routine). */
7125 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7126 && !TREE_ASM_WRITTEN (decl))
7127 return false;
7129 func_type = arm_current_func_type ();
7130 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7131 if (IS_INTERRUPT (func_type))
7132 return false;
7134 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7135 generated for entry functions themselves. */
7136 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7137 return false;
7139 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7140 this would complicate matters for later code generation. */
7141 if (TREE_CODE (exp) == CALL_EXPR)
7143 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7144 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7145 return false;
7148 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7150 /* Check that the return value locations are the same. For
7151 example that we aren't returning a value from the sibling in
7152 a VFP register but then need to transfer it to a core
7153 register. */
7154 rtx a, b;
7155 tree decl_or_type = decl;
7157 /* If it is an indirect function pointer, get the function type. */
7158 if (!decl)
7159 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7161 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7162 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7163 cfun->decl, false);
7164 if (!rtx_equal_p (a, b))
7165 return false;
7168 /* Never tailcall if function may be called with a misaligned SP. */
7169 if (IS_STACKALIGN (func_type))
7170 return false;
7172 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7173 references should become a NOP. Don't convert such calls into
7174 sibling calls. */
7175 if (TARGET_AAPCS_BASED
7176 && arm_abi == ARM_ABI_AAPCS
7177 && decl
7178 && DECL_WEAK (decl))
7179 return false;
7181 /* We cannot do a tailcall for an indirect call by descriptor if all the
7182 argument registers are used because the only register left to load the
7183 address is IP and it will already contain the static chain. */
7184 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7186 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7187 CUMULATIVE_ARGS cum;
7188 cumulative_args_t cum_v;
7190 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7191 cum_v = pack_cumulative_args (&cum);
7193 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7195 tree type = TREE_VALUE (t);
7196 if (!VOID_TYPE_P (type))
7197 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7200 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7201 return false;
7204 /* Everything else is ok. */
7205 return true;
7209 /* Addressing mode support functions. */
7211 /* Return nonzero if X is a legitimate immediate operand when compiling
7212 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7214 legitimate_pic_operand_p (rtx x)
7216 if (GET_CODE (x) == SYMBOL_REF
7217 || (GET_CODE (x) == CONST
7218 && GET_CODE (XEXP (x, 0)) == PLUS
7219 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7220 return 0;
7222 return 1;
7225 /* Record that the current function needs a PIC register. Initialize
7226 cfun->machine->pic_reg if we have not already done so. */
7228 static void
7229 require_pic_register (void)
7231 /* A lot of the logic here is made obscure by the fact that this
7232 routine gets called as part of the rtx cost estimation process.
7233 We don't want those calls to affect any assumptions about the real
7234 function; and further, we can't call entry_of_function() until we
7235 start the real expansion process. */
7236 if (!crtl->uses_pic_offset_table)
7238 gcc_assert (can_create_pseudo_p ());
7239 if (arm_pic_register != INVALID_REGNUM
7240 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7242 if (!cfun->machine->pic_reg)
7243 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7245 /* Play games to avoid marking the function as needing pic
7246 if we are being called as part of the cost-estimation
7247 process. */
7248 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7249 crtl->uses_pic_offset_table = 1;
7251 else
7253 rtx_insn *seq, *insn;
7255 if (!cfun->machine->pic_reg)
7256 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7258 /* Play games to avoid marking the function as needing pic
7259 if we are being called as part of the cost-estimation
7260 process. */
7261 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7263 crtl->uses_pic_offset_table = 1;
7264 start_sequence ();
7266 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7267 && arm_pic_register > LAST_LO_REGNUM)
7268 emit_move_insn (cfun->machine->pic_reg,
7269 gen_rtx_REG (Pmode, arm_pic_register));
7270 else
7271 arm_load_pic_register (0UL);
7273 seq = get_insns ();
7274 end_sequence ();
7276 for (insn = seq; insn; insn = NEXT_INSN (insn))
7277 if (INSN_P (insn))
7278 INSN_LOCATION (insn) = prologue_location;
7280 /* We can be called during expansion of PHI nodes, where
7281 we can't yet emit instructions directly in the final
7282 insn stream. Queue the insns on the entry edge, they will
7283 be committed after everything else is expanded. */
7284 insert_insn_on_edge (seq,
7285 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7292 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7294 if (GET_CODE (orig) == SYMBOL_REF
7295 || GET_CODE (orig) == LABEL_REF)
7297 if (reg == 0)
7299 gcc_assert (can_create_pseudo_p ());
7300 reg = gen_reg_rtx (Pmode);
7303 /* VxWorks does not impose a fixed gap between segments; the run-time
7304 gap can be different from the object-file gap. We therefore can't
7305 use GOTOFF unless we are absolutely sure that the symbol is in the
7306 same segment as the GOT. Unfortunately, the flexibility of linker
7307 scripts means that we can't be sure of that in general, so assume
7308 that GOTOFF is never valid on VxWorks. */
7309 /* References to weak symbols cannot be resolved locally: they
7310 may be overridden by a non-weak definition at link time. */
7311 rtx_insn *insn;
7312 if ((GET_CODE (orig) == LABEL_REF
7313 || (GET_CODE (orig) == SYMBOL_REF
7314 && SYMBOL_REF_LOCAL_P (orig)
7315 && (SYMBOL_REF_DECL (orig)
7316 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7317 && NEED_GOT_RELOC
7318 && arm_pic_data_is_text_relative)
7319 insn = arm_pic_static_addr (orig, reg);
7320 else
7322 rtx pat;
7323 rtx mem;
7325 /* If this function doesn't have a pic register, create one now. */
7326 require_pic_register ();
7328 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7330 /* Make the MEM as close to a constant as possible. */
7331 mem = SET_SRC (pat);
7332 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7333 MEM_READONLY_P (mem) = 1;
7334 MEM_NOTRAP_P (mem) = 1;
7336 insn = emit_insn (pat);
7339 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7340 by loop. */
7341 set_unique_reg_note (insn, REG_EQUAL, orig);
7343 return reg;
7345 else if (GET_CODE (orig) == CONST)
7347 rtx base, offset;
7349 if (GET_CODE (XEXP (orig, 0)) == PLUS
7350 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7351 return orig;
7353 /* Handle the case where we have: const (UNSPEC_TLS). */
7354 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7355 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7356 return orig;
7358 /* Handle the case where we have:
7359 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7360 CONST_INT. */
7361 if (GET_CODE (XEXP (orig, 0)) == PLUS
7362 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7363 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7365 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7366 return orig;
7369 if (reg == 0)
7371 gcc_assert (can_create_pseudo_p ());
7372 reg = gen_reg_rtx (Pmode);
7375 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7377 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7378 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7379 base == reg ? 0 : reg);
7381 if (CONST_INT_P (offset))
7383 /* The base register doesn't really matter, we only want to
7384 test the index for the appropriate mode. */
7385 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7387 gcc_assert (can_create_pseudo_p ());
7388 offset = force_reg (Pmode, offset);
7391 if (CONST_INT_P (offset))
7392 return plus_constant (Pmode, base, INTVAL (offset));
7395 if (GET_MODE_SIZE (mode) > 4
7396 && (GET_MODE_CLASS (mode) == MODE_INT
7397 || TARGET_SOFT_FLOAT))
7399 emit_insn (gen_addsi3 (reg, base, offset));
7400 return reg;
7403 return gen_rtx_PLUS (Pmode, base, offset);
7406 return orig;
7410 /* Find a spare register to use during the prolog of a function. */
7412 static int
7413 thumb_find_work_register (unsigned long pushed_regs_mask)
7415 int reg;
7417 /* Check the argument registers first as these are call-used. The
7418 register allocation order means that sometimes r3 might be used
7419 but earlier argument registers might not, so check them all. */
7420 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7421 if (!df_regs_ever_live_p (reg))
7422 return reg;
7424 /* Before going on to check the call-saved registers we can try a couple
7425 more ways of deducing that r3 is available. The first is when we are
7426 pushing anonymous arguments onto the stack and we have less than 4
7427 registers worth of fixed arguments(*). In this case r3 will be part of
7428 the variable argument list and so we can be sure that it will be
7429 pushed right at the start of the function. Hence it will be available
7430 for the rest of the prologue.
7431 (*): ie crtl->args.pretend_args_size is greater than 0. */
7432 if (cfun->machine->uses_anonymous_args
7433 && crtl->args.pretend_args_size > 0)
7434 return LAST_ARG_REGNUM;
7436 /* The other case is when we have fixed arguments but less than 4 registers
7437 worth. In this case r3 might be used in the body of the function, but
7438 it is not being used to convey an argument into the function. In theory
7439 we could just check crtl->args.size to see how many bytes are
7440 being passed in argument registers, but it seems that it is unreliable.
7441 Sometimes it will have the value 0 when in fact arguments are being
7442 passed. (See testcase execute/20021111-1.c for an example). So we also
7443 check the args_info.nregs field as well. The problem with this field is
7444 that it makes no allowances for arguments that are passed to the
7445 function but which are not used. Hence we could miss an opportunity
7446 when a function has an unused argument in r3. But it is better to be
7447 safe than to be sorry. */
7448 if (! cfun->machine->uses_anonymous_args
7449 && crtl->args.size >= 0
7450 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7451 && (TARGET_AAPCS_BASED
7452 ? crtl->args.info.aapcs_ncrn < 4
7453 : crtl->args.info.nregs < 4))
7454 return LAST_ARG_REGNUM;
7456 /* Otherwise look for a call-saved register that is going to be pushed. */
7457 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7458 if (pushed_regs_mask & (1 << reg))
7459 return reg;
7461 if (TARGET_THUMB2)
7463 /* Thumb-2 can use high regs. */
7464 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7465 if (pushed_regs_mask & (1 << reg))
7466 return reg;
7468 /* Something went wrong - thumb_compute_save_reg_mask()
7469 should have arranged for a suitable register to be pushed. */
7470 gcc_unreachable ();
7473 static GTY(()) int pic_labelno;
7475 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7476 low register. */
7478 void
7479 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7481 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7483 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7484 return;
7486 gcc_assert (flag_pic);
7488 pic_reg = cfun->machine->pic_reg;
7489 if (TARGET_VXWORKS_RTP)
7491 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7492 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7493 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7495 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7497 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7498 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7500 else
7502 /* We use an UNSPEC rather than a LABEL_REF because this label
7503 never appears in the code stream. */
7505 labelno = GEN_INT (pic_labelno++);
7506 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7507 l1 = gen_rtx_CONST (VOIDmode, l1);
7509 /* On the ARM the PC register contains 'dot + 8' at the time of the
7510 addition, on the Thumb it is 'dot + 4'. */
7511 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7512 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7513 UNSPEC_GOTSYM_OFF);
7514 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7516 if (TARGET_32BIT)
7518 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7520 else /* TARGET_THUMB1 */
7522 if (arm_pic_register != INVALID_REGNUM
7523 && REGNO (pic_reg) > LAST_LO_REGNUM)
7525 /* We will have pushed the pic register, so we should always be
7526 able to find a work register. */
7527 pic_tmp = gen_rtx_REG (SImode,
7528 thumb_find_work_register (saved_regs));
7529 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7530 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7531 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7533 else if (arm_pic_register != INVALID_REGNUM
7534 && arm_pic_register > LAST_LO_REGNUM
7535 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7537 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7538 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7539 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7541 else
7542 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7546 /* Need to emit this whether or not we obey regdecls,
7547 since setjmp/longjmp can cause life info to screw up. */
7548 emit_use (pic_reg);
7551 /* Generate code to load the address of a static var when flag_pic is set. */
7552 static rtx_insn *
7553 arm_pic_static_addr (rtx orig, rtx reg)
7555 rtx l1, labelno, offset_rtx;
7557 gcc_assert (flag_pic);
7559 /* We use an UNSPEC rather than a LABEL_REF because this label
7560 never appears in the code stream. */
7561 labelno = GEN_INT (pic_labelno++);
7562 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7563 l1 = gen_rtx_CONST (VOIDmode, l1);
7565 /* On the ARM the PC register contains 'dot + 8' at the time of the
7566 addition, on the Thumb it is 'dot + 4'. */
7567 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7568 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7569 UNSPEC_SYMBOL_OFFSET);
7570 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7572 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7575 /* Return nonzero if X is valid as an ARM state addressing register. */
7576 static int
7577 arm_address_register_rtx_p (rtx x, int strict_p)
7579 int regno;
7581 if (!REG_P (x))
7582 return 0;
7584 regno = REGNO (x);
7586 if (strict_p)
7587 return ARM_REGNO_OK_FOR_BASE_P (regno);
7589 return (regno <= LAST_ARM_REGNUM
7590 || regno >= FIRST_PSEUDO_REGISTER
7591 || regno == FRAME_POINTER_REGNUM
7592 || regno == ARG_POINTER_REGNUM);
7595 /* Return TRUE if this rtx is the difference of a symbol and a label,
7596 and will reduce to a PC-relative relocation in the object file.
7597 Expressions like this can be left alone when generating PIC, rather
7598 than forced through the GOT. */
7599 static int
7600 pcrel_constant_p (rtx x)
7602 if (GET_CODE (x) == MINUS)
7603 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7605 return FALSE;
7608 /* Return true if X will surely end up in an index register after next
7609 splitting pass. */
7610 static bool
7611 will_be_in_index_register (const_rtx x)
7613 /* arm.md: calculate_pic_address will split this into a register. */
7614 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7617 /* Return nonzero if X is a valid ARM state address operand. */
7619 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7620 int strict_p)
7622 bool use_ldrd;
7623 enum rtx_code code = GET_CODE (x);
7625 if (arm_address_register_rtx_p (x, strict_p))
7626 return 1;
7628 use_ldrd = (TARGET_LDRD
7629 && (mode == DImode || mode == DFmode));
7631 if (code == POST_INC || code == PRE_DEC
7632 || ((code == PRE_INC || code == POST_DEC)
7633 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7634 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7636 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7637 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7638 && GET_CODE (XEXP (x, 1)) == PLUS
7639 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7641 rtx addend = XEXP (XEXP (x, 1), 1);
7643 /* Don't allow ldrd post increment by register because it's hard
7644 to fixup invalid register choices. */
7645 if (use_ldrd
7646 && GET_CODE (x) == POST_MODIFY
7647 && REG_P (addend))
7648 return 0;
7650 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7651 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7654 /* After reload constants split into minipools will have addresses
7655 from a LABEL_REF. */
7656 else if (reload_completed
7657 && (code == LABEL_REF
7658 || (code == CONST
7659 && GET_CODE (XEXP (x, 0)) == PLUS
7660 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7661 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7662 return 1;
7664 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7665 return 0;
7667 else if (code == PLUS)
7669 rtx xop0 = XEXP (x, 0);
7670 rtx xop1 = XEXP (x, 1);
7672 return ((arm_address_register_rtx_p (xop0, strict_p)
7673 && ((CONST_INT_P (xop1)
7674 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7675 || (!strict_p && will_be_in_index_register (xop1))))
7676 || (arm_address_register_rtx_p (xop1, strict_p)
7677 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7680 #if 0
7681 /* Reload currently can't handle MINUS, so disable this for now */
7682 else if (GET_CODE (x) == MINUS)
7684 rtx xop0 = XEXP (x, 0);
7685 rtx xop1 = XEXP (x, 1);
7687 return (arm_address_register_rtx_p (xop0, strict_p)
7688 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7690 #endif
7692 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7693 && code == SYMBOL_REF
7694 && CONSTANT_POOL_ADDRESS_P (x)
7695 && ! (flag_pic
7696 && symbol_mentioned_p (get_pool_constant (x))
7697 && ! pcrel_constant_p (get_pool_constant (x))))
7698 return 1;
7700 return 0;
7703 /* Return nonzero if X is a valid Thumb-2 address operand. */
7704 static int
7705 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7707 bool use_ldrd;
7708 enum rtx_code code = GET_CODE (x);
7710 if (arm_address_register_rtx_p (x, strict_p))
7711 return 1;
7713 use_ldrd = (TARGET_LDRD
7714 && (mode == DImode || mode == DFmode));
7716 if (code == POST_INC || code == PRE_DEC
7717 || ((code == PRE_INC || code == POST_DEC)
7718 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7719 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7721 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7722 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7723 && GET_CODE (XEXP (x, 1)) == PLUS
7724 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7726 /* Thumb-2 only has autoincrement by constant. */
7727 rtx addend = XEXP (XEXP (x, 1), 1);
7728 HOST_WIDE_INT offset;
7730 if (!CONST_INT_P (addend))
7731 return 0;
7733 offset = INTVAL(addend);
7734 if (GET_MODE_SIZE (mode) <= 4)
7735 return (offset > -256 && offset < 256);
7737 return (use_ldrd && offset > -1024 && offset < 1024
7738 && (offset & 3) == 0);
7741 /* After reload constants split into minipools will have addresses
7742 from a LABEL_REF. */
7743 else if (reload_completed
7744 && (code == LABEL_REF
7745 || (code == CONST
7746 && GET_CODE (XEXP (x, 0)) == PLUS
7747 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7748 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7749 return 1;
7751 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7752 return 0;
7754 else if (code == PLUS)
7756 rtx xop0 = XEXP (x, 0);
7757 rtx xop1 = XEXP (x, 1);
7759 return ((arm_address_register_rtx_p (xop0, strict_p)
7760 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7761 || (!strict_p && will_be_in_index_register (xop1))))
7762 || (arm_address_register_rtx_p (xop1, strict_p)
7763 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7766 /* Normally we can assign constant values to target registers without
7767 the help of constant pool. But there are cases we have to use constant
7768 pool like:
7769 1) assign a label to register.
7770 2) sign-extend a 8bit value to 32bit and then assign to register.
7772 Constant pool access in format:
7773 (set (reg r0) (mem (symbol_ref (".LC0"))))
7774 will cause the use of literal pool (later in function arm_reorg).
7775 So here we mark such format as an invalid format, then the compiler
7776 will adjust it into:
7777 (set (reg r0) (symbol_ref (".LC0")))
7778 (set (reg r0) (mem (reg r0))).
7779 No extra register is required, and (mem (reg r0)) won't cause the use
7780 of literal pools. */
7781 else if (arm_disable_literal_pool && code == SYMBOL_REF
7782 && CONSTANT_POOL_ADDRESS_P (x))
7783 return 0;
7785 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7786 && code == SYMBOL_REF
7787 && CONSTANT_POOL_ADDRESS_P (x)
7788 && ! (flag_pic
7789 && symbol_mentioned_p (get_pool_constant (x))
7790 && ! pcrel_constant_p (get_pool_constant (x))))
7791 return 1;
7793 return 0;
7796 /* Return nonzero if INDEX is valid for an address index operand in
7797 ARM state. */
7798 static int
7799 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7800 int strict_p)
7802 HOST_WIDE_INT range;
7803 enum rtx_code code = GET_CODE (index);
7805 /* Standard coprocessor addressing modes. */
7806 if (TARGET_HARD_FLOAT
7807 && (mode == SFmode || mode == DFmode))
7808 return (code == CONST_INT && INTVAL (index) < 1024
7809 && INTVAL (index) > -1024
7810 && (INTVAL (index) & 3) == 0);
7812 /* For quad modes, we restrict the constant offset to be slightly less
7813 than what the instruction format permits. We do this because for
7814 quad mode moves, we will actually decompose them into two separate
7815 double-mode reads or writes. INDEX must therefore be a valid
7816 (double-mode) offset and so should INDEX+8. */
7817 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7818 return (code == CONST_INT
7819 && INTVAL (index) < 1016
7820 && INTVAL (index) > -1024
7821 && (INTVAL (index) & 3) == 0);
7823 /* We have no such constraint on double mode offsets, so we permit the
7824 full range of the instruction format. */
7825 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7826 return (code == CONST_INT
7827 && INTVAL (index) < 1024
7828 && INTVAL (index) > -1024
7829 && (INTVAL (index) & 3) == 0);
7831 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7832 return (code == CONST_INT
7833 && INTVAL (index) < 1024
7834 && INTVAL (index) > -1024
7835 && (INTVAL (index) & 3) == 0);
7837 if (arm_address_register_rtx_p (index, strict_p)
7838 && (GET_MODE_SIZE (mode) <= 4))
7839 return 1;
7841 if (mode == DImode || mode == DFmode)
7843 if (code == CONST_INT)
7845 HOST_WIDE_INT val = INTVAL (index);
7847 if (TARGET_LDRD)
7848 return val > -256 && val < 256;
7849 else
7850 return val > -4096 && val < 4092;
7853 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7856 if (GET_MODE_SIZE (mode) <= 4
7857 && ! (arm_arch4
7858 && (mode == HImode
7859 || mode == HFmode
7860 || (mode == QImode && outer == SIGN_EXTEND))))
7862 if (code == MULT)
7864 rtx xiop0 = XEXP (index, 0);
7865 rtx xiop1 = XEXP (index, 1);
7867 return ((arm_address_register_rtx_p (xiop0, strict_p)
7868 && power_of_two_operand (xiop1, SImode))
7869 || (arm_address_register_rtx_p (xiop1, strict_p)
7870 && power_of_two_operand (xiop0, SImode)));
7872 else if (code == LSHIFTRT || code == ASHIFTRT
7873 || code == ASHIFT || code == ROTATERT)
7875 rtx op = XEXP (index, 1);
7877 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7878 && CONST_INT_P (op)
7879 && INTVAL (op) > 0
7880 && INTVAL (op) <= 31);
7884 /* For ARM v4 we may be doing a sign-extend operation during the
7885 load. */
7886 if (arm_arch4)
7888 if (mode == HImode
7889 || mode == HFmode
7890 || (outer == SIGN_EXTEND && mode == QImode))
7891 range = 256;
7892 else
7893 range = 4096;
7895 else
7896 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7898 return (code == CONST_INT
7899 && INTVAL (index) < range
7900 && INTVAL (index) > -range);
7903 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7904 index operand. i.e. 1, 2, 4 or 8. */
7905 static bool
7906 thumb2_index_mul_operand (rtx op)
7908 HOST_WIDE_INT val;
7910 if (!CONST_INT_P (op))
7911 return false;
7913 val = INTVAL(op);
7914 return (val == 1 || val == 2 || val == 4 || val == 8);
7917 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7918 static int
7919 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7921 enum rtx_code code = GET_CODE (index);
7923 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7924 /* Standard coprocessor addressing modes. */
7925 if (TARGET_HARD_FLOAT
7926 && (mode == SFmode || mode == DFmode))
7927 return (code == CONST_INT && INTVAL (index) < 1024
7928 /* Thumb-2 allows only > -256 index range for it's core register
7929 load/stores. Since we allow SF/DF in core registers, we have
7930 to use the intersection between -256~4096 (core) and -1024~1024
7931 (coprocessor). */
7932 && INTVAL (index) > -256
7933 && (INTVAL (index) & 3) == 0);
7935 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7937 /* For DImode assume values will usually live in core regs
7938 and only allow LDRD addressing modes. */
7939 if (!TARGET_LDRD || mode != DImode)
7940 return (code == CONST_INT
7941 && INTVAL (index) < 1024
7942 && INTVAL (index) > -1024
7943 && (INTVAL (index) & 3) == 0);
7946 /* For quad modes, we restrict the constant offset to be slightly less
7947 than what the instruction format permits. We do this because for
7948 quad mode moves, we will actually decompose them into two separate
7949 double-mode reads or writes. INDEX must therefore be a valid
7950 (double-mode) offset and so should INDEX+8. */
7951 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7952 return (code == CONST_INT
7953 && INTVAL (index) < 1016
7954 && INTVAL (index) > -1024
7955 && (INTVAL (index) & 3) == 0);
7957 /* We have no such constraint on double mode offsets, so we permit the
7958 full range of the instruction format. */
7959 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7960 return (code == CONST_INT
7961 && INTVAL (index) < 1024
7962 && INTVAL (index) > -1024
7963 && (INTVAL (index) & 3) == 0);
7965 if (arm_address_register_rtx_p (index, strict_p)
7966 && (GET_MODE_SIZE (mode) <= 4))
7967 return 1;
7969 if (mode == DImode || mode == DFmode)
7971 if (code == CONST_INT)
7973 HOST_WIDE_INT val = INTVAL (index);
7974 /* ??? Can we assume ldrd for thumb2? */
7975 /* Thumb-2 ldrd only has reg+const addressing modes. */
7976 /* ldrd supports offsets of +-1020.
7977 However the ldr fallback does not. */
7978 return val > -256 && val < 256 && (val & 3) == 0;
7980 else
7981 return 0;
7984 if (code == MULT)
7986 rtx xiop0 = XEXP (index, 0);
7987 rtx xiop1 = XEXP (index, 1);
7989 return ((arm_address_register_rtx_p (xiop0, strict_p)
7990 && thumb2_index_mul_operand (xiop1))
7991 || (arm_address_register_rtx_p (xiop1, strict_p)
7992 && thumb2_index_mul_operand (xiop0)));
7994 else if (code == ASHIFT)
7996 rtx op = XEXP (index, 1);
7998 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7999 && CONST_INT_P (op)
8000 && INTVAL (op) > 0
8001 && INTVAL (op) <= 3);
8004 return (code == CONST_INT
8005 && INTVAL (index) < 4096
8006 && INTVAL (index) > -256);
8009 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8010 static int
8011 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8013 int regno;
8015 if (!REG_P (x))
8016 return 0;
8018 regno = REGNO (x);
8020 if (strict_p)
8021 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8023 return (regno <= LAST_LO_REGNUM
8024 || regno > LAST_VIRTUAL_REGISTER
8025 || regno == FRAME_POINTER_REGNUM
8026 || (GET_MODE_SIZE (mode) >= 4
8027 && (regno == STACK_POINTER_REGNUM
8028 || regno >= FIRST_PSEUDO_REGISTER
8029 || x == hard_frame_pointer_rtx
8030 || x == arg_pointer_rtx)));
8033 /* Return nonzero if x is a legitimate index register. This is the case
8034 for any base register that can access a QImode object. */
8035 inline static int
8036 thumb1_index_register_rtx_p (rtx x, int strict_p)
8038 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8041 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8043 The AP may be eliminated to either the SP or the FP, so we use the
8044 least common denominator, e.g. SImode, and offsets from 0 to 64.
8046 ??? Verify whether the above is the right approach.
8048 ??? Also, the FP may be eliminated to the SP, so perhaps that
8049 needs special handling also.
8051 ??? Look at how the mips16 port solves this problem. It probably uses
8052 better ways to solve some of these problems.
8054 Although it is not incorrect, we don't accept QImode and HImode
8055 addresses based on the frame pointer or arg pointer until the
8056 reload pass starts. This is so that eliminating such addresses
8057 into stack based ones won't produce impossible code. */
8059 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8061 /* ??? Not clear if this is right. Experiment. */
8062 if (GET_MODE_SIZE (mode) < 4
8063 && !(reload_in_progress || reload_completed)
8064 && (reg_mentioned_p (frame_pointer_rtx, x)
8065 || reg_mentioned_p (arg_pointer_rtx, x)
8066 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8067 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8068 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8069 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8070 return 0;
8072 /* Accept any base register. SP only in SImode or larger. */
8073 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8074 return 1;
8076 /* This is PC relative data before arm_reorg runs. */
8077 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8078 && GET_CODE (x) == SYMBOL_REF
8079 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8080 return 1;
8082 /* This is PC relative data after arm_reorg runs. */
8083 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8084 && reload_completed
8085 && (GET_CODE (x) == LABEL_REF
8086 || (GET_CODE (x) == CONST
8087 && GET_CODE (XEXP (x, 0)) == PLUS
8088 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8089 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8090 return 1;
8092 /* Post-inc indexing only supported for SImode and larger. */
8093 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8094 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8095 return 1;
8097 else if (GET_CODE (x) == PLUS)
8099 /* REG+REG address can be any two index registers. */
8100 /* We disallow FRAME+REG addressing since we know that FRAME
8101 will be replaced with STACK, and SP relative addressing only
8102 permits SP+OFFSET. */
8103 if (GET_MODE_SIZE (mode) <= 4
8104 && XEXP (x, 0) != frame_pointer_rtx
8105 && XEXP (x, 1) != frame_pointer_rtx
8106 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8107 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8108 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8109 return 1;
8111 /* REG+const has 5-7 bit offset for non-SP registers. */
8112 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8113 || XEXP (x, 0) == arg_pointer_rtx)
8114 && CONST_INT_P (XEXP (x, 1))
8115 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8116 return 1;
8118 /* REG+const has 10-bit offset for SP, but only SImode and
8119 larger is supported. */
8120 /* ??? Should probably check for DI/DFmode overflow here
8121 just like GO_IF_LEGITIMATE_OFFSET does. */
8122 else if (REG_P (XEXP (x, 0))
8123 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8124 && GET_MODE_SIZE (mode) >= 4
8125 && CONST_INT_P (XEXP (x, 1))
8126 && INTVAL (XEXP (x, 1)) >= 0
8127 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8128 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8129 return 1;
8131 else if (REG_P (XEXP (x, 0))
8132 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8133 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8134 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8135 && REGNO (XEXP (x, 0))
8136 <= LAST_VIRTUAL_POINTER_REGISTER))
8137 && GET_MODE_SIZE (mode) >= 4
8138 && CONST_INT_P (XEXP (x, 1))
8139 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8140 return 1;
8143 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8144 && GET_MODE_SIZE (mode) == 4
8145 && GET_CODE (x) == SYMBOL_REF
8146 && CONSTANT_POOL_ADDRESS_P (x)
8147 && ! (flag_pic
8148 && symbol_mentioned_p (get_pool_constant (x))
8149 && ! pcrel_constant_p (get_pool_constant (x))))
8150 return 1;
8152 return 0;
8155 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8156 instruction of mode MODE. */
8158 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8160 switch (GET_MODE_SIZE (mode))
8162 case 1:
8163 return val >= 0 && val < 32;
8165 case 2:
8166 return val >= 0 && val < 64 && (val & 1) == 0;
8168 default:
8169 return (val >= 0
8170 && (val + GET_MODE_SIZE (mode)) <= 128
8171 && (val & 3) == 0);
8175 bool
8176 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8178 if (TARGET_ARM)
8179 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8180 else if (TARGET_THUMB2)
8181 return thumb2_legitimate_address_p (mode, x, strict_p);
8182 else /* if (TARGET_THUMB1) */
8183 return thumb1_legitimate_address_p (mode, x, strict_p);
8186 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8188 Given an rtx X being reloaded into a reg required to be
8189 in class CLASS, return the class of reg to actually use.
8190 In general this is just CLASS, but for the Thumb core registers and
8191 immediate constants we prefer a LO_REGS class or a subset. */
8193 static reg_class_t
8194 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8196 if (TARGET_32BIT)
8197 return rclass;
8198 else
8200 if (rclass == GENERAL_REGS)
8201 return LO_REGS;
8202 else
8203 return rclass;
8207 /* Build the SYMBOL_REF for __tls_get_addr. */
8209 static GTY(()) rtx tls_get_addr_libfunc;
8211 static rtx
8212 get_tls_get_addr (void)
8214 if (!tls_get_addr_libfunc)
8215 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8216 return tls_get_addr_libfunc;
8220 arm_load_tp (rtx target)
8222 if (!target)
8223 target = gen_reg_rtx (SImode);
8225 if (TARGET_HARD_TP)
8227 /* Can return in any reg. */
8228 emit_insn (gen_load_tp_hard (target));
8230 else
8232 /* Always returned in r0. Immediately copy the result into a pseudo,
8233 otherwise other uses of r0 (e.g. setting up function arguments) may
8234 clobber the value. */
8236 rtx tmp;
8238 emit_insn (gen_load_tp_soft ());
8240 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8241 emit_move_insn (target, tmp);
8243 return target;
8246 static rtx
8247 load_tls_operand (rtx x, rtx reg)
8249 rtx tmp;
8251 if (reg == NULL_RTX)
8252 reg = gen_reg_rtx (SImode);
8254 tmp = gen_rtx_CONST (SImode, x);
8256 emit_move_insn (reg, tmp);
8258 return reg;
8261 static rtx_insn *
8262 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8264 rtx label, labelno, sum;
8266 gcc_assert (reloc != TLS_DESCSEQ);
8267 start_sequence ();
8269 labelno = GEN_INT (pic_labelno++);
8270 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8271 label = gen_rtx_CONST (VOIDmode, label);
8273 sum = gen_rtx_UNSPEC (Pmode,
8274 gen_rtvec (4, x, GEN_INT (reloc), label,
8275 GEN_INT (TARGET_ARM ? 8 : 4)),
8276 UNSPEC_TLS);
8277 reg = load_tls_operand (sum, reg);
8279 if (TARGET_ARM)
8280 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8281 else
8282 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8284 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8285 LCT_PURE, /* LCT_CONST? */
8286 Pmode, 1, reg, Pmode);
8288 rtx_insn *insns = get_insns ();
8289 end_sequence ();
8291 return insns;
8294 static rtx
8295 arm_tls_descseq_addr (rtx x, rtx reg)
8297 rtx labelno = GEN_INT (pic_labelno++);
8298 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8299 rtx sum = gen_rtx_UNSPEC (Pmode,
8300 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8301 gen_rtx_CONST (VOIDmode, label),
8302 GEN_INT (!TARGET_ARM)),
8303 UNSPEC_TLS);
8304 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8306 emit_insn (gen_tlscall (x, labelno));
8307 if (!reg)
8308 reg = gen_reg_rtx (SImode);
8309 else
8310 gcc_assert (REGNO (reg) != R0_REGNUM);
8312 emit_move_insn (reg, reg0);
8314 return reg;
8318 legitimize_tls_address (rtx x, rtx reg)
8320 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8321 rtx_insn *insns;
8322 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8324 switch (model)
8326 case TLS_MODEL_GLOBAL_DYNAMIC:
8327 if (TARGET_GNU2_TLS)
8329 reg = arm_tls_descseq_addr (x, reg);
8331 tp = arm_load_tp (NULL_RTX);
8333 dest = gen_rtx_PLUS (Pmode, tp, reg);
8335 else
8337 /* Original scheme */
8338 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8339 dest = gen_reg_rtx (Pmode);
8340 emit_libcall_block (insns, dest, ret, x);
8342 return dest;
8344 case TLS_MODEL_LOCAL_DYNAMIC:
8345 if (TARGET_GNU2_TLS)
8347 reg = arm_tls_descseq_addr (x, reg);
8349 tp = arm_load_tp (NULL_RTX);
8351 dest = gen_rtx_PLUS (Pmode, tp, reg);
8353 else
8355 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8357 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8358 share the LDM result with other LD model accesses. */
8359 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8360 UNSPEC_TLS);
8361 dest = gen_reg_rtx (Pmode);
8362 emit_libcall_block (insns, dest, ret, eqv);
8364 /* Load the addend. */
8365 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8366 GEN_INT (TLS_LDO32)),
8367 UNSPEC_TLS);
8368 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8369 dest = gen_rtx_PLUS (Pmode, dest, addend);
8371 return dest;
8373 case TLS_MODEL_INITIAL_EXEC:
8374 labelno = GEN_INT (pic_labelno++);
8375 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8376 label = gen_rtx_CONST (VOIDmode, label);
8377 sum = gen_rtx_UNSPEC (Pmode,
8378 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8379 GEN_INT (TARGET_ARM ? 8 : 4)),
8380 UNSPEC_TLS);
8381 reg = load_tls_operand (sum, reg);
8383 if (TARGET_ARM)
8384 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8385 else if (TARGET_THUMB2)
8386 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8387 else
8389 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8390 emit_move_insn (reg, gen_const_mem (SImode, reg));
8393 tp = arm_load_tp (NULL_RTX);
8395 return gen_rtx_PLUS (Pmode, tp, reg);
8397 case TLS_MODEL_LOCAL_EXEC:
8398 tp = arm_load_tp (NULL_RTX);
8400 reg = gen_rtx_UNSPEC (Pmode,
8401 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8402 UNSPEC_TLS);
8403 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8405 return gen_rtx_PLUS (Pmode, tp, reg);
8407 default:
8408 abort ();
8412 /* Try machine-dependent ways of modifying an illegitimate address
8413 to be legitimate. If we find one, return the new, valid address. */
8415 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8417 if (arm_tls_referenced_p (x))
8419 rtx addend = NULL;
8421 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8423 addend = XEXP (XEXP (x, 0), 1);
8424 x = XEXP (XEXP (x, 0), 0);
8427 if (GET_CODE (x) != SYMBOL_REF)
8428 return x;
8430 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8432 x = legitimize_tls_address (x, NULL_RTX);
8434 if (addend)
8436 x = gen_rtx_PLUS (SImode, x, addend);
8437 orig_x = x;
8439 else
8440 return x;
8443 if (!TARGET_ARM)
8445 /* TODO: legitimize_address for Thumb2. */
8446 if (TARGET_THUMB2)
8447 return x;
8448 return thumb_legitimize_address (x, orig_x, mode);
8451 if (GET_CODE (x) == PLUS)
8453 rtx xop0 = XEXP (x, 0);
8454 rtx xop1 = XEXP (x, 1);
8456 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8457 xop0 = force_reg (SImode, xop0);
8459 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8460 && !symbol_mentioned_p (xop1))
8461 xop1 = force_reg (SImode, xop1);
8463 if (ARM_BASE_REGISTER_RTX_P (xop0)
8464 && CONST_INT_P (xop1))
8466 HOST_WIDE_INT n, low_n;
8467 rtx base_reg, val;
8468 n = INTVAL (xop1);
8470 /* VFP addressing modes actually allow greater offsets, but for
8471 now we just stick with the lowest common denominator. */
8472 if (mode == DImode || mode == DFmode)
8474 low_n = n & 0x0f;
8475 n &= ~0x0f;
8476 if (low_n > 4)
8478 n += 16;
8479 low_n -= 16;
8482 else
8484 low_n = ((mode) == TImode ? 0
8485 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8486 n -= low_n;
8489 base_reg = gen_reg_rtx (SImode);
8490 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8491 emit_move_insn (base_reg, val);
8492 x = plus_constant (Pmode, base_reg, low_n);
8494 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8495 x = gen_rtx_PLUS (SImode, xop0, xop1);
8498 /* XXX We don't allow MINUS any more -- see comment in
8499 arm_legitimate_address_outer_p (). */
8500 else if (GET_CODE (x) == MINUS)
8502 rtx xop0 = XEXP (x, 0);
8503 rtx xop1 = XEXP (x, 1);
8505 if (CONSTANT_P (xop0))
8506 xop0 = force_reg (SImode, xop0);
8508 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8509 xop1 = force_reg (SImode, xop1);
8511 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8512 x = gen_rtx_MINUS (SImode, xop0, xop1);
8515 /* Make sure to take full advantage of the pre-indexed addressing mode
8516 with absolute addresses which often allows for the base register to
8517 be factorized for multiple adjacent memory references, and it might
8518 even allows for the mini pool to be avoided entirely. */
8519 else if (CONST_INT_P (x) && optimize > 0)
8521 unsigned int bits;
8522 HOST_WIDE_INT mask, base, index;
8523 rtx base_reg;
8525 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8526 use a 8-bit index. So let's use a 12-bit index for SImode only and
8527 hope that arm_gen_constant will enable ldrb to use more bits. */
8528 bits = (mode == SImode) ? 12 : 8;
8529 mask = (1 << bits) - 1;
8530 base = INTVAL (x) & ~mask;
8531 index = INTVAL (x) & mask;
8532 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8534 /* It'll most probably be more efficient to generate the base
8535 with more bits set and use a negative index instead. */
8536 base |= mask;
8537 index -= mask;
8539 base_reg = force_reg (SImode, GEN_INT (base));
8540 x = plus_constant (Pmode, base_reg, index);
8543 if (flag_pic)
8545 /* We need to find and carefully transform any SYMBOL and LABEL
8546 references; so go back to the original address expression. */
8547 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8549 if (new_x != orig_x)
8550 x = new_x;
8553 return x;
8557 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8558 to be legitimate. If we find one, return the new, valid address. */
8560 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8562 if (GET_CODE (x) == PLUS
8563 && CONST_INT_P (XEXP (x, 1))
8564 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8565 || INTVAL (XEXP (x, 1)) < 0))
8567 rtx xop0 = XEXP (x, 0);
8568 rtx xop1 = XEXP (x, 1);
8569 HOST_WIDE_INT offset = INTVAL (xop1);
8571 /* Try and fold the offset into a biasing of the base register and
8572 then offsetting that. Don't do this when optimizing for space
8573 since it can cause too many CSEs. */
8574 if (optimize_size && offset >= 0
8575 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8577 HOST_WIDE_INT delta;
8579 if (offset >= 256)
8580 delta = offset - (256 - GET_MODE_SIZE (mode));
8581 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8582 delta = 31 * GET_MODE_SIZE (mode);
8583 else
8584 delta = offset & (~31 * GET_MODE_SIZE (mode));
8586 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8587 NULL_RTX);
8588 x = plus_constant (Pmode, xop0, delta);
8590 else if (offset < 0 && offset > -256)
8591 /* Small negative offsets are best done with a subtract before the
8592 dereference, forcing these into a register normally takes two
8593 instructions. */
8594 x = force_operand (x, NULL_RTX);
8595 else
8597 /* For the remaining cases, force the constant into a register. */
8598 xop1 = force_reg (SImode, xop1);
8599 x = gen_rtx_PLUS (SImode, xop0, xop1);
8602 else if (GET_CODE (x) == PLUS
8603 && s_register_operand (XEXP (x, 1), SImode)
8604 && !s_register_operand (XEXP (x, 0), SImode))
8606 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8608 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8611 if (flag_pic)
8613 /* We need to find and carefully transform any SYMBOL and LABEL
8614 references; so go back to the original address expression. */
8615 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8617 if (new_x != orig_x)
8618 x = new_x;
8621 return x;
8624 /* Return TRUE if X contains any TLS symbol references. */
8626 bool
8627 arm_tls_referenced_p (rtx x)
8629 if (! TARGET_HAVE_TLS)
8630 return false;
8632 subrtx_iterator::array_type array;
8633 FOR_EACH_SUBRTX (iter, array, x, ALL)
8635 const_rtx x = *iter;
8636 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8637 return true;
8639 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8640 TLS offsets, not real symbol references. */
8641 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8642 iter.skip_subrtxes ();
8644 return false;
8647 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8649 On the ARM, allow any integer (invalid ones are removed later by insn
8650 patterns), nice doubles and symbol_refs which refer to the function's
8651 constant pool XXX.
8653 When generating pic allow anything. */
8655 static bool
8656 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8658 return flag_pic || !label_mentioned_p (x);
8661 static bool
8662 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8664 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8665 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8666 for ARMv8-M Baseline or later the result is valid. */
8667 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8668 x = XEXP (x, 0);
8670 return (CONST_INT_P (x)
8671 || CONST_DOUBLE_P (x)
8672 || CONSTANT_ADDRESS_P (x)
8673 || flag_pic);
8676 static bool
8677 arm_legitimate_constant_p (machine_mode mode, rtx x)
8679 return (!arm_cannot_force_const_mem (mode, x)
8680 && (TARGET_32BIT
8681 ? arm_legitimate_constant_p_1 (mode, x)
8682 : thumb_legitimate_constant_p (mode, x)));
8685 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8687 static bool
8688 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8690 rtx base, offset;
8692 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8694 split_const (x, &base, &offset);
8695 if (GET_CODE (base) == SYMBOL_REF
8696 && !offset_within_block_p (base, INTVAL (offset)))
8697 return true;
8699 return arm_tls_referenced_p (x);
8702 #define REG_OR_SUBREG_REG(X) \
8703 (REG_P (X) \
8704 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8706 #define REG_OR_SUBREG_RTX(X) \
8707 (REG_P (X) ? (X) : SUBREG_REG (X))
8709 static inline int
8710 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8712 machine_mode mode = GET_MODE (x);
8713 int total, words;
8715 switch (code)
8717 case ASHIFT:
8718 case ASHIFTRT:
8719 case LSHIFTRT:
8720 case ROTATERT:
8721 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8723 case PLUS:
8724 case MINUS:
8725 case COMPARE:
8726 case NEG:
8727 case NOT:
8728 return COSTS_N_INSNS (1);
8730 case MULT:
8731 if (CONST_INT_P (XEXP (x, 1)))
8733 int cycles = 0;
8734 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8736 while (i)
8738 i >>= 2;
8739 cycles++;
8741 return COSTS_N_INSNS (2) + cycles;
8743 return COSTS_N_INSNS (1) + 16;
8745 case SET:
8746 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8747 the mode. */
8748 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8749 return (COSTS_N_INSNS (words)
8750 + 4 * ((MEM_P (SET_SRC (x)))
8751 + MEM_P (SET_DEST (x))));
8753 case CONST_INT:
8754 if (outer == SET)
8756 if (UINTVAL (x) < 256
8757 /* 16-bit constant. */
8758 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8759 return 0;
8760 if (thumb_shiftable_const (INTVAL (x)))
8761 return COSTS_N_INSNS (2);
8762 return COSTS_N_INSNS (3);
8764 else if ((outer == PLUS || outer == COMPARE)
8765 && INTVAL (x) < 256 && INTVAL (x) > -256)
8766 return 0;
8767 else if ((outer == IOR || outer == XOR || outer == AND)
8768 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8769 return COSTS_N_INSNS (1);
8770 else if (outer == AND)
8772 int i;
8773 /* This duplicates the tests in the andsi3 expander. */
8774 for (i = 9; i <= 31; i++)
8775 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8776 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8777 return COSTS_N_INSNS (2);
8779 else if (outer == ASHIFT || outer == ASHIFTRT
8780 || outer == LSHIFTRT)
8781 return 0;
8782 return COSTS_N_INSNS (2);
8784 case CONST:
8785 case CONST_DOUBLE:
8786 case LABEL_REF:
8787 case SYMBOL_REF:
8788 return COSTS_N_INSNS (3);
8790 case UDIV:
8791 case UMOD:
8792 case DIV:
8793 case MOD:
8794 return 100;
8796 case TRUNCATE:
8797 return 99;
8799 case AND:
8800 case XOR:
8801 case IOR:
8802 /* XXX guess. */
8803 return 8;
8805 case MEM:
8806 /* XXX another guess. */
8807 /* Memory costs quite a lot for the first word, but subsequent words
8808 load at the equivalent of a single insn each. */
8809 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8810 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8811 ? 4 : 0));
8813 case IF_THEN_ELSE:
8814 /* XXX a guess. */
8815 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8816 return 14;
8817 return 2;
8819 case SIGN_EXTEND:
8820 case ZERO_EXTEND:
8821 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8822 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8824 if (mode == SImode)
8825 return total;
8827 if (arm_arch6)
8828 return total + COSTS_N_INSNS (1);
8830 /* Assume a two-shift sequence. Increase the cost slightly so
8831 we prefer actual shifts over an extend operation. */
8832 return total + 1 + COSTS_N_INSNS (2);
8834 default:
8835 return 99;
8839 /* Estimates the size cost of thumb1 instructions.
8840 For now most of the code is copied from thumb1_rtx_costs. We need more
8841 fine grain tuning when we have more related test cases. */
8842 static inline int
8843 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8845 machine_mode mode = GET_MODE (x);
8846 int words, cost;
8848 switch (code)
8850 case ASHIFT:
8851 case ASHIFTRT:
8852 case LSHIFTRT:
8853 case ROTATERT:
8854 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8856 case PLUS:
8857 case MINUS:
8858 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8859 defined by RTL expansion, especially for the expansion of
8860 multiplication. */
8861 if ((GET_CODE (XEXP (x, 0)) == MULT
8862 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8863 || (GET_CODE (XEXP (x, 1)) == MULT
8864 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8865 return COSTS_N_INSNS (2);
8866 /* Fall through. */
8867 case COMPARE:
8868 case NEG:
8869 case NOT:
8870 return COSTS_N_INSNS (1);
8872 case MULT:
8873 if (CONST_INT_P (XEXP (x, 1)))
8875 /* Thumb1 mul instruction can't operate on const. We must Load it
8876 into a register first. */
8877 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8878 /* For the targets which have a very small and high-latency multiply
8879 unit, we prefer to synthesize the mult with up to 5 instructions,
8880 giving a good balance between size and performance. */
8881 if (arm_arch6m && arm_m_profile_small_mul)
8882 return COSTS_N_INSNS (5);
8883 else
8884 return COSTS_N_INSNS (1) + const_size;
8886 return COSTS_N_INSNS (1);
8888 case SET:
8889 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8890 the mode. */
8891 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8892 cost = COSTS_N_INSNS (words);
8893 if (satisfies_constraint_J (SET_SRC (x))
8894 || satisfies_constraint_K (SET_SRC (x))
8895 /* Too big an immediate for a 2-byte mov, using MOVT. */
8896 || (CONST_INT_P (SET_SRC (x))
8897 && UINTVAL (SET_SRC (x)) >= 256
8898 && TARGET_HAVE_MOVT
8899 && satisfies_constraint_j (SET_SRC (x)))
8900 /* thumb1_movdi_insn. */
8901 || ((words > 1) && MEM_P (SET_SRC (x))))
8902 cost += COSTS_N_INSNS (1);
8903 return cost;
8905 case CONST_INT:
8906 if (outer == SET)
8908 if (UINTVAL (x) < 256)
8909 return COSTS_N_INSNS (1);
8910 /* movw is 4byte long. */
8911 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
8912 return COSTS_N_INSNS (2);
8913 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8914 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8915 return COSTS_N_INSNS (2);
8916 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8917 if (thumb_shiftable_const (INTVAL (x)))
8918 return COSTS_N_INSNS (2);
8919 return COSTS_N_INSNS (3);
8921 else if ((outer == PLUS || outer == COMPARE)
8922 && INTVAL (x) < 256 && INTVAL (x) > -256)
8923 return 0;
8924 else if ((outer == IOR || outer == XOR || outer == AND)
8925 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8926 return COSTS_N_INSNS (1);
8927 else if (outer == AND)
8929 int i;
8930 /* This duplicates the tests in the andsi3 expander. */
8931 for (i = 9; i <= 31; i++)
8932 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8933 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8934 return COSTS_N_INSNS (2);
8936 else if (outer == ASHIFT || outer == ASHIFTRT
8937 || outer == LSHIFTRT)
8938 return 0;
8939 return COSTS_N_INSNS (2);
8941 case CONST:
8942 case CONST_DOUBLE:
8943 case LABEL_REF:
8944 case SYMBOL_REF:
8945 return COSTS_N_INSNS (3);
8947 case UDIV:
8948 case UMOD:
8949 case DIV:
8950 case MOD:
8951 return 100;
8953 case TRUNCATE:
8954 return 99;
8956 case AND:
8957 case XOR:
8958 case IOR:
8959 return COSTS_N_INSNS (1);
8961 case MEM:
8962 return (COSTS_N_INSNS (1)
8963 + COSTS_N_INSNS (1)
8964 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8965 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8966 ? COSTS_N_INSNS (1) : 0));
8968 case IF_THEN_ELSE:
8969 /* XXX a guess. */
8970 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8971 return 14;
8972 return 2;
8974 case ZERO_EXTEND:
8975 /* XXX still guessing. */
8976 switch (GET_MODE (XEXP (x, 0)))
8978 case QImode:
8979 return (1 + (mode == DImode ? 4 : 0)
8980 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8982 case HImode:
8983 return (4 + (mode == DImode ? 4 : 0)
8984 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8986 case SImode:
8987 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8989 default:
8990 return 99;
8993 default:
8994 return 99;
8998 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8999 operand, then return the operand that is being shifted. If the shift
9000 is not by a constant, then set SHIFT_REG to point to the operand.
9001 Return NULL if OP is not a shifter operand. */
9002 static rtx
9003 shifter_op_p (rtx op, rtx *shift_reg)
9005 enum rtx_code code = GET_CODE (op);
9007 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9008 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9009 return XEXP (op, 0);
9010 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9011 return XEXP (op, 0);
9012 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9013 || code == ASHIFTRT)
9015 if (!CONST_INT_P (XEXP (op, 1)))
9016 *shift_reg = XEXP (op, 1);
9017 return XEXP (op, 0);
9020 return NULL;
9023 static bool
9024 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9026 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9027 rtx_code code = GET_CODE (x);
9028 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9030 switch (XINT (x, 1))
9032 case UNSPEC_UNALIGNED_LOAD:
9033 /* We can only do unaligned loads into the integer unit, and we can't
9034 use LDM or LDRD. */
9035 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9036 if (speed_p)
9037 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9038 + extra_cost->ldst.load_unaligned);
9040 #ifdef NOT_YET
9041 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9042 ADDR_SPACE_GENERIC, speed_p);
9043 #endif
9044 return true;
9046 case UNSPEC_UNALIGNED_STORE:
9047 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9048 if (speed_p)
9049 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9050 + extra_cost->ldst.store_unaligned);
9052 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9053 #ifdef NOT_YET
9054 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9055 ADDR_SPACE_GENERIC, speed_p);
9056 #endif
9057 return true;
9059 case UNSPEC_VRINTZ:
9060 case UNSPEC_VRINTP:
9061 case UNSPEC_VRINTM:
9062 case UNSPEC_VRINTR:
9063 case UNSPEC_VRINTX:
9064 case UNSPEC_VRINTA:
9065 if (speed_p)
9066 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9068 return true;
9069 default:
9070 *cost = COSTS_N_INSNS (2);
9071 break;
9073 return true;
9076 /* Cost of a libcall. We assume one insn per argument, an amount for the
9077 call (one insn for -Os) and then one for processing the result. */
9078 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9080 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9081 do \
9083 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9084 if (shift_op != NULL \
9085 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9087 if (shift_reg) \
9089 if (speed_p) \
9090 *cost += extra_cost->alu.arith_shift_reg; \
9091 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9092 ASHIFT, 1, speed_p); \
9094 else if (speed_p) \
9095 *cost += extra_cost->alu.arith_shift; \
9097 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9098 ASHIFT, 0, speed_p) \
9099 + rtx_cost (XEXP (x, 1 - IDX), \
9100 GET_MODE (shift_op), \
9101 OP, 1, speed_p)); \
9102 return true; \
9105 while (0);
9107 /* RTX costs. Make an estimate of the cost of executing the operation
9108 X, which is contained with an operation with code OUTER_CODE.
9109 SPEED_P indicates whether the cost desired is the performance cost,
9110 or the size cost. The estimate is stored in COST and the return
9111 value is TRUE if the cost calculation is final, or FALSE if the
9112 caller should recurse through the operands of X to add additional
9113 costs.
9115 We currently make no attempt to model the size savings of Thumb-2
9116 16-bit instructions. At the normal points in compilation where
9117 this code is called we have no measure of whether the condition
9118 flags are live or not, and thus no realistic way to determine what
9119 the size will eventually be. */
9120 static bool
9121 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9122 const struct cpu_cost_table *extra_cost,
9123 int *cost, bool speed_p)
9125 machine_mode mode = GET_MODE (x);
9127 *cost = COSTS_N_INSNS (1);
9129 if (TARGET_THUMB1)
9131 if (speed_p)
9132 *cost = thumb1_rtx_costs (x, code, outer_code);
9133 else
9134 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9135 return true;
9138 switch (code)
9140 case SET:
9141 *cost = 0;
9142 /* SET RTXs don't have a mode so we get it from the destination. */
9143 mode = GET_MODE (SET_DEST (x));
9145 if (REG_P (SET_SRC (x))
9146 && REG_P (SET_DEST (x)))
9148 /* Assume that most copies can be done with a single insn,
9149 unless we don't have HW FP, in which case everything
9150 larger than word mode will require two insns. */
9151 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9152 && GET_MODE_SIZE (mode) > 4)
9153 || mode == DImode)
9154 ? 2 : 1);
9155 /* Conditional register moves can be encoded
9156 in 16 bits in Thumb mode. */
9157 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9158 *cost >>= 1;
9160 return true;
9163 if (CONST_INT_P (SET_SRC (x)))
9165 /* Handle CONST_INT here, since the value doesn't have a mode
9166 and we would otherwise be unable to work out the true cost. */
9167 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9168 0, speed_p);
9169 outer_code = SET;
9170 /* Slightly lower the cost of setting a core reg to a constant.
9171 This helps break up chains and allows for better scheduling. */
9172 if (REG_P (SET_DEST (x))
9173 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9174 *cost -= 1;
9175 x = SET_SRC (x);
9176 /* Immediate moves with an immediate in the range [0, 255] can be
9177 encoded in 16 bits in Thumb mode. */
9178 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9179 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9180 *cost >>= 1;
9181 goto const_int_cost;
9184 return false;
9186 case MEM:
9187 /* A memory access costs 1 insn if the mode is small, or the address is
9188 a single register, otherwise it costs one insn per word. */
9189 if (REG_P (XEXP (x, 0)))
9190 *cost = COSTS_N_INSNS (1);
9191 else if (flag_pic
9192 && GET_CODE (XEXP (x, 0)) == PLUS
9193 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9194 /* This will be split into two instructions.
9195 See arm.md:calculate_pic_address. */
9196 *cost = COSTS_N_INSNS (2);
9197 else
9198 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9200 /* For speed optimizations, add the costs of the address and
9201 accessing memory. */
9202 if (speed_p)
9203 #ifdef NOT_YET
9204 *cost += (extra_cost->ldst.load
9205 + arm_address_cost (XEXP (x, 0), mode,
9206 ADDR_SPACE_GENERIC, speed_p));
9207 #else
9208 *cost += extra_cost->ldst.load;
9209 #endif
9210 return true;
9212 case PARALLEL:
9214 /* Calculations of LDM costs are complex. We assume an initial cost
9215 (ldm_1st) which will load the number of registers mentioned in
9216 ldm_regs_per_insn_1st registers; then each additional
9217 ldm_regs_per_insn_subsequent registers cost one more insn. The
9218 formula for N regs is thus:
9220 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9221 + ldm_regs_per_insn_subsequent - 1)
9222 / ldm_regs_per_insn_subsequent).
9224 Additional costs may also be added for addressing. A similar
9225 formula is used for STM. */
9227 bool is_ldm = load_multiple_operation (x, SImode);
9228 bool is_stm = store_multiple_operation (x, SImode);
9230 if (is_ldm || is_stm)
9232 if (speed_p)
9234 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9235 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9236 ? extra_cost->ldst.ldm_regs_per_insn_1st
9237 : extra_cost->ldst.stm_regs_per_insn_1st;
9238 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9239 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9240 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9242 *cost += regs_per_insn_1st
9243 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9244 + regs_per_insn_sub - 1)
9245 / regs_per_insn_sub);
9246 return true;
9250 return false;
9252 case DIV:
9253 case UDIV:
9254 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9255 && (mode == SFmode || !TARGET_VFP_SINGLE))
9256 *cost += COSTS_N_INSNS (speed_p
9257 ? extra_cost->fp[mode != SFmode].div : 0);
9258 else if (mode == SImode && TARGET_IDIV)
9259 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9260 else
9261 *cost = LIBCALL_COST (2);
9262 return false; /* All arguments must be in registers. */
9264 case MOD:
9265 /* MOD by a power of 2 can be expanded as:
9266 rsbs r1, r0, #0
9267 and r0, r0, #(n - 1)
9268 and r1, r1, #(n - 1)
9269 rsbpl r0, r1, #0. */
9270 if (CONST_INT_P (XEXP (x, 1))
9271 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9272 && mode == SImode)
9274 *cost += COSTS_N_INSNS (3);
9276 if (speed_p)
9277 *cost += 2 * extra_cost->alu.logical
9278 + extra_cost->alu.arith;
9279 return true;
9282 /* Fall-through. */
9283 case UMOD:
9284 *cost = LIBCALL_COST (2);
9285 return false; /* All arguments must be in registers. */
9287 case ROTATE:
9288 if (mode == SImode && REG_P (XEXP (x, 1)))
9290 *cost += (COSTS_N_INSNS (1)
9291 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9292 if (speed_p)
9293 *cost += extra_cost->alu.shift_reg;
9294 return true;
9296 /* Fall through */
9297 case ROTATERT:
9298 case ASHIFT:
9299 case LSHIFTRT:
9300 case ASHIFTRT:
9301 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9303 *cost += (COSTS_N_INSNS (2)
9304 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9305 if (speed_p)
9306 *cost += 2 * extra_cost->alu.shift;
9307 return true;
9309 else if (mode == SImode)
9311 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9312 /* Slightly disparage register shifts at -Os, but not by much. */
9313 if (!CONST_INT_P (XEXP (x, 1)))
9314 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9315 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9316 return true;
9318 else if (GET_MODE_CLASS (mode) == MODE_INT
9319 && GET_MODE_SIZE (mode) < 4)
9321 if (code == ASHIFT)
9323 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9324 /* Slightly disparage register shifts at -Os, but not by
9325 much. */
9326 if (!CONST_INT_P (XEXP (x, 1)))
9327 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9328 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9330 else if (code == LSHIFTRT || code == ASHIFTRT)
9332 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9334 /* Can use SBFX/UBFX. */
9335 if (speed_p)
9336 *cost += extra_cost->alu.bfx;
9337 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9339 else
9341 *cost += COSTS_N_INSNS (1);
9342 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9343 if (speed_p)
9345 if (CONST_INT_P (XEXP (x, 1)))
9346 *cost += 2 * extra_cost->alu.shift;
9347 else
9348 *cost += (extra_cost->alu.shift
9349 + extra_cost->alu.shift_reg);
9351 else
9352 /* Slightly disparage register shifts. */
9353 *cost += !CONST_INT_P (XEXP (x, 1));
9356 else /* Rotates. */
9358 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9359 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9360 if (speed_p)
9362 if (CONST_INT_P (XEXP (x, 1)))
9363 *cost += (2 * extra_cost->alu.shift
9364 + extra_cost->alu.log_shift);
9365 else
9366 *cost += (extra_cost->alu.shift
9367 + extra_cost->alu.shift_reg
9368 + extra_cost->alu.log_shift_reg);
9371 return true;
9374 *cost = LIBCALL_COST (2);
9375 return false;
9377 case BSWAP:
9378 if (arm_arch6)
9380 if (mode == SImode)
9382 if (speed_p)
9383 *cost += extra_cost->alu.rev;
9385 return false;
9388 else
9390 /* No rev instruction available. Look at arm_legacy_rev
9391 and thumb_legacy_rev for the form of RTL used then. */
9392 if (TARGET_THUMB)
9394 *cost += COSTS_N_INSNS (9);
9396 if (speed_p)
9398 *cost += 6 * extra_cost->alu.shift;
9399 *cost += 3 * extra_cost->alu.logical;
9402 else
9404 *cost += COSTS_N_INSNS (4);
9406 if (speed_p)
9408 *cost += 2 * extra_cost->alu.shift;
9409 *cost += extra_cost->alu.arith_shift;
9410 *cost += 2 * extra_cost->alu.logical;
9413 return true;
9415 return false;
9417 case MINUS:
9418 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9419 && (mode == SFmode || !TARGET_VFP_SINGLE))
9421 if (GET_CODE (XEXP (x, 0)) == MULT
9422 || GET_CODE (XEXP (x, 1)) == MULT)
9424 rtx mul_op0, mul_op1, sub_op;
9426 if (speed_p)
9427 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9429 if (GET_CODE (XEXP (x, 0)) == MULT)
9431 mul_op0 = XEXP (XEXP (x, 0), 0);
9432 mul_op1 = XEXP (XEXP (x, 0), 1);
9433 sub_op = XEXP (x, 1);
9435 else
9437 mul_op0 = XEXP (XEXP (x, 1), 0);
9438 mul_op1 = XEXP (XEXP (x, 1), 1);
9439 sub_op = XEXP (x, 0);
9442 /* The first operand of the multiply may be optionally
9443 negated. */
9444 if (GET_CODE (mul_op0) == NEG)
9445 mul_op0 = XEXP (mul_op0, 0);
9447 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9448 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9449 + rtx_cost (sub_op, mode, code, 0, speed_p));
9451 return true;
9454 if (speed_p)
9455 *cost += extra_cost->fp[mode != SFmode].addsub;
9456 return false;
9459 if (mode == SImode)
9461 rtx shift_by_reg = NULL;
9462 rtx shift_op;
9463 rtx non_shift_op;
9465 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9466 if (shift_op == NULL)
9468 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9469 non_shift_op = XEXP (x, 0);
9471 else
9472 non_shift_op = XEXP (x, 1);
9474 if (shift_op != NULL)
9476 if (shift_by_reg != NULL)
9478 if (speed_p)
9479 *cost += extra_cost->alu.arith_shift_reg;
9480 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9482 else if (speed_p)
9483 *cost += extra_cost->alu.arith_shift;
9485 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9486 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9487 return true;
9490 if (arm_arch_thumb2
9491 && GET_CODE (XEXP (x, 1)) == MULT)
9493 /* MLS. */
9494 if (speed_p)
9495 *cost += extra_cost->mult[0].add;
9496 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9497 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9498 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9499 return true;
9502 if (CONST_INT_P (XEXP (x, 0)))
9504 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9505 INTVAL (XEXP (x, 0)), NULL_RTX,
9506 NULL_RTX, 1, 0);
9507 *cost = COSTS_N_INSNS (insns);
9508 if (speed_p)
9509 *cost += insns * extra_cost->alu.arith;
9510 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9511 return true;
9513 else if (speed_p)
9514 *cost += extra_cost->alu.arith;
9516 return false;
9519 if (GET_MODE_CLASS (mode) == MODE_INT
9520 && GET_MODE_SIZE (mode) < 4)
9522 rtx shift_op, shift_reg;
9523 shift_reg = NULL;
9525 /* We check both sides of the MINUS for shifter operands since,
9526 unlike PLUS, it's not commutative. */
9528 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9529 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9531 /* Slightly disparage, as we might need to widen the result. */
9532 *cost += 1;
9533 if (speed_p)
9534 *cost += extra_cost->alu.arith;
9536 if (CONST_INT_P (XEXP (x, 0)))
9538 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9539 return true;
9542 return false;
9545 if (mode == DImode)
9547 *cost += COSTS_N_INSNS (1);
9549 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9551 rtx op1 = XEXP (x, 1);
9553 if (speed_p)
9554 *cost += 2 * extra_cost->alu.arith;
9556 if (GET_CODE (op1) == ZERO_EXTEND)
9557 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9558 0, speed_p);
9559 else
9560 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9561 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9562 0, speed_p);
9563 return true;
9565 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9567 if (speed_p)
9568 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9569 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9570 0, speed_p)
9571 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9572 return true;
9574 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9575 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9577 if (speed_p)
9578 *cost += (extra_cost->alu.arith
9579 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9580 ? extra_cost->alu.arith
9581 : extra_cost->alu.arith_shift));
9582 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9583 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9584 GET_CODE (XEXP (x, 1)), 0, speed_p));
9585 return true;
9588 if (speed_p)
9589 *cost += 2 * extra_cost->alu.arith;
9590 return false;
9593 /* Vector mode? */
9595 *cost = LIBCALL_COST (2);
9596 return false;
9598 case PLUS:
9599 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9600 && (mode == SFmode || !TARGET_VFP_SINGLE))
9602 if (GET_CODE (XEXP (x, 0)) == MULT)
9604 rtx mul_op0, mul_op1, add_op;
9606 if (speed_p)
9607 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9609 mul_op0 = XEXP (XEXP (x, 0), 0);
9610 mul_op1 = XEXP (XEXP (x, 0), 1);
9611 add_op = XEXP (x, 1);
9613 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9614 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9615 + rtx_cost (add_op, mode, code, 0, speed_p));
9617 return true;
9620 if (speed_p)
9621 *cost += extra_cost->fp[mode != SFmode].addsub;
9622 return false;
9624 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9626 *cost = LIBCALL_COST (2);
9627 return false;
9630 /* Narrow modes can be synthesized in SImode, but the range
9631 of useful sub-operations is limited. Check for shift operations
9632 on one of the operands. Only left shifts can be used in the
9633 narrow modes. */
9634 if (GET_MODE_CLASS (mode) == MODE_INT
9635 && GET_MODE_SIZE (mode) < 4)
9637 rtx shift_op, shift_reg;
9638 shift_reg = NULL;
9640 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9642 if (CONST_INT_P (XEXP (x, 1)))
9644 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9645 INTVAL (XEXP (x, 1)), NULL_RTX,
9646 NULL_RTX, 1, 0);
9647 *cost = COSTS_N_INSNS (insns);
9648 if (speed_p)
9649 *cost += insns * extra_cost->alu.arith;
9650 /* Slightly penalize a narrow operation as the result may
9651 need widening. */
9652 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9653 return true;
9656 /* Slightly penalize a narrow operation as the result may
9657 need widening. */
9658 *cost += 1;
9659 if (speed_p)
9660 *cost += extra_cost->alu.arith;
9662 return false;
9665 if (mode == SImode)
9667 rtx shift_op, shift_reg;
9669 if (TARGET_INT_SIMD
9670 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9671 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9673 /* UXTA[BH] or SXTA[BH]. */
9674 if (speed_p)
9675 *cost += extra_cost->alu.extend_arith;
9676 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9677 0, speed_p)
9678 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9679 return true;
9682 shift_reg = NULL;
9683 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9684 if (shift_op != NULL)
9686 if (shift_reg)
9688 if (speed_p)
9689 *cost += extra_cost->alu.arith_shift_reg;
9690 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9692 else if (speed_p)
9693 *cost += extra_cost->alu.arith_shift;
9695 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9696 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9697 return true;
9699 if (GET_CODE (XEXP (x, 0)) == MULT)
9701 rtx mul_op = XEXP (x, 0);
9703 if (TARGET_DSP_MULTIPLY
9704 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9705 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9706 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9707 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9708 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9709 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9710 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9711 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9712 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9713 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9714 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9715 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9716 == 16))))))
9718 /* SMLA[BT][BT]. */
9719 if (speed_p)
9720 *cost += extra_cost->mult[0].extend_add;
9721 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9722 SIGN_EXTEND, 0, speed_p)
9723 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9724 SIGN_EXTEND, 0, speed_p)
9725 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9726 return true;
9729 if (speed_p)
9730 *cost += extra_cost->mult[0].add;
9731 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9732 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9733 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9734 return true;
9736 if (CONST_INT_P (XEXP (x, 1)))
9738 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9739 INTVAL (XEXP (x, 1)), NULL_RTX,
9740 NULL_RTX, 1, 0);
9741 *cost = COSTS_N_INSNS (insns);
9742 if (speed_p)
9743 *cost += insns * extra_cost->alu.arith;
9744 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9745 return true;
9747 else if (speed_p)
9748 *cost += extra_cost->alu.arith;
9750 return false;
9753 if (mode == DImode)
9755 if (arm_arch3m
9756 && GET_CODE (XEXP (x, 0)) == MULT
9757 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9758 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9759 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9760 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9762 if (speed_p)
9763 *cost += extra_cost->mult[1].extend_add;
9764 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9765 ZERO_EXTEND, 0, speed_p)
9766 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9767 ZERO_EXTEND, 0, speed_p)
9768 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9769 return true;
9772 *cost += COSTS_N_INSNS (1);
9774 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9775 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9777 if (speed_p)
9778 *cost += (extra_cost->alu.arith
9779 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9780 ? extra_cost->alu.arith
9781 : extra_cost->alu.arith_shift));
9783 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9784 0, speed_p)
9785 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9786 return true;
9789 if (speed_p)
9790 *cost += 2 * extra_cost->alu.arith;
9791 return false;
9794 /* Vector mode? */
9795 *cost = LIBCALL_COST (2);
9796 return false;
9797 case IOR:
9798 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9800 if (speed_p)
9801 *cost += extra_cost->alu.rev;
9803 return true;
9805 /* Fall through. */
9806 case AND: case XOR:
9807 if (mode == SImode)
9809 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9810 rtx op0 = XEXP (x, 0);
9811 rtx shift_op, shift_reg;
9813 if (subcode == NOT
9814 && (code == AND
9815 || (code == IOR && TARGET_THUMB2)))
9816 op0 = XEXP (op0, 0);
9818 shift_reg = NULL;
9819 shift_op = shifter_op_p (op0, &shift_reg);
9820 if (shift_op != NULL)
9822 if (shift_reg)
9824 if (speed_p)
9825 *cost += extra_cost->alu.log_shift_reg;
9826 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9828 else if (speed_p)
9829 *cost += extra_cost->alu.log_shift;
9831 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9832 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9833 return true;
9836 if (CONST_INT_P (XEXP (x, 1)))
9838 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9839 INTVAL (XEXP (x, 1)), NULL_RTX,
9840 NULL_RTX, 1, 0);
9842 *cost = COSTS_N_INSNS (insns);
9843 if (speed_p)
9844 *cost += insns * extra_cost->alu.logical;
9845 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9846 return true;
9849 if (speed_p)
9850 *cost += extra_cost->alu.logical;
9851 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9852 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9853 return true;
9856 if (mode == DImode)
9858 rtx op0 = XEXP (x, 0);
9859 enum rtx_code subcode = GET_CODE (op0);
9861 *cost += COSTS_N_INSNS (1);
9863 if (subcode == NOT
9864 && (code == AND
9865 || (code == IOR && TARGET_THUMB2)))
9866 op0 = XEXP (op0, 0);
9868 if (GET_CODE (op0) == ZERO_EXTEND)
9870 if (speed_p)
9871 *cost += 2 * extra_cost->alu.logical;
9873 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9874 0, speed_p)
9875 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9876 return true;
9878 else if (GET_CODE (op0) == SIGN_EXTEND)
9880 if (speed_p)
9881 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9883 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9884 0, speed_p)
9885 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9886 return true;
9889 if (speed_p)
9890 *cost += 2 * extra_cost->alu.logical;
9892 return true;
9894 /* Vector mode? */
9896 *cost = LIBCALL_COST (2);
9897 return false;
9899 case MULT:
9900 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9901 && (mode == SFmode || !TARGET_VFP_SINGLE))
9903 rtx op0 = XEXP (x, 0);
9905 if (GET_CODE (op0) == NEG && !flag_rounding_math)
9906 op0 = XEXP (op0, 0);
9908 if (speed_p)
9909 *cost += extra_cost->fp[mode != SFmode].mult;
9911 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
9912 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
9913 return true;
9915 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9917 *cost = LIBCALL_COST (2);
9918 return false;
9921 if (mode == SImode)
9923 if (TARGET_DSP_MULTIPLY
9924 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9925 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9926 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9927 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9928 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9929 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
9930 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9931 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
9932 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9933 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9934 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9935 && (INTVAL (XEXP (XEXP (x, 1), 1))
9936 == 16))))))
9938 /* SMUL[TB][TB]. */
9939 if (speed_p)
9940 *cost += extra_cost->mult[0].extend;
9941 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
9942 SIGN_EXTEND, 0, speed_p);
9943 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
9944 SIGN_EXTEND, 1, speed_p);
9945 return true;
9947 if (speed_p)
9948 *cost += extra_cost->mult[0].simple;
9949 return false;
9952 if (mode == DImode)
9954 if (arm_arch3m
9955 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9956 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
9957 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9958 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
9960 if (speed_p)
9961 *cost += extra_cost->mult[1].extend;
9962 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
9963 ZERO_EXTEND, 0, speed_p)
9964 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9965 ZERO_EXTEND, 0, speed_p));
9966 return true;
9969 *cost = LIBCALL_COST (2);
9970 return false;
9973 /* Vector mode? */
9974 *cost = LIBCALL_COST (2);
9975 return false;
9977 case NEG:
9978 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9979 && (mode == SFmode || !TARGET_VFP_SINGLE))
9981 if (GET_CODE (XEXP (x, 0)) == MULT)
9983 /* VNMUL. */
9984 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
9985 return true;
9988 if (speed_p)
9989 *cost += extra_cost->fp[mode != SFmode].neg;
9991 return false;
9993 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9995 *cost = LIBCALL_COST (1);
9996 return false;
9999 if (mode == SImode)
10001 if (GET_CODE (XEXP (x, 0)) == ABS)
10003 *cost += COSTS_N_INSNS (1);
10004 /* Assume the non-flag-changing variant. */
10005 if (speed_p)
10006 *cost += (extra_cost->alu.log_shift
10007 + extra_cost->alu.arith_shift);
10008 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10009 return true;
10012 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10013 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10015 *cost += COSTS_N_INSNS (1);
10016 /* No extra cost for MOV imm and MVN imm. */
10017 /* If the comparison op is using the flags, there's no further
10018 cost, otherwise we need to add the cost of the comparison. */
10019 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10020 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10021 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10023 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10024 *cost += (COSTS_N_INSNS (1)
10025 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10026 0, speed_p)
10027 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10028 1, speed_p));
10029 if (speed_p)
10030 *cost += extra_cost->alu.arith;
10032 return true;
10035 if (speed_p)
10036 *cost += extra_cost->alu.arith;
10037 return false;
10040 if (GET_MODE_CLASS (mode) == MODE_INT
10041 && GET_MODE_SIZE (mode) < 4)
10043 /* Slightly disparage, as we might need an extend operation. */
10044 *cost += 1;
10045 if (speed_p)
10046 *cost += extra_cost->alu.arith;
10047 return false;
10050 if (mode == DImode)
10052 *cost += COSTS_N_INSNS (1);
10053 if (speed_p)
10054 *cost += 2 * extra_cost->alu.arith;
10055 return false;
10058 /* Vector mode? */
10059 *cost = LIBCALL_COST (1);
10060 return false;
10062 case NOT:
10063 if (mode == SImode)
10065 rtx shift_op;
10066 rtx shift_reg = NULL;
10068 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10070 if (shift_op)
10072 if (shift_reg != NULL)
10074 if (speed_p)
10075 *cost += extra_cost->alu.log_shift_reg;
10076 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10078 else if (speed_p)
10079 *cost += extra_cost->alu.log_shift;
10080 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10081 return true;
10084 if (speed_p)
10085 *cost += extra_cost->alu.logical;
10086 return false;
10088 if (mode == DImode)
10090 *cost += COSTS_N_INSNS (1);
10091 return false;
10094 /* Vector mode? */
10096 *cost += LIBCALL_COST (1);
10097 return false;
10099 case IF_THEN_ELSE:
10101 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10103 *cost += COSTS_N_INSNS (3);
10104 return true;
10106 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10107 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10109 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10110 /* Assume that if one arm of the if_then_else is a register,
10111 that it will be tied with the result and eliminate the
10112 conditional insn. */
10113 if (REG_P (XEXP (x, 1)))
10114 *cost += op2cost;
10115 else if (REG_P (XEXP (x, 2)))
10116 *cost += op1cost;
10117 else
10119 if (speed_p)
10121 if (extra_cost->alu.non_exec_costs_exec)
10122 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10123 else
10124 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10126 else
10127 *cost += op1cost + op2cost;
10130 return true;
10132 case COMPARE:
10133 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10134 *cost = 0;
10135 else
10137 machine_mode op0mode;
10138 /* We'll mostly assume that the cost of a compare is the cost of the
10139 LHS. However, there are some notable exceptions. */
10141 /* Floating point compares are never done as side-effects. */
10142 op0mode = GET_MODE (XEXP (x, 0));
10143 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10144 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10146 if (speed_p)
10147 *cost += extra_cost->fp[op0mode != SFmode].compare;
10149 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10151 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10152 return true;
10155 return false;
10157 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10159 *cost = LIBCALL_COST (2);
10160 return false;
10163 /* DImode compares normally take two insns. */
10164 if (op0mode == DImode)
10166 *cost += COSTS_N_INSNS (1);
10167 if (speed_p)
10168 *cost += 2 * extra_cost->alu.arith;
10169 return false;
10172 if (op0mode == SImode)
10174 rtx shift_op;
10175 rtx shift_reg;
10177 if (XEXP (x, 1) == const0_rtx
10178 && !(REG_P (XEXP (x, 0))
10179 || (GET_CODE (XEXP (x, 0)) == SUBREG
10180 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10182 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10184 /* Multiply operations that set the flags are often
10185 significantly more expensive. */
10186 if (speed_p
10187 && GET_CODE (XEXP (x, 0)) == MULT
10188 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10189 *cost += extra_cost->mult[0].flag_setting;
10191 if (speed_p
10192 && GET_CODE (XEXP (x, 0)) == PLUS
10193 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10194 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10195 0), 1), mode))
10196 *cost += extra_cost->mult[0].flag_setting;
10197 return true;
10200 shift_reg = NULL;
10201 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10202 if (shift_op != NULL)
10204 if (shift_reg != NULL)
10206 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10207 1, speed_p);
10208 if (speed_p)
10209 *cost += extra_cost->alu.arith_shift_reg;
10211 else if (speed_p)
10212 *cost += extra_cost->alu.arith_shift;
10213 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10214 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10215 return true;
10218 if (speed_p)
10219 *cost += extra_cost->alu.arith;
10220 if (CONST_INT_P (XEXP (x, 1))
10221 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10223 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10224 return true;
10226 return false;
10229 /* Vector mode? */
10231 *cost = LIBCALL_COST (2);
10232 return false;
10234 return true;
10236 case EQ:
10237 case NE:
10238 case LT:
10239 case LE:
10240 case GT:
10241 case GE:
10242 case LTU:
10243 case LEU:
10244 case GEU:
10245 case GTU:
10246 case ORDERED:
10247 case UNORDERED:
10248 case UNEQ:
10249 case UNLE:
10250 case UNLT:
10251 case UNGE:
10252 case UNGT:
10253 case LTGT:
10254 if (outer_code == SET)
10256 /* Is it a store-flag operation? */
10257 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10258 && XEXP (x, 1) == const0_rtx)
10260 /* Thumb also needs an IT insn. */
10261 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10262 return true;
10264 if (XEXP (x, 1) == const0_rtx)
10266 switch (code)
10268 case LT:
10269 /* LSR Rd, Rn, #31. */
10270 if (speed_p)
10271 *cost += extra_cost->alu.shift;
10272 break;
10274 case EQ:
10275 /* RSBS T1, Rn, #0
10276 ADC Rd, Rn, T1. */
10278 case NE:
10279 /* SUBS T1, Rn, #1
10280 SBC Rd, Rn, T1. */
10281 *cost += COSTS_N_INSNS (1);
10282 break;
10284 case LE:
10285 /* RSBS T1, Rn, Rn, LSR #31
10286 ADC Rd, Rn, T1. */
10287 *cost += COSTS_N_INSNS (1);
10288 if (speed_p)
10289 *cost += extra_cost->alu.arith_shift;
10290 break;
10292 case GT:
10293 /* RSB Rd, Rn, Rn, ASR #1
10294 LSR Rd, Rd, #31. */
10295 *cost += COSTS_N_INSNS (1);
10296 if (speed_p)
10297 *cost += (extra_cost->alu.arith_shift
10298 + extra_cost->alu.shift);
10299 break;
10301 case GE:
10302 /* ASR Rd, Rn, #31
10303 ADD Rd, Rn, #1. */
10304 *cost += COSTS_N_INSNS (1);
10305 if (speed_p)
10306 *cost += extra_cost->alu.shift;
10307 break;
10309 default:
10310 /* Remaining cases are either meaningless or would take
10311 three insns anyway. */
10312 *cost = COSTS_N_INSNS (3);
10313 break;
10315 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10316 return true;
10318 else
10320 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10321 if (CONST_INT_P (XEXP (x, 1))
10322 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10324 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10325 return true;
10328 return false;
10331 /* Not directly inside a set. If it involves the condition code
10332 register it must be the condition for a branch, cond_exec or
10333 I_T_E operation. Since the comparison is performed elsewhere
10334 this is just the control part which has no additional
10335 cost. */
10336 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10337 && XEXP (x, 1) == const0_rtx)
10339 *cost = 0;
10340 return true;
10342 return false;
10344 case ABS:
10345 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10346 && (mode == SFmode || !TARGET_VFP_SINGLE))
10348 if (speed_p)
10349 *cost += extra_cost->fp[mode != SFmode].neg;
10351 return false;
10353 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10355 *cost = LIBCALL_COST (1);
10356 return false;
10359 if (mode == SImode)
10361 if (speed_p)
10362 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10363 return false;
10365 /* Vector mode? */
10366 *cost = LIBCALL_COST (1);
10367 return false;
10369 case SIGN_EXTEND:
10370 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10371 && MEM_P (XEXP (x, 0)))
10373 if (mode == DImode)
10374 *cost += COSTS_N_INSNS (1);
10376 if (!speed_p)
10377 return true;
10379 if (GET_MODE (XEXP (x, 0)) == SImode)
10380 *cost += extra_cost->ldst.load;
10381 else
10382 *cost += extra_cost->ldst.load_sign_extend;
10384 if (mode == DImode)
10385 *cost += extra_cost->alu.shift;
10387 return true;
10390 /* Widening from less than 32-bits requires an extend operation. */
10391 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10393 /* We have SXTB/SXTH. */
10394 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10395 if (speed_p)
10396 *cost += extra_cost->alu.extend;
10398 else if (GET_MODE (XEXP (x, 0)) != SImode)
10400 /* Needs two shifts. */
10401 *cost += COSTS_N_INSNS (1);
10402 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10403 if (speed_p)
10404 *cost += 2 * extra_cost->alu.shift;
10407 /* Widening beyond 32-bits requires one more insn. */
10408 if (mode == DImode)
10410 *cost += COSTS_N_INSNS (1);
10411 if (speed_p)
10412 *cost += extra_cost->alu.shift;
10415 return true;
10417 case ZERO_EXTEND:
10418 if ((arm_arch4
10419 || GET_MODE (XEXP (x, 0)) == SImode
10420 || GET_MODE (XEXP (x, 0)) == QImode)
10421 && MEM_P (XEXP (x, 0)))
10423 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10425 if (mode == DImode)
10426 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10428 return true;
10431 /* Widening from less than 32-bits requires an extend operation. */
10432 if (GET_MODE (XEXP (x, 0)) == QImode)
10434 /* UXTB can be a shorter instruction in Thumb2, but it might
10435 be slower than the AND Rd, Rn, #255 alternative. When
10436 optimizing for speed it should never be slower to use
10437 AND, and we don't really model 16-bit vs 32-bit insns
10438 here. */
10439 if (speed_p)
10440 *cost += extra_cost->alu.logical;
10442 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10444 /* We have UXTB/UXTH. */
10445 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10446 if (speed_p)
10447 *cost += extra_cost->alu.extend;
10449 else if (GET_MODE (XEXP (x, 0)) != SImode)
10451 /* Needs two shifts. It's marginally preferable to use
10452 shifts rather than two BIC instructions as the second
10453 shift may merge with a subsequent insn as a shifter
10454 op. */
10455 *cost = COSTS_N_INSNS (2);
10456 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10457 if (speed_p)
10458 *cost += 2 * extra_cost->alu.shift;
10461 /* Widening beyond 32-bits requires one more insn. */
10462 if (mode == DImode)
10464 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10467 return true;
10469 case CONST_INT:
10470 *cost = 0;
10471 /* CONST_INT has no mode, so we cannot tell for sure how many
10472 insns are really going to be needed. The best we can do is
10473 look at the value passed. If it fits in SImode, then assume
10474 that's the mode it will be used for. Otherwise assume it
10475 will be used in DImode. */
10476 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10477 mode = SImode;
10478 else
10479 mode = DImode;
10481 /* Avoid blowing up in arm_gen_constant (). */
10482 if (!(outer_code == PLUS
10483 || outer_code == AND
10484 || outer_code == IOR
10485 || outer_code == XOR
10486 || outer_code == MINUS))
10487 outer_code = SET;
10489 const_int_cost:
10490 if (mode == SImode)
10492 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10493 INTVAL (x), NULL, NULL,
10494 0, 0));
10495 /* Extra costs? */
10497 else
10499 *cost += COSTS_N_INSNS (arm_gen_constant
10500 (outer_code, SImode, NULL,
10501 trunc_int_for_mode (INTVAL (x), SImode),
10502 NULL, NULL, 0, 0)
10503 + arm_gen_constant (outer_code, SImode, NULL,
10504 INTVAL (x) >> 32, NULL,
10505 NULL, 0, 0));
10506 /* Extra costs? */
10509 return true;
10511 case CONST:
10512 case LABEL_REF:
10513 case SYMBOL_REF:
10514 if (speed_p)
10516 if (arm_arch_thumb2 && !flag_pic)
10517 *cost += COSTS_N_INSNS (1);
10518 else
10519 *cost += extra_cost->ldst.load;
10521 else
10522 *cost += COSTS_N_INSNS (1);
10524 if (flag_pic)
10526 *cost += COSTS_N_INSNS (1);
10527 if (speed_p)
10528 *cost += extra_cost->alu.arith;
10531 return true;
10533 case CONST_FIXED:
10534 *cost = COSTS_N_INSNS (4);
10535 /* Fixme. */
10536 return true;
10538 case CONST_DOUBLE:
10539 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10540 && (mode == SFmode || !TARGET_VFP_SINGLE))
10542 if (vfp3_const_double_rtx (x))
10544 if (speed_p)
10545 *cost += extra_cost->fp[mode == DFmode].fpconst;
10546 return true;
10549 if (speed_p)
10551 if (mode == DFmode)
10552 *cost += extra_cost->ldst.loadd;
10553 else
10554 *cost += extra_cost->ldst.loadf;
10556 else
10557 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10559 return true;
10561 *cost = COSTS_N_INSNS (4);
10562 return true;
10564 case CONST_VECTOR:
10565 /* Fixme. */
10566 if (TARGET_NEON
10567 && TARGET_HARD_FLOAT
10568 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10569 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10570 *cost = COSTS_N_INSNS (1);
10571 else
10572 *cost = COSTS_N_INSNS (4);
10573 return true;
10575 case HIGH:
10576 case LO_SUM:
10577 /* When optimizing for size, we prefer constant pool entries to
10578 MOVW/MOVT pairs, so bump the cost of these slightly. */
10579 if (!speed_p)
10580 *cost += 1;
10581 return true;
10583 case CLZ:
10584 if (speed_p)
10585 *cost += extra_cost->alu.clz;
10586 return false;
10588 case SMIN:
10589 if (XEXP (x, 1) == const0_rtx)
10591 if (speed_p)
10592 *cost += extra_cost->alu.log_shift;
10593 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10594 return true;
10596 /* Fall through. */
10597 case SMAX:
10598 case UMIN:
10599 case UMAX:
10600 *cost += COSTS_N_INSNS (1);
10601 return false;
10603 case TRUNCATE:
10604 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10605 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10606 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10607 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10608 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10609 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10610 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10611 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10612 == ZERO_EXTEND))))
10614 if (speed_p)
10615 *cost += extra_cost->mult[1].extend;
10616 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10617 ZERO_EXTEND, 0, speed_p)
10618 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10619 ZERO_EXTEND, 0, speed_p));
10620 return true;
10622 *cost = LIBCALL_COST (1);
10623 return false;
10625 case UNSPEC_VOLATILE:
10626 case UNSPEC:
10627 return arm_unspec_cost (x, outer_code, speed_p, cost);
10629 case PC:
10630 /* Reading the PC is like reading any other register. Writing it
10631 is more expensive, but we take that into account elsewhere. */
10632 *cost = 0;
10633 return true;
10635 case ZERO_EXTRACT:
10636 /* TODO: Simple zero_extract of bottom bits using AND. */
10637 /* Fall through. */
10638 case SIGN_EXTRACT:
10639 if (arm_arch6
10640 && mode == SImode
10641 && CONST_INT_P (XEXP (x, 1))
10642 && CONST_INT_P (XEXP (x, 2)))
10644 if (speed_p)
10645 *cost += extra_cost->alu.bfx;
10646 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10647 return true;
10649 /* Without UBFX/SBFX, need to resort to shift operations. */
10650 *cost += COSTS_N_INSNS (1);
10651 if (speed_p)
10652 *cost += 2 * extra_cost->alu.shift;
10653 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10654 return true;
10656 case FLOAT_EXTEND:
10657 if (TARGET_HARD_FLOAT)
10659 if (speed_p)
10660 *cost += extra_cost->fp[mode == DFmode].widen;
10661 if (!TARGET_FPU_ARMV8
10662 && GET_MODE (XEXP (x, 0)) == HFmode)
10664 /* Pre v8, widening HF->DF is a two-step process, first
10665 widening to SFmode. */
10666 *cost += COSTS_N_INSNS (1);
10667 if (speed_p)
10668 *cost += extra_cost->fp[0].widen;
10670 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10671 return true;
10674 *cost = LIBCALL_COST (1);
10675 return false;
10677 case FLOAT_TRUNCATE:
10678 if (TARGET_HARD_FLOAT)
10680 if (speed_p)
10681 *cost += extra_cost->fp[mode == DFmode].narrow;
10682 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10683 return true;
10684 /* Vector modes? */
10686 *cost = LIBCALL_COST (1);
10687 return false;
10689 case FMA:
10690 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10692 rtx op0 = XEXP (x, 0);
10693 rtx op1 = XEXP (x, 1);
10694 rtx op2 = XEXP (x, 2);
10697 /* vfms or vfnma. */
10698 if (GET_CODE (op0) == NEG)
10699 op0 = XEXP (op0, 0);
10701 /* vfnms or vfnma. */
10702 if (GET_CODE (op2) == NEG)
10703 op2 = XEXP (op2, 0);
10705 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10706 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10707 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10709 if (speed_p)
10710 *cost += extra_cost->fp[mode ==DFmode].fma;
10712 return true;
10715 *cost = LIBCALL_COST (3);
10716 return false;
10718 case FIX:
10719 case UNSIGNED_FIX:
10720 if (TARGET_HARD_FLOAT)
10722 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10723 a vcvt fixed-point conversion. */
10724 if (code == FIX && mode == SImode
10725 && GET_CODE (XEXP (x, 0)) == FIX
10726 && GET_MODE (XEXP (x, 0)) == SFmode
10727 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10728 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10729 > 0)
10731 if (speed_p)
10732 *cost += extra_cost->fp[0].toint;
10734 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10735 code, 0, speed_p);
10736 return true;
10739 if (GET_MODE_CLASS (mode) == MODE_INT)
10741 mode = GET_MODE (XEXP (x, 0));
10742 if (speed_p)
10743 *cost += extra_cost->fp[mode == DFmode].toint;
10744 /* Strip of the 'cost' of rounding towards zero. */
10745 if (GET_CODE (XEXP (x, 0)) == FIX)
10746 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10747 0, speed_p);
10748 else
10749 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10750 /* ??? Increase the cost to deal with transferring from
10751 FP -> CORE registers? */
10752 return true;
10754 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10755 && TARGET_FPU_ARMV8)
10757 if (speed_p)
10758 *cost += extra_cost->fp[mode == DFmode].roundint;
10759 return false;
10761 /* Vector costs? */
10763 *cost = LIBCALL_COST (1);
10764 return false;
10766 case FLOAT:
10767 case UNSIGNED_FLOAT:
10768 if (TARGET_HARD_FLOAT)
10770 /* ??? Increase the cost to deal with transferring from CORE
10771 -> FP registers? */
10772 if (speed_p)
10773 *cost += extra_cost->fp[mode == DFmode].fromint;
10774 return false;
10776 *cost = LIBCALL_COST (1);
10777 return false;
10779 case CALL:
10780 return true;
10782 case ASM_OPERANDS:
10784 /* Just a guess. Guess number of instructions in the asm
10785 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10786 though (see PR60663). */
10787 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10788 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10790 *cost = COSTS_N_INSNS (asm_length + num_operands);
10791 return true;
10793 default:
10794 if (mode != VOIDmode)
10795 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10796 else
10797 *cost = COSTS_N_INSNS (4); /* Who knows? */
10798 return false;
10802 #undef HANDLE_NARROW_SHIFT_ARITH
10804 /* RTX costs entry point. */
10806 static bool
10807 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10808 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10810 bool result;
10811 int code = GET_CODE (x);
10812 gcc_assert (current_tune->insn_extra_cost);
10814 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10815 (enum rtx_code) outer_code,
10816 current_tune->insn_extra_cost,
10817 total, speed);
10819 if (dump_file && (dump_flags & TDF_DETAILS))
10821 print_rtl_single (dump_file, x);
10822 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10823 *total, result ? "final" : "partial");
10825 return result;
10828 /* All address computations that can be done are free, but rtx cost returns
10829 the same for practically all of them. So we weight the different types
10830 of address here in the order (most pref first):
10831 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10832 static inline int
10833 arm_arm_address_cost (rtx x)
10835 enum rtx_code c = GET_CODE (x);
10837 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10838 return 0;
10839 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10840 return 10;
10842 if (c == PLUS)
10844 if (CONST_INT_P (XEXP (x, 1)))
10845 return 2;
10847 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10848 return 3;
10850 return 4;
10853 return 6;
10856 static inline int
10857 arm_thumb_address_cost (rtx x)
10859 enum rtx_code c = GET_CODE (x);
10861 if (c == REG)
10862 return 1;
10863 if (c == PLUS
10864 && REG_P (XEXP (x, 0))
10865 && CONST_INT_P (XEXP (x, 1)))
10866 return 1;
10868 return 2;
10871 static int
10872 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10873 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10875 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10878 /* Adjust cost hook for XScale. */
10879 static bool
10880 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10881 int * cost)
10883 /* Some true dependencies can have a higher cost depending
10884 on precisely how certain input operands are used. */
10885 if (dep_type == 0
10886 && recog_memoized (insn) >= 0
10887 && recog_memoized (dep) >= 0)
10889 int shift_opnum = get_attr_shift (insn);
10890 enum attr_type attr_type = get_attr_type (dep);
10892 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10893 operand for INSN. If we have a shifted input operand and the
10894 instruction we depend on is another ALU instruction, then we may
10895 have to account for an additional stall. */
10896 if (shift_opnum != 0
10897 && (attr_type == TYPE_ALU_SHIFT_IMM
10898 || attr_type == TYPE_ALUS_SHIFT_IMM
10899 || attr_type == TYPE_LOGIC_SHIFT_IMM
10900 || attr_type == TYPE_LOGICS_SHIFT_IMM
10901 || attr_type == TYPE_ALU_SHIFT_REG
10902 || attr_type == TYPE_ALUS_SHIFT_REG
10903 || attr_type == TYPE_LOGIC_SHIFT_REG
10904 || attr_type == TYPE_LOGICS_SHIFT_REG
10905 || attr_type == TYPE_MOV_SHIFT
10906 || attr_type == TYPE_MVN_SHIFT
10907 || attr_type == TYPE_MOV_SHIFT_REG
10908 || attr_type == TYPE_MVN_SHIFT_REG))
10910 rtx shifted_operand;
10911 int opno;
10913 /* Get the shifted operand. */
10914 extract_insn (insn);
10915 shifted_operand = recog_data.operand[shift_opnum];
10917 /* Iterate over all the operands in DEP. If we write an operand
10918 that overlaps with SHIFTED_OPERAND, then we have increase the
10919 cost of this dependency. */
10920 extract_insn (dep);
10921 preprocess_constraints (dep);
10922 for (opno = 0; opno < recog_data.n_operands; opno++)
10924 /* We can ignore strict inputs. */
10925 if (recog_data.operand_type[opno] == OP_IN)
10926 continue;
10928 if (reg_overlap_mentioned_p (recog_data.operand[opno],
10929 shifted_operand))
10931 *cost = 2;
10932 return false;
10937 return true;
10940 /* Adjust cost hook for Cortex A9. */
10941 static bool
10942 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10943 int * cost)
10945 switch (dep_type)
10947 case REG_DEP_ANTI:
10948 *cost = 0;
10949 return false;
10951 case REG_DEP_TRUE:
10952 case REG_DEP_OUTPUT:
10953 if (recog_memoized (insn) >= 0
10954 && recog_memoized (dep) >= 0)
10956 if (GET_CODE (PATTERN (insn)) == SET)
10958 if (GET_MODE_CLASS
10959 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
10960 || GET_MODE_CLASS
10961 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
10963 enum attr_type attr_type_insn = get_attr_type (insn);
10964 enum attr_type attr_type_dep = get_attr_type (dep);
10966 /* By default all dependencies of the form
10967 s0 = s0 <op> s1
10968 s0 = s0 <op> s2
10969 have an extra latency of 1 cycle because
10970 of the input and output dependency in this
10971 case. However this gets modeled as an true
10972 dependency and hence all these checks. */
10973 if (REG_P (SET_DEST (PATTERN (insn)))
10974 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
10976 /* FMACS is a special case where the dependent
10977 instruction can be issued 3 cycles before
10978 the normal latency in case of an output
10979 dependency. */
10980 if ((attr_type_insn == TYPE_FMACS
10981 || attr_type_insn == TYPE_FMACD)
10982 && (attr_type_dep == TYPE_FMACS
10983 || attr_type_dep == TYPE_FMACD))
10985 if (dep_type == REG_DEP_OUTPUT)
10986 *cost = insn_default_latency (dep) - 3;
10987 else
10988 *cost = insn_default_latency (dep);
10989 return false;
10991 else
10993 if (dep_type == REG_DEP_OUTPUT)
10994 *cost = insn_default_latency (dep) + 1;
10995 else
10996 *cost = insn_default_latency (dep);
10998 return false;
11003 break;
11005 default:
11006 gcc_unreachable ();
11009 return true;
11012 /* Adjust cost hook for FA726TE. */
11013 static bool
11014 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11015 int * cost)
11017 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11018 have penalty of 3. */
11019 if (dep_type == REG_DEP_TRUE
11020 && recog_memoized (insn) >= 0
11021 && recog_memoized (dep) >= 0
11022 && get_attr_conds (dep) == CONDS_SET)
11024 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11025 if (get_attr_conds (insn) == CONDS_USE
11026 && get_attr_type (insn) != TYPE_BRANCH)
11028 *cost = 3;
11029 return false;
11032 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11033 || get_attr_conds (insn) == CONDS_USE)
11035 *cost = 0;
11036 return false;
11040 return true;
11043 /* Implement TARGET_REGISTER_MOVE_COST.
11045 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11046 it is typically more expensive than a single memory access. We set
11047 the cost to less than two memory accesses so that floating
11048 point to integer conversion does not go through memory. */
11051 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11052 reg_class_t from, reg_class_t to)
11054 if (TARGET_32BIT)
11056 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11057 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11058 return 15;
11059 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11060 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11061 return 4;
11062 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11063 return 20;
11064 else
11065 return 2;
11067 else
11069 if (from == HI_REGS || to == HI_REGS)
11070 return 4;
11071 else
11072 return 2;
11076 /* Implement TARGET_MEMORY_MOVE_COST. */
11079 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11080 bool in ATTRIBUTE_UNUSED)
11082 if (TARGET_32BIT)
11083 return 10;
11084 else
11086 if (GET_MODE_SIZE (mode) < 4)
11087 return 8;
11088 else
11089 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11093 /* Vectorizer cost model implementation. */
11095 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11096 static int
11097 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11098 tree vectype,
11099 int misalign ATTRIBUTE_UNUSED)
11101 unsigned elements;
11103 switch (type_of_cost)
11105 case scalar_stmt:
11106 return current_tune->vec_costs->scalar_stmt_cost;
11108 case scalar_load:
11109 return current_tune->vec_costs->scalar_load_cost;
11111 case scalar_store:
11112 return current_tune->vec_costs->scalar_store_cost;
11114 case vector_stmt:
11115 return current_tune->vec_costs->vec_stmt_cost;
11117 case vector_load:
11118 return current_tune->vec_costs->vec_align_load_cost;
11120 case vector_store:
11121 return current_tune->vec_costs->vec_store_cost;
11123 case vec_to_scalar:
11124 return current_tune->vec_costs->vec_to_scalar_cost;
11126 case scalar_to_vec:
11127 return current_tune->vec_costs->scalar_to_vec_cost;
11129 case unaligned_load:
11130 return current_tune->vec_costs->vec_unalign_load_cost;
11132 case unaligned_store:
11133 return current_tune->vec_costs->vec_unalign_store_cost;
11135 case cond_branch_taken:
11136 return current_tune->vec_costs->cond_taken_branch_cost;
11138 case cond_branch_not_taken:
11139 return current_tune->vec_costs->cond_not_taken_branch_cost;
11141 case vec_perm:
11142 case vec_promote_demote:
11143 return current_tune->vec_costs->vec_stmt_cost;
11145 case vec_construct:
11146 elements = TYPE_VECTOR_SUBPARTS (vectype);
11147 return elements / 2 + 1;
11149 default:
11150 gcc_unreachable ();
11154 /* Implement targetm.vectorize.add_stmt_cost. */
11156 static unsigned
11157 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11158 struct _stmt_vec_info *stmt_info, int misalign,
11159 enum vect_cost_model_location where)
11161 unsigned *cost = (unsigned *) data;
11162 unsigned retval = 0;
11164 if (flag_vect_cost_model)
11166 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11167 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11169 /* Statements in an inner loop relative to the loop being
11170 vectorized are weighted more heavily. The value here is
11171 arbitrary and could potentially be improved with analysis. */
11172 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11173 count *= 50; /* FIXME. */
11175 retval = (unsigned) (count * stmt_cost);
11176 cost[where] += retval;
11179 return retval;
11182 /* Return true if and only if this insn can dual-issue only as older. */
11183 static bool
11184 cortexa7_older_only (rtx_insn *insn)
11186 if (recog_memoized (insn) < 0)
11187 return false;
11189 switch (get_attr_type (insn))
11191 case TYPE_ALU_DSP_REG:
11192 case TYPE_ALU_SREG:
11193 case TYPE_ALUS_SREG:
11194 case TYPE_LOGIC_REG:
11195 case TYPE_LOGICS_REG:
11196 case TYPE_ADC_REG:
11197 case TYPE_ADCS_REG:
11198 case TYPE_ADR:
11199 case TYPE_BFM:
11200 case TYPE_REV:
11201 case TYPE_MVN_REG:
11202 case TYPE_SHIFT_IMM:
11203 case TYPE_SHIFT_REG:
11204 case TYPE_LOAD_BYTE:
11205 case TYPE_LOAD1:
11206 case TYPE_STORE1:
11207 case TYPE_FFARITHS:
11208 case TYPE_FADDS:
11209 case TYPE_FFARITHD:
11210 case TYPE_FADDD:
11211 case TYPE_FMOV:
11212 case TYPE_F_CVT:
11213 case TYPE_FCMPS:
11214 case TYPE_FCMPD:
11215 case TYPE_FCONSTS:
11216 case TYPE_FCONSTD:
11217 case TYPE_FMULS:
11218 case TYPE_FMACS:
11219 case TYPE_FMULD:
11220 case TYPE_FMACD:
11221 case TYPE_FDIVS:
11222 case TYPE_FDIVD:
11223 case TYPE_F_MRC:
11224 case TYPE_F_MRRC:
11225 case TYPE_F_FLAG:
11226 case TYPE_F_LOADS:
11227 case TYPE_F_STORES:
11228 return true;
11229 default:
11230 return false;
11234 /* Return true if and only if this insn can dual-issue as younger. */
11235 static bool
11236 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11238 if (recog_memoized (insn) < 0)
11240 if (verbose > 5)
11241 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11242 return false;
11245 switch (get_attr_type (insn))
11247 case TYPE_ALU_IMM:
11248 case TYPE_ALUS_IMM:
11249 case TYPE_LOGIC_IMM:
11250 case TYPE_LOGICS_IMM:
11251 case TYPE_EXTEND:
11252 case TYPE_MVN_IMM:
11253 case TYPE_MOV_IMM:
11254 case TYPE_MOV_REG:
11255 case TYPE_MOV_SHIFT:
11256 case TYPE_MOV_SHIFT_REG:
11257 case TYPE_BRANCH:
11258 case TYPE_CALL:
11259 return true;
11260 default:
11261 return false;
11266 /* Look for an instruction that can dual issue only as an older
11267 instruction, and move it in front of any instructions that can
11268 dual-issue as younger, while preserving the relative order of all
11269 other instructions in the ready list. This is a hueuristic to help
11270 dual-issue in later cycles, by postponing issue of more flexible
11271 instructions. This heuristic may affect dual issue opportunities
11272 in the current cycle. */
11273 static void
11274 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11275 int *n_readyp, int clock)
11277 int i;
11278 int first_older_only = -1, first_younger = -1;
11280 if (verbose > 5)
11281 fprintf (file,
11282 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11283 clock,
11284 *n_readyp);
11286 /* Traverse the ready list from the head (the instruction to issue
11287 first), and looking for the first instruction that can issue as
11288 younger and the first instruction that can dual-issue only as
11289 older. */
11290 for (i = *n_readyp - 1; i >= 0; i--)
11292 rtx_insn *insn = ready[i];
11293 if (cortexa7_older_only (insn))
11295 first_older_only = i;
11296 if (verbose > 5)
11297 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11298 break;
11300 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11301 first_younger = i;
11304 /* Nothing to reorder because either no younger insn found or insn
11305 that can dual-issue only as older appears before any insn that
11306 can dual-issue as younger. */
11307 if (first_younger == -1)
11309 if (verbose > 5)
11310 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11311 return;
11314 /* Nothing to reorder because no older-only insn in the ready list. */
11315 if (first_older_only == -1)
11317 if (verbose > 5)
11318 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11319 return;
11322 /* Move first_older_only insn before first_younger. */
11323 if (verbose > 5)
11324 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11325 INSN_UID(ready [first_older_only]),
11326 INSN_UID(ready [first_younger]));
11327 rtx_insn *first_older_only_insn = ready [first_older_only];
11328 for (i = first_older_only; i < first_younger; i++)
11330 ready[i] = ready[i+1];
11333 ready[i] = first_older_only_insn;
11334 return;
11337 /* Implement TARGET_SCHED_REORDER. */
11338 static int
11339 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11340 int clock)
11342 switch (arm_tune)
11344 case TARGET_CPU_cortexa7:
11345 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11346 break;
11347 default:
11348 /* Do nothing for other cores. */
11349 break;
11352 return arm_issue_rate ();
11355 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11356 It corrects the value of COST based on the relationship between
11357 INSN and DEP through the dependence LINK. It returns the new
11358 value. There is a per-core adjust_cost hook to adjust scheduler costs
11359 and the per-core hook can choose to completely override the generic
11360 adjust_cost function. Only put bits of code into arm_adjust_cost that
11361 are common across all cores. */
11362 static int
11363 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11364 unsigned int)
11366 rtx i_pat, d_pat;
11368 /* When generating Thumb-1 code, we want to place flag-setting operations
11369 close to a conditional branch which depends on them, so that we can
11370 omit the comparison. */
11371 if (TARGET_THUMB1
11372 && dep_type == 0
11373 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11374 && recog_memoized (dep) >= 0
11375 && get_attr_conds (dep) == CONDS_SET)
11376 return 0;
11378 if (current_tune->sched_adjust_cost != NULL)
11380 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11381 return cost;
11384 /* XXX Is this strictly true? */
11385 if (dep_type == REG_DEP_ANTI
11386 || dep_type == REG_DEP_OUTPUT)
11387 return 0;
11389 /* Call insns don't incur a stall, even if they follow a load. */
11390 if (dep_type == 0
11391 && CALL_P (insn))
11392 return 1;
11394 if ((i_pat = single_set (insn)) != NULL
11395 && MEM_P (SET_SRC (i_pat))
11396 && (d_pat = single_set (dep)) != NULL
11397 && MEM_P (SET_DEST (d_pat)))
11399 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11400 /* This is a load after a store, there is no conflict if the load reads
11401 from a cached area. Assume that loads from the stack, and from the
11402 constant pool are cached, and that others will miss. This is a
11403 hack. */
11405 if ((GET_CODE (src_mem) == SYMBOL_REF
11406 && CONSTANT_POOL_ADDRESS_P (src_mem))
11407 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11408 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11409 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11410 return 1;
11413 return cost;
11417 arm_max_conditional_execute (void)
11419 return max_insns_skipped;
11422 static int
11423 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11425 if (TARGET_32BIT)
11426 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11427 else
11428 return (optimize > 0) ? 2 : 0;
11431 static int
11432 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11434 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11437 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11438 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11439 sequences of non-executed instructions in IT blocks probably take the same
11440 amount of time as executed instructions (and the IT instruction itself takes
11441 space in icache). This function was experimentally determined to give good
11442 results on a popular embedded benchmark. */
11444 static int
11445 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11447 return (TARGET_32BIT && speed_p) ? 1
11448 : arm_default_branch_cost (speed_p, predictable_p);
11451 static int
11452 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11454 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11457 static bool fp_consts_inited = false;
11459 static REAL_VALUE_TYPE value_fp0;
11461 static void
11462 init_fp_table (void)
11464 REAL_VALUE_TYPE r;
11466 r = REAL_VALUE_ATOF ("0", DFmode);
11467 value_fp0 = r;
11468 fp_consts_inited = true;
11471 /* Return TRUE if rtx X is a valid immediate FP constant. */
11473 arm_const_double_rtx (rtx x)
11475 const REAL_VALUE_TYPE *r;
11477 if (!fp_consts_inited)
11478 init_fp_table ();
11480 r = CONST_DOUBLE_REAL_VALUE (x);
11481 if (REAL_VALUE_MINUS_ZERO (*r))
11482 return 0;
11484 if (real_equal (r, &value_fp0))
11485 return 1;
11487 return 0;
11490 /* VFPv3 has a fairly wide range of representable immediates, formed from
11491 "quarter-precision" floating-point values. These can be evaluated using this
11492 formula (with ^ for exponentiation):
11494 -1^s * n * 2^-r
11496 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11497 16 <= n <= 31 and 0 <= r <= 7.
11499 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11501 - A (most-significant) is the sign bit.
11502 - BCD are the exponent (encoded as r XOR 3).
11503 - EFGH are the mantissa (encoded as n - 16).
11506 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11507 fconst[sd] instruction, or -1 if X isn't suitable. */
11508 static int
11509 vfp3_const_double_index (rtx x)
11511 REAL_VALUE_TYPE r, m;
11512 int sign, exponent;
11513 unsigned HOST_WIDE_INT mantissa, mant_hi;
11514 unsigned HOST_WIDE_INT mask;
11515 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11516 bool fail;
11518 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11519 return -1;
11521 r = *CONST_DOUBLE_REAL_VALUE (x);
11523 /* We can't represent these things, so detect them first. */
11524 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11525 return -1;
11527 /* Extract sign, exponent and mantissa. */
11528 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11529 r = real_value_abs (&r);
11530 exponent = REAL_EXP (&r);
11531 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11532 highest (sign) bit, with a fixed binary point at bit point_pos.
11533 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11534 bits for the mantissa, this may fail (low bits would be lost). */
11535 real_ldexp (&m, &r, point_pos - exponent);
11536 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11537 mantissa = w.elt (0);
11538 mant_hi = w.elt (1);
11540 /* If there are bits set in the low part of the mantissa, we can't
11541 represent this value. */
11542 if (mantissa != 0)
11543 return -1;
11545 /* Now make it so that mantissa contains the most-significant bits, and move
11546 the point_pos to indicate that the least-significant bits have been
11547 discarded. */
11548 point_pos -= HOST_BITS_PER_WIDE_INT;
11549 mantissa = mant_hi;
11551 /* We can permit four significant bits of mantissa only, plus a high bit
11552 which is always 1. */
11553 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11554 if ((mantissa & mask) != 0)
11555 return -1;
11557 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11558 mantissa >>= point_pos - 5;
11560 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11561 floating-point immediate zero with Neon using an integer-zero load, but
11562 that case is handled elsewhere.) */
11563 if (mantissa == 0)
11564 return -1;
11566 gcc_assert (mantissa >= 16 && mantissa <= 31);
11568 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11569 normalized significands are in the range [1, 2). (Our mantissa is shifted
11570 left 4 places at this point relative to normalized IEEE754 values). GCC
11571 internally uses [0.5, 1) (see real.c), so the exponent returned from
11572 REAL_EXP must be altered. */
11573 exponent = 5 - exponent;
11575 if (exponent < 0 || exponent > 7)
11576 return -1;
11578 /* Sign, mantissa and exponent are now in the correct form to plug into the
11579 formula described in the comment above. */
11580 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11583 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11585 vfp3_const_double_rtx (rtx x)
11587 if (!TARGET_VFP3)
11588 return 0;
11590 return vfp3_const_double_index (x) != -1;
11593 /* Recognize immediates which can be used in various Neon instructions. Legal
11594 immediates are described by the following table (for VMVN variants, the
11595 bitwise inverse of the constant shown is recognized. In either case, VMOV
11596 is output and the correct instruction to use for a given constant is chosen
11597 by the assembler). The constant shown is replicated across all elements of
11598 the destination vector.
11600 insn elems variant constant (binary)
11601 ---- ----- ------- -----------------
11602 vmov i32 0 00000000 00000000 00000000 abcdefgh
11603 vmov i32 1 00000000 00000000 abcdefgh 00000000
11604 vmov i32 2 00000000 abcdefgh 00000000 00000000
11605 vmov i32 3 abcdefgh 00000000 00000000 00000000
11606 vmov i16 4 00000000 abcdefgh
11607 vmov i16 5 abcdefgh 00000000
11608 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11609 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11610 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11611 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11612 vmvn i16 10 00000000 abcdefgh
11613 vmvn i16 11 abcdefgh 00000000
11614 vmov i32 12 00000000 00000000 abcdefgh 11111111
11615 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11616 vmov i32 14 00000000 abcdefgh 11111111 11111111
11617 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11618 vmov i8 16 abcdefgh
11619 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11620 eeeeeeee ffffffff gggggggg hhhhhhhh
11621 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11622 vmov f32 19 00000000 00000000 00000000 00000000
11624 For case 18, B = !b. Representable values are exactly those accepted by
11625 vfp3_const_double_index, but are output as floating-point numbers rather
11626 than indices.
11628 For case 19, we will change it to vmov.i32 when assembling.
11630 Variants 0-5 (inclusive) may also be used as immediates for the second
11631 operand of VORR/VBIC instructions.
11633 The INVERSE argument causes the bitwise inverse of the given operand to be
11634 recognized instead (used for recognizing legal immediates for the VAND/VORN
11635 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11636 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11637 output, rather than the real insns vbic/vorr).
11639 INVERSE makes no difference to the recognition of float vectors.
11641 The return value is the variant of immediate as shown in the above table, or
11642 -1 if the given value doesn't match any of the listed patterns.
11644 static int
11645 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11646 rtx *modconst, int *elementwidth)
11648 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11649 matches = 1; \
11650 for (i = 0; i < idx; i += (STRIDE)) \
11651 if (!(TEST)) \
11652 matches = 0; \
11653 if (matches) \
11655 immtype = (CLASS); \
11656 elsize = (ELSIZE); \
11657 break; \
11660 unsigned int i, elsize = 0, idx = 0, n_elts;
11661 unsigned int innersize;
11662 unsigned char bytes[16];
11663 int immtype = -1, matches;
11664 unsigned int invmask = inverse ? 0xff : 0;
11665 bool vector = GET_CODE (op) == CONST_VECTOR;
11667 if (vector)
11668 n_elts = CONST_VECTOR_NUNITS (op);
11669 else
11671 n_elts = 1;
11672 if (mode == VOIDmode)
11673 mode = DImode;
11676 innersize = GET_MODE_UNIT_SIZE (mode);
11678 /* Vectors of float constants. */
11679 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11681 rtx el0 = CONST_VECTOR_ELT (op, 0);
11683 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11684 return -1;
11686 /* FP16 vectors cannot be represented. */
11687 if (GET_MODE_INNER (mode) == HFmode)
11688 return -1;
11690 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11691 are distinct in this context. */
11692 if (!const_vec_duplicate_p (op))
11693 return -1;
11695 if (modconst)
11696 *modconst = CONST_VECTOR_ELT (op, 0);
11698 if (elementwidth)
11699 *elementwidth = 0;
11701 if (el0 == CONST0_RTX (GET_MODE (el0)))
11702 return 19;
11703 else
11704 return 18;
11707 /* The tricks done in the code below apply for little-endian vector layout.
11708 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11709 FIXME: Implement logic for big-endian vectors. */
11710 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11711 return -1;
11713 /* Splat vector constant out into a byte vector. */
11714 for (i = 0; i < n_elts; i++)
11716 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11717 unsigned HOST_WIDE_INT elpart;
11719 gcc_assert (CONST_INT_P (el));
11720 elpart = INTVAL (el);
11722 for (unsigned int byte = 0; byte < innersize; byte++)
11724 bytes[idx++] = (elpart & 0xff) ^ invmask;
11725 elpart >>= BITS_PER_UNIT;
11729 /* Sanity check. */
11730 gcc_assert (idx == GET_MODE_SIZE (mode));
11734 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11735 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11737 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11738 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11740 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11741 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11743 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11744 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11746 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11748 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11750 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11751 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11753 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11754 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11756 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11757 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11759 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11760 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11762 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11764 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11766 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11767 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11769 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11770 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11772 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11773 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11775 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11776 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11778 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11780 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11781 && bytes[i] == bytes[(i + 8) % idx]);
11783 while (0);
11785 if (immtype == -1)
11786 return -1;
11788 if (elementwidth)
11789 *elementwidth = elsize;
11791 if (modconst)
11793 unsigned HOST_WIDE_INT imm = 0;
11795 /* Un-invert bytes of recognized vector, if necessary. */
11796 if (invmask != 0)
11797 for (i = 0; i < idx; i++)
11798 bytes[i] ^= invmask;
11800 if (immtype == 17)
11802 /* FIXME: Broken on 32-bit H_W_I hosts. */
11803 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11805 for (i = 0; i < 8; i++)
11806 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11807 << (i * BITS_PER_UNIT);
11809 *modconst = GEN_INT (imm);
11811 else
11813 unsigned HOST_WIDE_INT imm = 0;
11815 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11816 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11818 *modconst = GEN_INT (imm);
11822 return immtype;
11823 #undef CHECK
11826 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11827 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11828 float elements), and a modified constant (whatever should be output for a
11829 VMOV) in *MODCONST. */
11832 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11833 rtx *modconst, int *elementwidth)
11835 rtx tmpconst;
11836 int tmpwidth;
11837 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11839 if (retval == -1)
11840 return 0;
11842 if (modconst)
11843 *modconst = tmpconst;
11845 if (elementwidth)
11846 *elementwidth = tmpwidth;
11848 return 1;
11851 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11852 the immediate is valid, write a constant suitable for using as an operand
11853 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11854 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11857 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11858 rtx *modconst, int *elementwidth)
11860 rtx tmpconst;
11861 int tmpwidth;
11862 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11864 if (retval < 0 || retval > 5)
11865 return 0;
11867 if (modconst)
11868 *modconst = tmpconst;
11870 if (elementwidth)
11871 *elementwidth = tmpwidth;
11873 return 1;
11876 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11877 the immediate is valid, write a constant suitable for using as an operand
11878 to VSHR/VSHL to *MODCONST and the corresponding element width to
11879 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11880 because they have different limitations. */
11883 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11884 rtx *modconst, int *elementwidth,
11885 bool isleftshift)
11887 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11888 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11889 unsigned HOST_WIDE_INT last_elt = 0;
11890 unsigned HOST_WIDE_INT maxshift;
11892 /* Split vector constant out into a byte vector. */
11893 for (i = 0; i < n_elts; i++)
11895 rtx el = CONST_VECTOR_ELT (op, i);
11896 unsigned HOST_WIDE_INT elpart;
11898 if (CONST_INT_P (el))
11899 elpart = INTVAL (el);
11900 else if (CONST_DOUBLE_P (el))
11901 return 0;
11902 else
11903 gcc_unreachable ();
11905 if (i != 0 && elpart != last_elt)
11906 return 0;
11908 last_elt = elpart;
11911 /* Shift less than element size. */
11912 maxshift = innersize * 8;
11914 if (isleftshift)
11916 /* Left shift immediate value can be from 0 to <size>-1. */
11917 if (last_elt >= maxshift)
11918 return 0;
11920 else
11922 /* Right shift immediate value can be from 1 to <size>. */
11923 if (last_elt == 0 || last_elt > maxshift)
11924 return 0;
11927 if (elementwidth)
11928 *elementwidth = innersize * 8;
11930 if (modconst)
11931 *modconst = CONST_VECTOR_ELT (op, 0);
11933 return 1;
11936 /* Return a string suitable for output of Neon immediate logic operation
11937 MNEM. */
11939 char *
11940 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
11941 int inverse, int quad)
11943 int width, is_valid;
11944 static char templ[40];
11946 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
11948 gcc_assert (is_valid != 0);
11950 if (quad)
11951 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
11952 else
11953 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
11955 return templ;
11958 /* Return a string suitable for output of Neon immediate shift operation
11959 (VSHR or VSHL) MNEM. */
11961 char *
11962 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
11963 machine_mode mode, int quad,
11964 bool isleftshift)
11966 int width, is_valid;
11967 static char templ[40];
11969 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
11970 gcc_assert (is_valid != 0);
11972 if (quad)
11973 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
11974 else
11975 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
11977 return templ;
11980 /* Output a sequence of pairwise operations to implement a reduction.
11981 NOTE: We do "too much work" here, because pairwise operations work on two
11982 registers-worth of operands in one go. Unfortunately we can't exploit those
11983 extra calculations to do the full operation in fewer steps, I don't think.
11984 Although all vector elements of the result but the first are ignored, we
11985 actually calculate the same result in each of the elements. An alternative
11986 such as initially loading a vector with zero to use as each of the second
11987 operands would use up an additional register and take an extra instruction,
11988 for no particular gain. */
11990 void
11991 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
11992 rtx (*reduc) (rtx, rtx, rtx))
11994 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
11995 rtx tmpsum = op1;
11997 for (i = parts / 2; i >= 1; i /= 2)
11999 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12000 emit_insn (reduc (dest, tmpsum, tmpsum));
12001 tmpsum = dest;
12005 /* If VALS is a vector constant that can be loaded into a register
12006 using VDUP, generate instructions to do so and return an RTX to
12007 assign to the register. Otherwise return NULL_RTX. */
12009 static rtx
12010 neon_vdup_constant (rtx vals)
12012 machine_mode mode = GET_MODE (vals);
12013 machine_mode inner_mode = GET_MODE_INNER (mode);
12014 rtx x;
12016 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12017 return NULL_RTX;
12019 if (!const_vec_duplicate_p (vals, &x))
12020 /* The elements are not all the same. We could handle repeating
12021 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12022 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12023 vdup.i16). */
12024 return NULL_RTX;
12026 /* We can load this constant by using VDUP and a constant in a
12027 single ARM register. This will be cheaper than a vector
12028 load. */
12030 x = copy_to_mode_reg (inner_mode, x);
12031 return gen_rtx_VEC_DUPLICATE (mode, x);
12034 /* Generate code to load VALS, which is a PARALLEL containing only
12035 constants (for vec_init) or CONST_VECTOR, efficiently into a
12036 register. Returns an RTX to copy into the register, or NULL_RTX
12037 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12040 neon_make_constant (rtx vals)
12042 machine_mode mode = GET_MODE (vals);
12043 rtx target;
12044 rtx const_vec = NULL_RTX;
12045 int n_elts = GET_MODE_NUNITS (mode);
12046 int n_const = 0;
12047 int i;
12049 if (GET_CODE (vals) == CONST_VECTOR)
12050 const_vec = vals;
12051 else if (GET_CODE (vals) == PARALLEL)
12053 /* A CONST_VECTOR must contain only CONST_INTs and
12054 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12055 Only store valid constants in a CONST_VECTOR. */
12056 for (i = 0; i < n_elts; ++i)
12058 rtx x = XVECEXP (vals, 0, i);
12059 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12060 n_const++;
12062 if (n_const == n_elts)
12063 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12065 else
12066 gcc_unreachable ();
12068 if (const_vec != NULL
12069 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12070 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12071 return const_vec;
12072 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12073 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12074 pipeline cycle; creating the constant takes one or two ARM
12075 pipeline cycles. */
12076 return target;
12077 else if (const_vec != NULL_RTX)
12078 /* Load from constant pool. On Cortex-A8 this takes two cycles
12079 (for either double or quad vectors). We can not take advantage
12080 of single-cycle VLD1 because we need a PC-relative addressing
12081 mode. */
12082 return const_vec;
12083 else
12084 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12085 We can not construct an initializer. */
12086 return NULL_RTX;
12089 /* Initialize vector TARGET to VALS. */
12091 void
12092 neon_expand_vector_init (rtx target, rtx vals)
12094 machine_mode mode = GET_MODE (target);
12095 machine_mode inner_mode = GET_MODE_INNER (mode);
12096 int n_elts = GET_MODE_NUNITS (mode);
12097 int n_var = 0, one_var = -1;
12098 bool all_same = true;
12099 rtx x, mem;
12100 int i;
12102 for (i = 0; i < n_elts; ++i)
12104 x = XVECEXP (vals, 0, i);
12105 if (!CONSTANT_P (x))
12106 ++n_var, one_var = i;
12108 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12109 all_same = false;
12112 if (n_var == 0)
12114 rtx constant = neon_make_constant (vals);
12115 if (constant != NULL_RTX)
12117 emit_move_insn (target, constant);
12118 return;
12122 /* Splat a single non-constant element if we can. */
12123 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12125 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12126 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12127 return;
12130 /* One field is non-constant. Load constant then overwrite varying
12131 field. This is more efficient than using the stack. */
12132 if (n_var == 1)
12134 rtx copy = copy_rtx (vals);
12135 rtx index = GEN_INT (one_var);
12137 /* Load constant part of vector, substitute neighboring value for
12138 varying element. */
12139 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12140 neon_expand_vector_init (target, copy);
12142 /* Insert variable. */
12143 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12144 switch (mode)
12146 case V8QImode:
12147 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12148 break;
12149 case V16QImode:
12150 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12151 break;
12152 case V4HImode:
12153 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12154 break;
12155 case V8HImode:
12156 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12157 break;
12158 case V2SImode:
12159 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12160 break;
12161 case V4SImode:
12162 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12163 break;
12164 case V2SFmode:
12165 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12166 break;
12167 case V4SFmode:
12168 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12169 break;
12170 case V2DImode:
12171 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12172 break;
12173 default:
12174 gcc_unreachable ();
12176 return;
12179 /* Construct the vector in memory one field at a time
12180 and load the whole vector. */
12181 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12182 for (i = 0; i < n_elts; i++)
12183 emit_move_insn (adjust_address_nv (mem, inner_mode,
12184 i * GET_MODE_SIZE (inner_mode)),
12185 XVECEXP (vals, 0, i));
12186 emit_move_insn (target, mem);
12189 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12190 ERR if it doesn't. EXP indicates the source location, which includes the
12191 inlining history for intrinsics. */
12193 static void
12194 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12195 const_tree exp, const char *desc)
12197 HOST_WIDE_INT lane;
12199 gcc_assert (CONST_INT_P (operand));
12201 lane = INTVAL (operand);
12203 if (lane < low || lane >= high)
12205 if (exp)
12206 error ("%K%s %wd out of range %wd - %wd",
12207 exp, desc, lane, low, high - 1);
12208 else
12209 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12213 /* Bounds-check lanes. */
12215 void
12216 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12217 const_tree exp)
12219 bounds_check (operand, low, high, exp, "lane");
12222 /* Bounds-check constants. */
12224 void
12225 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12227 bounds_check (operand, low, high, NULL_TREE, "constant");
12230 HOST_WIDE_INT
12231 neon_element_bits (machine_mode mode)
12233 return GET_MODE_UNIT_BITSIZE (mode);
12237 /* Predicates for `match_operand' and `match_operator'. */
12239 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12240 WB is true if full writeback address modes are allowed and is false
12241 if limited writeback address modes (POST_INC and PRE_DEC) are
12242 allowed. */
12245 arm_coproc_mem_operand (rtx op, bool wb)
12247 rtx ind;
12249 /* Reject eliminable registers. */
12250 if (! (reload_in_progress || reload_completed || lra_in_progress)
12251 && ( reg_mentioned_p (frame_pointer_rtx, op)
12252 || reg_mentioned_p (arg_pointer_rtx, op)
12253 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12254 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12255 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12256 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12257 return FALSE;
12259 /* Constants are converted into offsets from labels. */
12260 if (!MEM_P (op))
12261 return FALSE;
12263 ind = XEXP (op, 0);
12265 if (reload_completed
12266 && (GET_CODE (ind) == LABEL_REF
12267 || (GET_CODE (ind) == CONST
12268 && GET_CODE (XEXP (ind, 0)) == PLUS
12269 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12270 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12271 return TRUE;
12273 /* Match: (mem (reg)). */
12274 if (REG_P (ind))
12275 return arm_address_register_rtx_p (ind, 0);
12277 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12278 acceptable in any case (subject to verification by
12279 arm_address_register_rtx_p). We need WB to be true to accept
12280 PRE_INC and POST_DEC. */
12281 if (GET_CODE (ind) == POST_INC
12282 || GET_CODE (ind) == PRE_DEC
12283 || (wb
12284 && (GET_CODE (ind) == PRE_INC
12285 || GET_CODE (ind) == POST_DEC)))
12286 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12288 if (wb
12289 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12290 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12291 && GET_CODE (XEXP (ind, 1)) == PLUS
12292 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12293 ind = XEXP (ind, 1);
12295 /* Match:
12296 (plus (reg)
12297 (const)). */
12298 if (GET_CODE (ind) == PLUS
12299 && REG_P (XEXP (ind, 0))
12300 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12301 && CONST_INT_P (XEXP (ind, 1))
12302 && INTVAL (XEXP (ind, 1)) > -1024
12303 && INTVAL (XEXP (ind, 1)) < 1024
12304 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12305 return TRUE;
12307 return FALSE;
12310 /* Return TRUE if OP is a memory operand which we can load or store a vector
12311 to/from. TYPE is one of the following values:
12312 0 - Vector load/stor (vldr)
12313 1 - Core registers (ldm)
12314 2 - Element/structure loads (vld1)
12317 neon_vector_mem_operand (rtx op, int type, bool strict)
12319 rtx ind;
12321 /* Reject eliminable registers. */
12322 if (strict && ! (reload_in_progress || reload_completed)
12323 && (reg_mentioned_p (frame_pointer_rtx, op)
12324 || reg_mentioned_p (arg_pointer_rtx, op)
12325 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12326 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12327 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12328 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12329 return FALSE;
12331 /* Constants are converted into offsets from labels. */
12332 if (!MEM_P (op))
12333 return FALSE;
12335 ind = XEXP (op, 0);
12337 if (reload_completed
12338 && (GET_CODE (ind) == LABEL_REF
12339 || (GET_CODE (ind) == CONST
12340 && GET_CODE (XEXP (ind, 0)) == PLUS
12341 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12342 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12343 return TRUE;
12345 /* Match: (mem (reg)). */
12346 if (REG_P (ind))
12347 return arm_address_register_rtx_p (ind, 0);
12349 /* Allow post-increment with Neon registers. */
12350 if ((type != 1 && GET_CODE (ind) == POST_INC)
12351 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12352 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12354 /* Allow post-increment by register for VLDn */
12355 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12356 && GET_CODE (XEXP (ind, 1)) == PLUS
12357 && REG_P (XEXP (XEXP (ind, 1), 1)))
12358 return true;
12360 /* Match:
12361 (plus (reg)
12362 (const)). */
12363 if (type == 0
12364 && GET_CODE (ind) == PLUS
12365 && REG_P (XEXP (ind, 0))
12366 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12367 && CONST_INT_P (XEXP (ind, 1))
12368 && INTVAL (XEXP (ind, 1)) > -1024
12369 /* For quad modes, we restrict the constant offset to be slightly less
12370 than what the instruction format permits. We have no such constraint
12371 on double mode offsets. (This must match arm_legitimate_index_p.) */
12372 && (INTVAL (XEXP (ind, 1))
12373 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12374 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12375 return TRUE;
12377 return FALSE;
12380 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12381 type. */
12383 neon_struct_mem_operand (rtx op)
12385 rtx ind;
12387 /* Reject eliminable registers. */
12388 if (! (reload_in_progress || reload_completed)
12389 && ( reg_mentioned_p (frame_pointer_rtx, op)
12390 || reg_mentioned_p (arg_pointer_rtx, op)
12391 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12392 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12393 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12394 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12395 return FALSE;
12397 /* Constants are converted into offsets from labels. */
12398 if (!MEM_P (op))
12399 return FALSE;
12401 ind = XEXP (op, 0);
12403 if (reload_completed
12404 && (GET_CODE (ind) == LABEL_REF
12405 || (GET_CODE (ind) == CONST
12406 && GET_CODE (XEXP (ind, 0)) == PLUS
12407 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12408 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12409 return TRUE;
12411 /* Match: (mem (reg)). */
12412 if (REG_P (ind))
12413 return arm_address_register_rtx_p (ind, 0);
12415 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12416 if (GET_CODE (ind) == POST_INC
12417 || GET_CODE (ind) == PRE_DEC)
12418 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12420 return FALSE;
12423 /* Return true if X is a register that will be eliminated later on. */
12425 arm_eliminable_register (rtx x)
12427 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12428 || REGNO (x) == ARG_POINTER_REGNUM
12429 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12430 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12433 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12434 coprocessor registers. Otherwise return NO_REGS. */
12436 enum reg_class
12437 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12439 if (mode == HFmode)
12441 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12442 return GENERAL_REGS;
12443 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12444 return NO_REGS;
12445 return GENERAL_REGS;
12448 /* The neon move patterns handle all legitimate vector and struct
12449 addresses. */
12450 if (TARGET_NEON
12451 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12452 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12453 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12454 || VALID_NEON_STRUCT_MODE (mode)))
12455 return NO_REGS;
12457 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12458 return NO_REGS;
12460 return GENERAL_REGS;
12463 /* Values which must be returned in the most-significant end of the return
12464 register. */
12466 static bool
12467 arm_return_in_msb (const_tree valtype)
12469 return (TARGET_AAPCS_BASED
12470 && BYTES_BIG_ENDIAN
12471 && (AGGREGATE_TYPE_P (valtype)
12472 || TREE_CODE (valtype) == COMPLEX_TYPE
12473 || FIXED_POINT_TYPE_P (valtype)));
12476 /* Return TRUE if X references a SYMBOL_REF. */
12478 symbol_mentioned_p (rtx x)
12480 const char * fmt;
12481 int i;
12483 if (GET_CODE (x) == SYMBOL_REF)
12484 return 1;
12486 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12487 are constant offsets, not symbols. */
12488 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12489 return 0;
12491 fmt = GET_RTX_FORMAT (GET_CODE (x));
12493 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12495 if (fmt[i] == 'E')
12497 int j;
12499 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12500 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12501 return 1;
12503 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12504 return 1;
12507 return 0;
12510 /* Return TRUE if X references a LABEL_REF. */
12512 label_mentioned_p (rtx x)
12514 const char * fmt;
12515 int i;
12517 if (GET_CODE (x) == LABEL_REF)
12518 return 1;
12520 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12521 instruction, but they are constant offsets, not symbols. */
12522 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12523 return 0;
12525 fmt = GET_RTX_FORMAT (GET_CODE (x));
12526 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12528 if (fmt[i] == 'E')
12530 int j;
12532 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12533 if (label_mentioned_p (XVECEXP (x, i, j)))
12534 return 1;
12536 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12537 return 1;
12540 return 0;
12544 tls_mentioned_p (rtx x)
12546 switch (GET_CODE (x))
12548 case CONST:
12549 return tls_mentioned_p (XEXP (x, 0));
12551 case UNSPEC:
12552 if (XINT (x, 1) == UNSPEC_TLS)
12553 return 1;
12555 /* Fall through. */
12556 default:
12557 return 0;
12561 /* Must not copy any rtx that uses a pc-relative address.
12562 Also, disallow copying of load-exclusive instructions that
12563 may appear after splitting of compare-and-swap-style operations
12564 so as to prevent those loops from being transformed away from their
12565 canonical forms (see PR 69904). */
12567 static bool
12568 arm_cannot_copy_insn_p (rtx_insn *insn)
12570 /* The tls call insn cannot be copied, as it is paired with a data
12571 word. */
12572 if (recog_memoized (insn) == CODE_FOR_tlscall)
12573 return true;
12575 subrtx_iterator::array_type array;
12576 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12578 const_rtx x = *iter;
12579 if (GET_CODE (x) == UNSPEC
12580 && (XINT (x, 1) == UNSPEC_PIC_BASE
12581 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12582 return true;
12585 rtx set = single_set (insn);
12586 if (set)
12588 rtx src = SET_SRC (set);
12589 if (GET_CODE (src) == ZERO_EXTEND)
12590 src = XEXP (src, 0);
12592 /* Catch the load-exclusive and load-acquire operations. */
12593 if (GET_CODE (src) == UNSPEC_VOLATILE
12594 && (XINT (src, 1) == VUNSPEC_LL
12595 || XINT (src, 1) == VUNSPEC_LAX))
12596 return true;
12598 return false;
12601 enum rtx_code
12602 minmax_code (rtx x)
12604 enum rtx_code code = GET_CODE (x);
12606 switch (code)
12608 case SMAX:
12609 return GE;
12610 case SMIN:
12611 return LE;
12612 case UMIN:
12613 return LEU;
12614 case UMAX:
12615 return GEU;
12616 default:
12617 gcc_unreachable ();
12621 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12623 bool
12624 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12625 int *mask, bool *signed_sat)
12627 /* The high bound must be a power of two minus one. */
12628 int log = exact_log2 (INTVAL (hi_bound) + 1);
12629 if (log == -1)
12630 return false;
12632 /* The low bound is either zero (for usat) or one less than the
12633 negation of the high bound (for ssat). */
12634 if (INTVAL (lo_bound) == 0)
12636 if (mask)
12637 *mask = log;
12638 if (signed_sat)
12639 *signed_sat = false;
12641 return true;
12644 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12646 if (mask)
12647 *mask = log + 1;
12648 if (signed_sat)
12649 *signed_sat = true;
12651 return true;
12654 return false;
12657 /* Return 1 if memory locations are adjacent. */
12659 adjacent_mem_locations (rtx a, rtx b)
12661 /* We don't guarantee to preserve the order of these memory refs. */
12662 if (volatile_refs_p (a) || volatile_refs_p (b))
12663 return 0;
12665 if ((REG_P (XEXP (a, 0))
12666 || (GET_CODE (XEXP (a, 0)) == PLUS
12667 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12668 && (REG_P (XEXP (b, 0))
12669 || (GET_CODE (XEXP (b, 0)) == PLUS
12670 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12672 HOST_WIDE_INT val0 = 0, val1 = 0;
12673 rtx reg0, reg1;
12674 int val_diff;
12676 if (GET_CODE (XEXP (a, 0)) == PLUS)
12678 reg0 = XEXP (XEXP (a, 0), 0);
12679 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12681 else
12682 reg0 = XEXP (a, 0);
12684 if (GET_CODE (XEXP (b, 0)) == PLUS)
12686 reg1 = XEXP (XEXP (b, 0), 0);
12687 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12689 else
12690 reg1 = XEXP (b, 0);
12692 /* Don't accept any offset that will require multiple
12693 instructions to handle, since this would cause the
12694 arith_adjacentmem pattern to output an overlong sequence. */
12695 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12696 return 0;
12698 /* Don't allow an eliminable register: register elimination can make
12699 the offset too large. */
12700 if (arm_eliminable_register (reg0))
12701 return 0;
12703 val_diff = val1 - val0;
12705 if (arm_ld_sched)
12707 /* If the target has load delay slots, then there's no benefit
12708 to using an ldm instruction unless the offset is zero and
12709 we are optimizing for size. */
12710 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12711 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12712 && (val_diff == 4 || val_diff == -4));
12715 return ((REGNO (reg0) == REGNO (reg1))
12716 && (val_diff == 4 || val_diff == -4));
12719 return 0;
12722 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12723 for load operations, false for store operations. CONSECUTIVE is true
12724 if the register numbers in the operation must be consecutive in the register
12725 bank. RETURN_PC is true if value is to be loaded in PC.
12726 The pattern we are trying to match for load is:
12727 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12728 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12731 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12733 where
12734 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12735 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12736 3. If consecutive is TRUE, then for kth register being loaded,
12737 REGNO (R_dk) = REGNO (R_d0) + k.
12738 The pattern for store is similar. */
12739 bool
12740 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12741 bool consecutive, bool return_pc)
12743 HOST_WIDE_INT count = XVECLEN (op, 0);
12744 rtx reg, mem, addr;
12745 unsigned regno;
12746 unsigned first_regno;
12747 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12748 rtx elt;
12749 bool addr_reg_in_reglist = false;
12750 bool update = false;
12751 int reg_increment;
12752 int offset_adj;
12753 int regs_per_val;
12755 /* If not in SImode, then registers must be consecutive
12756 (e.g., VLDM instructions for DFmode). */
12757 gcc_assert ((mode == SImode) || consecutive);
12758 /* Setting return_pc for stores is illegal. */
12759 gcc_assert (!return_pc || load);
12761 /* Set up the increments and the regs per val based on the mode. */
12762 reg_increment = GET_MODE_SIZE (mode);
12763 regs_per_val = reg_increment / 4;
12764 offset_adj = return_pc ? 1 : 0;
12766 if (count <= 1
12767 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12768 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12769 return false;
12771 /* Check if this is a write-back. */
12772 elt = XVECEXP (op, 0, offset_adj);
12773 if (GET_CODE (SET_SRC (elt)) == PLUS)
12775 i++;
12776 base = 1;
12777 update = true;
12779 /* The offset adjustment must be the number of registers being
12780 popped times the size of a single register. */
12781 if (!REG_P (SET_DEST (elt))
12782 || !REG_P (XEXP (SET_SRC (elt), 0))
12783 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12784 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12785 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12786 ((count - 1 - offset_adj) * reg_increment))
12787 return false;
12790 i = i + offset_adj;
12791 base = base + offset_adj;
12792 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12793 success depends on the type: VLDM can do just one reg,
12794 LDM must do at least two. */
12795 if ((count <= i) && (mode == SImode))
12796 return false;
12798 elt = XVECEXP (op, 0, i - 1);
12799 if (GET_CODE (elt) != SET)
12800 return false;
12802 if (load)
12804 reg = SET_DEST (elt);
12805 mem = SET_SRC (elt);
12807 else
12809 reg = SET_SRC (elt);
12810 mem = SET_DEST (elt);
12813 if (!REG_P (reg) || !MEM_P (mem))
12814 return false;
12816 regno = REGNO (reg);
12817 first_regno = regno;
12818 addr = XEXP (mem, 0);
12819 if (GET_CODE (addr) == PLUS)
12821 if (!CONST_INT_P (XEXP (addr, 1)))
12822 return false;
12824 offset = INTVAL (XEXP (addr, 1));
12825 addr = XEXP (addr, 0);
12828 if (!REG_P (addr))
12829 return false;
12831 /* Don't allow SP to be loaded unless it is also the base register. It
12832 guarantees that SP is reset correctly when an LDM instruction
12833 is interrupted. Otherwise, we might end up with a corrupt stack. */
12834 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12835 return false;
12837 for (; i < count; i++)
12839 elt = XVECEXP (op, 0, i);
12840 if (GET_CODE (elt) != SET)
12841 return false;
12843 if (load)
12845 reg = SET_DEST (elt);
12846 mem = SET_SRC (elt);
12848 else
12850 reg = SET_SRC (elt);
12851 mem = SET_DEST (elt);
12854 if (!REG_P (reg)
12855 || GET_MODE (reg) != mode
12856 || REGNO (reg) <= regno
12857 || (consecutive
12858 && (REGNO (reg) !=
12859 (unsigned int) (first_regno + regs_per_val * (i - base))))
12860 /* Don't allow SP to be loaded unless it is also the base register. It
12861 guarantees that SP is reset correctly when an LDM instruction
12862 is interrupted. Otherwise, we might end up with a corrupt stack. */
12863 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12864 || !MEM_P (mem)
12865 || GET_MODE (mem) != mode
12866 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12867 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12868 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12869 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12870 offset + (i - base) * reg_increment))
12871 && (!REG_P (XEXP (mem, 0))
12872 || offset + (i - base) * reg_increment != 0)))
12873 return false;
12875 regno = REGNO (reg);
12876 if (regno == REGNO (addr))
12877 addr_reg_in_reglist = true;
12880 if (load)
12882 if (update && addr_reg_in_reglist)
12883 return false;
12885 /* For Thumb-1, address register is always modified - either by write-back
12886 or by explicit load. If the pattern does not describe an update,
12887 then the address register must be in the list of loaded registers. */
12888 if (TARGET_THUMB1)
12889 return update || addr_reg_in_reglist;
12892 return true;
12895 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12896 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12897 instruction. ADD_OFFSET is nonzero if the base address register needs
12898 to be modified with an add instruction before we can use it. */
12900 static bool
12901 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
12902 int nops, HOST_WIDE_INT add_offset)
12904 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12905 if the offset isn't small enough. The reason 2 ldrs are faster
12906 is because these ARMs are able to do more than one cache access
12907 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12908 whilst the ARM8 has a double bandwidth cache. This means that
12909 these cores can do both an instruction fetch and a data fetch in
12910 a single cycle, so the trick of calculating the address into a
12911 scratch register (one of the result regs) and then doing a load
12912 multiple actually becomes slower (and no smaller in code size).
12913 That is the transformation
12915 ldr rd1, [rbase + offset]
12916 ldr rd2, [rbase + offset + 4]
12920 add rd1, rbase, offset
12921 ldmia rd1, {rd1, rd2}
12923 produces worse code -- '3 cycles + any stalls on rd2' instead of
12924 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12925 access per cycle, the first sequence could never complete in less
12926 than 6 cycles, whereas the ldm sequence would only take 5 and
12927 would make better use of sequential accesses if not hitting the
12928 cache.
12930 We cheat here and test 'arm_ld_sched' which we currently know to
12931 only be true for the ARM8, ARM9 and StrongARM. If this ever
12932 changes, then the test below needs to be reworked. */
12933 if (nops == 2 && arm_ld_sched && add_offset != 0)
12934 return false;
12936 /* XScale has load-store double instructions, but they have stricter
12937 alignment requirements than load-store multiple, so we cannot
12938 use them.
12940 For XScale ldm requires 2 + NREGS cycles to complete and blocks
12941 the pipeline until completion.
12943 NREGS CYCLES
12949 An ldr instruction takes 1-3 cycles, but does not block the
12950 pipeline.
12952 NREGS CYCLES
12953 1 1-3
12954 2 2-6
12955 3 3-9
12956 4 4-12
12958 Best case ldr will always win. However, the more ldr instructions
12959 we issue, the less likely we are to be able to schedule them well.
12960 Using ldr instructions also increases code size.
12962 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
12963 for counts of 3 or 4 regs. */
12964 if (nops <= 2 && arm_tune_xscale && !optimize_size)
12965 return false;
12966 return true;
12969 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
12970 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
12971 an array ORDER which describes the sequence to use when accessing the
12972 offsets that produces an ascending order. In this sequence, each
12973 offset must be larger by exactly 4 than the previous one. ORDER[0]
12974 must have been filled in with the lowest offset by the caller.
12975 If UNSORTED_REGS is nonnull, it is an array of register numbers that
12976 we use to verify that ORDER produces an ascending order of registers.
12977 Return true if it was possible to construct such an order, false if
12978 not. */
12980 static bool
12981 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
12982 int *unsorted_regs)
12984 int i;
12985 for (i = 1; i < nops; i++)
12987 int j;
12989 order[i] = order[i - 1];
12990 for (j = 0; j < nops; j++)
12991 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
12993 /* We must find exactly one offset that is higher than the
12994 previous one by 4. */
12995 if (order[i] != order[i - 1])
12996 return false;
12997 order[i] = j;
12999 if (order[i] == order[i - 1])
13000 return false;
13001 /* The register numbers must be ascending. */
13002 if (unsorted_regs != NULL
13003 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13004 return false;
13006 return true;
13009 /* Used to determine in a peephole whether a sequence of load
13010 instructions can be changed into a load-multiple instruction.
13011 NOPS is the number of separate load instructions we are examining. The
13012 first NOPS entries in OPERANDS are the destination registers, the
13013 next NOPS entries are memory operands. If this function is
13014 successful, *BASE is set to the common base register of the memory
13015 accesses; *LOAD_OFFSET is set to the first memory location's offset
13016 from that base register.
13017 REGS is an array filled in with the destination register numbers.
13018 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13019 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13020 the sequence of registers in REGS matches the loads from ascending memory
13021 locations, and the function verifies that the register numbers are
13022 themselves ascending. If CHECK_REGS is false, the register numbers
13023 are stored in the order they are found in the operands. */
13024 static int
13025 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13026 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13028 int unsorted_regs[MAX_LDM_STM_OPS];
13029 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13030 int order[MAX_LDM_STM_OPS];
13031 rtx base_reg_rtx = NULL;
13032 int base_reg = -1;
13033 int i, ldm_case;
13035 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13036 easily extended if required. */
13037 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13039 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13041 /* Loop over the operands and check that the memory references are
13042 suitable (i.e. immediate offsets from the same base register). At
13043 the same time, extract the target register, and the memory
13044 offsets. */
13045 for (i = 0; i < nops; i++)
13047 rtx reg;
13048 rtx offset;
13050 /* Convert a subreg of a mem into the mem itself. */
13051 if (GET_CODE (operands[nops + i]) == SUBREG)
13052 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13054 gcc_assert (MEM_P (operands[nops + i]));
13056 /* Don't reorder volatile memory references; it doesn't seem worth
13057 looking for the case where the order is ok anyway. */
13058 if (MEM_VOLATILE_P (operands[nops + i]))
13059 return 0;
13061 offset = const0_rtx;
13063 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13064 || (GET_CODE (reg) == SUBREG
13065 && REG_P (reg = SUBREG_REG (reg))))
13066 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13067 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13068 || (GET_CODE (reg) == SUBREG
13069 && REG_P (reg = SUBREG_REG (reg))))
13070 && (CONST_INT_P (offset
13071 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13073 if (i == 0)
13075 base_reg = REGNO (reg);
13076 base_reg_rtx = reg;
13077 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13078 return 0;
13080 else if (base_reg != (int) REGNO (reg))
13081 /* Not addressed from the same base register. */
13082 return 0;
13084 unsorted_regs[i] = (REG_P (operands[i])
13085 ? REGNO (operands[i])
13086 : REGNO (SUBREG_REG (operands[i])));
13088 /* If it isn't an integer register, or if it overwrites the
13089 base register but isn't the last insn in the list, then
13090 we can't do this. */
13091 if (unsorted_regs[i] < 0
13092 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13093 || unsorted_regs[i] > 14
13094 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13095 return 0;
13097 /* Don't allow SP to be loaded unless it is also the base
13098 register. It guarantees that SP is reset correctly when
13099 an LDM instruction is interrupted. Otherwise, we might
13100 end up with a corrupt stack. */
13101 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13102 return 0;
13104 unsorted_offsets[i] = INTVAL (offset);
13105 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13106 order[0] = i;
13108 else
13109 /* Not a suitable memory address. */
13110 return 0;
13113 /* All the useful information has now been extracted from the
13114 operands into unsorted_regs and unsorted_offsets; additionally,
13115 order[0] has been set to the lowest offset in the list. Sort
13116 the offsets into order, verifying that they are adjacent, and
13117 check that the register numbers are ascending. */
13118 if (!compute_offset_order (nops, unsorted_offsets, order,
13119 check_regs ? unsorted_regs : NULL))
13120 return 0;
13122 if (saved_order)
13123 memcpy (saved_order, order, sizeof order);
13125 if (base)
13127 *base = base_reg;
13129 for (i = 0; i < nops; i++)
13130 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13132 *load_offset = unsorted_offsets[order[0]];
13135 if (TARGET_THUMB1
13136 && !peep2_reg_dead_p (nops, base_reg_rtx))
13137 return 0;
13139 if (unsorted_offsets[order[0]] == 0)
13140 ldm_case = 1; /* ldmia */
13141 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13142 ldm_case = 2; /* ldmib */
13143 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13144 ldm_case = 3; /* ldmda */
13145 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13146 ldm_case = 4; /* ldmdb */
13147 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13148 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13149 ldm_case = 5;
13150 else
13151 return 0;
13153 if (!multiple_operation_profitable_p (false, nops,
13154 ldm_case == 5
13155 ? unsorted_offsets[order[0]] : 0))
13156 return 0;
13158 return ldm_case;
13161 /* Used to determine in a peephole whether a sequence of store instructions can
13162 be changed into a store-multiple instruction.
13163 NOPS is the number of separate store instructions we are examining.
13164 NOPS_TOTAL is the total number of instructions recognized by the peephole
13165 pattern.
13166 The first NOPS entries in OPERANDS are the source registers, the next
13167 NOPS entries are memory operands. If this function is successful, *BASE is
13168 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13169 to the first memory location's offset from that base register. REGS is an
13170 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13171 likewise filled with the corresponding rtx's.
13172 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13173 numbers to an ascending order of stores.
13174 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13175 from ascending memory locations, and the function verifies that the register
13176 numbers are themselves ascending. If CHECK_REGS is false, the register
13177 numbers are stored in the order they are found in the operands. */
13178 static int
13179 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13180 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13181 HOST_WIDE_INT *load_offset, bool check_regs)
13183 int unsorted_regs[MAX_LDM_STM_OPS];
13184 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13185 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13186 int order[MAX_LDM_STM_OPS];
13187 int base_reg = -1;
13188 rtx base_reg_rtx = NULL;
13189 int i, stm_case;
13191 /* Write back of base register is currently only supported for Thumb 1. */
13192 int base_writeback = TARGET_THUMB1;
13194 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13195 easily extended if required. */
13196 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13198 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13200 /* Loop over the operands and check that the memory references are
13201 suitable (i.e. immediate offsets from the same base register). At
13202 the same time, extract the target register, and the memory
13203 offsets. */
13204 for (i = 0; i < nops; i++)
13206 rtx reg;
13207 rtx offset;
13209 /* Convert a subreg of a mem into the mem itself. */
13210 if (GET_CODE (operands[nops + i]) == SUBREG)
13211 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13213 gcc_assert (MEM_P (operands[nops + i]));
13215 /* Don't reorder volatile memory references; it doesn't seem worth
13216 looking for the case where the order is ok anyway. */
13217 if (MEM_VOLATILE_P (operands[nops + i]))
13218 return 0;
13220 offset = const0_rtx;
13222 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13223 || (GET_CODE (reg) == SUBREG
13224 && REG_P (reg = SUBREG_REG (reg))))
13225 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13226 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13227 || (GET_CODE (reg) == SUBREG
13228 && REG_P (reg = SUBREG_REG (reg))))
13229 && (CONST_INT_P (offset
13230 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13232 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13233 ? operands[i] : SUBREG_REG (operands[i]));
13234 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13236 if (i == 0)
13238 base_reg = REGNO (reg);
13239 base_reg_rtx = reg;
13240 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13241 return 0;
13243 else if (base_reg != (int) REGNO (reg))
13244 /* Not addressed from the same base register. */
13245 return 0;
13247 /* If it isn't an integer register, then we can't do this. */
13248 if (unsorted_regs[i] < 0
13249 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13250 /* The effects are unpredictable if the base register is
13251 both updated and stored. */
13252 || (base_writeback && unsorted_regs[i] == base_reg)
13253 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13254 || unsorted_regs[i] > 14)
13255 return 0;
13257 unsorted_offsets[i] = INTVAL (offset);
13258 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13259 order[0] = i;
13261 else
13262 /* Not a suitable memory address. */
13263 return 0;
13266 /* All the useful information has now been extracted from the
13267 operands into unsorted_regs and unsorted_offsets; additionally,
13268 order[0] has been set to the lowest offset in the list. Sort
13269 the offsets into order, verifying that they are adjacent, and
13270 check that the register numbers are ascending. */
13271 if (!compute_offset_order (nops, unsorted_offsets, order,
13272 check_regs ? unsorted_regs : NULL))
13273 return 0;
13275 if (saved_order)
13276 memcpy (saved_order, order, sizeof order);
13278 if (base)
13280 *base = base_reg;
13282 for (i = 0; i < nops; i++)
13284 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13285 if (reg_rtxs)
13286 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13289 *load_offset = unsorted_offsets[order[0]];
13292 if (TARGET_THUMB1
13293 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13294 return 0;
13296 if (unsorted_offsets[order[0]] == 0)
13297 stm_case = 1; /* stmia */
13298 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13299 stm_case = 2; /* stmib */
13300 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13301 stm_case = 3; /* stmda */
13302 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13303 stm_case = 4; /* stmdb */
13304 else
13305 return 0;
13307 if (!multiple_operation_profitable_p (false, nops, 0))
13308 return 0;
13310 return stm_case;
13313 /* Routines for use in generating RTL. */
13315 /* Generate a load-multiple instruction. COUNT is the number of loads in
13316 the instruction; REGS and MEMS are arrays containing the operands.
13317 BASEREG is the base register to be used in addressing the memory operands.
13318 WBACK_OFFSET is nonzero if the instruction should update the base
13319 register. */
13321 static rtx
13322 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13323 HOST_WIDE_INT wback_offset)
13325 int i = 0, j;
13326 rtx result;
13328 if (!multiple_operation_profitable_p (false, count, 0))
13330 rtx seq;
13332 start_sequence ();
13334 for (i = 0; i < count; i++)
13335 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13337 if (wback_offset != 0)
13338 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13340 seq = get_insns ();
13341 end_sequence ();
13343 return seq;
13346 result = gen_rtx_PARALLEL (VOIDmode,
13347 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13348 if (wback_offset != 0)
13350 XVECEXP (result, 0, 0)
13351 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13352 i = 1;
13353 count++;
13356 for (j = 0; i < count; i++, j++)
13357 XVECEXP (result, 0, i)
13358 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13360 return result;
13363 /* Generate a store-multiple instruction. COUNT is the number of stores in
13364 the instruction; REGS and MEMS are arrays containing the operands.
13365 BASEREG is the base register to be used in addressing the memory operands.
13366 WBACK_OFFSET is nonzero if the instruction should update the base
13367 register. */
13369 static rtx
13370 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13371 HOST_WIDE_INT wback_offset)
13373 int i = 0, j;
13374 rtx result;
13376 if (GET_CODE (basereg) == PLUS)
13377 basereg = XEXP (basereg, 0);
13379 if (!multiple_operation_profitable_p (false, count, 0))
13381 rtx seq;
13383 start_sequence ();
13385 for (i = 0; i < count; i++)
13386 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13388 if (wback_offset != 0)
13389 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13391 seq = get_insns ();
13392 end_sequence ();
13394 return seq;
13397 result = gen_rtx_PARALLEL (VOIDmode,
13398 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13399 if (wback_offset != 0)
13401 XVECEXP (result, 0, 0)
13402 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13403 i = 1;
13404 count++;
13407 for (j = 0; i < count; i++, j++)
13408 XVECEXP (result, 0, i)
13409 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13411 return result;
13414 /* Generate either a load-multiple or a store-multiple instruction. This
13415 function can be used in situations where we can start with a single MEM
13416 rtx and adjust its address upwards.
13417 COUNT is the number of operations in the instruction, not counting a
13418 possible update of the base register. REGS is an array containing the
13419 register operands.
13420 BASEREG is the base register to be used in addressing the memory operands,
13421 which are constructed from BASEMEM.
13422 WRITE_BACK specifies whether the generated instruction should include an
13423 update of the base register.
13424 OFFSETP is used to pass an offset to and from this function; this offset
13425 is not used when constructing the address (instead BASEMEM should have an
13426 appropriate offset in its address), it is used only for setting
13427 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13429 static rtx
13430 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13431 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13433 rtx mems[MAX_LDM_STM_OPS];
13434 HOST_WIDE_INT offset = *offsetp;
13435 int i;
13437 gcc_assert (count <= MAX_LDM_STM_OPS);
13439 if (GET_CODE (basereg) == PLUS)
13440 basereg = XEXP (basereg, 0);
13442 for (i = 0; i < count; i++)
13444 rtx addr = plus_constant (Pmode, basereg, i * 4);
13445 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13446 offset += 4;
13449 if (write_back)
13450 *offsetp = offset;
13452 if (is_load)
13453 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13454 write_back ? 4 * count : 0);
13455 else
13456 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13457 write_back ? 4 * count : 0);
13461 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13462 rtx basemem, HOST_WIDE_INT *offsetp)
13464 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13465 offsetp);
13469 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13470 rtx basemem, HOST_WIDE_INT *offsetp)
13472 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13473 offsetp);
13476 /* Called from a peephole2 expander to turn a sequence of loads into an
13477 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13478 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13479 is true if we can reorder the registers because they are used commutatively
13480 subsequently.
13481 Returns true iff we could generate a new instruction. */
13483 bool
13484 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13486 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13487 rtx mems[MAX_LDM_STM_OPS];
13488 int i, j, base_reg;
13489 rtx base_reg_rtx;
13490 HOST_WIDE_INT offset;
13491 int write_back = FALSE;
13492 int ldm_case;
13493 rtx addr;
13495 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13496 &base_reg, &offset, !sort_regs);
13498 if (ldm_case == 0)
13499 return false;
13501 if (sort_regs)
13502 for (i = 0; i < nops - 1; i++)
13503 for (j = i + 1; j < nops; j++)
13504 if (regs[i] > regs[j])
13506 int t = regs[i];
13507 regs[i] = regs[j];
13508 regs[j] = t;
13510 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13512 if (TARGET_THUMB1)
13514 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13515 gcc_assert (ldm_case == 1 || ldm_case == 5);
13516 write_back = TRUE;
13519 if (ldm_case == 5)
13521 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13522 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13523 offset = 0;
13524 if (!TARGET_THUMB1)
13526 base_reg = regs[0];
13527 base_reg_rtx = newbase;
13531 for (i = 0; i < nops; i++)
13533 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13534 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13535 SImode, addr, 0);
13537 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13538 write_back ? offset + i * 4 : 0));
13539 return true;
13542 /* Called from a peephole2 expander to turn a sequence of stores into an
13543 STM instruction. OPERANDS are the operands found by the peephole matcher;
13544 NOPS indicates how many separate stores we are trying to combine.
13545 Returns true iff we could generate a new instruction. */
13547 bool
13548 gen_stm_seq (rtx *operands, int nops)
13550 int i;
13551 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13552 rtx mems[MAX_LDM_STM_OPS];
13553 int base_reg;
13554 rtx base_reg_rtx;
13555 HOST_WIDE_INT offset;
13556 int write_back = FALSE;
13557 int stm_case;
13558 rtx addr;
13559 bool base_reg_dies;
13561 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13562 mem_order, &base_reg, &offset, true);
13564 if (stm_case == 0)
13565 return false;
13567 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13569 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13570 if (TARGET_THUMB1)
13572 gcc_assert (base_reg_dies);
13573 write_back = TRUE;
13576 if (stm_case == 5)
13578 gcc_assert (base_reg_dies);
13579 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13580 offset = 0;
13583 addr = plus_constant (Pmode, base_reg_rtx, offset);
13585 for (i = 0; i < nops; i++)
13587 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13588 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13589 SImode, addr, 0);
13591 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13592 write_back ? offset + i * 4 : 0));
13593 return true;
13596 /* Called from a peephole2 expander to turn a sequence of stores that are
13597 preceded by constant loads into an STM instruction. OPERANDS are the
13598 operands found by the peephole matcher; NOPS indicates how many
13599 separate stores we are trying to combine; there are 2 * NOPS
13600 instructions in the peephole.
13601 Returns true iff we could generate a new instruction. */
13603 bool
13604 gen_const_stm_seq (rtx *operands, int nops)
13606 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13607 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13608 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13609 rtx mems[MAX_LDM_STM_OPS];
13610 int base_reg;
13611 rtx base_reg_rtx;
13612 HOST_WIDE_INT offset;
13613 int write_back = FALSE;
13614 int stm_case;
13615 rtx addr;
13616 bool base_reg_dies;
13617 int i, j;
13618 HARD_REG_SET allocated;
13620 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13621 mem_order, &base_reg, &offset, false);
13623 if (stm_case == 0)
13624 return false;
13626 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13628 /* If the same register is used more than once, try to find a free
13629 register. */
13630 CLEAR_HARD_REG_SET (allocated);
13631 for (i = 0; i < nops; i++)
13633 for (j = i + 1; j < nops; j++)
13634 if (regs[i] == regs[j])
13636 rtx t = peep2_find_free_register (0, nops * 2,
13637 TARGET_THUMB1 ? "l" : "r",
13638 SImode, &allocated);
13639 if (t == NULL_RTX)
13640 return false;
13641 reg_rtxs[i] = t;
13642 regs[i] = REGNO (t);
13646 /* Compute an ordering that maps the register numbers to an ascending
13647 sequence. */
13648 reg_order[0] = 0;
13649 for (i = 0; i < nops; i++)
13650 if (regs[i] < regs[reg_order[0]])
13651 reg_order[0] = i;
13653 for (i = 1; i < nops; i++)
13655 int this_order = reg_order[i - 1];
13656 for (j = 0; j < nops; j++)
13657 if (regs[j] > regs[reg_order[i - 1]]
13658 && (this_order == reg_order[i - 1]
13659 || regs[j] < regs[this_order]))
13660 this_order = j;
13661 reg_order[i] = this_order;
13664 /* Ensure that registers that must be live after the instruction end
13665 up with the correct value. */
13666 for (i = 0; i < nops; i++)
13668 int this_order = reg_order[i];
13669 if ((this_order != mem_order[i]
13670 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13671 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13672 return false;
13675 /* Load the constants. */
13676 for (i = 0; i < nops; i++)
13678 rtx op = operands[2 * nops + mem_order[i]];
13679 sorted_regs[i] = regs[reg_order[i]];
13680 emit_move_insn (reg_rtxs[reg_order[i]], op);
13683 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13685 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13686 if (TARGET_THUMB1)
13688 gcc_assert (base_reg_dies);
13689 write_back = TRUE;
13692 if (stm_case == 5)
13694 gcc_assert (base_reg_dies);
13695 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13696 offset = 0;
13699 addr = plus_constant (Pmode, base_reg_rtx, offset);
13701 for (i = 0; i < nops; i++)
13703 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13704 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13705 SImode, addr, 0);
13707 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13708 write_back ? offset + i * 4 : 0));
13709 return true;
13712 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13713 unaligned copies on processors which support unaligned semantics for those
13714 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13715 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13716 An interleave factor of 1 (the minimum) will perform no interleaving.
13717 Load/store multiple are used for aligned addresses where possible. */
13719 static void
13720 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13721 HOST_WIDE_INT length,
13722 unsigned int interleave_factor)
13724 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13725 int *regnos = XALLOCAVEC (int, interleave_factor);
13726 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13727 HOST_WIDE_INT i, j;
13728 HOST_WIDE_INT remaining = length, words;
13729 rtx halfword_tmp = NULL, byte_tmp = NULL;
13730 rtx dst, src;
13731 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13732 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13733 HOST_WIDE_INT srcoffset, dstoffset;
13734 HOST_WIDE_INT src_autoinc, dst_autoinc;
13735 rtx mem, addr;
13737 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13739 /* Use hard registers if we have aligned source or destination so we can use
13740 load/store multiple with contiguous registers. */
13741 if (dst_aligned || src_aligned)
13742 for (i = 0; i < interleave_factor; i++)
13743 regs[i] = gen_rtx_REG (SImode, i);
13744 else
13745 for (i = 0; i < interleave_factor; i++)
13746 regs[i] = gen_reg_rtx (SImode);
13748 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13749 src = copy_addr_to_reg (XEXP (srcbase, 0));
13751 srcoffset = dstoffset = 0;
13753 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13754 For copying the last bytes we want to subtract this offset again. */
13755 src_autoinc = dst_autoinc = 0;
13757 for (i = 0; i < interleave_factor; i++)
13758 regnos[i] = i;
13760 /* Copy BLOCK_SIZE_BYTES chunks. */
13762 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13764 /* Load words. */
13765 if (src_aligned && interleave_factor > 1)
13767 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13768 TRUE, srcbase, &srcoffset));
13769 src_autoinc += UNITS_PER_WORD * interleave_factor;
13771 else
13773 for (j = 0; j < interleave_factor; j++)
13775 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13776 - src_autoinc));
13777 mem = adjust_automodify_address (srcbase, SImode, addr,
13778 srcoffset + j * UNITS_PER_WORD);
13779 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13781 srcoffset += block_size_bytes;
13784 /* Store words. */
13785 if (dst_aligned && interleave_factor > 1)
13787 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13788 TRUE, dstbase, &dstoffset));
13789 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13791 else
13793 for (j = 0; j < interleave_factor; j++)
13795 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13796 - dst_autoinc));
13797 mem = adjust_automodify_address (dstbase, SImode, addr,
13798 dstoffset + j * UNITS_PER_WORD);
13799 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13801 dstoffset += block_size_bytes;
13804 remaining -= block_size_bytes;
13807 /* Copy any whole words left (note these aren't interleaved with any
13808 subsequent halfword/byte load/stores in the interests of simplicity). */
13810 words = remaining / UNITS_PER_WORD;
13812 gcc_assert (words < interleave_factor);
13814 if (src_aligned && words > 1)
13816 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13817 &srcoffset));
13818 src_autoinc += UNITS_PER_WORD * words;
13820 else
13822 for (j = 0; j < words; j++)
13824 addr = plus_constant (Pmode, src,
13825 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13826 mem = adjust_automodify_address (srcbase, SImode, addr,
13827 srcoffset + j * UNITS_PER_WORD);
13828 if (src_aligned)
13829 emit_move_insn (regs[j], mem);
13830 else
13831 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13833 srcoffset += words * UNITS_PER_WORD;
13836 if (dst_aligned && words > 1)
13838 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13839 &dstoffset));
13840 dst_autoinc += words * UNITS_PER_WORD;
13842 else
13844 for (j = 0; j < words; j++)
13846 addr = plus_constant (Pmode, dst,
13847 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13848 mem = adjust_automodify_address (dstbase, SImode, addr,
13849 dstoffset + j * UNITS_PER_WORD);
13850 if (dst_aligned)
13851 emit_move_insn (mem, regs[j]);
13852 else
13853 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13855 dstoffset += words * UNITS_PER_WORD;
13858 remaining -= words * UNITS_PER_WORD;
13860 gcc_assert (remaining < 4);
13862 /* Copy a halfword if necessary. */
13864 if (remaining >= 2)
13866 halfword_tmp = gen_reg_rtx (SImode);
13868 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13869 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13870 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13872 /* Either write out immediately, or delay until we've loaded the last
13873 byte, depending on interleave factor. */
13874 if (interleave_factor == 1)
13876 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13877 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13878 emit_insn (gen_unaligned_storehi (mem,
13879 gen_lowpart (HImode, halfword_tmp)));
13880 halfword_tmp = NULL;
13881 dstoffset += 2;
13884 remaining -= 2;
13885 srcoffset += 2;
13888 gcc_assert (remaining < 2);
13890 /* Copy last byte. */
13892 if ((remaining & 1) != 0)
13894 byte_tmp = gen_reg_rtx (SImode);
13896 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13897 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
13898 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
13900 if (interleave_factor == 1)
13902 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13903 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13904 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13905 byte_tmp = NULL;
13906 dstoffset++;
13909 remaining--;
13910 srcoffset++;
13913 /* Store last halfword if we haven't done so already. */
13915 if (halfword_tmp)
13917 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13918 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13919 emit_insn (gen_unaligned_storehi (mem,
13920 gen_lowpart (HImode, halfword_tmp)));
13921 dstoffset += 2;
13924 /* Likewise for last byte. */
13926 if (byte_tmp)
13928 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13929 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13930 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13931 dstoffset++;
13934 gcc_assert (remaining == 0 && srcoffset == dstoffset);
13937 /* From mips_adjust_block_mem:
13939 Helper function for doing a loop-based block operation on memory
13940 reference MEM. Each iteration of the loop will operate on LENGTH
13941 bytes of MEM.
13943 Create a new base register for use within the loop and point it to
13944 the start of MEM. Create a new memory reference that uses this
13945 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
13947 static void
13948 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
13949 rtx *loop_mem)
13951 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
13953 /* Although the new mem does not refer to a known location,
13954 it does keep up to LENGTH bytes of alignment. */
13955 *loop_mem = change_address (mem, BLKmode, *loop_reg);
13956 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
13959 /* From mips_block_move_loop:
13961 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
13962 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
13963 the memory regions do not overlap. */
13965 static void
13966 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
13967 unsigned int interleave_factor,
13968 HOST_WIDE_INT bytes_per_iter)
13970 rtx src_reg, dest_reg, final_src, test;
13971 HOST_WIDE_INT leftover;
13973 leftover = length % bytes_per_iter;
13974 length -= leftover;
13976 /* Create registers and memory references for use within the loop. */
13977 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
13978 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
13980 /* Calculate the value that SRC_REG should have after the last iteration of
13981 the loop. */
13982 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
13983 0, 0, OPTAB_WIDEN);
13985 /* Emit the start of the loop. */
13986 rtx_code_label *label = gen_label_rtx ();
13987 emit_label (label);
13989 /* Emit the loop body. */
13990 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
13991 interleave_factor);
13993 /* Move on to the next block. */
13994 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
13995 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
13997 /* Emit the loop condition. */
13998 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
13999 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14001 /* Mop up any left-over bytes. */
14002 if (leftover)
14003 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14006 /* Emit a block move when either the source or destination is unaligned (not
14007 aligned to a four-byte boundary). This may need further tuning depending on
14008 core type, optimize_size setting, etc. */
14010 static int
14011 arm_movmemqi_unaligned (rtx *operands)
14013 HOST_WIDE_INT length = INTVAL (operands[2]);
14015 if (optimize_size)
14017 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14018 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14019 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14020 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14021 or dst_aligned though: allow more interleaving in those cases since the
14022 resulting code can be smaller. */
14023 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14024 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14026 if (length > 12)
14027 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14028 interleave_factor, bytes_per_iter);
14029 else
14030 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14031 interleave_factor);
14033 else
14035 /* Note that the loop created by arm_block_move_unaligned_loop may be
14036 subject to loop unrolling, which makes tuning this condition a little
14037 redundant. */
14038 if (length > 32)
14039 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14040 else
14041 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14044 return 1;
14048 arm_gen_movmemqi (rtx *operands)
14050 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14051 HOST_WIDE_INT srcoffset, dstoffset;
14052 int i;
14053 rtx src, dst, srcbase, dstbase;
14054 rtx part_bytes_reg = NULL;
14055 rtx mem;
14057 if (!CONST_INT_P (operands[2])
14058 || !CONST_INT_P (operands[3])
14059 || INTVAL (operands[2]) > 64)
14060 return 0;
14062 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14063 return arm_movmemqi_unaligned (operands);
14065 if (INTVAL (operands[3]) & 3)
14066 return 0;
14068 dstbase = operands[0];
14069 srcbase = operands[1];
14071 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14072 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14074 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14075 out_words_to_go = INTVAL (operands[2]) / 4;
14076 last_bytes = INTVAL (operands[2]) & 3;
14077 dstoffset = srcoffset = 0;
14079 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14080 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14082 for (i = 0; in_words_to_go >= 2; i+=4)
14084 if (in_words_to_go > 4)
14085 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14086 TRUE, srcbase, &srcoffset));
14087 else
14088 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14089 src, FALSE, srcbase,
14090 &srcoffset));
14092 if (out_words_to_go)
14094 if (out_words_to_go > 4)
14095 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14096 TRUE, dstbase, &dstoffset));
14097 else if (out_words_to_go != 1)
14098 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14099 out_words_to_go, dst,
14100 (last_bytes == 0
14101 ? FALSE : TRUE),
14102 dstbase, &dstoffset));
14103 else
14105 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14106 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14107 if (last_bytes != 0)
14109 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14110 dstoffset += 4;
14115 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14116 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14119 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14120 if (out_words_to_go)
14122 rtx sreg;
14124 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14125 sreg = copy_to_reg (mem);
14127 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14128 emit_move_insn (mem, sreg);
14129 in_words_to_go--;
14131 gcc_assert (!in_words_to_go); /* Sanity check */
14134 if (in_words_to_go)
14136 gcc_assert (in_words_to_go > 0);
14138 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14139 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14142 gcc_assert (!last_bytes || part_bytes_reg);
14144 if (BYTES_BIG_ENDIAN && last_bytes)
14146 rtx tmp = gen_reg_rtx (SImode);
14148 /* The bytes we want are in the top end of the word. */
14149 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14150 GEN_INT (8 * (4 - last_bytes))));
14151 part_bytes_reg = tmp;
14153 while (last_bytes)
14155 mem = adjust_automodify_address (dstbase, QImode,
14156 plus_constant (Pmode, dst,
14157 last_bytes - 1),
14158 dstoffset + last_bytes - 1);
14159 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14161 if (--last_bytes)
14163 tmp = gen_reg_rtx (SImode);
14164 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14165 part_bytes_reg = tmp;
14170 else
14172 if (last_bytes > 1)
14174 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14175 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14176 last_bytes -= 2;
14177 if (last_bytes)
14179 rtx tmp = gen_reg_rtx (SImode);
14180 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14181 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14182 part_bytes_reg = tmp;
14183 dstoffset += 2;
14187 if (last_bytes)
14189 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14190 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14194 return 1;
14197 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14198 by mode size. */
14199 inline static rtx
14200 next_consecutive_mem (rtx mem)
14202 machine_mode mode = GET_MODE (mem);
14203 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14204 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14206 return adjust_automodify_address (mem, mode, addr, offset);
14209 /* Copy using LDRD/STRD instructions whenever possible.
14210 Returns true upon success. */
14211 bool
14212 gen_movmem_ldrd_strd (rtx *operands)
14214 unsigned HOST_WIDE_INT len;
14215 HOST_WIDE_INT align;
14216 rtx src, dst, base;
14217 rtx reg0;
14218 bool src_aligned, dst_aligned;
14219 bool src_volatile, dst_volatile;
14221 gcc_assert (CONST_INT_P (operands[2]));
14222 gcc_assert (CONST_INT_P (operands[3]));
14224 len = UINTVAL (operands[2]);
14225 if (len > 64)
14226 return false;
14228 /* Maximum alignment we can assume for both src and dst buffers. */
14229 align = INTVAL (operands[3]);
14231 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14232 return false;
14234 /* Place src and dst addresses in registers
14235 and update the corresponding mem rtx. */
14236 dst = operands[0];
14237 dst_volatile = MEM_VOLATILE_P (dst);
14238 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14239 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14240 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14242 src = operands[1];
14243 src_volatile = MEM_VOLATILE_P (src);
14244 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14245 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14246 src = adjust_automodify_address (src, VOIDmode, base, 0);
14248 if (!unaligned_access && !(src_aligned && dst_aligned))
14249 return false;
14251 if (src_volatile || dst_volatile)
14252 return false;
14254 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14255 if (!(dst_aligned || src_aligned))
14256 return arm_gen_movmemqi (operands);
14258 /* If the either src or dst is unaligned we'll be accessing it as pairs
14259 of unaligned SImode accesses. Otherwise we can generate DImode
14260 ldrd/strd instructions. */
14261 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14262 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14264 while (len >= 8)
14266 len -= 8;
14267 reg0 = gen_reg_rtx (DImode);
14268 rtx low_reg = NULL_RTX;
14269 rtx hi_reg = NULL_RTX;
14271 if (!src_aligned || !dst_aligned)
14273 low_reg = gen_lowpart (SImode, reg0);
14274 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14276 if (src_aligned)
14277 emit_move_insn (reg0, src);
14278 else
14280 emit_insn (gen_unaligned_loadsi (low_reg, src));
14281 src = next_consecutive_mem (src);
14282 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14285 if (dst_aligned)
14286 emit_move_insn (dst, reg0);
14287 else
14289 emit_insn (gen_unaligned_storesi (dst, low_reg));
14290 dst = next_consecutive_mem (dst);
14291 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14294 src = next_consecutive_mem (src);
14295 dst = next_consecutive_mem (dst);
14298 gcc_assert (len < 8);
14299 if (len >= 4)
14301 /* More than a word but less than a double-word to copy. Copy a word. */
14302 reg0 = gen_reg_rtx (SImode);
14303 src = adjust_address (src, SImode, 0);
14304 dst = adjust_address (dst, SImode, 0);
14305 if (src_aligned)
14306 emit_move_insn (reg0, src);
14307 else
14308 emit_insn (gen_unaligned_loadsi (reg0, src));
14310 if (dst_aligned)
14311 emit_move_insn (dst, reg0);
14312 else
14313 emit_insn (gen_unaligned_storesi (dst, reg0));
14315 src = next_consecutive_mem (src);
14316 dst = next_consecutive_mem (dst);
14317 len -= 4;
14320 if (len == 0)
14321 return true;
14323 /* Copy the remaining bytes. */
14324 if (len >= 2)
14326 dst = adjust_address (dst, HImode, 0);
14327 src = adjust_address (src, HImode, 0);
14328 reg0 = gen_reg_rtx (SImode);
14329 if (src_aligned)
14330 emit_insn (gen_zero_extendhisi2 (reg0, src));
14331 else
14332 emit_insn (gen_unaligned_loadhiu (reg0, src));
14334 if (dst_aligned)
14335 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14336 else
14337 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14339 src = next_consecutive_mem (src);
14340 dst = next_consecutive_mem (dst);
14341 if (len == 2)
14342 return true;
14345 dst = adjust_address (dst, QImode, 0);
14346 src = adjust_address (src, QImode, 0);
14347 reg0 = gen_reg_rtx (QImode);
14348 emit_move_insn (reg0, src);
14349 emit_move_insn (dst, reg0);
14350 return true;
14353 /* Select a dominance comparison mode if possible for a test of the general
14354 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14355 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14356 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14357 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14358 In all cases OP will be either EQ or NE, but we don't need to know which
14359 here. If we are unable to support a dominance comparison we return
14360 CC mode. This will then fail to match for the RTL expressions that
14361 generate this call. */
14362 machine_mode
14363 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14365 enum rtx_code cond1, cond2;
14366 int swapped = 0;
14368 /* Currently we will probably get the wrong result if the individual
14369 comparisons are not simple. This also ensures that it is safe to
14370 reverse a comparison if necessary. */
14371 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14372 != CCmode)
14373 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14374 != CCmode))
14375 return CCmode;
14377 /* The if_then_else variant of this tests the second condition if the
14378 first passes, but is true if the first fails. Reverse the first
14379 condition to get a true "inclusive-or" expression. */
14380 if (cond_or == DOM_CC_NX_OR_Y)
14381 cond1 = reverse_condition (cond1);
14383 /* If the comparisons are not equal, and one doesn't dominate the other,
14384 then we can't do this. */
14385 if (cond1 != cond2
14386 && !comparison_dominates_p (cond1, cond2)
14387 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14388 return CCmode;
14390 if (swapped)
14391 std::swap (cond1, cond2);
14393 switch (cond1)
14395 case EQ:
14396 if (cond_or == DOM_CC_X_AND_Y)
14397 return CC_DEQmode;
14399 switch (cond2)
14401 case EQ: return CC_DEQmode;
14402 case LE: return CC_DLEmode;
14403 case LEU: return CC_DLEUmode;
14404 case GE: return CC_DGEmode;
14405 case GEU: return CC_DGEUmode;
14406 default: gcc_unreachable ();
14409 case LT:
14410 if (cond_or == DOM_CC_X_AND_Y)
14411 return CC_DLTmode;
14413 switch (cond2)
14415 case LT:
14416 return CC_DLTmode;
14417 case LE:
14418 return CC_DLEmode;
14419 case NE:
14420 return CC_DNEmode;
14421 default:
14422 gcc_unreachable ();
14425 case GT:
14426 if (cond_or == DOM_CC_X_AND_Y)
14427 return CC_DGTmode;
14429 switch (cond2)
14431 case GT:
14432 return CC_DGTmode;
14433 case GE:
14434 return CC_DGEmode;
14435 case NE:
14436 return CC_DNEmode;
14437 default:
14438 gcc_unreachable ();
14441 case LTU:
14442 if (cond_or == DOM_CC_X_AND_Y)
14443 return CC_DLTUmode;
14445 switch (cond2)
14447 case LTU:
14448 return CC_DLTUmode;
14449 case LEU:
14450 return CC_DLEUmode;
14451 case NE:
14452 return CC_DNEmode;
14453 default:
14454 gcc_unreachable ();
14457 case GTU:
14458 if (cond_or == DOM_CC_X_AND_Y)
14459 return CC_DGTUmode;
14461 switch (cond2)
14463 case GTU:
14464 return CC_DGTUmode;
14465 case GEU:
14466 return CC_DGEUmode;
14467 case NE:
14468 return CC_DNEmode;
14469 default:
14470 gcc_unreachable ();
14473 /* The remaining cases only occur when both comparisons are the
14474 same. */
14475 case NE:
14476 gcc_assert (cond1 == cond2);
14477 return CC_DNEmode;
14479 case LE:
14480 gcc_assert (cond1 == cond2);
14481 return CC_DLEmode;
14483 case GE:
14484 gcc_assert (cond1 == cond2);
14485 return CC_DGEmode;
14487 case LEU:
14488 gcc_assert (cond1 == cond2);
14489 return CC_DLEUmode;
14491 case GEU:
14492 gcc_assert (cond1 == cond2);
14493 return CC_DGEUmode;
14495 default:
14496 gcc_unreachable ();
14500 machine_mode
14501 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14503 /* All floating point compares return CCFP if it is an equality
14504 comparison, and CCFPE otherwise. */
14505 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14507 switch (op)
14509 case EQ:
14510 case NE:
14511 case UNORDERED:
14512 case ORDERED:
14513 case UNLT:
14514 case UNLE:
14515 case UNGT:
14516 case UNGE:
14517 case UNEQ:
14518 case LTGT:
14519 return CCFPmode;
14521 case LT:
14522 case LE:
14523 case GT:
14524 case GE:
14525 return CCFPEmode;
14527 default:
14528 gcc_unreachable ();
14532 /* A compare with a shifted operand. Because of canonicalization, the
14533 comparison will have to be swapped when we emit the assembler. */
14534 if (GET_MODE (y) == SImode
14535 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14536 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14537 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14538 || GET_CODE (x) == ROTATERT))
14539 return CC_SWPmode;
14541 /* This operation is performed swapped, but since we only rely on the Z
14542 flag we don't need an additional mode. */
14543 if (GET_MODE (y) == SImode
14544 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14545 && GET_CODE (x) == NEG
14546 && (op == EQ || op == NE))
14547 return CC_Zmode;
14549 /* This is a special case that is used by combine to allow a
14550 comparison of a shifted byte load to be split into a zero-extend
14551 followed by a comparison of the shifted integer (only valid for
14552 equalities and unsigned inequalities). */
14553 if (GET_MODE (x) == SImode
14554 && GET_CODE (x) == ASHIFT
14555 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14556 && GET_CODE (XEXP (x, 0)) == SUBREG
14557 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14558 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14559 && (op == EQ || op == NE
14560 || op == GEU || op == GTU || op == LTU || op == LEU)
14561 && CONST_INT_P (y))
14562 return CC_Zmode;
14564 /* A construct for a conditional compare, if the false arm contains
14565 0, then both conditions must be true, otherwise either condition
14566 must be true. Not all conditions are possible, so CCmode is
14567 returned if it can't be done. */
14568 if (GET_CODE (x) == IF_THEN_ELSE
14569 && (XEXP (x, 2) == const0_rtx
14570 || XEXP (x, 2) == const1_rtx)
14571 && COMPARISON_P (XEXP (x, 0))
14572 && COMPARISON_P (XEXP (x, 1)))
14573 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14574 INTVAL (XEXP (x, 2)));
14576 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14577 if (GET_CODE (x) == AND
14578 && (op == EQ || op == NE)
14579 && COMPARISON_P (XEXP (x, 0))
14580 && COMPARISON_P (XEXP (x, 1)))
14581 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14582 DOM_CC_X_AND_Y);
14584 if (GET_CODE (x) == IOR
14585 && (op == EQ || op == NE)
14586 && COMPARISON_P (XEXP (x, 0))
14587 && COMPARISON_P (XEXP (x, 1)))
14588 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14589 DOM_CC_X_OR_Y);
14591 /* An operation (on Thumb) where we want to test for a single bit.
14592 This is done by shifting that bit up into the top bit of a
14593 scratch register; we can then branch on the sign bit. */
14594 if (TARGET_THUMB1
14595 && GET_MODE (x) == SImode
14596 && (op == EQ || op == NE)
14597 && GET_CODE (x) == ZERO_EXTRACT
14598 && XEXP (x, 1) == const1_rtx)
14599 return CC_Nmode;
14601 /* An operation that sets the condition codes as a side-effect, the
14602 V flag is not set correctly, so we can only use comparisons where
14603 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14604 instead.) */
14605 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14606 if (GET_MODE (x) == SImode
14607 && y == const0_rtx
14608 && (op == EQ || op == NE || op == LT || op == GE)
14609 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14610 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14611 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14612 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14613 || GET_CODE (x) == LSHIFTRT
14614 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14615 || GET_CODE (x) == ROTATERT
14616 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14617 return CC_NOOVmode;
14619 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14620 return CC_Zmode;
14622 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14623 && GET_CODE (x) == PLUS
14624 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14625 return CC_Cmode;
14627 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14629 switch (op)
14631 case EQ:
14632 case NE:
14633 /* A DImode comparison against zero can be implemented by
14634 or'ing the two halves together. */
14635 if (y == const0_rtx)
14636 return CC_Zmode;
14638 /* We can do an equality test in three Thumb instructions. */
14639 if (!TARGET_32BIT)
14640 return CC_Zmode;
14642 /* FALLTHROUGH */
14644 case LTU:
14645 case LEU:
14646 case GTU:
14647 case GEU:
14648 /* DImode unsigned comparisons can be implemented by cmp +
14649 cmpeq without a scratch register. Not worth doing in
14650 Thumb-2. */
14651 if (TARGET_32BIT)
14652 return CC_CZmode;
14654 /* FALLTHROUGH */
14656 case LT:
14657 case LE:
14658 case GT:
14659 case GE:
14660 /* DImode signed and unsigned comparisons can be implemented
14661 by cmp + sbcs with a scratch register, but that does not
14662 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14663 gcc_assert (op != EQ && op != NE);
14664 return CC_NCVmode;
14666 default:
14667 gcc_unreachable ();
14671 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14672 return GET_MODE (x);
14674 return CCmode;
14677 /* X and Y are two things to compare using CODE. Emit the compare insn and
14678 return the rtx for register 0 in the proper mode. FP means this is a
14679 floating point compare: I don't think that it is needed on the arm. */
14681 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14683 machine_mode mode;
14684 rtx cc_reg;
14685 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14687 /* We might have X as a constant, Y as a register because of the predicates
14688 used for cmpdi. If so, force X to a register here. */
14689 if (dimode_comparison && !REG_P (x))
14690 x = force_reg (DImode, x);
14692 mode = SELECT_CC_MODE (code, x, y);
14693 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14695 if (dimode_comparison
14696 && mode != CC_CZmode)
14698 rtx clobber, set;
14700 /* To compare two non-zero values for equality, XOR them and
14701 then compare against zero. Not used for ARM mode; there
14702 CC_CZmode is cheaper. */
14703 if (mode == CC_Zmode && y != const0_rtx)
14705 gcc_assert (!reload_completed);
14706 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14707 y = const0_rtx;
14710 /* A scratch register is required. */
14711 if (reload_completed)
14712 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14713 else
14714 scratch = gen_rtx_SCRATCH (SImode);
14716 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14717 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14718 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14720 else
14721 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14723 return cc_reg;
14726 /* Generate a sequence of insns that will generate the correct return
14727 address mask depending on the physical architecture that the program
14728 is running on. */
14730 arm_gen_return_addr_mask (void)
14732 rtx reg = gen_reg_rtx (Pmode);
14734 emit_insn (gen_return_addr_mask (reg));
14735 return reg;
14738 void
14739 arm_reload_in_hi (rtx *operands)
14741 rtx ref = operands[1];
14742 rtx base, scratch;
14743 HOST_WIDE_INT offset = 0;
14745 if (GET_CODE (ref) == SUBREG)
14747 offset = SUBREG_BYTE (ref);
14748 ref = SUBREG_REG (ref);
14751 if (REG_P (ref))
14753 /* We have a pseudo which has been spilt onto the stack; there
14754 are two cases here: the first where there is a simple
14755 stack-slot replacement and a second where the stack-slot is
14756 out of range, or is used as a subreg. */
14757 if (reg_equiv_mem (REGNO (ref)))
14759 ref = reg_equiv_mem (REGNO (ref));
14760 base = find_replacement (&XEXP (ref, 0));
14762 else
14763 /* The slot is out of range, or was dressed up in a SUBREG. */
14764 base = reg_equiv_address (REGNO (ref));
14766 /* PR 62554: If there is no equivalent memory location then just move
14767 the value as an SImode register move. This happens when the target
14768 architecture variant does not have an HImode register move. */
14769 if (base == NULL)
14771 gcc_assert (REG_P (operands[0]));
14772 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14773 gen_rtx_SUBREG (SImode, ref, 0)));
14774 return;
14777 else
14778 base = find_replacement (&XEXP (ref, 0));
14780 /* Handle the case where the address is too complex to be offset by 1. */
14781 if (GET_CODE (base) == MINUS
14782 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14784 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14786 emit_set_insn (base_plus, base);
14787 base = base_plus;
14789 else if (GET_CODE (base) == PLUS)
14791 /* The addend must be CONST_INT, or we would have dealt with it above. */
14792 HOST_WIDE_INT hi, lo;
14794 offset += INTVAL (XEXP (base, 1));
14795 base = XEXP (base, 0);
14797 /* Rework the address into a legal sequence of insns. */
14798 /* Valid range for lo is -4095 -> 4095 */
14799 lo = (offset >= 0
14800 ? (offset & 0xfff)
14801 : -((-offset) & 0xfff));
14803 /* Corner case, if lo is the max offset then we would be out of range
14804 once we have added the additional 1 below, so bump the msb into the
14805 pre-loading insn(s). */
14806 if (lo == 4095)
14807 lo &= 0x7ff;
14809 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14810 ^ (HOST_WIDE_INT) 0x80000000)
14811 - (HOST_WIDE_INT) 0x80000000);
14813 gcc_assert (hi + lo == offset);
14815 if (hi != 0)
14817 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14819 /* Get the base address; addsi3 knows how to handle constants
14820 that require more than one insn. */
14821 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14822 base = base_plus;
14823 offset = lo;
14827 /* Operands[2] may overlap operands[0] (though it won't overlap
14828 operands[1]), that's why we asked for a DImode reg -- so we can
14829 use the bit that does not overlap. */
14830 if (REGNO (operands[2]) == REGNO (operands[0]))
14831 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14832 else
14833 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14835 emit_insn (gen_zero_extendqisi2 (scratch,
14836 gen_rtx_MEM (QImode,
14837 plus_constant (Pmode, base,
14838 offset))));
14839 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14840 gen_rtx_MEM (QImode,
14841 plus_constant (Pmode, base,
14842 offset + 1))));
14843 if (!BYTES_BIG_ENDIAN)
14844 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14845 gen_rtx_IOR (SImode,
14846 gen_rtx_ASHIFT
14847 (SImode,
14848 gen_rtx_SUBREG (SImode, operands[0], 0),
14849 GEN_INT (8)),
14850 scratch));
14851 else
14852 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14853 gen_rtx_IOR (SImode,
14854 gen_rtx_ASHIFT (SImode, scratch,
14855 GEN_INT (8)),
14856 gen_rtx_SUBREG (SImode, operands[0], 0)));
14859 /* Handle storing a half-word to memory during reload by synthesizing as two
14860 byte stores. Take care not to clobber the input values until after we
14861 have moved them somewhere safe. This code assumes that if the DImode
14862 scratch in operands[2] overlaps either the input value or output address
14863 in some way, then that value must die in this insn (we absolutely need
14864 two scratch registers for some corner cases). */
14865 void
14866 arm_reload_out_hi (rtx *operands)
14868 rtx ref = operands[0];
14869 rtx outval = operands[1];
14870 rtx base, scratch;
14871 HOST_WIDE_INT offset = 0;
14873 if (GET_CODE (ref) == SUBREG)
14875 offset = SUBREG_BYTE (ref);
14876 ref = SUBREG_REG (ref);
14879 if (REG_P (ref))
14881 /* We have a pseudo which has been spilt onto the stack; there
14882 are two cases here: the first where there is a simple
14883 stack-slot replacement and a second where the stack-slot is
14884 out of range, or is used as a subreg. */
14885 if (reg_equiv_mem (REGNO (ref)))
14887 ref = reg_equiv_mem (REGNO (ref));
14888 base = find_replacement (&XEXP (ref, 0));
14890 else
14891 /* The slot is out of range, or was dressed up in a SUBREG. */
14892 base = reg_equiv_address (REGNO (ref));
14894 /* PR 62254: If there is no equivalent memory location then just move
14895 the value as an SImode register move. This happens when the target
14896 architecture variant does not have an HImode register move. */
14897 if (base == NULL)
14899 gcc_assert (REG_P (outval) || SUBREG_P (outval));
14901 if (REG_P (outval))
14903 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14904 gen_rtx_SUBREG (SImode, outval, 0)));
14906 else /* SUBREG_P (outval) */
14908 if (GET_MODE (SUBREG_REG (outval)) == SImode)
14909 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14910 SUBREG_REG (outval)));
14911 else
14912 /* FIXME: Handle other cases ? */
14913 gcc_unreachable ();
14915 return;
14918 else
14919 base = find_replacement (&XEXP (ref, 0));
14921 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14923 /* Handle the case where the address is too complex to be offset by 1. */
14924 if (GET_CODE (base) == MINUS
14925 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14927 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14929 /* Be careful not to destroy OUTVAL. */
14930 if (reg_overlap_mentioned_p (base_plus, outval))
14932 /* Updating base_plus might destroy outval, see if we can
14933 swap the scratch and base_plus. */
14934 if (!reg_overlap_mentioned_p (scratch, outval))
14935 std::swap (scratch, base_plus);
14936 else
14938 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14940 /* Be conservative and copy OUTVAL into the scratch now,
14941 this should only be necessary if outval is a subreg
14942 of something larger than a word. */
14943 /* XXX Might this clobber base? I can't see how it can,
14944 since scratch is known to overlap with OUTVAL, and
14945 must be wider than a word. */
14946 emit_insn (gen_movhi (scratch_hi, outval));
14947 outval = scratch_hi;
14951 emit_set_insn (base_plus, base);
14952 base = base_plus;
14954 else if (GET_CODE (base) == PLUS)
14956 /* The addend must be CONST_INT, or we would have dealt with it above. */
14957 HOST_WIDE_INT hi, lo;
14959 offset += INTVAL (XEXP (base, 1));
14960 base = XEXP (base, 0);
14962 /* Rework the address into a legal sequence of insns. */
14963 /* Valid range for lo is -4095 -> 4095 */
14964 lo = (offset >= 0
14965 ? (offset & 0xfff)
14966 : -((-offset) & 0xfff));
14968 /* Corner case, if lo is the max offset then we would be out of range
14969 once we have added the additional 1 below, so bump the msb into the
14970 pre-loading insn(s). */
14971 if (lo == 4095)
14972 lo &= 0x7ff;
14974 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14975 ^ (HOST_WIDE_INT) 0x80000000)
14976 - (HOST_WIDE_INT) 0x80000000);
14978 gcc_assert (hi + lo == offset);
14980 if (hi != 0)
14982 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14984 /* Be careful not to destroy OUTVAL. */
14985 if (reg_overlap_mentioned_p (base_plus, outval))
14987 /* Updating base_plus might destroy outval, see if we
14988 can swap the scratch and base_plus. */
14989 if (!reg_overlap_mentioned_p (scratch, outval))
14990 std::swap (scratch, base_plus);
14991 else
14993 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14995 /* Be conservative and copy outval into scratch now,
14996 this should only be necessary if outval is a
14997 subreg of something larger than a word. */
14998 /* XXX Might this clobber base? I can't see how it
14999 can, since scratch is known to overlap with
15000 outval. */
15001 emit_insn (gen_movhi (scratch_hi, outval));
15002 outval = scratch_hi;
15006 /* Get the base address; addsi3 knows how to handle constants
15007 that require more than one insn. */
15008 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15009 base = base_plus;
15010 offset = lo;
15014 if (BYTES_BIG_ENDIAN)
15016 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15017 plus_constant (Pmode, base,
15018 offset + 1)),
15019 gen_lowpart (QImode, outval)));
15020 emit_insn (gen_lshrsi3 (scratch,
15021 gen_rtx_SUBREG (SImode, outval, 0),
15022 GEN_INT (8)));
15023 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15024 offset)),
15025 gen_lowpart (QImode, scratch)));
15027 else
15029 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15030 offset)),
15031 gen_lowpart (QImode, outval)));
15032 emit_insn (gen_lshrsi3 (scratch,
15033 gen_rtx_SUBREG (SImode, outval, 0),
15034 GEN_INT (8)));
15035 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15036 plus_constant (Pmode, base,
15037 offset + 1)),
15038 gen_lowpart (QImode, scratch)));
15042 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15043 (padded to the size of a word) should be passed in a register. */
15045 static bool
15046 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15048 if (TARGET_AAPCS_BASED)
15049 return must_pass_in_stack_var_size (mode, type);
15050 else
15051 return must_pass_in_stack_var_size_or_pad (mode, type);
15055 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15056 Return true if an argument passed on the stack should be padded upwards,
15057 i.e. if the least-significant byte has useful data.
15058 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15059 aggregate types are placed in the lowest memory address. */
15061 bool
15062 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15064 if (!TARGET_AAPCS_BASED)
15065 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15067 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15068 return false;
15070 return true;
15074 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15075 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15076 register has useful data, and return the opposite if the most
15077 significant byte does. */
15079 bool
15080 arm_pad_reg_upward (machine_mode mode,
15081 tree type, int first ATTRIBUTE_UNUSED)
15083 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15085 /* For AAPCS, small aggregates, small fixed-point types,
15086 and small complex types are always padded upwards. */
15087 if (type)
15089 if ((AGGREGATE_TYPE_P (type)
15090 || TREE_CODE (type) == COMPLEX_TYPE
15091 || FIXED_POINT_TYPE_P (type))
15092 && int_size_in_bytes (type) <= 4)
15093 return true;
15095 else
15097 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15098 && GET_MODE_SIZE (mode) <= 4)
15099 return true;
15103 /* Otherwise, use default padding. */
15104 return !BYTES_BIG_ENDIAN;
15107 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15108 assuming that the address in the base register is word aligned. */
15109 bool
15110 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15112 HOST_WIDE_INT max_offset;
15114 /* Offset must be a multiple of 4 in Thumb mode. */
15115 if (TARGET_THUMB2 && ((offset & 3) != 0))
15116 return false;
15118 if (TARGET_THUMB2)
15119 max_offset = 1020;
15120 else if (TARGET_ARM)
15121 max_offset = 255;
15122 else
15123 return false;
15125 return ((offset <= max_offset) && (offset >= -max_offset));
15128 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15129 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15130 Assumes that the address in the base register RN is word aligned. Pattern
15131 guarantees that both memory accesses use the same base register,
15132 the offsets are constants within the range, and the gap between the offsets is 4.
15133 If preload complete then check that registers are legal. WBACK indicates whether
15134 address is updated. LOAD indicates whether memory access is load or store. */
15135 bool
15136 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15137 bool wback, bool load)
15139 unsigned int t, t2, n;
15141 if (!reload_completed)
15142 return true;
15144 if (!offset_ok_for_ldrd_strd (offset))
15145 return false;
15147 t = REGNO (rt);
15148 t2 = REGNO (rt2);
15149 n = REGNO (rn);
15151 if ((TARGET_THUMB2)
15152 && ((wback && (n == t || n == t2))
15153 || (t == SP_REGNUM)
15154 || (t == PC_REGNUM)
15155 || (t2 == SP_REGNUM)
15156 || (t2 == PC_REGNUM)
15157 || (!load && (n == PC_REGNUM))
15158 || (load && (t == t2))
15159 /* Triggers Cortex-M3 LDRD errata. */
15160 || (!wback && load && fix_cm3_ldrd && (n == t))))
15161 return false;
15163 if ((TARGET_ARM)
15164 && ((wback && (n == t || n == t2))
15165 || (t2 == PC_REGNUM)
15166 || (t % 2 != 0) /* First destination register is not even. */
15167 || (t2 != t + 1)
15168 /* PC can be used as base register (for offset addressing only),
15169 but it is depricated. */
15170 || (n == PC_REGNUM)))
15171 return false;
15173 return true;
15176 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15177 operand MEM's address contains an immediate offset from the base
15178 register and has no side effects, in which case it sets BASE and
15179 OFFSET accordingly. */
15180 static bool
15181 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15183 rtx addr;
15185 gcc_assert (base != NULL && offset != NULL);
15187 /* TODO: Handle more general memory operand patterns, such as
15188 PRE_DEC and PRE_INC. */
15190 if (side_effects_p (mem))
15191 return false;
15193 /* Can't deal with subregs. */
15194 if (GET_CODE (mem) == SUBREG)
15195 return false;
15197 gcc_assert (MEM_P (mem));
15199 *offset = const0_rtx;
15201 addr = XEXP (mem, 0);
15203 /* If addr isn't valid for DImode, then we can't handle it. */
15204 if (!arm_legitimate_address_p (DImode, addr,
15205 reload_in_progress || reload_completed))
15206 return false;
15208 if (REG_P (addr))
15210 *base = addr;
15211 return true;
15213 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15215 *base = XEXP (addr, 0);
15216 *offset = XEXP (addr, 1);
15217 return (REG_P (*base) && CONST_INT_P (*offset));
15220 return false;
15223 /* Called from a peephole2 to replace two word-size accesses with a
15224 single LDRD/STRD instruction. Returns true iff we can generate a
15225 new instruction sequence. That is, both accesses use the same base
15226 register and the gap between constant offsets is 4. This function
15227 may reorder its operands to match ldrd/strd RTL templates.
15228 OPERANDS are the operands found by the peephole matcher;
15229 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15230 corresponding memory operands. LOAD indicaates whether the access
15231 is load or store. CONST_STORE indicates a store of constant
15232 integer values held in OPERANDS[4,5] and assumes that the pattern
15233 is of length 4 insn, for the purpose of checking dead registers.
15234 COMMUTE indicates that register operands may be reordered. */
15235 bool
15236 gen_operands_ldrd_strd (rtx *operands, bool load,
15237 bool const_store, bool commute)
15239 int nops = 2;
15240 HOST_WIDE_INT offsets[2], offset;
15241 rtx base = NULL_RTX;
15242 rtx cur_base, cur_offset, tmp;
15243 int i, gap;
15244 HARD_REG_SET regset;
15246 gcc_assert (!const_store || !load);
15247 /* Check that the memory references are immediate offsets from the
15248 same base register. Extract the base register, the destination
15249 registers, and the corresponding memory offsets. */
15250 for (i = 0; i < nops; i++)
15252 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15253 return false;
15255 if (i == 0)
15256 base = cur_base;
15257 else if (REGNO (base) != REGNO (cur_base))
15258 return false;
15260 offsets[i] = INTVAL (cur_offset);
15261 if (GET_CODE (operands[i]) == SUBREG)
15263 tmp = SUBREG_REG (operands[i]);
15264 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15265 operands[i] = tmp;
15269 /* Make sure there is no dependency between the individual loads. */
15270 if (load && REGNO (operands[0]) == REGNO (base))
15271 return false; /* RAW */
15273 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15274 return false; /* WAW */
15276 /* If the same input register is used in both stores
15277 when storing different constants, try to find a free register.
15278 For example, the code
15279 mov r0, 0
15280 str r0, [r2]
15281 mov r0, 1
15282 str r0, [r2, #4]
15283 can be transformed into
15284 mov r1, 0
15285 mov r0, 1
15286 strd r1, r0, [r2]
15287 in Thumb mode assuming that r1 is free.
15288 For ARM mode do the same but only if the starting register
15289 can be made to be even. */
15290 if (const_store
15291 && REGNO (operands[0]) == REGNO (operands[1])
15292 && INTVAL (operands[4]) != INTVAL (operands[5]))
15294 if (TARGET_THUMB2)
15296 CLEAR_HARD_REG_SET (regset);
15297 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15298 if (tmp == NULL_RTX)
15299 return false;
15301 /* Use the new register in the first load to ensure that
15302 if the original input register is not dead after peephole,
15303 then it will have the correct constant value. */
15304 operands[0] = tmp;
15306 else if (TARGET_ARM)
15308 int regno = REGNO (operands[0]);
15309 if (!peep2_reg_dead_p (4, operands[0]))
15311 /* When the input register is even and is not dead after the
15312 pattern, it has to hold the second constant but we cannot
15313 form a legal STRD in ARM mode with this register as the second
15314 register. */
15315 if (regno % 2 == 0)
15316 return false;
15318 /* Is regno-1 free? */
15319 SET_HARD_REG_SET (regset);
15320 CLEAR_HARD_REG_BIT(regset, regno - 1);
15321 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15322 if (tmp == NULL_RTX)
15323 return false;
15325 operands[0] = tmp;
15327 else
15329 /* Find a DImode register. */
15330 CLEAR_HARD_REG_SET (regset);
15331 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15332 if (tmp != NULL_RTX)
15334 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15335 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15337 else
15339 /* Can we use the input register to form a DI register? */
15340 SET_HARD_REG_SET (regset);
15341 CLEAR_HARD_REG_BIT(regset,
15342 regno % 2 == 0 ? regno + 1 : regno - 1);
15343 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15344 if (tmp == NULL_RTX)
15345 return false;
15346 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15350 gcc_assert (operands[0] != NULL_RTX);
15351 gcc_assert (operands[1] != NULL_RTX);
15352 gcc_assert (REGNO (operands[0]) % 2 == 0);
15353 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15357 /* Make sure the instructions are ordered with lower memory access first. */
15358 if (offsets[0] > offsets[1])
15360 gap = offsets[0] - offsets[1];
15361 offset = offsets[1];
15363 /* Swap the instructions such that lower memory is accessed first. */
15364 std::swap (operands[0], operands[1]);
15365 std::swap (operands[2], operands[3]);
15366 if (const_store)
15367 std::swap (operands[4], operands[5]);
15369 else
15371 gap = offsets[1] - offsets[0];
15372 offset = offsets[0];
15375 /* Make sure accesses are to consecutive memory locations. */
15376 if (gap != 4)
15377 return false;
15379 /* Make sure we generate legal instructions. */
15380 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15381 false, load))
15382 return true;
15384 /* In Thumb state, where registers are almost unconstrained, there
15385 is little hope to fix it. */
15386 if (TARGET_THUMB2)
15387 return false;
15389 if (load && commute)
15391 /* Try reordering registers. */
15392 std::swap (operands[0], operands[1]);
15393 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15394 false, load))
15395 return true;
15398 if (const_store)
15400 /* If input registers are dead after this pattern, they can be
15401 reordered or replaced by other registers that are free in the
15402 current pattern. */
15403 if (!peep2_reg_dead_p (4, operands[0])
15404 || !peep2_reg_dead_p (4, operands[1]))
15405 return false;
15407 /* Try to reorder the input registers. */
15408 /* For example, the code
15409 mov r0, 0
15410 mov r1, 1
15411 str r1, [r2]
15412 str r0, [r2, #4]
15413 can be transformed into
15414 mov r1, 0
15415 mov r0, 1
15416 strd r0, [r2]
15418 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15419 false, false))
15421 std::swap (operands[0], operands[1]);
15422 return true;
15425 /* Try to find a free DI register. */
15426 CLEAR_HARD_REG_SET (regset);
15427 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15428 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15429 while (true)
15431 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15432 if (tmp == NULL_RTX)
15433 return false;
15435 /* DREG must be an even-numbered register in DImode.
15436 Split it into SI registers. */
15437 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15438 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15439 gcc_assert (operands[0] != NULL_RTX);
15440 gcc_assert (operands[1] != NULL_RTX);
15441 gcc_assert (REGNO (operands[0]) % 2 == 0);
15442 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15444 return (operands_ok_ldrd_strd (operands[0], operands[1],
15445 base, offset,
15446 false, load));
15450 return false;
15456 /* Print a symbolic form of X to the debug file, F. */
15457 static void
15458 arm_print_value (FILE *f, rtx x)
15460 switch (GET_CODE (x))
15462 case CONST_INT:
15463 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15464 return;
15466 case CONST_DOUBLE:
15467 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15468 return;
15470 case CONST_VECTOR:
15472 int i;
15474 fprintf (f, "<");
15475 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15477 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15478 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15479 fputc (',', f);
15481 fprintf (f, ">");
15483 return;
15485 case CONST_STRING:
15486 fprintf (f, "\"%s\"", XSTR (x, 0));
15487 return;
15489 case SYMBOL_REF:
15490 fprintf (f, "`%s'", XSTR (x, 0));
15491 return;
15493 case LABEL_REF:
15494 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15495 return;
15497 case CONST:
15498 arm_print_value (f, XEXP (x, 0));
15499 return;
15501 case PLUS:
15502 arm_print_value (f, XEXP (x, 0));
15503 fprintf (f, "+");
15504 arm_print_value (f, XEXP (x, 1));
15505 return;
15507 case PC:
15508 fprintf (f, "pc");
15509 return;
15511 default:
15512 fprintf (f, "????");
15513 return;
15517 /* Routines for manipulation of the constant pool. */
15519 /* Arm instructions cannot load a large constant directly into a
15520 register; they have to come from a pc relative load. The constant
15521 must therefore be placed in the addressable range of the pc
15522 relative load. Depending on the precise pc relative load
15523 instruction the range is somewhere between 256 bytes and 4k. This
15524 means that we often have to dump a constant inside a function, and
15525 generate code to branch around it.
15527 It is important to minimize this, since the branches will slow
15528 things down and make the code larger.
15530 Normally we can hide the table after an existing unconditional
15531 branch so that there is no interruption of the flow, but in the
15532 worst case the code looks like this:
15534 ldr rn, L1
15536 b L2
15537 align
15538 L1: .long value
15542 ldr rn, L3
15544 b L4
15545 align
15546 L3: .long value
15550 We fix this by performing a scan after scheduling, which notices
15551 which instructions need to have their operands fetched from the
15552 constant table and builds the table.
15554 The algorithm starts by building a table of all the constants that
15555 need fixing up and all the natural barriers in the function (places
15556 where a constant table can be dropped without breaking the flow).
15557 For each fixup we note how far the pc-relative replacement will be
15558 able to reach and the offset of the instruction into the function.
15560 Having built the table we then group the fixes together to form
15561 tables that are as large as possible (subject to addressing
15562 constraints) and emit each table of constants after the last
15563 barrier that is within range of all the instructions in the group.
15564 If a group does not contain a barrier, then we forcibly create one
15565 by inserting a jump instruction into the flow. Once the table has
15566 been inserted, the insns are then modified to reference the
15567 relevant entry in the pool.
15569 Possible enhancements to the algorithm (not implemented) are:
15571 1) For some processors and object formats, there may be benefit in
15572 aligning the pools to the start of cache lines; this alignment
15573 would need to be taken into account when calculating addressability
15574 of a pool. */
15576 /* These typedefs are located at the start of this file, so that
15577 they can be used in the prototypes there. This comment is to
15578 remind readers of that fact so that the following structures
15579 can be understood more easily.
15581 typedef struct minipool_node Mnode;
15582 typedef struct minipool_fixup Mfix; */
15584 struct minipool_node
15586 /* Doubly linked chain of entries. */
15587 Mnode * next;
15588 Mnode * prev;
15589 /* The maximum offset into the code that this entry can be placed. While
15590 pushing fixes for forward references, all entries are sorted in order
15591 of increasing max_address. */
15592 HOST_WIDE_INT max_address;
15593 /* Similarly for an entry inserted for a backwards ref. */
15594 HOST_WIDE_INT min_address;
15595 /* The number of fixes referencing this entry. This can become zero
15596 if we "unpush" an entry. In this case we ignore the entry when we
15597 come to emit the code. */
15598 int refcount;
15599 /* The offset from the start of the minipool. */
15600 HOST_WIDE_INT offset;
15601 /* The value in table. */
15602 rtx value;
15603 /* The mode of value. */
15604 machine_mode mode;
15605 /* The size of the value. With iWMMXt enabled
15606 sizes > 4 also imply an alignment of 8-bytes. */
15607 int fix_size;
15610 struct minipool_fixup
15612 Mfix * next;
15613 rtx_insn * insn;
15614 HOST_WIDE_INT address;
15615 rtx * loc;
15616 machine_mode mode;
15617 int fix_size;
15618 rtx value;
15619 Mnode * minipool;
15620 HOST_WIDE_INT forwards;
15621 HOST_WIDE_INT backwards;
15624 /* Fixes less than a word need padding out to a word boundary. */
15625 #define MINIPOOL_FIX_SIZE(mode) \
15626 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15628 static Mnode * minipool_vector_head;
15629 static Mnode * minipool_vector_tail;
15630 static rtx_code_label *minipool_vector_label;
15631 static int minipool_pad;
15633 /* The linked list of all minipool fixes required for this function. */
15634 Mfix * minipool_fix_head;
15635 Mfix * minipool_fix_tail;
15636 /* The fix entry for the current minipool, once it has been placed. */
15637 Mfix * minipool_barrier;
15639 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15640 #define JUMP_TABLES_IN_TEXT_SECTION 0
15641 #endif
15643 static HOST_WIDE_INT
15644 get_jump_table_size (rtx_jump_table_data *insn)
15646 /* ADDR_VECs only take room if read-only data does into the text
15647 section. */
15648 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15650 rtx body = PATTERN (insn);
15651 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15652 HOST_WIDE_INT size;
15653 HOST_WIDE_INT modesize;
15655 modesize = GET_MODE_SIZE (GET_MODE (body));
15656 size = modesize * XVECLEN (body, elt);
15657 switch (modesize)
15659 case 1:
15660 /* Round up size of TBB table to a halfword boundary. */
15661 size = (size + 1) & ~HOST_WIDE_INT_1;
15662 break;
15663 case 2:
15664 /* No padding necessary for TBH. */
15665 break;
15666 case 4:
15667 /* Add two bytes for alignment on Thumb. */
15668 if (TARGET_THUMB)
15669 size += 2;
15670 break;
15671 default:
15672 gcc_unreachable ();
15674 return size;
15677 return 0;
15680 /* Return the maximum amount of padding that will be inserted before
15681 label LABEL. */
15683 static HOST_WIDE_INT
15684 get_label_padding (rtx label)
15686 HOST_WIDE_INT align, min_insn_size;
15688 align = 1 << label_to_alignment (label);
15689 min_insn_size = TARGET_THUMB ? 2 : 4;
15690 return align > min_insn_size ? align - min_insn_size : 0;
15693 /* Move a minipool fix MP from its current location to before MAX_MP.
15694 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15695 constraints may need updating. */
15696 static Mnode *
15697 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15698 HOST_WIDE_INT max_address)
15700 /* The code below assumes these are different. */
15701 gcc_assert (mp != max_mp);
15703 if (max_mp == NULL)
15705 if (max_address < mp->max_address)
15706 mp->max_address = max_address;
15708 else
15710 if (max_address > max_mp->max_address - mp->fix_size)
15711 mp->max_address = max_mp->max_address - mp->fix_size;
15712 else
15713 mp->max_address = max_address;
15715 /* Unlink MP from its current position. Since max_mp is non-null,
15716 mp->prev must be non-null. */
15717 mp->prev->next = mp->next;
15718 if (mp->next != NULL)
15719 mp->next->prev = mp->prev;
15720 else
15721 minipool_vector_tail = mp->prev;
15723 /* Re-insert it before MAX_MP. */
15724 mp->next = max_mp;
15725 mp->prev = max_mp->prev;
15726 max_mp->prev = mp;
15728 if (mp->prev != NULL)
15729 mp->prev->next = mp;
15730 else
15731 minipool_vector_head = mp;
15734 /* Save the new entry. */
15735 max_mp = mp;
15737 /* Scan over the preceding entries and adjust their addresses as
15738 required. */
15739 while (mp->prev != NULL
15740 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15742 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15743 mp = mp->prev;
15746 return max_mp;
15749 /* Add a constant to the minipool for a forward reference. Returns the
15750 node added or NULL if the constant will not fit in this pool. */
15751 static Mnode *
15752 add_minipool_forward_ref (Mfix *fix)
15754 /* If set, max_mp is the first pool_entry that has a lower
15755 constraint than the one we are trying to add. */
15756 Mnode * max_mp = NULL;
15757 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15758 Mnode * mp;
15760 /* If the minipool starts before the end of FIX->INSN then this FIX
15761 can not be placed into the current pool. Furthermore, adding the
15762 new constant pool entry may cause the pool to start FIX_SIZE bytes
15763 earlier. */
15764 if (minipool_vector_head &&
15765 (fix->address + get_attr_length (fix->insn)
15766 >= minipool_vector_head->max_address - fix->fix_size))
15767 return NULL;
15769 /* Scan the pool to see if a constant with the same value has
15770 already been added. While we are doing this, also note the
15771 location where we must insert the constant if it doesn't already
15772 exist. */
15773 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15775 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15776 && fix->mode == mp->mode
15777 && (!LABEL_P (fix->value)
15778 || (CODE_LABEL_NUMBER (fix->value)
15779 == CODE_LABEL_NUMBER (mp->value)))
15780 && rtx_equal_p (fix->value, mp->value))
15782 /* More than one fix references this entry. */
15783 mp->refcount++;
15784 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15787 /* Note the insertion point if necessary. */
15788 if (max_mp == NULL
15789 && mp->max_address > max_address)
15790 max_mp = mp;
15792 /* If we are inserting an 8-bytes aligned quantity and
15793 we have not already found an insertion point, then
15794 make sure that all such 8-byte aligned quantities are
15795 placed at the start of the pool. */
15796 if (ARM_DOUBLEWORD_ALIGN
15797 && max_mp == NULL
15798 && fix->fix_size >= 8
15799 && mp->fix_size < 8)
15801 max_mp = mp;
15802 max_address = mp->max_address;
15806 /* The value is not currently in the minipool, so we need to create
15807 a new entry for it. If MAX_MP is NULL, the entry will be put on
15808 the end of the list since the placement is less constrained than
15809 any existing entry. Otherwise, we insert the new fix before
15810 MAX_MP and, if necessary, adjust the constraints on the other
15811 entries. */
15812 mp = XNEW (Mnode);
15813 mp->fix_size = fix->fix_size;
15814 mp->mode = fix->mode;
15815 mp->value = fix->value;
15816 mp->refcount = 1;
15817 /* Not yet required for a backwards ref. */
15818 mp->min_address = -65536;
15820 if (max_mp == NULL)
15822 mp->max_address = max_address;
15823 mp->next = NULL;
15824 mp->prev = minipool_vector_tail;
15826 if (mp->prev == NULL)
15828 minipool_vector_head = mp;
15829 minipool_vector_label = gen_label_rtx ();
15831 else
15832 mp->prev->next = mp;
15834 minipool_vector_tail = mp;
15836 else
15838 if (max_address > max_mp->max_address - mp->fix_size)
15839 mp->max_address = max_mp->max_address - mp->fix_size;
15840 else
15841 mp->max_address = max_address;
15843 mp->next = max_mp;
15844 mp->prev = max_mp->prev;
15845 max_mp->prev = mp;
15846 if (mp->prev != NULL)
15847 mp->prev->next = mp;
15848 else
15849 minipool_vector_head = mp;
15852 /* Save the new entry. */
15853 max_mp = mp;
15855 /* Scan over the preceding entries and adjust their addresses as
15856 required. */
15857 while (mp->prev != NULL
15858 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15860 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15861 mp = mp->prev;
15864 return max_mp;
15867 static Mnode *
15868 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15869 HOST_WIDE_INT min_address)
15871 HOST_WIDE_INT offset;
15873 /* The code below assumes these are different. */
15874 gcc_assert (mp != min_mp);
15876 if (min_mp == NULL)
15878 if (min_address > mp->min_address)
15879 mp->min_address = min_address;
15881 else
15883 /* We will adjust this below if it is too loose. */
15884 mp->min_address = min_address;
15886 /* Unlink MP from its current position. Since min_mp is non-null,
15887 mp->next must be non-null. */
15888 mp->next->prev = mp->prev;
15889 if (mp->prev != NULL)
15890 mp->prev->next = mp->next;
15891 else
15892 minipool_vector_head = mp->next;
15894 /* Reinsert it after MIN_MP. */
15895 mp->prev = min_mp;
15896 mp->next = min_mp->next;
15897 min_mp->next = mp;
15898 if (mp->next != NULL)
15899 mp->next->prev = mp;
15900 else
15901 minipool_vector_tail = mp;
15904 min_mp = mp;
15906 offset = 0;
15907 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15909 mp->offset = offset;
15910 if (mp->refcount > 0)
15911 offset += mp->fix_size;
15913 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
15914 mp->next->min_address = mp->min_address + mp->fix_size;
15917 return min_mp;
15920 /* Add a constant to the minipool for a backward reference. Returns the
15921 node added or NULL if the constant will not fit in this pool.
15923 Note that the code for insertion for a backwards reference can be
15924 somewhat confusing because the calculated offsets for each fix do
15925 not take into account the size of the pool (which is still under
15926 construction. */
15927 static Mnode *
15928 add_minipool_backward_ref (Mfix *fix)
15930 /* If set, min_mp is the last pool_entry that has a lower constraint
15931 than the one we are trying to add. */
15932 Mnode *min_mp = NULL;
15933 /* This can be negative, since it is only a constraint. */
15934 HOST_WIDE_INT min_address = fix->address - fix->backwards;
15935 Mnode *mp;
15937 /* If we can't reach the current pool from this insn, or if we can't
15938 insert this entry at the end of the pool without pushing other
15939 fixes out of range, then we don't try. This ensures that we
15940 can't fail later on. */
15941 if (min_address >= minipool_barrier->address
15942 || (minipool_vector_tail->min_address + fix->fix_size
15943 >= minipool_barrier->address))
15944 return NULL;
15946 /* Scan the pool to see if a constant with the same value has
15947 already been added. While we are doing this, also note the
15948 location where we must insert the constant if it doesn't already
15949 exist. */
15950 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
15952 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15953 && fix->mode == mp->mode
15954 && (!LABEL_P (fix->value)
15955 || (CODE_LABEL_NUMBER (fix->value)
15956 == CODE_LABEL_NUMBER (mp->value)))
15957 && rtx_equal_p (fix->value, mp->value)
15958 /* Check that there is enough slack to move this entry to the
15959 end of the table (this is conservative). */
15960 && (mp->max_address
15961 > (minipool_barrier->address
15962 + minipool_vector_tail->offset
15963 + minipool_vector_tail->fix_size)))
15965 mp->refcount++;
15966 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
15969 if (min_mp != NULL)
15970 mp->min_address += fix->fix_size;
15971 else
15973 /* Note the insertion point if necessary. */
15974 if (mp->min_address < min_address)
15976 /* For now, we do not allow the insertion of 8-byte alignment
15977 requiring nodes anywhere but at the start of the pool. */
15978 if (ARM_DOUBLEWORD_ALIGN
15979 && fix->fix_size >= 8 && mp->fix_size < 8)
15980 return NULL;
15981 else
15982 min_mp = mp;
15984 else if (mp->max_address
15985 < minipool_barrier->address + mp->offset + fix->fix_size)
15987 /* Inserting before this entry would push the fix beyond
15988 its maximum address (which can happen if we have
15989 re-located a forwards fix); force the new fix to come
15990 after it. */
15991 if (ARM_DOUBLEWORD_ALIGN
15992 && fix->fix_size >= 8 && mp->fix_size < 8)
15993 return NULL;
15994 else
15996 min_mp = mp;
15997 min_address = mp->min_address + fix->fix_size;
16000 /* Do not insert a non-8-byte aligned quantity before 8-byte
16001 aligned quantities. */
16002 else if (ARM_DOUBLEWORD_ALIGN
16003 && fix->fix_size < 8
16004 && mp->fix_size >= 8)
16006 min_mp = mp;
16007 min_address = mp->min_address + fix->fix_size;
16012 /* We need to create a new entry. */
16013 mp = XNEW (Mnode);
16014 mp->fix_size = fix->fix_size;
16015 mp->mode = fix->mode;
16016 mp->value = fix->value;
16017 mp->refcount = 1;
16018 mp->max_address = minipool_barrier->address + 65536;
16020 mp->min_address = min_address;
16022 if (min_mp == NULL)
16024 mp->prev = NULL;
16025 mp->next = minipool_vector_head;
16027 if (mp->next == NULL)
16029 minipool_vector_tail = mp;
16030 minipool_vector_label = gen_label_rtx ();
16032 else
16033 mp->next->prev = mp;
16035 minipool_vector_head = mp;
16037 else
16039 mp->next = min_mp->next;
16040 mp->prev = min_mp;
16041 min_mp->next = mp;
16043 if (mp->next != NULL)
16044 mp->next->prev = mp;
16045 else
16046 minipool_vector_tail = mp;
16049 /* Save the new entry. */
16050 min_mp = mp;
16052 if (mp->prev)
16053 mp = mp->prev;
16054 else
16055 mp->offset = 0;
16057 /* Scan over the following entries and adjust their offsets. */
16058 while (mp->next != NULL)
16060 if (mp->next->min_address < mp->min_address + mp->fix_size)
16061 mp->next->min_address = mp->min_address + mp->fix_size;
16063 if (mp->refcount)
16064 mp->next->offset = mp->offset + mp->fix_size;
16065 else
16066 mp->next->offset = mp->offset;
16068 mp = mp->next;
16071 return min_mp;
16074 static void
16075 assign_minipool_offsets (Mfix *barrier)
16077 HOST_WIDE_INT offset = 0;
16078 Mnode *mp;
16080 minipool_barrier = barrier;
16082 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16084 mp->offset = offset;
16086 if (mp->refcount > 0)
16087 offset += mp->fix_size;
16091 /* Output the literal table */
16092 static void
16093 dump_minipool (rtx_insn *scan)
16095 Mnode * mp;
16096 Mnode * nmp;
16097 int align64 = 0;
16099 if (ARM_DOUBLEWORD_ALIGN)
16100 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16101 if (mp->refcount > 0 && mp->fix_size >= 8)
16103 align64 = 1;
16104 break;
16107 if (dump_file)
16108 fprintf (dump_file,
16109 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16110 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16112 scan = emit_label_after (gen_label_rtx (), scan);
16113 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16114 scan = emit_label_after (minipool_vector_label, scan);
16116 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16118 if (mp->refcount > 0)
16120 if (dump_file)
16122 fprintf (dump_file,
16123 ";; Offset %u, min %ld, max %ld ",
16124 (unsigned) mp->offset, (unsigned long) mp->min_address,
16125 (unsigned long) mp->max_address);
16126 arm_print_value (dump_file, mp->value);
16127 fputc ('\n', dump_file);
16130 rtx val = copy_rtx (mp->value);
16132 switch (GET_MODE_SIZE (mp->mode))
16134 #ifdef HAVE_consttable_1
16135 case 1:
16136 scan = emit_insn_after (gen_consttable_1 (val), scan);
16137 break;
16139 #endif
16140 #ifdef HAVE_consttable_2
16141 case 2:
16142 scan = emit_insn_after (gen_consttable_2 (val), scan);
16143 break;
16145 #endif
16146 #ifdef HAVE_consttable_4
16147 case 4:
16148 scan = emit_insn_after (gen_consttable_4 (val), scan);
16149 break;
16151 #endif
16152 #ifdef HAVE_consttable_8
16153 case 8:
16154 scan = emit_insn_after (gen_consttable_8 (val), scan);
16155 break;
16157 #endif
16158 #ifdef HAVE_consttable_16
16159 case 16:
16160 scan = emit_insn_after (gen_consttable_16 (val), scan);
16161 break;
16163 #endif
16164 default:
16165 gcc_unreachable ();
16169 nmp = mp->next;
16170 free (mp);
16173 minipool_vector_head = minipool_vector_tail = NULL;
16174 scan = emit_insn_after (gen_consttable_end (), scan);
16175 scan = emit_barrier_after (scan);
16178 /* Return the cost of forcibly inserting a barrier after INSN. */
16179 static int
16180 arm_barrier_cost (rtx_insn *insn)
16182 /* Basing the location of the pool on the loop depth is preferable,
16183 but at the moment, the basic block information seems to be
16184 corrupt by this stage of the compilation. */
16185 int base_cost = 50;
16186 rtx_insn *next = next_nonnote_insn (insn);
16188 if (next != NULL && LABEL_P (next))
16189 base_cost -= 20;
16191 switch (GET_CODE (insn))
16193 case CODE_LABEL:
16194 /* It will always be better to place the table before the label, rather
16195 than after it. */
16196 return 50;
16198 case INSN:
16199 case CALL_INSN:
16200 return base_cost;
16202 case JUMP_INSN:
16203 return base_cost - 10;
16205 default:
16206 return base_cost + 10;
16210 /* Find the best place in the insn stream in the range
16211 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16212 Create the barrier by inserting a jump and add a new fix entry for
16213 it. */
16214 static Mfix *
16215 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16217 HOST_WIDE_INT count = 0;
16218 rtx_barrier *barrier;
16219 rtx_insn *from = fix->insn;
16220 /* The instruction after which we will insert the jump. */
16221 rtx_insn *selected = NULL;
16222 int selected_cost;
16223 /* The address at which the jump instruction will be placed. */
16224 HOST_WIDE_INT selected_address;
16225 Mfix * new_fix;
16226 HOST_WIDE_INT max_count = max_address - fix->address;
16227 rtx_code_label *label = gen_label_rtx ();
16229 selected_cost = arm_barrier_cost (from);
16230 selected_address = fix->address;
16232 while (from && count < max_count)
16234 rtx_jump_table_data *tmp;
16235 int new_cost;
16237 /* This code shouldn't have been called if there was a natural barrier
16238 within range. */
16239 gcc_assert (!BARRIER_P (from));
16241 /* Count the length of this insn. This must stay in sync with the
16242 code that pushes minipool fixes. */
16243 if (LABEL_P (from))
16244 count += get_label_padding (from);
16245 else
16246 count += get_attr_length (from);
16248 /* If there is a jump table, add its length. */
16249 if (tablejump_p (from, NULL, &tmp))
16251 count += get_jump_table_size (tmp);
16253 /* Jump tables aren't in a basic block, so base the cost on
16254 the dispatch insn. If we select this location, we will
16255 still put the pool after the table. */
16256 new_cost = arm_barrier_cost (from);
16258 if (count < max_count
16259 && (!selected || new_cost <= selected_cost))
16261 selected = tmp;
16262 selected_cost = new_cost;
16263 selected_address = fix->address + count;
16266 /* Continue after the dispatch table. */
16267 from = NEXT_INSN (tmp);
16268 continue;
16271 new_cost = arm_barrier_cost (from);
16273 if (count < max_count
16274 && (!selected || new_cost <= selected_cost))
16276 selected = from;
16277 selected_cost = new_cost;
16278 selected_address = fix->address + count;
16281 from = NEXT_INSN (from);
16284 /* Make sure that we found a place to insert the jump. */
16285 gcc_assert (selected);
16287 /* Make sure we do not split a call and its corresponding
16288 CALL_ARG_LOCATION note. */
16289 if (CALL_P (selected))
16291 rtx_insn *next = NEXT_INSN (selected);
16292 if (next && NOTE_P (next)
16293 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16294 selected = next;
16297 /* Create a new JUMP_INSN that branches around a barrier. */
16298 from = emit_jump_insn_after (gen_jump (label), selected);
16299 JUMP_LABEL (from) = label;
16300 barrier = emit_barrier_after (from);
16301 emit_label_after (label, barrier);
16303 /* Create a minipool barrier entry for the new barrier. */
16304 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16305 new_fix->insn = barrier;
16306 new_fix->address = selected_address;
16307 new_fix->next = fix->next;
16308 fix->next = new_fix;
16310 return new_fix;
16313 /* Record that there is a natural barrier in the insn stream at
16314 ADDRESS. */
16315 static void
16316 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16318 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16320 fix->insn = insn;
16321 fix->address = address;
16323 fix->next = NULL;
16324 if (minipool_fix_head != NULL)
16325 minipool_fix_tail->next = fix;
16326 else
16327 minipool_fix_head = fix;
16329 minipool_fix_tail = fix;
16332 /* Record INSN, which will need fixing up to load a value from the
16333 minipool. ADDRESS is the offset of the insn since the start of the
16334 function; LOC is a pointer to the part of the insn which requires
16335 fixing; VALUE is the constant that must be loaded, which is of type
16336 MODE. */
16337 static void
16338 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16339 machine_mode mode, rtx value)
16341 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16343 fix->insn = insn;
16344 fix->address = address;
16345 fix->loc = loc;
16346 fix->mode = mode;
16347 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16348 fix->value = value;
16349 fix->forwards = get_attr_pool_range (insn);
16350 fix->backwards = get_attr_neg_pool_range (insn);
16351 fix->minipool = NULL;
16353 /* If an insn doesn't have a range defined for it, then it isn't
16354 expecting to be reworked by this code. Better to stop now than
16355 to generate duff assembly code. */
16356 gcc_assert (fix->forwards || fix->backwards);
16358 /* If an entry requires 8-byte alignment then assume all constant pools
16359 require 4 bytes of padding. Trying to do this later on a per-pool
16360 basis is awkward because existing pool entries have to be modified. */
16361 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16362 minipool_pad = 4;
16364 if (dump_file)
16366 fprintf (dump_file,
16367 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16368 GET_MODE_NAME (mode),
16369 INSN_UID (insn), (unsigned long) address,
16370 -1 * (long)fix->backwards, (long)fix->forwards);
16371 arm_print_value (dump_file, fix->value);
16372 fprintf (dump_file, "\n");
16375 /* Add it to the chain of fixes. */
16376 fix->next = NULL;
16378 if (minipool_fix_head != NULL)
16379 minipool_fix_tail->next = fix;
16380 else
16381 minipool_fix_head = fix;
16383 minipool_fix_tail = fix;
16386 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16387 Returns the number of insns needed, or 99 if we always want to synthesize
16388 the value. */
16390 arm_max_const_double_inline_cost ()
16392 /* Let the value get synthesized to avoid the use of literal pools. */
16393 if (arm_disable_literal_pool)
16394 return 99;
16396 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16399 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16400 Returns the number of insns needed, or 99 if we don't know how to
16401 do it. */
16403 arm_const_double_inline_cost (rtx val)
16405 rtx lowpart, highpart;
16406 machine_mode mode;
16408 mode = GET_MODE (val);
16410 if (mode == VOIDmode)
16411 mode = DImode;
16413 gcc_assert (GET_MODE_SIZE (mode) == 8);
16415 lowpart = gen_lowpart (SImode, val);
16416 highpart = gen_highpart_mode (SImode, mode, val);
16418 gcc_assert (CONST_INT_P (lowpart));
16419 gcc_assert (CONST_INT_P (highpart));
16421 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16422 NULL_RTX, NULL_RTX, 0, 0)
16423 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16424 NULL_RTX, NULL_RTX, 0, 0));
16427 /* Cost of loading a SImode constant. */
16428 static inline int
16429 arm_const_inline_cost (enum rtx_code code, rtx val)
16431 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16432 NULL_RTX, NULL_RTX, 1, 0);
16435 /* Return true if it is worthwhile to split a 64-bit constant into two
16436 32-bit operations. This is the case if optimizing for size, or
16437 if we have load delay slots, or if one 32-bit part can be done with
16438 a single data operation. */
16439 bool
16440 arm_const_double_by_parts (rtx val)
16442 machine_mode mode = GET_MODE (val);
16443 rtx part;
16445 if (optimize_size || arm_ld_sched)
16446 return true;
16448 if (mode == VOIDmode)
16449 mode = DImode;
16451 part = gen_highpart_mode (SImode, mode, val);
16453 gcc_assert (CONST_INT_P (part));
16455 if (const_ok_for_arm (INTVAL (part))
16456 || const_ok_for_arm (~INTVAL (part)))
16457 return true;
16459 part = gen_lowpart (SImode, val);
16461 gcc_assert (CONST_INT_P (part));
16463 if (const_ok_for_arm (INTVAL (part))
16464 || const_ok_for_arm (~INTVAL (part)))
16465 return true;
16467 return false;
16470 /* Return true if it is possible to inline both the high and low parts
16471 of a 64-bit constant into 32-bit data processing instructions. */
16472 bool
16473 arm_const_double_by_immediates (rtx val)
16475 machine_mode mode = GET_MODE (val);
16476 rtx part;
16478 if (mode == VOIDmode)
16479 mode = DImode;
16481 part = gen_highpart_mode (SImode, mode, val);
16483 gcc_assert (CONST_INT_P (part));
16485 if (!const_ok_for_arm (INTVAL (part)))
16486 return false;
16488 part = gen_lowpart (SImode, val);
16490 gcc_assert (CONST_INT_P (part));
16492 if (!const_ok_for_arm (INTVAL (part)))
16493 return false;
16495 return true;
16498 /* Scan INSN and note any of its operands that need fixing.
16499 If DO_PUSHES is false we do not actually push any of the fixups
16500 needed. */
16501 static void
16502 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16504 int opno;
16506 extract_constrain_insn (insn);
16508 if (recog_data.n_alternatives == 0)
16509 return;
16511 /* Fill in recog_op_alt with information about the constraints of
16512 this insn. */
16513 preprocess_constraints (insn);
16515 const operand_alternative *op_alt = which_op_alt ();
16516 for (opno = 0; opno < recog_data.n_operands; opno++)
16518 /* Things we need to fix can only occur in inputs. */
16519 if (recog_data.operand_type[opno] != OP_IN)
16520 continue;
16522 /* If this alternative is a memory reference, then any mention
16523 of constants in this alternative is really to fool reload
16524 into allowing us to accept one there. We need to fix them up
16525 now so that we output the right code. */
16526 if (op_alt[opno].memory_ok)
16528 rtx op = recog_data.operand[opno];
16530 if (CONSTANT_P (op))
16532 if (do_pushes)
16533 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16534 recog_data.operand_mode[opno], op);
16536 else if (MEM_P (op)
16537 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16538 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16540 if (do_pushes)
16542 rtx cop = avoid_constant_pool_reference (op);
16544 /* Casting the address of something to a mode narrower
16545 than a word can cause avoid_constant_pool_reference()
16546 to return the pool reference itself. That's no good to
16547 us here. Lets just hope that we can use the
16548 constant pool value directly. */
16549 if (op == cop)
16550 cop = get_pool_constant (XEXP (op, 0));
16552 push_minipool_fix (insn, address,
16553 recog_data.operand_loc[opno],
16554 recog_data.operand_mode[opno], cop);
16561 return;
16564 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16565 and unions in the context of ARMv8-M Security Extensions. It is used as a
16566 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16567 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16568 or four masks, depending on whether it is being computed for a
16569 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16570 respectively. The tree for the type of the argument or a field within an
16571 argument is passed in ARG_TYPE, the current register this argument or field
16572 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16573 argument or field starts at is passed in STARTING_BIT and the last used bit
16574 is kept in LAST_USED_BIT which is also updated accordingly. */
16576 static unsigned HOST_WIDE_INT
16577 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16578 uint32_t * padding_bits_to_clear,
16579 unsigned starting_bit, int * last_used_bit)
16582 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16584 if (TREE_CODE (arg_type) == RECORD_TYPE)
16586 unsigned current_bit = starting_bit;
16587 tree field;
16588 long int offset, size;
16591 field = TYPE_FIELDS (arg_type);
16592 while (field)
16594 /* The offset within a structure is always an offset from
16595 the start of that structure. Make sure we take that into the
16596 calculation of the register based offset that we use here. */
16597 offset = starting_bit;
16598 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16599 offset %= 32;
16601 /* This is the actual size of the field, for bitfields this is the
16602 bitfield width and not the container size. */
16603 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16605 if (*last_used_bit != offset)
16607 if (offset < *last_used_bit)
16609 /* This field's offset is before the 'last_used_bit', that
16610 means this field goes on the next register. So we need to
16611 pad the rest of the current register and increase the
16612 register number. */
16613 uint32_t mask;
16614 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16615 mask++;
16617 padding_bits_to_clear[*regno] |= mask;
16618 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16619 (*regno)++;
16621 else
16623 /* Otherwise we pad the bits between the last field's end and
16624 the start of the new field. */
16625 uint32_t mask;
16627 mask = ((uint32_t)-1) >> (32 - offset);
16628 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16629 padding_bits_to_clear[*regno] |= mask;
16631 current_bit = offset;
16634 /* Calculate further padding bits for inner structs/unions too. */
16635 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16637 *last_used_bit = current_bit;
16638 not_to_clear_reg_mask
16639 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16640 padding_bits_to_clear, offset,
16641 last_used_bit);
16643 else
16645 /* Update 'current_bit' with this field's size. If the
16646 'current_bit' lies in a subsequent register, update 'regno' and
16647 reset 'current_bit' to point to the current bit in that new
16648 register. */
16649 current_bit += size;
16650 while (current_bit >= 32)
16652 current_bit-=32;
16653 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16654 (*regno)++;
16656 *last_used_bit = current_bit;
16659 field = TREE_CHAIN (field);
16661 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16663 else if (TREE_CODE (arg_type) == UNION_TYPE)
16665 tree field, field_t;
16666 int i, regno_t, field_size;
16667 int max_reg = -1;
16668 int max_bit = -1;
16669 uint32_t mask;
16670 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16671 = {-1, -1, -1, -1};
16673 /* To compute the padding bits in a union we only consider bits as
16674 padding bits if they are always either a padding bit or fall outside a
16675 fields size for all fields in the union. */
16676 field = TYPE_FIELDS (arg_type);
16677 while (field)
16679 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16680 = {0U, 0U, 0U, 0U};
16681 int last_used_bit_t = *last_used_bit;
16682 regno_t = *regno;
16683 field_t = TREE_TYPE (field);
16685 /* If the field's type is either a record or a union make sure to
16686 compute their padding bits too. */
16687 if (RECORD_OR_UNION_TYPE_P (field_t))
16688 not_to_clear_reg_mask
16689 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16690 &padding_bits_to_clear_t[0],
16691 starting_bit, &last_used_bit_t);
16692 else
16694 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16695 regno_t = (field_size / 32) + *regno;
16696 last_used_bit_t = (starting_bit + field_size) % 32;
16699 for (i = *regno; i < regno_t; i++)
16701 /* For all but the last register used by this field only keep the
16702 padding bits that were padding bits in this field. */
16703 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16706 /* For the last register, keep all padding bits that were padding
16707 bits in this field and any padding bits that are still valid
16708 as padding bits but fall outside of this field's size. */
16709 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16710 padding_bits_to_clear_res[regno_t]
16711 &= padding_bits_to_clear_t[regno_t] | mask;
16713 /* Update the maximum size of the fields in terms of registers used
16714 ('max_reg') and the 'last_used_bit' in said register. */
16715 if (max_reg < regno_t)
16717 max_reg = regno_t;
16718 max_bit = last_used_bit_t;
16720 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16721 max_bit = last_used_bit_t;
16723 field = TREE_CHAIN (field);
16726 /* Update the current padding_bits_to_clear using the intersection of the
16727 padding bits of all the fields. */
16728 for (i=*regno; i < max_reg; i++)
16729 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16731 /* Do not keep trailing padding bits, we do not know yet whether this
16732 is the end of the argument. */
16733 mask = ((uint32_t) 1 << max_bit) - 1;
16734 padding_bits_to_clear[max_reg]
16735 |= padding_bits_to_clear_res[max_reg] & mask;
16737 *regno = max_reg;
16738 *last_used_bit = max_bit;
16740 else
16741 /* This function should only be used for structs and unions. */
16742 gcc_unreachable ();
16744 return not_to_clear_reg_mask;
16747 /* In the context of ARMv8-M Security Extensions, this function is used for both
16748 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16749 registers are used when returning or passing arguments, which is then
16750 returned as a mask. It will also compute a mask to indicate padding/unused
16751 bits for each of these registers, and passes this through the
16752 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16753 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16754 the starting register used to pass this argument or return value is passed
16755 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16756 for struct and union types. */
16758 static unsigned HOST_WIDE_INT
16759 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16760 uint32_t * padding_bits_to_clear)
16763 int last_used_bit = 0;
16764 unsigned HOST_WIDE_INT not_to_clear_mask;
16766 if (RECORD_OR_UNION_TYPE_P (arg_type))
16768 not_to_clear_mask
16769 = comp_not_to_clear_mask_str_un (arg_type, &regno,
16770 padding_bits_to_clear, 0,
16771 &last_used_bit);
16774 /* If the 'last_used_bit' is not zero, that means we are still using a
16775 part of the last 'regno'. In such cases we must clear the trailing
16776 bits. Otherwise we are not using regno and we should mark it as to
16777 clear. */
16778 if (last_used_bit != 0)
16779 padding_bits_to_clear[regno]
16780 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16781 else
16782 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16784 else
16786 not_to_clear_mask = 0;
16787 /* We are not dealing with structs nor unions. So these arguments may be
16788 passed in floating point registers too. In some cases a BLKmode is
16789 used when returning or passing arguments in multiple VFP registers. */
16790 if (GET_MODE (arg_rtx) == BLKmode)
16792 int i, arg_regs;
16793 rtx reg;
16795 /* This should really only occur when dealing with the hard-float
16796 ABI. */
16797 gcc_assert (TARGET_HARD_FLOAT_ABI);
16799 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16801 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16802 gcc_assert (REG_P (reg));
16804 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16806 /* If we are dealing with DF mode, make sure we don't
16807 clear either of the registers it addresses. */
16808 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16809 if (arg_regs > 1)
16811 unsigned HOST_WIDE_INT mask;
16812 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16813 mask -= HOST_WIDE_INT_1U << REGNO (reg);
16814 not_to_clear_mask |= mask;
16818 else
16820 /* Otherwise we can rely on the MODE to determine how many registers
16821 are being used by this argument. */
16822 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16823 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16824 if (arg_regs > 1)
16826 unsigned HOST_WIDE_INT
16827 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16828 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16829 not_to_clear_mask |= mask;
16834 return not_to_clear_mask;
16837 /* Saves callee saved registers, clears callee saved registers and caller saved
16838 registers not used to pass arguments before a cmse_nonsecure_call. And
16839 restores the callee saved registers after. */
16841 static void
16842 cmse_nonsecure_call_clear_caller_saved (void)
16844 basic_block bb;
16846 FOR_EACH_BB_FN (bb, cfun)
16848 rtx_insn *insn;
16850 FOR_BB_INSNS (bb, insn)
16852 uint64_t to_clear_mask, float_mask;
16853 rtx_insn *seq;
16854 rtx pat, call, unspec, reg, cleared_reg, tmp;
16855 unsigned int regno, maxregno;
16856 rtx address;
16857 CUMULATIVE_ARGS args_so_far_v;
16858 cumulative_args_t args_so_far;
16859 tree arg_type, fntype;
16860 bool using_r4, first_param = true;
16861 function_args_iterator args_iter;
16862 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16863 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16865 if (!NONDEBUG_INSN_P (insn))
16866 continue;
16868 if (!CALL_P (insn))
16869 continue;
16871 pat = PATTERN (insn);
16872 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16873 call = XVECEXP (pat, 0, 0);
16875 /* Get the real call RTX if the insn sets a value, ie. returns. */
16876 if (GET_CODE (call) == SET)
16877 call = SET_SRC (call);
16879 /* Check if it is a cmse_nonsecure_call. */
16880 unspec = XEXP (call, 0);
16881 if (GET_CODE (unspec) != UNSPEC
16882 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16883 continue;
16885 /* Determine the caller-saved registers we need to clear. */
16886 to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
16887 maxregno = NUM_ARG_REGS - 1;
16888 /* Only look at the caller-saved floating point registers in case of
16889 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16890 lazy store and loads which clear both caller- and callee-saved
16891 registers. */
16892 if (TARGET_HARD_FLOAT_ABI)
16894 float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
16895 float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
16896 to_clear_mask |= float_mask;
16897 maxregno = D7_VFP_REGNUM;
16900 /* Make sure the register used to hold the function address is not
16901 cleared. */
16902 address = RTVEC_ELT (XVEC (unspec, 0), 0);
16903 gcc_assert (MEM_P (address));
16904 gcc_assert (REG_P (XEXP (address, 0)));
16905 to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
16907 /* Set basic block of call insn so that df rescan is performed on
16908 insns inserted here. */
16909 set_block_for_insn (insn, bb);
16910 df_set_flags (DF_DEFER_INSN_RESCAN);
16911 start_sequence ();
16913 /* Make sure the scheduler doesn't schedule other insns beyond
16914 here. */
16915 emit_insn (gen_blockage ());
16917 /* Walk through all arguments and clear registers appropriately.
16919 fntype = TREE_TYPE (MEM_EXPR (address));
16920 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
16921 NULL_TREE);
16922 args_so_far = pack_cumulative_args (&args_so_far_v);
16923 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
16925 rtx arg_rtx;
16926 machine_mode arg_mode = TYPE_MODE (arg_type);
16928 if (VOID_TYPE_P (arg_type))
16929 continue;
16931 if (!first_param)
16932 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
16933 true);
16935 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
16936 true);
16937 gcc_assert (REG_P (arg_rtx));
16938 to_clear_mask
16939 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
16940 REGNO (arg_rtx),
16941 padding_bits_to_clear_ptr);
16943 first_param = false;
16946 /* Clear padding bits where needed. */
16947 cleared_reg = XEXP (address, 0);
16948 reg = gen_rtx_REG (SImode, IP_REGNUM);
16949 using_r4 = false;
16950 for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
16952 if (padding_bits_to_clear[regno] == 0)
16953 continue;
16955 /* If this is a Thumb-1 target copy the address of the function
16956 we are calling from 'r4' into 'ip' such that we can use r4 to
16957 clear the unused bits in the arguments. */
16958 if (TARGET_THUMB1 && !using_r4)
16960 using_r4 = true;
16961 reg = cleared_reg;
16962 emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
16963 reg);
16966 tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
16967 emit_move_insn (reg, tmp);
16968 /* Also fill the top half of the negated
16969 padding_bits_to_clear. */
16970 if (((~padding_bits_to_clear[regno]) >> 16) > 0)
16972 tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
16973 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
16974 GEN_INT (16),
16975 GEN_INT (16)),
16976 tmp));
16979 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
16980 gen_rtx_REG (SImode, regno),
16981 reg));
16984 if (using_r4)
16985 emit_move_insn (cleared_reg,
16986 gen_rtx_REG (SImode, IP_REGNUM));
16988 /* We use right shift and left shift to clear the LSB of the address
16989 we jump to instead of using bic, to avoid having to use an extra
16990 register on Thumb-1. */
16991 tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
16992 emit_insn (gen_rtx_SET (cleared_reg, tmp));
16993 tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
16994 emit_insn (gen_rtx_SET (cleared_reg, tmp));
16996 /* Clearing all registers that leak before doing a non-secure
16997 call. */
16998 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17000 if (!(to_clear_mask & (1LL << regno)))
17001 continue;
17003 /* If regno is an even vfp register and its successor is also to
17004 be cleared, use vmov. */
17005 if (IS_VFP_REGNUM (regno))
17007 if (TARGET_VFP_DOUBLE
17008 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17009 && to_clear_mask & (1LL << (regno + 1)))
17010 emit_move_insn (gen_rtx_REG (DFmode, regno++),
17011 CONST0_RTX (DFmode));
17012 else
17013 emit_move_insn (gen_rtx_REG (SFmode, regno),
17014 CONST0_RTX (SFmode));
17016 else
17017 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17020 seq = get_insns ();
17021 end_sequence ();
17022 emit_insn_before (seq, insn);
17028 /* Rewrite move insn into subtract of 0 if the condition codes will
17029 be useful in next conditional jump insn. */
17031 static void
17032 thumb1_reorg (void)
17034 basic_block bb;
17036 FOR_EACH_BB_FN (bb, cfun)
17038 rtx dest, src;
17039 rtx cmp, op0, op1, set = NULL;
17040 rtx_insn *prev, *insn = BB_END (bb);
17041 bool insn_clobbered = false;
17043 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17044 insn = PREV_INSN (insn);
17046 /* Find the last cbranchsi4_insn in basic block BB. */
17047 if (insn == BB_HEAD (bb)
17048 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17049 continue;
17051 /* Get the register with which we are comparing. */
17052 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17053 op0 = XEXP (cmp, 0);
17054 op1 = XEXP (cmp, 1);
17056 /* Check that comparison is against ZERO. */
17057 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17058 continue;
17060 /* Find the first flag setting insn before INSN in basic block BB. */
17061 gcc_assert (insn != BB_HEAD (bb));
17062 for (prev = PREV_INSN (insn);
17063 (!insn_clobbered
17064 && prev != BB_HEAD (bb)
17065 && (NOTE_P (prev)
17066 || DEBUG_INSN_P (prev)
17067 || ((set = single_set (prev)) != NULL
17068 && get_attr_conds (prev) == CONDS_NOCOND)));
17069 prev = PREV_INSN (prev))
17071 if (reg_set_p (op0, prev))
17072 insn_clobbered = true;
17075 /* Skip if op0 is clobbered by insn other than prev. */
17076 if (insn_clobbered)
17077 continue;
17079 if (!set)
17080 continue;
17082 dest = SET_DEST (set);
17083 src = SET_SRC (set);
17084 if (!low_register_operand (dest, SImode)
17085 || !low_register_operand (src, SImode))
17086 continue;
17088 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17089 in INSN. Both src and dest of the move insn are checked. */
17090 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17092 dest = copy_rtx (dest);
17093 src = copy_rtx (src);
17094 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17095 PATTERN (prev) = gen_rtx_SET (dest, src);
17096 INSN_CODE (prev) = -1;
17097 /* Set test register in INSN to dest. */
17098 XEXP (cmp, 0) = copy_rtx (dest);
17099 INSN_CODE (insn) = -1;
17104 /* Convert instructions to their cc-clobbering variant if possible, since
17105 that allows us to use smaller encodings. */
17107 static void
17108 thumb2_reorg (void)
17110 basic_block bb;
17111 regset_head live;
17113 INIT_REG_SET (&live);
17115 /* We are freeing block_for_insn in the toplev to keep compatibility
17116 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17117 compute_bb_for_insn ();
17118 df_analyze ();
17120 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17122 FOR_EACH_BB_FN (bb, cfun)
17124 if ((current_tune->disparage_flag_setting_t16_encodings
17125 == tune_params::DISPARAGE_FLAGS_ALL)
17126 && optimize_bb_for_speed_p (bb))
17127 continue;
17129 rtx_insn *insn;
17130 Convert_Action action = SKIP;
17131 Convert_Action action_for_partial_flag_setting
17132 = ((current_tune->disparage_flag_setting_t16_encodings
17133 != tune_params::DISPARAGE_FLAGS_NEITHER)
17134 && optimize_bb_for_speed_p (bb))
17135 ? SKIP : CONV;
17137 COPY_REG_SET (&live, DF_LR_OUT (bb));
17138 df_simulate_initialize_backwards (bb, &live);
17139 FOR_BB_INSNS_REVERSE (bb, insn)
17141 if (NONJUMP_INSN_P (insn)
17142 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17143 && GET_CODE (PATTERN (insn)) == SET)
17145 action = SKIP;
17146 rtx pat = PATTERN (insn);
17147 rtx dst = XEXP (pat, 0);
17148 rtx src = XEXP (pat, 1);
17149 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17151 if (UNARY_P (src) || BINARY_P (src))
17152 op0 = XEXP (src, 0);
17154 if (BINARY_P (src))
17155 op1 = XEXP (src, 1);
17157 if (low_register_operand (dst, SImode))
17159 switch (GET_CODE (src))
17161 case PLUS:
17162 /* Adding two registers and storing the result
17163 in the first source is already a 16-bit
17164 operation. */
17165 if (rtx_equal_p (dst, op0)
17166 && register_operand (op1, SImode))
17167 break;
17169 if (low_register_operand (op0, SImode))
17171 /* ADDS <Rd>,<Rn>,<Rm> */
17172 if (low_register_operand (op1, SImode))
17173 action = CONV;
17174 /* ADDS <Rdn>,#<imm8> */
17175 /* SUBS <Rdn>,#<imm8> */
17176 else if (rtx_equal_p (dst, op0)
17177 && CONST_INT_P (op1)
17178 && IN_RANGE (INTVAL (op1), -255, 255))
17179 action = CONV;
17180 /* ADDS <Rd>,<Rn>,#<imm3> */
17181 /* SUBS <Rd>,<Rn>,#<imm3> */
17182 else if (CONST_INT_P (op1)
17183 && IN_RANGE (INTVAL (op1), -7, 7))
17184 action = CONV;
17186 /* ADCS <Rd>, <Rn> */
17187 else if (GET_CODE (XEXP (src, 0)) == PLUS
17188 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17189 && low_register_operand (XEXP (XEXP (src, 0), 1),
17190 SImode)
17191 && COMPARISON_P (op1)
17192 && cc_register (XEXP (op1, 0), VOIDmode)
17193 && maybe_get_arm_condition_code (op1) == ARM_CS
17194 && XEXP (op1, 1) == const0_rtx)
17195 action = CONV;
17196 break;
17198 case MINUS:
17199 /* RSBS <Rd>,<Rn>,#0
17200 Not handled here: see NEG below. */
17201 /* SUBS <Rd>,<Rn>,#<imm3>
17202 SUBS <Rdn>,#<imm8>
17203 Not handled here: see PLUS above. */
17204 /* SUBS <Rd>,<Rn>,<Rm> */
17205 if (low_register_operand (op0, SImode)
17206 && low_register_operand (op1, SImode))
17207 action = CONV;
17208 break;
17210 case MULT:
17211 /* MULS <Rdm>,<Rn>,<Rdm>
17212 As an exception to the rule, this is only used
17213 when optimizing for size since MULS is slow on all
17214 known implementations. We do not even want to use
17215 MULS in cold code, if optimizing for speed, so we
17216 test the global flag here. */
17217 if (!optimize_size)
17218 break;
17219 /* Fall through. */
17220 case AND:
17221 case IOR:
17222 case XOR:
17223 /* ANDS <Rdn>,<Rm> */
17224 if (rtx_equal_p (dst, op0)
17225 && low_register_operand (op1, SImode))
17226 action = action_for_partial_flag_setting;
17227 else if (rtx_equal_p (dst, op1)
17228 && low_register_operand (op0, SImode))
17229 action = action_for_partial_flag_setting == SKIP
17230 ? SKIP : SWAP_CONV;
17231 break;
17233 case ASHIFTRT:
17234 case ASHIFT:
17235 case LSHIFTRT:
17236 /* ASRS <Rdn>,<Rm> */
17237 /* LSRS <Rdn>,<Rm> */
17238 /* LSLS <Rdn>,<Rm> */
17239 if (rtx_equal_p (dst, op0)
17240 && low_register_operand (op1, SImode))
17241 action = action_for_partial_flag_setting;
17242 /* ASRS <Rd>,<Rm>,#<imm5> */
17243 /* LSRS <Rd>,<Rm>,#<imm5> */
17244 /* LSLS <Rd>,<Rm>,#<imm5> */
17245 else if (low_register_operand (op0, SImode)
17246 && CONST_INT_P (op1)
17247 && IN_RANGE (INTVAL (op1), 0, 31))
17248 action = action_for_partial_flag_setting;
17249 break;
17251 case ROTATERT:
17252 /* RORS <Rdn>,<Rm> */
17253 if (rtx_equal_p (dst, op0)
17254 && low_register_operand (op1, SImode))
17255 action = action_for_partial_flag_setting;
17256 break;
17258 case NOT:
17259 /* MVNS <Rd>,<Rm> */
17260 if (low_register_operand (op0, SImode))
17261 action = action_for_partial_flag_setting;
17262 break;
17264 case NEG:
17265 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17266 if (low_register_operand (op0, SImode))
17267 action = CONV;
17268 break;
17270 case CONST_INT:
17271 /* MOVS <Rd>,#<imm8> */
17272 if (CONST_INT_P (src)
17273 && IN_RANGE (INTVAL (src), 0, 255))
17274 action = action_for_partial_flag_setting;
17275 break;
17277 case REG:
17278 /* MOVS and MOV<c> with registers have different
17279 encodings, so are not relevant here. */
17280 break;
17282 default:
17283 break;
17287 if (action != SKIP)
17289 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17290 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17291 rtvec vec;
17293 if (action == SWAP_CONV)
17295 src = copy_rtx (src);
17296 XEXP (src, 0) = op1;
17297 XEXP (src, 1) = op0;
17298 pat = gen_rtx_SET (dst, src);
17299 vec = gen_rtvec (2, pat, clobber);
17301 else /* action == CONV */
17302 vec = gen_rtvec (2, pat, clobber);
17304 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17305 INSN_CODE (insn) = -1;
17309 if (NONDEBUG_INSN_P (insn))
17310 df_simulate_one_insn_backwards (bb, insn, &live);
17314 CLEAR_REG_SET (&live);
17317 /* Gcc puts the pool in the wrong place for ARM, since we can only
17318 load addresses a limited distance around the pc. We do some
17319 special munging to move the constant pool values to the correct
17320 point in the code. */
17321 static void
17322 arm_reorg (void)
17324 rtx_insn *insn;
17325 HOST_WIDE_INT address = 0;
17326 Mfix * fix;
17328 if (use_cmse)
17329 cmse_nonsecure_call_clear_caller_saved ();
17330 if (TARGET_THUMB1)
17331 thumb1_reorg ();
17332 else if (TARGET_THUMB2)
17333 thumb2_reorg ();
17335 /* Ensure all insns that must be split have been split at this point.
17336 Otherwise, the pool placement code below may compute incorrect
17337 insn lengths. Note that when optimizing, all insns have already
17338 been split at this point. */
17339 if (!optimize)
17340 split_all_insns_noflow ();
17342 minipool_fix_head = minipool_fix_tail = NULL;
17344 /* The first insn must always be a note, or the code below won't
17345 scan it properly. */
17346 insn = get_insns ();
17347 gcc_assert (NOTE_P (insn));
17348 minipool_pad = 0;
17350 /* Scan all the insns and record the operands that will need fixing. */
17351 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17353 if (BARRIER_P (insn))
17354 push_minipool_barrier (insn, address);
17355 else if (INSN_P (insn))
17357 rtx_jump_table_data *table;
17359 note_invalid_constants (insn, address, true);
17360 address += get_attr_length (insn);
17362 /* If the insn is a vector jump, add the size of the table
17363 and skip the table. */
17364 if (tablejump_p (insn, NULL, &table))
17366 address += get_jump_table_size (table);
17367 insn = table;
17370 else if (LABEL_P (insn))
17371 /* Add the worst-case padding due to alignment. We don't add
17372 the _current_ padding because the minipool insertions
17373 themselves might change it. */
17374 address += get_label_padding (insn);
17377 fix = minipool_fix_head;
17379 /* Now scan the fixups and perform the required changes. */
17380 while (fix)
17382 Mfix * ftmp;
17383 Mfix * fdel;
17384 Mfix * last_added_fix;
17385 Mfix * last_barrier = NULL;
17386 Mfix * this_fix;
17388 /* Skip any further barriers before the next fix. */
17389 while (fix && BARRIER_P (fix->insn))
17390 fix = fix->next;
17392 /* No more fixes. */
17393 if (fix == NULL)
17394 break;
17396 last_added_fix = NULL;
17398 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17400 if (BARRIER_P (ftmp->insn))
17402 if (ftmp->address >= minipool_vector_head->max_address)
17403 break;
17405 last_barrier = ftmp;
17407 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17408 break;
17410 last_added_fix = ftmp; /* Keep track of the last fix added. */
17413 /* If we found a barrier, drop back to that; any fixes that we
17414 could have reached but come after the barrier will now go in
17415 the next mini-pool. */
17416 if (last_barrier != NULL)
17418 /* Reduce the refcount for those fixes that won't go into this
17419 pool after all. */
17420 for (fdel = last_barrier->next;
17421 fdel && fdel != ftmp;
17422 fdel = fdel->next)
17424 fdel->minipool->refcount--;
17425 fdel->minipool = NULL;
17428 ftmp = last_barrier;
17430 else
17432 /* ftmp is first fix that we can't fit into this pool and
17433 there no natural barriers that we could use. Insert a
17434 new barrier in the code somewhere between the previous
17435 fix and this one, and arrange to jump around it. */
17436 HOST_WIDE_INT max_address;
17438 /* The last item on the list of fixes must be a barrier, so
17439 we can never run off the end of the list of fixes without
17440 last_barrier being set. */
17441 gcc_assert (ftmp);
17443 max_address = minipool_vector_head->max_address;
17444 /* Check that there isn't another fix that is in range that
17445 we couldn't fit into this pool because the pool was
17446 already too large: we need to put the pool before such an
17447 instruction. The pool itself may come just after the
17448 fix because create_fix_barrier also allows space for a
17449 jump instruction. */
17450 if (ftmp->address < max_address)
17451 max_address = ftmp->address + 1;
17453 last_barrier = create_fix_barrier (last_added_fix, max_address);
17456 assign_minipool_offsets (last_barrier);
17458 while (ftmp)
17460 if (!BARRIER_P (ftmp->insn)
17461 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17462 == NULL))
17463 break;
17465 ftmp = ftmp->next;
17468 /* Scan over the fixes we have identified for this pool, fixing them
17469 up and adding the constants to the pool itself. */
17470 for (this_fix = fix; this_fix && ftmp != this_fix;
17471 this_fix = this_fix->next)
17472 if (!BARRIER_P (this_fix->insn))
17474 rtx addr
17475 = plus_constant (Pmode,
17476 gen_rtx_LABEL_REF (VOIDmode,
17477 minipool_vector_label),
17478 this_fix->minipool->offset);
17479 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17482 dump_minipool (last_barrier->insn);
17483 fix = ftmp;
17486 /* From now on we must synthesize any constants that we can't handle
17487 directly. This can happen if the RTL gets split during final
17488 instruction generation. */
17489 cfun->machine->after_arm_reorg = 1;
17491 /* Free the minipool memory. */
17492 obstack_free (&minipool_obstack, minipool_startobj);
17495 /* Routines to output assembly language. */
17497 /* Return string representation of passed in real value. */
17498 static const char *
17499 fp_const_from_val (REAL_VALUE_TYPE *r)
17501 if (!fp_consts_inited)
17502 init_fp_table ();
17504 gcc_assert (real_equal (r, &value_fp0));
17505 return "0";
17508 /* OPERANDS[0] is the entire list of insns that constitute pop,
17509 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17510 is in the list, UPDATE is true iff the list contains explicit
17511 update of base register. */
17512 void
17513 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17514 bool update)
17516 int i;
17517 char pattern[100];
17518 int offset;
17519 const char *conditional;
17520 int num_saves = XVECLEN (operands[0], 0);
17521 unsigned int regno;
17522 unsigned int regno_base = REGNO (operands[1]);
17523 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17525 offset = 0;
17526 offset += update ? 1 : 0;
17527 offset += return_pc ? 1 : 0;
17529 /* Is the base register in the list? */
17530 for (i = offset; i < num_saves; i++)
17532 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17533 /* If SP is in the list, then the base register must be SP. */
17534 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17535 /* If base register is in the list, there must be no explicit update. */
17536 if (regno == regno_base)
17537 gcc_assert (!update);
17540 conditional = reverse ? "%?%D0" : "%?%d0";
17541 /* Can't use POP if returning from an interrupt. */
17542 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17543 sprintf (pattern, "pop%s\t{", conditional);
17544 else
17546 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17547 It's just a convention, their semantics are identical. */
17548 if (regno_base == SP_REGNUM)
17549 sprintf (pattern, "ldmfd%s\t", conditional);
17550 else if (update)
17551 sprintf (pattern, "ldmia%s\t", conditional);
17552 else
17553 sprintf (pattern, "ldm%s\t", conditional);
17555 strcat (pattern, reg_names[regno_base]);
17556 if (update)
17557 strcat (pattern, "!, {");
17558 else
17559 strcat (pattern, ", {");
17562 /* Output the first destination register. */
17563 strcat (pattern,
17564 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17566 /* Output the rest of the destination registers. */
17567 for (i = offset + 1; i < num_saves; i++)
17569 strcat (pattern, ", ");
17570 strcat (pattern,
17571 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17574 strcat (pattern, "}");
17576 if (interrupt_p && return_pc)
17577 strcat (pattern, "^");
17579 output_asm_insn (pattern, &cond);
17583 /* Output the assembly for a store multiple. */
17585 const char *
17586 vfp_output_vstmd (rtx * operands)
17588 char pattern[100];
17589 int p;
17590 int base;
17591 int i;
17592 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17593 ? XEXP (operands[0], 0)
17594 : XEXP (XEXP (operands[0], 0), 0);
17595 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17597 if (push_p)
17598 strcpy (pattern, "vpush%?.64\t{%P1");
17599 else
17600 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17602 p = strlen (pattern);
17604 gcc_assert (REG_P (operands[1]));
17606 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17607 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17609 p += sprintf (&pattern[p], ", d%d", base + i);
17611 strcpy (&pattern[p], "}");
17613 output_asm_insn (pattern, operands);
17614 return "";
17618 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17619 number of bytes pushed. */
17621 static int
17622 vfp_emit_fstmd (int base_reg, int count)
17624 rtx par;
17625 rtx dwarf;
17626 rtx tmp, reg;
17627 int i;
17629 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17630 register pairs are stored by a store multiple insn. We avoid this
17631 by pushing an extra pair. */
17632 if (count == 2 && !arm_arch6)
17634 if (base_reg == LAST_VFP_REGNUM - 3)
17635 base_reg -= 2;
17636 count++;
17639 /* FSTMD may not store more than 16 doubleword registers at once. Split
17640 larger stores into multiple parts (up to a maximum of two, in
17641 practice). */
17642 if (count > 16)
17644 int saved;
17645 /* NOTE: base_reg is an internal register number, so each D register
17646 counts as 2. */
17647 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17648 saved += vfp_emit_fstmd (base_reg, 16);
17649 return saved;
17652 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17653 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17655 reg = gen_rtx_REG (DFmode, base_reg);
17656 base_reg += 2;
17658 XVECEXP (par, 0, 0)
17659 = gen_rtx_SET (gen_frame_mem
17660 (BLKmode,
17661 gen_rtx_PRE_MODIFY (Pmode,
17662 stack_pointer_rtx,
17663 plus_constant
17664 (Pmode, stack_pointer_rtx,
17665 - (count * 8)))
17667 gen_rtx_UNSPEC (BLKmode,
17668 gen_rtvec (1, reg),
17669 UNSPEC_PUSH_MULT));
17671 tmp = gen_rtx_SET (stack_pointer_rtx,
17672 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17673 RTX_FRAME_RELATED_P (tmp) = 1;
17674 XVECEXP (dwarf, 0, 0) = tmp;
17676 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17677 RTX_FRAME_RELATED_P (tmp) = 1;
17678 XVECEXP (dwarf, 0, 1) = tmp;
17680 for (i = 1; i < count; i++)
17682 reg = gen_rtx_REG (DFmode, base_reg);
17683 base_reg += 2;
17684 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17686 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17687 plus_constant (Pmode,
17688 stack_pointer_rtx,
17689 i * 8)),
17690 reg);
17691 RTX_FRAME_RELATED_P (tmp) = 1;
17692 XVECEXP (dwarf, 0, i + 1) = tmp;
17695 par = emit_insn (par);
17696 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17697 RTX_FRAME_RELATED_P (par) = 1;
17699 return count * 8;
17702 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17703 has the cmse_nonsecure_call attribute and returns false otherwise. */
17705 bool
17706 detect_cmse_nonsecure_call (tree addr)
17708 if (!addr)
17709 return FALSE;
17711 tree fntype = TREE_TYPE (addr);
17712 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17713 TYPE_ATTRIBUTES (fntype)))
17714 return TRUE;
17715 return FALSE;
17719 /* Emit a call instruction with pattern PAT. ADDR is the address of
17720 the call target. */
17722 void
17723 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17725 rtx insn;
17727 insn = emit_call_insn (pat);
17729 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17730 If the call might use such an entry, add a use of the PIC register
17731 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17732 if (TARGET_VXWORKS_RTP
17733 && flag_pic
17734 && !sibcall
17735 && GET_CODE (addr) == SYMBOL_REF
17736 && (SYMBOL_REF_DECL (addr)
17737 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17738 : !SYMBOL_REF_LOCAL_P (addr)))
17740 require_pic_register ();
17741 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17744 if (TARGET_AAPCS_BASED)
17746 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17747 linker. We need to add an IP clobber to allow setting
17748 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17749 is not needed since it's a fixed register. */
17750 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17751 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17755 /* Output a 'call' insn. */
17756 const char *
17757 output_call (rtx *operands)
17759 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17761 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17762 if (REGNO (operands[0]) == LR_REGNUM)
17764 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17765 output_asm_insn ("mov%?\t%0, %|lr", operands);
17768 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17770 if (TARGET_INTERWORK || arm_arch4t)
17771 output_asm_insn ("bx%?\t%0", operands);
17772 else
17773 output_asm_insn ("mov%?\t%|pc, %0", operands);
17775 return "";
17778 /* Output a move from arm registers to arm registers of a long double
17779 OPERANDS[0] is the destination.
17780 OPERANDS[1] is the source. */
17781 const char *
17782 output_mov_long_double_arm_from_arm (rtx *operands)
17784 /* We have to be careful here because the two might overlap. */
17785 int dest_start = REGNO (operands[0]);
17786 int src_start = REGNO (operands[1]);
17787 rtx ops[2];
17788 int i;
17790 if (dest_start < src_start)
17792 for (i = 0; i < 3; i++)
17794 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17795 ops[1] = gen_rtx_REG (SImode, src_start + i);
17796 output_asm_insn ("mov%?\t%0, %1", ops);
17799 else
17801 for (i = 2; i >= 0; i--)
17803 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17804 ops[1] = gen_rtx_REG (SImode, src_start + i);
17805 output_asm_insn ("mov%?\t%0, %1", ops);
17809 return "";
17812 void
17813 arm_emit_movpair (rtx dest, rtx src)
17815 /* If the src is an immediate, simplify it. */
17816 if (CONST_INT_P (src))
17818 HOST_WIDE_INT val = INTVAL (src);
17819 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17820 if ((val >> 16) & 0x0000ffff)
17822 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17823 GEN_INT (16)),
17824 GEN_INT ((val >> 16) & 0x0000ffff));
17825 rtx_insn *insn = get_last_insn ();
17826 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17828 return;
17830 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17831 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17832 rtx_insn *insn = get_last_insn ();
17833 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17836 /* Output a move between double words. It must be REG<-MEM
17837 or MEM<-REG. */
17838 const char *
17839 output_move_double (rtx *operands, bool emit, int *count)
17841 enum rtx_code code0 = GET_CODE (operands[0]);
17842 enum rtx_code code1 = GET_CODE (operands[1]);
17843 rtx otherops[3];
17844 if (count)
17845 *count = 1;
17847 /* The only case when this might happen is when
17848 you are looking at the length of a DImode instruction
17849 that has an invalid constant in it. */
17850 if (code0 == REG && code1 != MEM)
17852 gcc_assert (!emit);
17853 *count = 2;
17854 return "";
17857 if (code0 == REG)
17859 unsigned int reg0 = REGNO (operands[0]);
17861 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17863 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17865 switch (GET_CODE (XEXP (operands[1], 0)))
17867 case REG:
17869 if (emit)
17871 if (TARGET_LDRD
17872 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17873 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17874 else
17875 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17877 break;
17879 case PRE_INC:
17880 gcc_assert (TARGET_LDRD);
17881 if (emit)
17882 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17883 break;
17885 case PRE_DEC:
17886 if (emit)
17888 if (TARGET_LDRD)
17889 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17890 else
17891 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
17893 break;
17895 case POST_INC:
17896 if (emit)
17898 if (TARGET_LDRD)
17899 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
17900 else
17901 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
17903 break;
17905 case POST_DEC:
17906 gcc_assert (TARGET_LDRD);
17907 if (emit)
17908 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
17909 break;
17911 case PRE_MODIFY:
17912 case POST_MODIFY:
17913 /* Autoicrement addressing modes should never have overlapping
17914 base and destination registers, and overlapping index registers
17915 are already prohibited, so this doesn't need to worry about
17916 fix_cm3_ldrd. */
17917 otherops[0] = operands[0];
17918 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17919 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17921 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17923 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17925 /* Registers overlap so split out the increment. */
17926 if (emit)
17928 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17929 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
17931 if (count)
17932 *count = 2;
17934 else
17936 /* Use a single insn if we can.
17937 FIXME: IWMMXT allows offsets larger than ldrd can
17938 handle, fix these up with a pair of ldr. */
17939 if (TARGET_THUMB2
17940 || !CONST_INT_P (otherops[2])
17941 || (INTVAL (otherops[2]) > -256
17942 && INTVAL (otherops[2]) < 256))
17944 if (emit)
17945 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
17947 else
17949 if (emit)
17951 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17952 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17954 if (count)
17955 *count = 2;
17960 else
17962 /* Use a single insn if we can.
17963 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17964 fix these up with a pair of ldr. */
17965 if (TARGET_THUMB2
17966 || !CONST_INT_P (otherops[2])
17967 || (INTVAL (otherops[2]) > -256
17968 && INTVAL (otherops[2]) < 256))
17970 if (emit)
17971 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
17973 else
17975 if (emit)
17977 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17978 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17980 if (count)
17981 *count = 2;
17984 break;
17986 case LABEL_REF:
17987 case CONST:
17988 /* We might be able to use ldrd %0, %1 here. However the range is
17989 different to ldr/adr, and it is broken on some ARMv7-M
17990 implementations. */
17991 /* Use the second register of the pair to avoid problematic
17992 overlap. */
17993 otherops[1] = operands[1];
17994 if (emit)
17995 output_asm_insn ("adr%?\t%0, %1", otherops);
17996 operands[1] = otherops[0];
17997 if (emit)
17999 if (TARGET_LDRD)
18000 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18001 else
18002 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18005 if (count)
18006 *count = 2;
18007 break;
18009 /* ??? This needs checking for thumb2. */
18010 default:
18011 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18012 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18014 otherops[0] = operands[0];
18015 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18016 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18018 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18020 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18022 switch ((int) INTVAL (otherops[2]))
18024 case -8:
18025 if (emit)
18026 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18027 return "";
18028 case -4:
18029 if (TARGET_THUMB2)
18030 break;
18031 if (emit)
18032 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18033 return "";
18034 case 4:
18035 if (TARGET_THUMB2)
18036 break;
18037 if (emit)
18038 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18039 return "";
18042 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18043 operands[1] = otherops[0];
18044 if (TARGET_LDRD
18045 && (REG_P (otherops[2])
18046 || TARGET_THUMB2
18047 || (CONST_INT_P (otherops[2])
18048 && INTVAL (otherops[2]) > -256
18049 && INTVAL (otherops[2]) < 256)))
18051 if (reg_overlap_mentioned_p (operands[0],
18052 otherops[2]))
18054 /* Swap base and index registers over to
18055 avoid a conflict. */
18056 std::swap (otherops[1], otherops[2]);
18058 /* If both registers conflict, it will usually
18059 have been fixed by a splitter. */
18060 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18061 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18063 if (emit)
18065 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18066 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18068 if (count)
18069 *count = 2;
18071 else
18073 otherops[0] = operands[0];
18074 if (emit)
18075 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18077 return "";
18080 if (CONST_INT_P (otherops[2]))
18082 if (emit)
18084 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18085 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18086 else
18087 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18090 else
18092 if (emit)
18093 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18096 else
18098 if (emit)
18099 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18102 if (count)
18103 *count = 2;
18105 if (TARGET_LDRD)
18106 return "ldrd%?\t%0, [%1]";
18108 return "ldmia%?\t%1, %M0";
18110 else
18112 otherops[1] = adjust_address (operands[1], SImode, 4);
18113 /* Take care of overlapping base/data reg. */
18114 if (reg_mentioned_p (operands[0], operands[1]))
18116 if (emit)
18118 output_asm_insn ("ldr%?\t%0, %1", otherops);
18119 output_asm_insn ("ldr%?\t%0, %1", operands);
18121 if (count)
18122 *count = 2;
18125 else
18127 if (emit)
18129 output_asm_insn ("ldr%?\t%0, %1", operands);
18130 output_asm_insn ("ldr%?\t%0, %1", otherops);
18132 if (count)
18133 *count = 2;
18138 else
18140 /* Constraints should ensure this. */
18141 gcc_assert (code0 == MEM && code1 == REG);
18142 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18143 || (TARGET_ARM && TARGET_LDRD));
18145 switch (GET_CODE (XEXP (operands[0], 0)))
18147 case REG:
18148 if (emit)
18150 if (TARGET_LDRD)
18151 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18152 else
18153 output_asm_insn ("stm%?\t%m0, %M1", operands);
18155 break;
18157 case PRE_INC:
18158 gcc_assert (TARGET_LDRD);
18159 if (emit)
18160 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18161 break;
18163 case PRE_DEC:
18164 if (emit)
18166 if (TARGET_LDRD)
18167 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18168 else
18169 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18171 break;
18173 case POST_INC:
18174 if (emit)
18176 if (TARGET_LDRD)
18177 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18178 else
18179 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18181 break;
18183 case POST_DEC:
18184 gcc_assert (TARGET_LDRD);
18185 if (emit)
18186 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18187 break;
18189 case PRE_MODIFY:
18190 case POST_MODIFY:
18191 otherops[0] = operands[1];
18192 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18193 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18195 /* IWMMXT allows offsets larger than ldrd can handle,
18196 fix these up with a pair of ldr. */
18197 if (!TARGET_THUMB2
18198 && CONST_INT_P (otherops[2])
18199 && (INTVAL(otherops[2]) <= -256
18200 || INTVAL(otherops[2]) >= 256))
18202 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18204 if (emit)
18206 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18207 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18209 if (count)
18210 *count = 2;
18212 else
18214 if (emit)
18216 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18217 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18219 if (count)
18220 *count = 2;
18223 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18225 if (emit)
18226 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18228 else
18230 if (emit)
18231 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18233 break;
18235 case PLUS:
18236 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18237 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18239 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18241 case -8:
18242 if (emit)
18243 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18244 return "";
18246 case -4:
18247 if (TARGET_THUMB2)
18248 break;
18249 if (emit)
18250 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18251 return "";
18253 case 4:
18254 if (TARGET_THUMB2)
18255 break;
18256 if (emit)
18257 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18258 return "";
18261 if (TARGET_LDRD
18262 && (REG_P (otherops[2])
18263 || TARGET_THUMB2
18264 || (CONST_INT_P (otherops[2])
18265 && INTVAL (otherops[2]) > -256
18266 && INTVAL (otherops[2]) < 256)))
18268 otherops[0] = operands[1];
18269 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18270 if (emit)
18271 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18272 return "";
18274 /* Fall through */
18276 default:
18277 otherops[0] = adjust_address (operands[0], SImode, 4);
18278 otherops[1] = operands[1];
18279 if (emit)
18281 output_asm_insn ("str%?\t%1, %0", operands);
18282 output_asm_insn ("str%?\t%H1, %0", otherops);
18284 if (count)
18285 *count = 2;
18289 return "";
18292 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18293 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18295 const char *
18296 output_move_quad (rtx *operands)
18298 if (REG_P (operands[0]))
18300 /* Load, or reg->reg move. */
18302 if (MEM_P (operands[1]))
18304 switch (GET_CODE (XEXP (operands[1], 0)))
18306 case REG:
18307 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18308 break;
18310 case LABEL_REF:
18311 case CONST:
18312 output_asm_insn ("adr%?\t%0, %1", operands);
18313 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18314 break;
18316 default:
18317 gcc_unreachable ();
18320 else
18322 rtx ops[2];
18323 int dest, src, i;
18325 gcc_assert (REG_P (operands[1]));
18327 dest = REGNO (operands[0]);
18328 src = REGNO (operands[1]);
18330 /* This seems pretty dumb, but hopefully GCC won't try to do it
18331 very often. */
18332 if (dest < src)
18333 for (i = 0; i < 4; i++)
18335 ops[0] = gen_rtx_REG (SImode, dest + i);
18336 ops[1] = gen_rtx_REG (SImode, src + i);
18337 output_asm_insn ("mov%?\t%0, %1", ops);
18339 else
18340 for (i = 3; i >= 0; i--)
18342 ops[0] = gen_rtx_REG (SImode, dest + i);
18343 ops[1] = gen_rtx_REG (SImode, src + i);
18344 output_asm_insn ("mov%?\t%0, %1", ops);
18348 else
18350 gcc_assert (MEM_P (operands[0]));
18351 gcc_assert (REG_P (operands[1]));
18352 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18354 switch (GET_CODE (XEXP (operands[0], 0)))
18356 case REG:
18357 output_asm_insn ("stm%?\t%m0, %M1", operands);
18358 break;
18360 default:
18361 gcc_unreachable ();
18365 return "";
18368 /* Output a VFP load or store instruction. */
18370 const char *
18371 output_move_vfp (rtx *operands)
18373 rtx reg, mem, addr, ops[2];
18374 int load = REG_P (operands[0]);
18375 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18376 int sp = (!TARGET_VFP_FP16INST
18377 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18378 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18379 const char *templ;
18380 char buff[50];
18381 machine_mode mode;
18383 reg = operands[!load];
18384 mem = operands[load];
18386 mode = GET_MODE (reg);
18388 gcc_assert (REG_P (reg));
18389 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18390 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18391 || mode == SFmode
18392 || mode == DFmode
18393 || mode == HImode
18394 || mode == SImode
18395 || mode == DImode
18396 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18397 gcc_assert (MEM_P (mem));
18399 addr = XEXP (mem, 0);
18401 switch (GET_CODE (addr))
18403 case PRE_DEC:
18404 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18405 ops[0] = XEXP (addr, 0);
18406 ops[1] = reg;
18407 break;
18409 case POST_INC:
18410 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18411 ops[0] = XEXP (addr, 0);
18412 ops[1] = reg;
18413 break;
18415 default:
18416 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18417 ops[0] = reg;
18418 ops[1] = mem;
18419 break;
18422 sprintf (buff, templ,
18423 load ? "ld" : "st",
18424 dp ? "64" : sp ? "32" : "16",
18425 dp ? "P" : "",
18426 integer_p ? "\t%@ int" : "");
18427 output_asm_insn (buff, ops);
18429 return "";
18432 /* Output a Neon double-word or quad-word load or store, or a load
18433 or store for larger structure modes.
18435 WARNING: The ordering of elements is weird in big-endian mode,
18436 because the EABI requires that vectors stored in memory appear
18437 as though they were stored by a VSTM, as required by the EABI.
18438 GCC RTL defines element ordering based on in-memory order.
18439 This can be different from the architectural ordering of elements
18440 within a NEON register. The intrinsics defined in arm_neon.h use the
18441 NEON register element ordering, not the GCC RTL element ordering.
18443 For example, the in-memory ordering of a big-endian a quadword
18444 vector with 16-bit elements when stored from register pair {d0,d1}
18445 will be (lowest address first, d0[N] is NEON register element N):
18447 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18449 When necessary, quadword registers (dN, dN+1) are moved to ARM
18450 registers from rN in the order:
18452 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18454 So that STM/LDM can be used on vectors in ARM registers, and the
18455 same memory layout will result as if VSTM/VLDM were used.
18457 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18458 possible, which allows use of appropriate alignment tags.
18459 Note that the choice of "64" is independent of the actual vector
18460 element size; this size simply ensures that the behavior is
18461 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18463 Due to limitations of those instructions, use of VST1.64/VLD1.64
18464 is not possible if:
18465 - the address contains PRE_DEC, or
18466 - the mode refers to more than 4 double-word registers
18468 In those cases, it would be possible to replace VSTM/VLDM by a
18469 sequence of instructions; this is not currently implemented since
18470 this is not certain to actually improve performance. */
18472 const char *
18473 output_move_neon (rtx *operands)
18475 rtx reg, mem, addr, ops[2];
18476 int regno, nregs, load = REG_P (operands[0]);
18477 const char *templ;
18478 char buff[50];
18479 machine_mode mode;
18481 reg = operands[!load];
18482 mem = operands[load];
18484 mode = GET_MODE (reg);
18486 gcc_assert (REG_P (reg));
18487 regno = REGNO (reg);
18488 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18489 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18490 || NEON_REGNO_OK_FOR_QUAD (regno));
18491 gcc_assert (VALID_NEON_DREG_MODE (mode)
18492 || VALID_NEON_QREG_MODE (mode)
18493 || VALID_NEON_STRUCT_MODE (mode));
18494 gcc_assert (MEM_P (mem));
18496 addr = XEXP (mem, 0);
18498 /* Strip off const from addresses like (const (plus (...))). */
18499 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18500 addr = XEXP (addr, 0);
18502 switch (GET_CODE (addr))
18504 case POST_INC:
18505 /* We have to use vldm / vstm for too-large modes. */
18506 if (nregs > 4)
18508 templ = "v%smia%%?\t%%0!, %%h1";
18509 ops[0] = XEXP (addr, 0);
18511 else
18513 templ = "v%s1.64\t%%h1, %%A0";
18514 ops[0] = mem;
18516 ops[1] = reg;
18517 break;
18519 case PRE_DEC:
18520 /* We have to use vldm / vstm in this case, since there is no
18521 pre-decrement form of the vld1 / vst1 instructions. */
18522 templ = "v%smdb%%?\t%%0!, %%h1";
18523 ops[0] = XEXP (addr, 0);
18524 ops[1] = reg;
18525 break;
18527 case POST_MODIFY:
18528 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18529 gcc_unreachable ();
18531 case REG:
18532 /* We have to use vldm / vstm for too-large modes. */
18533 if (nregs > 1)
18535 if (nregs > 4)
18536 templ = "v%smia%%?\t%%m0, %%h1";
18537 else
18538 templ = "v%s1.64\t%%h1, %%A0";
18540 ops[0] = mem;
18541 ops[1] = reg;
18542 break;
18544 /* Fall through. */
18545 case LABEL_REF:
18546 case PLUS:
18548 int i;
18549 int overlap = -1;
18550 for (i = 0; i < nregs; i++)
18552 /* We're only using DImode here because it's a convenient size. */
18553 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18554 ops[1] = adjust_address (mem, DImode, 8 * i);
18555 if (reg_overlap_mentioned_p (ops[0], mem))
18557 gcc_assert (overlap == -1);
18558 overlap = i;
18560 else
18562 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18563 output_asm_insn (buff, ops);
18566 if (overlap != -1)
18568 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18569 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18570 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18571 output_asm_insn (buff, ops);
18574 return "";
18577 default:
18578 gcc_unreachable ();
18581 sprintf (buff, templ, load ? "ld" : "st");
18582 output_asm_insn (buff, ops);
18584 return "";
18587 /* Compute and return the length of neon_mov<mode>, where <mode> is
18588 one of VSTRUCT modes: EI, OI, CI or XI. */
18590 arm_attr_length_move_neon (rtx_insn *insn)
18592 rtx reg, mem, addr;
18593 int load;
18594 machine_mode mode;
18596 extract_insn_cached (insn);
18598 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18600 mode = GET_MODE (recog_data.operand[0]);
18601 switch (mode)
18603 case EImode:
18604 case OImode:
18605 return 8;
18606 case CImode:
18607 return 12;
18608 case XImode:
18609 return 16;
18610 default:
18611 gcc_unreachable ();
18615 load = REG_P (recog_data.operand[0]);
18616 reg = recog_data.operand[!load];
18617 mem = recog_data.operand[load];
18619 gcc_assert (MEM_P (mem));
18621 mode = GET_MODE (reg);
18622 addr = XEXP (mem, 0);
18624 /* Strip off const from addresses like (const (plus (...))). */
18625 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18626 addr = XEXP (addr, 0);
18628 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18630 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18631 return insns * 4;
18633 else
18634 return 4;
18637 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18638 return zero. */
18641 arm_address_offset_is_imm (rtx_insn *insn)
18643 rtx mem, addr;
18645 extract_insn_cached (insn);
18647 if (REG_P (recog_data.operand[0]))
18648 return 0;
18650 mem = recog_data.operand[0];
18652 gcc_assert (MEM_P (mem));
18654 addr = XEXP (mem, 0);
18656 if (REG_P (addr)
18657 || (GET_CODE (addr) == PLUS
18658 && REG_P (XEXP (addr, 0))
18659 && CONST_INT_P (XEXP (addr, 1))))
18660 return 1;
18661 else
18662 return 0;
18665 /* Output an ADD r, s, #n where n may be too big for one instruction.
18666 If adding zero to one register, output nothing. */
18667 const char *
18668 output_add_immediate (rtx *operands)
18670 HOST_WIDE_INT n = INTVAL (operands[2]);
18672 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18674 if (n < 0)
18675 output_multi_immediate (operands,
18676 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18677 -n);
18678 else
18679 output_multi_immediate (operands,
18680 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18684 return "";
18687 /* Output a multiple immediate operation.
18688 OPERANDS is the vector of operands referred to in the output patterns.
18689 INSTR1 is the output pattern to use for the first constant.
18690 INSTR2 is the output pattern to use for subsequent constants.
18691 IMMED_OP is the index of the constant slot in OPERANDS.
18692 N is the constant value. */
18693 static const char *
18694 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18695 int immed_op, HOST_WIDE_INT n)
18697 #if HOST_BITS_PER_WIDE_INT > 32
18698 n &= 0xffffffff;
18699 #endif
18701 if (n == 0)
18703 /* Quick and easy output. */
18704 operands[immed_op] = const0_rtx;
18705 output_asm_insn (instr1, operands);
18707 else
18709 int i;
18710 const char * instr = instr1;
18712 /* Note that n is never zero here (which would give no output). */
18713 for (i = 0; i < 32; i += 2)
18715 if (n & (3 << i))
18717 operands[immed_op] = GEN_INT (n & (255 << i));
18718 output_asm_insn (instr, operands);
18719 instr = instr2;
18720 i += 6;
18725 return "";
18728 /* Return the name of a shifter operation. */
18729 static const char *
18730 arm_shift_nmem(enum rtx_code code)
18732 switch (code)
18734 case ASHIFT:
18735 return ARM_LSL_NAME;
18737 case ASHIFTRT:
18738 return "asr";
18740 case LSHIFTRT:
18741 return "lsr";
18743 case ROTATERT:
18744 return "ror";
18746 default:
18747 abort();
18751 /* Return the appropriate ARM instruction for the operation code.
18752 The returned result should not be overwritten. OP is the rtx of the
18753 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18754 was shifted. */
18755 const char *
18756 arithmetic_instr (rtx op, int shift_first_arg)
18758 switch (GET_CODE (op))
18760 case PLUS:
18761 return "add";
18763 case MINUS:
18764 return shift_first_arg ? "rsb" : "sub";
18766 case IOR:
18767 return "orr";
18769 case XOR:
18770 return "eor";
18772 case AND:
18773 return "and";
18775 case ASHIFT:
18776 case ASHIFTRT:
18777 case LSHIFTRT:
18778 case ROTATERT:
18779 return arm_shift_nmem(GET_CODE(op));
18781 default:
18782 gcc_unreachable ();
18786 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18787 for the operation code. The returned result should not be overwritten.
18788 OP is the rtx code of the shift.
18789 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18790 shift. */
18791 static const char *
18792 shift_op (rtx op, HOST_WIDE_INT *amountp)
18794 const char * mnem;
18795 enum rtx_code code = GET_CODE (op);
18797 switch (code)
18799 case ROTATE:
18800 if (!CONST_INT_P (XEXP (op, 1)))
18802 output_operand_lossage ("invalid shift operand");
18803 return NULL;
18806 code = ROTATERT;
18807 *amountp = 32 - INTVAL (XEXP (op, 1));
18808 mnem = "ror";
18809 break;
18811 case ASHIFT:
18812 case ASHIFTRT:
18813 case LSHIFTRT:
18814 case ROTATERT:
18815 mnem = arm_shift_nmem(code);
18816 if (CONST_INT_P (XEXP (op, 1)))
18818 *amountp = INTVAL (XEXP (op, 1));
18820 else if (REG_P (XEXP (op, 1)))
18822 *amountp = -1;
18823 return mnem;
18825 else
18827 output_operand_lossage ("invalid shift operand");
18828 return NULL;
18830 break;
18832 case MULT:
18833 /* We never have to worry about the amount being other than a
18834 power of 2, since this case can never be reloaded from a reg. */
18835 if (!CONST_INT_P (XEXP (op, 1)))
18837 output_operand_lossage ("invalid shift operand");
18838 return NULL;
18841 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18843 /* Amount must be a power of two. */
18844 if (*amountp & (*amountp - 1))
18846 output_operand_lossage ("invalid shift operand");
18847 return NULL;
18850 *amountp = exact_log2 (*amountp);
18851 gcc_assert (IN_RANGE (*amountp, 0, 31));
18852 return ARM_LSL_NAME;
18854 default:
18855 output_operand_lossage ("invalid shift operand");
18856 return NULL;
18859 /* This is not 100% correct, but follows from the desire to merge
18860 multiplication by a power of 2 with the recognizer for a
18861 shift. >=32 is not a valid shift for "lsl", so we must try and
18862 output a shift that produces the correct arithmetical result.
18863 Using lsr #32 is identical except for the fact that the carry bit
18864 is not set correctly if we set the flags; but we never use the
18865 carry bit from such an operation, so we can ignore that. */
18866 if (code == ROTATERT)
18867 /* Rotate is just modulo 32. */
18868 *amountp &= 31;
18869 else if (*amountp != (*amountp & 31))
18871 if (code == ASHIFT)
18872 mnem = "lsr";
18873 *amountp = 32;
18876 /* Shifts of 0 are no-ops. */
18877 if (*amountp == 0)
18878 return NULL;
18880 return mnem;
18883 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18884 because /bin/as is horribly restrictive. The judgement about
18885 whether or not each character is 'printable' (and can be output as
18886 is) or not (and must be printed with an octal escape) must be made
18887 with reference to the *host* character set -- the situation is
18888 similar to that discussed in the comments above pp_c_char in
18889 c-pretty-print.c. */
18891 #define MAX_ASCII_LEN 51
18893 void
18894 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18896 int i;
18897 int len_so_far = 0;
18899 fputs ("\t.ascii\t\"", stream);
18901 for (i = 0; i < len; i++)
18903 int c = p[i];
18905 if (len_so_far >= MAX_ASCII_LEN)
18907 fputs ("\"\n\t.ascii\t\"", stream);
18908 len_so_far = 0;
18911 if (ISPRINT (c))
18913 if (c == '\\' || c == '\"')
18915 putc ('\\', stream);
18916 len_so_far++;
18918 putc (c, stream);
18919 len_so_far++;
18921 else
18923 fprintf (stream, "\\%03o", c);
18924 len_so_far += 4;
18928 fputs ("\"\n", stream);
18931 /* Whether a register is callee saved or not. This is necessary because high
18932 registers are marked as caller saved when optimizing for size on Thumb-1
18933 targets despite being callee saved in order to avoid using them. */
18934 #define callee_saved_reg_p(reg) \
18935 (!call_used_regs[reg] \
18936 || (TARGET_THUMB1 && optimize_size \
18937 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18939 /* Compute the register save mask for registers 0 through 12
18940 inclusive. This code is used by arm_compute_save_reg_mask. */
18942 static unsigned long
18943 arm_compute_save_reg0_reg12_mask (void)
18945 unsigned long func_type = arm_current_func_type ();
18946 unsigned long save_reg_mask = 0;
18947 unsigned int reg;
18949 if (IS_INTERRUPT (func_type))
18951 unsigned int max_reg;
18952 /* Interrupt functions must not corrupt any registers,
18953 even call clobbered ones. If this is a leaf function
18954 we can just examine the registers used by the RTL, but
18955 otherwise we have to assume that whatever function is
18956 called might clobber anything, and so we have to save
18957 all the call-clobbered registers as well. */
18958 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18959 /* FIQ handlers have registers r8 - r12 banked, so
18960 we only need to check r0 - r7, Normal ISRs only
18961 bank r14 and r15, so we must check up to r12.
18962 r13 is the stack pointer which is always preserved,
18963 so we do not need to consider it here. */
18964 max_reg = 7;
18965 else
18966 max_reg = 12;
18968 for (reg = 0; reg <= max_reg; reg++)
18969 if (df_regs_ever_live_p (reg)
18970 || (! crtl->is_leaf && call_used_regs[reg]))
18971 save_reg_mask |= (1 << reg);
18973 /* Also save the pic base register if necessary. */
18974 if (flag_pic
18975 && !TARGET_SINGLE_PIC_BASE
18976 && arm_pic_register != INVALID_REGNUM
18977 && crtl->uses_pic_offset_table)
18978 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18980 else if (IS_VOLATILE(func_type))
18982 /* For noreturn functions we historically omitted register saves
18983 altogether. However this really messes up debugging. As a
18984 compromise save just the frame pointers. Combined with the link
18985 register saved elsewhere this should be sufficient to get
18986 a backtrace. */
18987 if (frame_pointer_needed)
18988 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18989 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18990 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18991 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18992 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18994 else
18996 /* In the normal case we only need to save those registers
18997 which are call saved and which are used by this function. */
18998 for (reg = 0; reg <= 11; reg++)
18999 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19000 save_reg_mask |= (1 << reg);
19002 /* Handle the frame pointer as a special case. */
19003 if (frame_pointer_needed)
19004 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19006 /* If we aren't loading the PIC register,
19007 don't stack it even though it may be live. */
19008 if (flag_pic
19009 && !TARGET_SINGLE_PIC_BASE
19010 && arm_pic_register != INVALID_REGNUM
19011 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19012 || crtl->uses_pic_offset_table))
19013 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19015 /* The prologue will copy SP into R0, so save it. */
19016 if (IS_STACKALIGN (func_type))
19017 save_reg_mask |= 1;
19020 /* Save registers so the exception handler can modify them. */
19021 if (crtl->calls_eh_return)
19023 unsigned int i;
19025 for (i = 0; ; i++)
19027 reg = EH_RETURN_DATA_REGNO (i);
19028 if (reg == INVALID_REGNUM)
19029 break;
19030 save_reg_mask |= 1 << reg;
19034 return save_reg_mask;
19037 /* Return true if r3 is live at the start of the function. */
19039 static bool
19040 arm_r3_live_at_start_p (void)
19042 /* Just look at cfg info, which is still close enough to correct at this
19043 point. This gives false positives for broken functions that might use
19044 uninitialized data that happens to be allocated in r3, but who cares? */
19045 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19048 /* Compute the number of bytes used to store the static chain register on the
19049 stack, above the stack frame. We need to know this accurately to get the
19050 alignment of the rest of the stack frame correct. */
19052 static int
19053 arm_compute_static_chain_stack_bytes (void)
19055 /* See the defining assertion in arm_expand_prologue. */
19056 if (IS_NESTED (arm_current_func_type ())
19057 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19058 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19059 && !df_regs_ever_live_p (LR_REGNUM)))
19060 && arm_r3_live_at_start_p ()
19061 && crtl->args.pretend_args_size == 0)
19062 return 4;
19064 return 0;
19067 /* Compute a bit mask of which registers need to be
19068 saved on the stack for the current function.
19069 This is used by arm_get_frame_offsets, which may add extra registers. */
19071 static unsigned long
19072 arm_compute_save_reg_mask (void)
19074 unsigned int save_reg_mask = 0;
19075 unsigned long func_type = arm_current_func_type ();
19076 unsigned int reg;
19078 if (IS_NAKED (func_type))
19079 /* This should never really happen. */
19080 return 0;
19082 /* If we are creating a stack frame, then we must save the frame pointer,
19083 IP (which will hold the old stack pointer), LR and the PC. */
19084 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19085 save_reg_mask |=
19086 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19087 | (1 << IP_REGNUM)
19088 | (1 << LR_REGNUM)
19089 | (1 << PC_REGNUM);
19091 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19093 /* Decide if we need to save the link register.
19094 Interrupt routines have their own banked link register,
19095 so they never need to save it.
19096 Otherwise if we do not use the link register we do not need to save
19097 it. If we are pushing other registers onto the stack however, we
19098 can save an instruction in the epilogue by pushing the link register
19099 now and then popping it back into the PC. This incurs extra memory
19100 accesses though, so we only do it when optimizing for size, and only
19101 if we know that we will not need a fancy return sequence. */
19102 if (df_regs_ever_live_p (LR_REGNUM)
19103 || (save_reg_mask
19104 && optimize_size
19105 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19106 && !crtl->tail_call_emit
19107 && !crtl->calls_eh_return))
19108 save_reg_mask |= 1 << LR_REGNUM;
19110 if (cfun->machine->lr_save_eliminated)
19111 save_reg_mask &= ~ (1 << LR_REGNUM);
19113 if (TARGET_REALLY_IWMMXT
19114 && ((bit_count (save_reg_mask)
19115 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19116 arm_compute_static_chain_stack_bytes())
19117 ) % 2) != 0)
19119 /* The total number of registers that are going to be pushed
19120 onto the stack is odd. We need to ensure that the stack
19121 is 64-bit aligned before we start to save iWMMXt registers,
19122 and also before we start to create locals. (A local variable
19123 might be a double or long long which we will load/store using
19124 an iWMMXt instruction). Therefore we need to push another
19125 ARM register, so that the stack will be 64-bit aligned. We
19126 try to avoid using the arg registers (r0 -r3) as they might be
19127 used to pass values in a tail call. */
19128 for (reg = 4; reg <= 12; reg++)
19129 if ((save_reg_mask & (1 << reg)) == 0)
19130 break;
19132 if (reg <= 12)
19133 save_reg_mask |= (1 << reg);
19134 else
19136 cfun->machine->sibcall_blocked = 1;
19137 save_reg_mask |= (1 << 3);
19141 /* We may need to push an additional register for use initializing the
19142 PIC base register. */
19143 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19144 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19146 reg = thumb_find_work_register (1 << 4);
19147 if (!call_used_regs[reg])
19148 save_reg_mask |= (1 << reg);
19151 return save_reg_mask;
19154 /* Compute a bit mask of which registers need to be
19155 saved on the stack for the current function. */
19156 static unsigned long
19157 thumb1_compute_save_reg_mask (void)
19159 unsigned long mask;
19160 unsigned reg;
19162 mask = 0;
19163 for (reg = 0; reg < 12; reg ++)
19164 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19165 mask |= 1 << reg;
19167 /* Handle the frame pointer as a special case. */
19168 if (frame_pointer_needed)
19169 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19171 if (flag_pic
19172 && !TARGET_SINGLE_PIC_BASE
19173 && arm_pic_register != INVALID_REGNUM
19174 && crtl->uses_pic_offset_table)
19175 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19177 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19178 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19179 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19181 /* LR will also be pushed if any lo regs are pushed. */
19182 if (mask & 0xff || thumb_force_lr_save ())
19183 mask |= (1 << LR_REGNUM);
19185 /* Make sure we have a low work register if we need one.
19186 We will need one if we are going to push a high register,
19187 but we are not currently intending to push a low register. */
19188 if ((mask & 0xff) == 0
19189 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19191 /* Use thumb_find_work_register to choose which register
19192 we will use. If the register is live then we will
19193 have to push it. Use LAST_LO_REGNUM as our fallback
19194 choice for the register to select. */
19195 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19196 /* Make sure the register returned by thumb_find_work_register is
19197 not part of the return value. */
19198 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19199 reg = LAST_LO_REGNUM;
19201 if (callee_saved_reg_p (reg))
19202 mask |= 1 << reg;
19205 /* The 504 below is 8 bytes less than 512 because there are two possible
19206 alignment words. We can't tell here if they will be present or not so we
19207 have to play it safe and assume that they are. */
19208 if ((CALLER_INTERWORKING_SLOT_SIZE +
19209 ROUND_UP_WORD (get_frame_size ()) +
19210 crtl->outgoing_args_size) >= 504)
19212 /* This is the same as the code in thumb1_expand_prologue() which
19213 determines which register to use for stack decrement. */
19214 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19215 if (mask & (1 << reg))
19216 break;
19218 if (reg > LAST_LO_REGNUM)
19220 /* Make sure we have a register available for stack decrement. */
19221 mask |= 1 << LAST_LO_REGNUM;
19225 return mask;
19229 /* Return the number of bytes required to save VFP registers. */
19230 static int
19231 arm_get_vfp_saved_size (void)
19233 unsigned int regno;
19234 int count;
19235 int saved;
19237 saved = 0;
19238 /* Space for saved VFP registers. */
19239 if (TARGET_HARD_FLOAT)
19241 count = 0;
19242 for (regno = FIRST_VFP_REGNUM;
19243 regno < LAST_VFP_REGNUM;
19244 regno += 2)
19246 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19247 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19249 if (count > 0)
19251 /* Workaround ARM10 VFPr1 bug. */
19252 if (count == 2 && !arm_arch6)
19253 count++;
19254 saved += count * 8;
19256 count = 0;
19258 else
19259 count++;
19261 if (count > 0)
19263 if (count == 2 && !arm_arch6)
19264 count++;
19265 saved += count * 8;
19268 return saved;
19272 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19273 everything bar the final return instruction. If simple_return is true,
19274 then do not output epilogue, because it has already been emitted in RTL. */
19275 const char *
19276 output_return_instruction (rtx operand, bool really_return, bool reverse,
19277 bool simple_return)
19279 char conditional[10];
19280 char instr[100];
19281 unsigned reg;
19282 unsigned long live_regs_mask;
19283 unsigned long func_type;
19284 arm_stack_offsets *offsets;
19286 func_type = arm_current_func_type ();
19288 if (IS_NAKED (func_type))
19289 return "";
19291 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19293 /* If this function was declared non-returning, and we have
19294 found a tail call, then we have to trust that the called
19295 function won't return. */
19296 if (really_return)
19298 rtx ops[2];
19300 /* Otherwise, trap an attempted return by aborting. */
19301 ops[0] = operand;
19302 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19303 : "abort");
19304 assemble_external_libcall (ops[1]);
19305 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19308 return "";
19311 gcc_assert (!cfun->calls_alloca || really_return);
19313 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19315 cfun->machine->return_used_this_function = 1;
19317 offsets = arm_get_frame_offsets ();
19318 live_regs_mask = offsets->saved_regs_mask;
19320 if (!simple_return && live_regs_mask)
19322 const char * return_reg;
19324 /* If we do not have any special requirements for function exit
19325 (e.g. interworking) then we can load the return address
19326 directly into the PC. Otherwise we must load it into LR. */
19327 if (really_return
19328 && !IS_CMSE_ENTRY (func_type)
19329 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19330 return_reg = reg_names[PC_REGNUM];
19331 else
19332 return_reg = reg_names[LR_REGNUM];
19334 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19336 /* There are three possible reasons for the IP register
19337 being saved. 1) a stack frame was created, in which case
19338 IP contains the old stack pointer, or 2) an ISR routine
19339 corrupted it, or 3) it was saved to align the stack on
19340 iWMMXt. In case 1, restore IP into SP, otherwise just
19341 restore IP. */
19342 if (frame_pointer_needed)
19344 live_regs_mask &= ~ (1 << IP_REGNUM);
19345 live_regs_mask |= (1 << SP_REGNUM);
19347 else
19348 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19351 /* On some ARM architectures it is faster to use LDR rather than
19352 LDM to load a single register. On other architectures, the
19353 cost is the same. In 26 bit mode, or for exception handlers,
19354 we have to use LDM to load the PC so that the CPSR is also
19355 restored. */
19356 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19357 if (live_regs_mask == (1U << reg))
19358 break;
19360 if (reg <= LAST_ARM_REGNUM
19361 && (reg != LR_REGNUM
19362 || ! really_return
19363 || ! IS_INTERRUPT (func_type)))
19365 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19366 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19368 else
19370 char *p;
19371 int first = 1;
19373 /* Generate the load multiple instruction to restore the
19374 registers. Note we can get here, even if
19375 frame_pointer_needed is true, but only if sp already
19376 points to the base of the saved core registers. */
19377 if (live_regs_mask & (1 << SP_REGNUM))
19379 unsigned HOST_WIDE_INT stack_adjust;
19381 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19382 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19384 if (stack_adjust && arm_arch5 && TARGET_ARM)
19385 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19386 else
19388 /* If we can't use ldmib (SA110 bug),
19389 then try to pop r3 instead. */
19390 if (stack_adjust)
19391 live_regs_mask |= 1 << 3;
19393 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19396 /* For interrupt returns we have to use an LDM rather than
19397 a POP so that we can use the exception return variant. */
19398 else if (IS_INTERRUPT (func_type))
19399 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19400 else
19401 sprintf (instr, "pop%s\t{", conditional);
19403 p = instr + strlen (instr);
19405 for (reg = 0; reg <= SP_REGNUM; reg++)
19406 if (live_regs_mask & (1 << reg))
19408 int l = strlen (reg_names[reg]);
19410 if (first)
19411 first = 0;
19412 else
19414 memcpy (p, ", ", 2);
19415 p += 2;
19418 memcpy (p, "%|", 2);
19419 memcpy (p + 2, reg_names[reg], l);
19420 p += l + 2;
19423 if (live_regs_mask & (1 << LR_REGNUM))
19425 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19426 /* If returning from an interrupt, restore the CPSR. */
19427 if (IS_INTERRUPT (func_type))
19428 strcat (p, "^");
19430 else
19431 strcpy (p, "}");
19434 output_asm_insn (instr, & operand);
19436 /* See if we need to generate an extra instruction to
19437 perform the actual function return. */
19438 if (really_return
19439 && func_type != ARM_FT_INTERWORKED
19440 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19442 /* The return has already been handled
19443 by loading the LR into the PC. */
19444 return "";
19448 if (really_return)
19450 switch ((int) ARM_FUNC_TYPE (func_type))
19452 case ARM_FT_ISR:
19453 case ARM_FT_FIQ:
19454 /* ??? This is wrong for unified assembly syntax. */
19455 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19456 break;
19458 case ARM_FT_INTERWORKED:
19459 gcc_assert (arm_arch5 || arm_arch4t);
19460 sprintf (instr, "bx%s\t%%|lr", conditional);
19461 break;
19463 case ARM_FT_EXCEPTION:
19464 /* ??? This is wrong for unified assembly syntax. */
19465 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19466 break;
19468 default:
19469 if (IS_CMSE_ENTRY (func_type))
19471 /* Check if we have to clear the 'GE bits' which is only used if
19472 parallel add and subtraction instructions are available. */
19473 if (TARGET_INT_SIMD)
19474 snprintf (instr, sizeof (instr),
19475 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19476 else
19477 snprintf (instr, sizeof (instr),
19478 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19480 output_asm_insn (instr, & operand);
19481 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19483 /* Clear the cumulative exception-status bits (0-4,7) and the
19484 condition code bits (28-31) of the FPSCR. We need to
19485 remember to clear the first scratch register used (IP) and
19486 save and restore the second (r4). */
19487 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19488 output_asm_insn (instr, & operand);
19489 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19490 output_asm_insn (instr, & operand);
19491 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19492 output_asm_insn (instr, & operand);
19493 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19494 output_asm_insn (instr, & operand);
19495 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19496 output_asm_insn (instr, & operand);
19497 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19498 output_asm_insn (instr, & operand);
19499 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19500 output_asm_insn (instr, & operand);
19501 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19502 output_asm_insn (instr, & operand);
19504 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19506 /* Use bx if it's available. */
19507 else if (arm_arch5 || arm_arch4t)
19508 sprintf (instr, "bx%s\t%%|lr", conditional);
19509 else
19510 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19511 break;
19514 output_asm_insn (instr, & operand);
19517 return "";
19520 /* Output in FILE asm statements needed to declare the NAME of the function
19521 defined by its DECL node. */
19523 void
19524 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19526 size_t cmse_name_len;
19527 char *cmse_name = 0;
19528 char cmse_prefix[] = "__acle_se_";
19530 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19531 extra function label for each function with the 'cmse_nonsecure_entry'
19532 attribute. This extra function label should be prepended with
19533 '__acle_se_', telling the linker that it needs to create secure gateway
19534 veneers for this function. */
19535 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19536 DECL_ATTRIBUTES (decl)))
19538 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19539 cmse_name = XALLOCAVEC (char, cmse_name_len);
19540 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19541 targetm.asm_out.globalize_label (file, cmse_name);
19543 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19544 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19547 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19548 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19549 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19550 ASM_OUTPUT_LABEL (file, name);
19552 if (cmse_name)
19553 ASM_OUTPUT_LABEL (file, cmse_name);
19555 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19558 /* Write the function name into the code section, directly preceding
19559 the function prologue.
19561 Code will be output similar to this:
19563 .ascii "arm_poke_function_name", 0
19564 .align
19566 .word 0xff000000 + (t1 - t0)
19567 arm_poke_function_name
19568 mov ip, sp
19569 stmfd sp!, {fp, ip, lr, pc}
19570 sub fp, ip, #4
19572 When performing a stack backtrace, code can inspect the value
19573 of 'pc' stored at 'fp' + 0. If the trace function then looks
19574 at location pc - 12 and the top 8 bits are set, then we know
19575 that there is a function name embedded immediately preceding this
19576 location and has length ((pc[-3]) & 0xff000000).
19578 We assume that pc is declared as a pointer to an unsigned long.
19580 It is of no benefit to output the function name if we are assembling
19581 a leaf function. These function types will not contain a stack
19582 backtrace structure, therefore it is not possible to determine the
19583 function name. */
19584 void
19585 arm_poke_function_name (FILE *stream, const char *name)
19587 unsigned long alignlength;
19588 unsigned long length;
19589 rtx x;
19591 length = strlen (name) + 1;
19592 alignlength = ROUND_UP_WORD (length);
19594 ASM_OUTPUT_ASCII (stream, name, length);
19595 ASM_OUTPUT_ALIGN (stream, 2);
19596 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19597 assemble_aligned_integer (UNITS_PER_WORD, x);
19600 /* Place some comments into the assembler stream
19601 describing the current function. */
19602 static void
19603 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19605 unsigned long func_type;
19607 /* Sanity check. */
19608 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19610 func_type = arm_current_func_type ();
19612 switch ((int) ARM_FUNC_TYPE (func_type))
19614 default:
19615 case ARM_FT_NORMAL:
19616 break;
19617 case ARM_FT_INTERWORKED:
19618 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19619 break;
19620 case ARM_FT_ISR:
19621 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19622 break;
19623 case ARM_FT_FIQ:
19624 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19625 break;
19626 case ARM_FT_EXCEPTION:
19627 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19628 break;
19631 if (IS_NAKED (func_type))
19632 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19634 if (IS_VOLATILE (func_type))
19635 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19637 if (IS_NESTED (func_type))
19638 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19639 if (IS_STACKALIGN (func_type))
19640 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19641 if (IS_CMSE_ENTRY (func_type))
19642 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19644 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19645 crtl->args.size,
19646 crtl->args.pretend_args_size, frame_size);
19648 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19649 frame_pointer_needed,
19650 cfun->machine->uses_anonymous_args);
19652 if (cfun->machine->lr_save_eliminated)
19653 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19655 if (crtl->calls_eh_return)
19656 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19660 static void
19661 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19662 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19664 arm_stack_offsets *offsets;
19666 if (TARGET_THUMB1)
19668 int regno;
19670 /* Emit any call-via-reg trampolines that are needed for v4t support
19671 of call_reg and call_value_reg type insns. */
19672 for (regno = 0; regno < LR_REGNUM; regno++)
19674 rtx label = cfun->machine->call_via[regno];
19676 if (label != NULL)
19678 switch_to_section (function_section (current_function_decl));
19679 targetm.asm_out.internal_label (asm_out_file, "L",
19680 CODE_LABEL_NUMBER (label));
19681 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19685 /* ??? Probably not safe to set this here, since it assumes that a
19686 function will be emitted as assembly immediately after we generate
19687 RTL for it. This does not happen for inline functions. */
19688 cfun->machine->return_used_this_function = 0;
19690 else /* TARGET_32BIT */
19692 /* We need to take into account any stack-frame rounding. */
19693 offsets = arm_get_frame_offsets ();
19695 gcc_assert (!use_return_insn (FALSE, NULL)
19696 || (cfun->machine->return_used_this_function != 0)
19697 || offsets->saved_regs == offsets->outgoing_args
19698 || frame_pointer_needed);
19702 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19703 STR and STRD. If an even number of registers are being pushed, one
19704 or more STRD patterns are created for each register pair. If an
19705 odd number of registers are pushed, emit an initial STR followed by
19706 as many STRD instructions as are needed. This works best when the
19707 stack is initially 64-bit aligned (the normal case), since it
19708 ensures that each STRD is also 64-bit aligned. */
19709 static void
19710 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19712 int num_regs = 0;
19713 int i;
19714 int regno;
19715 rtx par = NULL_RTX;
19716 rtx dwarf = NULL_RTX;
19717 rtx tmp;
19718 bool first = true;
19720 num_regs = bit_count (saved_regs_mask);
19722 /* Must be at least one register to save, and can't save SP or PC. */
19723 gcc_assert (num_regs > 0 && num_regs <= 14);
19724 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19725 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19727 /* Create sequence for DWARF info. All the frame-related data for
19728 debugging is held in this wrapper. */
19729 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19731 /* Describe the stack adjustment. */
19732 tmp = gen_rtx_SET (stack_pointer_rtx,
19733 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19734 RTX_FRAME_RELATED_P (tmp) = 1;
19735 XVECEXP (dwarf, 0, 0) = tmp;
19737 /* Find the first register. */
19738 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19741 i = 0;
19743 /* If there's an odd number of registers to push. Start off by
19744 pushing a single register. This ensures that subsequent strd
19745 operations are dword aligned (assuming that SP was originally
19746 64-bit aligned). */
19747 if ((num_regs & 1) != 0)
19749 rtx reg, mem, insn;
19751 reg = gen_rtx_REG (SImode, regno);
19752 if (num_regs == 1)
19753 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19754 stack_pointer_rtx));
19755 else
19756 mem = gen_frame_mem (Pmode,
19757 gen_rtx_PRE_MODIFY
19758 (Pmode, stack_pointer_rtx,
19759 plus_constant (Pmode, stack_pointer_rtx,
19760 -4 * num_regs)));
19762 tmp = gen_rtx_SET (mem, reg);
19763 RTX_FRAME_RELATED_P (tmp) = 1;
19764 insn = emit_insn (tmp);
19765 RTX_FRAME_RELATED_P (insn) = 1;
19766 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19767 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19768 RTX_FRAME_RELATED_P (tmp) = 1;
19769 i++;
19770 regno++;
19771 XVECEXP (dwarf, 0, i) = tmp;
19772 first = false;
19775 while (i < num_regs)
19776 if (saved_regs_mask & (1 << regno))
19778 rtx reg1, reg2, mem1, mem2;
19779 rtx tmp0, tmp1, tmp2;
19780 int regno2;
19782 /* Find the register to pair with this one. */
19783 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19784 regno2++)
19787 reg1 = gen_rtx_REG (SImode, regno);
19788 reg2 = gen_rtx_REG (SImode, regno2);
19790 if (first)
19792 rtx insn;
19794 first = false;
19795 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19796 stack_pointer_rtx,
19797 -4 * num_regs));
19798 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19799 stack_pointer_rtx,
19800 -4 * (num_regs - 1)));
19801 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19802 plus_constant (Pmode, stack_pointer_rtx,
19803 -4 * (num_regs)));
19804 tmp1 = gen_rtx_SET (mem1, reg1);
19805 tmp2 = gen_rtx_SET (mem2, reg2);
19806 RTX_FRAME_RELATED_P (tmp0) = 1;
19807 RTX_FRAME_RELATED_P (tmp1) = 1;
19808 RTX_FRAME_RELATED_P (tmp2) = 1;
19809 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19810 XVECEXP (par, 0, 0) = tmp0;
19811 XVECEXP (par, 0, 1) = tmp1;
19812 XVECEXP (par, 0, 2) = tmp2;
19813 insn = emit_insn (par);
19814 RTX_FRAME_RELATED_P (insn) = 1;
19815 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19817 else
19819 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19820 stack_pointer_rtx,
19821 4 * i));
19822 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19823 stack_pointer_rtx,
19824 4 * (i + 1)));
19825 tmp1 = gen_rtx_SET (mem1, reg1);
19826 tmp2 = gen_rtx_SET (mem2, reg2);
19827 RTX_FRAME_RELATED_P (tmp1) = 1;
19828 RTX_FRAME_RELATED_P (tmp2) = 1;
19829 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19830 XVECEXP (par, 0, 0) = tmp1;
19831 XVECEXP (par, 0, 1) = tmp2;
19832 emit_insn (par);
19835 /* Create unwind information. This is an approximation. */
19836 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19837 plus_constant (Pmode,
19838 stack_pointer_rtx,
19839 4 * i)),
19840 reg1);
19841 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19842 plus_constant (Pmode,
19843 stack_pointer_rtx,
19844 4 * (i + 1))),
19845 reg2);
19847 RTX_FRAME_RELATED_P (tmp1) = 1;
19848 RTX_FRAME_RELATED_P (tmp2) = 1;
19849 XVECEXP (dwarf, 0, i + 1) = tmp1;
19850 XVECEXP (dwarf, 0, i + 2) = tmp2;
19851 i += 2;
19852 regno = regno2 + 1;
19854 else
19855 regno++;
19857 return;
19860 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19861 whenever possible, otherwise it emits single-word stores. The first store
19862 also allocates stack space for all saved registers, using writeback with
19863 post-addressing mode. All other stores use offset addressing. If no STRD
19864 can be emitted, this function emits a sequence of single-word stores,
19865 and not an STM as before, because single-word stores provide more freedom
19866 scheduling and can be turned into an STM by peephole optimizations. */
19867 static void
19868 arm_emit_strd_push (unsigned long saved_regs_mask)
19870 int num_regs = 0;
19871 int i, j, dwarf_index = 0;
19872 int offset = 0;
19873 rtx dwarf = NULL_RTX;
19874 rtx insn = NULL_RTX;
19875 rtx tmp, mem;
19877 /* TODO: A more efficient code can be emitted by changing the
19878 layout, e.g., first push all pairs that can use STRD to keep the
19879 stack aligned, and then push all other registers. */
19880 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19881 if (saved_regs_mask & (1 << i))
19882 num_regs++;
19884 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19885 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19886 gcc_assert (num_regs > 0);
19888 /* Create sequence for DWARF info. */
19889 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19891 /* For dwarf info, we generate explicit stack update. */
19892 tmp = gen_rtx_SET (stack_pointer_rtx,
19893 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19894 RTX_FRAME_RELATED_P (tmp) = 1;
19895 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19897 /* Save registers. */
19898 offset = - 4 * num_regs;
19899 j = 0;
19900 while (j <= LAST_ARM_REGNUM)
19901 if (saved_regs_mask & (1 << j))
19903 if ((j % 2 == 0)
19904 && (saved_regs_mask & (1 << (j + 1))))
19906 /* Current register and previous register form register pair for
19907 which STRD can be generated. */
19908 if (offset < 0)
19910 /* Allocate stack space for all saved registers. */
19911 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19912 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19913 mem = gen_frame_mem (DImode, tmp);
19914 offset = 0;
19916 else if (offset > 0)
19917 mem = gen_frame_mem (DImode,
19918 plus_constant (Pmode,
19919 stack_pointer_rtx,
19920 offset));
19921 else
19922 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19924 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
19925 RTX_FRAME_RELATED_P (tmp) = 1;
19926 tmp = emit_insn (tmp);
19928 /* Record the first store insn. */
19929 if (dwarf_index == 1)
19930 insn = tmp;
19932 /* Generate dwarf info. */
19933 mem = gen_frame_mem (SImode,
19934 plus_constant (Pmode,
19935 stack_pointer_rtx,
19936 offset));
19937 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19938 RTX_FRAME_RELATED_P (tmp) = 1;
19939 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19941 mem = gen_frame_mem (SImode,
19942 plus_constant (Pmode,
19943 stack_pointer_rtx,
19944 offset + 4));
19945 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
19946 RTX_FRAME_RELATED_P (tmp) = 1;
19947 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19949 offset += 8;
19950 j += 2;
19952 else
19954 /* Emit a single word store. */
19955 if (offset < 0)
19957 /* Allocate stack space for all saved registers. */
19958 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19959 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19960 mem = gen_frame_mem (SImode, tmp);
19961 offset = 0;
19963 else if (offset > 0)
19964 mem = gen_frame_mem (SImode,
19965 plus_constant (Pmode,
19966 stack_pointer_rtx,
19967 offset));
19968 else
19969 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19971 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19972 RTX_FRAME_RELATED_P (tmp) = 1;
19973 tmp = emit_insn (tmp);
19975 /* Record the first store insn. */
19976 if (dwarf_index == 1)
19977 insn = tmp;
19979 /* Generate dwarf info. */
19980 mem = gen_frame_mem (SImode,
19981 plus_constant(Pmode,
19982 stack_pointer_rtx,
19983 offset));
19984 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19985 RTX_FRAME_RELATED_P (tmp) = 1;
19986 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19988 offset += 4;
19989 j += 1;
19992 else
19993 j++;
19995 /* Attach dwarf info to the first insn we generate. */
19996 gcc_assert (insn != NULL_RTX);
19997 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19998 RTX_FRAME_RELATED_P (insn) = 1;
20001 /* Generate and emit an insn that we will recognize as a push_multi.
20002 Unfortunately, since this insn does not reflect very well the actual
20003 semantics of the operation, we need to annotate the insn for the benefit
20004 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20005 MASK for registers that should be annotated for DWARF2 frame unwind
20006 information. */
20007 static rtx
20008 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20010 int num_regs = 0;
20011 int num_dwarf_regs = 0;
20012 int i, j;
20013 rtx par;
20014 rtx dwarf;
20015 int dwarf_par_index;
20016 rtx tmp, reg;
20018 /* We don't record the PC in the dwarf frame information. */
20019 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20021 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20023 if (mask & (1 << i))
20024 num_regs++;
20025 if (dwarf_regs_mask & (1 << i))
20026 num_dwarf_regs++;
20029 gcc_assert (num_regs && num_regs <= 16);
20030 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20032 /* For the body of the insn we are going to generate an UNSPEC in
20033 parallel with several USEs. This allows the insn to be recognized
20034 by the push_multi pattern in the arm.md file.
20036 The body of the insn looks something like this:
20038 (parallel [
20039 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20040 (const_int:SI <num>)))
20041 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20042 (use (reg:SI XX))
20043 (use (reg:SI YY))
20047 For the frame note however, we try to be more explicit and actually
20048 show each register being stored into the stack frame, plus a (single)
20049 decrement of the stack pointer. We do it this way in order to be
20050 friendly to the stack unwinding code, which only wants to see a single
20051 stack decrement per instruction. The RTL we generate for the note looks
20052 something like this:
20054 (sequence [
20055 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20056 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20057 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20058 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20062 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20063 instead we'd have a parallel expression detailing all
20064 the stores to the various memory addresses so that debug
20065 information is more up-to-date. Remember however while writing
20066 this to take care of the constraints with the push instruction.
20068 Note also that this has to be taken care of for the VFP registers.
20070 For more see PR43399. */
20072 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20073 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20074 dwarf_par_index = 1;
20076 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20078 if (mask & (1 << i))
20080 reg = gen_rtx_REG (SImode, i);
20082 XVECEXP (par, 0, 0)
20083 = gen_rtx_SET (gen_frame_mem
20084 (BLKmode,
20085 gen_rtx_PRE_MODIFY (Pmode,
20086 stack_pointer_rtx,
20087 plus_constant
20088 (Pmode, stack_pointer_rtx,
20089 -4 * num_regs))
20091 gen_rtx_UNSPEC (BLKmode,
20092 gen_rtvec (1, reg),
20093 UNSPEC_PUSH_MULT));
20095 if (dwarf_regs_mask & (1 << i))
20097 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20098 reg);
20099 RTX_FRAME_RELATED_P (tmp) = 1;
20100 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20103 break;
20107 for (j = 1, i++; j < num_regs; i++)
20109 if (mask & (1 << i))
20111 reg = gen_rtx_REG (SImode, i);
20113 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20115 if (dwarf_regs_mask & (1 << i))
20118 = gen_rtx_SET (gen_frame_mem
20119 (SImode,
20120 plus_constant (Pmode, stack_pointer_rtx,
20121 4 * j)),
20122 reg);
20123 RTX_FRAME_RELATED_P (tmp) = 1;
20124 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20127 j++;
20131 par = emit_insn (par);
20133 tmp = gen_rtx_SET (stack_pointer_rtx,
20134 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20135 RTX_FRAME_RELATED_P (tmp) = 1;
20136 XVECEXP (dwarf, 0, 0) = tmp;
20138 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20140 return par;
20143 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20144 SIZE is the offset to be adjusted.
20145 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20146 static void
20147 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20149 rtx dwarf;
20151 RTX_FRAME_RELATED_P (insn) = 1;
20152 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20153 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20156 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20157 SAVED_REGS_MASK shows which registers need to be restored.
20159 Unfortunately, since this insn does not reflect very well the actual
20160 semantics of the operation, we need to annotate the insn for the benefit
20161 of DWARF2 frame unwind information. */
20162 static void
20163 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20165 int num_regs = 0;
20166 int i, j;
20167 rtx par;
20168 rtx dwarf = NULL_RTX;
20169 rtx tmp, reg;
20170 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20171 int offset_adj;
20172 int emit_update;
20174 offset_adj = return_in_pc ? 1 : 0;
20175 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20176 if (saved_regs_mask & (1 << i))
20177 num_regs++;
20179 gcc_assert (num_regs && num_regs <= 16);
20181 /* If SP is in reglist, then we don't emit SP update insn. */
20182 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20184 /* The parallel needs to hold num_regs SETs
20185 and one SET for the stack update. */
20186 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20188 if (return_in_pc)
20189 XVECEXP (par, 0, 0) = ret_rtx;
20191 if (emit_update)
20193 /* Increment the stack pointer, based on there being
20194 num_regs 4-byte registers to restore. */
20195 tmp = gen_rtx_SET (stack_pointer_rtx,
20196 plus_constant (Pmode,
20197 stack_pointer_rtx,
20198 4 * num_regs));
20199 RTX_FRAME_RELATED_P (tmp) = 1;
20200 XVECEXP (par, 0, offset_adj) = tmp;
20203 /* Now restore every reg, which may include PC. */
20204 for (j = 0, i = 0; j < num_regs; i++)
20205 if (saved_regs_mask & (1 << i))
20207 reg = gen_rtx_REG (SImode, i);
20208 if ((num_regs == 1) && emit_update && !return_in_pc)
20210 /* Emit single load with writeback. */
20211 tmp = gen_frame_mem (SImode,
20212 gen_rtx_POST_INC (Pmode,
20213 stack_pointer_rtx));
20214 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20215 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20216 return;
20219 tmp = gen_rtx_SET (reg,
20220 gen_frame_mem
20221 (SImode,
20222 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20223 RTX_FRAME_RELATED_P (tmp) = 1;
20224 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20226 /* We need to maintain a sequence for DWARF info too. As dwarf info
20227 should not have PC, skip PC. */
20228 if (i != PC_REGNUM)
20229 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20231 j++;
20234 if (return_in_pc)
20235 par = emit_jump_insn (par);
20236 else
20237 par = emit_insn (par);
20239 REG_NOTES (par) = dwarf;
20240 if (!return_in_pc)
20241 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20242 stack_pointer_rtx, stack_pointer_rtx);
20245 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20246 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20248 Unfortunately, since this insn does not reflect very well the actual
20249 semantics of the operation, we need to annotate the insn for the benefit
20250 of DWARF2 frame unwind information. */
20251 static void
20252 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20254 int i, j;
20255 rtx par;
20256 rtx dwarf = NULL_RTX;
20257 rtx tmp, reg;
20259 gcc_assert (num_regs && num_regs <= 32);
20261 /* Workaround ARM10 VFPr1 bug. */
20262 if (num_regs == 2 && !arm_arch6)
20264 if (first_reg == 15)
20265 first_reg--;
20267 num_regs++;
20270 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20271 there could be up to 32 D-registers to restore.
20272 If there are more than 16 D-registers, make two recursive calls,
20273 each of which emits one pop_multi instruction. */
20274 if (num_regs > 16)
20276 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20277 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20278 return;
20281 /* The parallel needs to hold num_regs SETs
20282 and one SET for the stack update. */
20283 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20285 /* Increment the stack pointer, based on there being
20286 num_regs 8-byte registers to restore. */
20287 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20288 RTX_FRAME_RELATED_P (tmp) = 1;
20289 XVECEXP (par, 0, 0) = tmp;
20291 /* Now show every reg that will be restored, using a SET for each. */
20292 for (j = 0, i=first_reg; j < num_regs; i += 2)
20294 reg = gen_rtx_REG (DFmode, i);
20296 tmp = gen_rtx_SET (reg,
20297 gen_frame_mem
20298 (DFmode,
20299 plus_constant (Pmode, base_reg, 8 * j)));
20300 RTX_FRAME_RELATED_P (tmp) = 1;
20301 XVECEXP (par, 0, j + 1) = tmp;
20303 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20305 j++;
20308 par = emit_insn (par);
20309 REG_NOTES (par) = dwarf;
20311 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20312 if (REGNO (base_reg) == IP_REGNUM)
20314 RTX_FRAME_RELATED_P (par) = 1;
20315 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20317 else
20318 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20319 base_reg, base_reg);
20322 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20323 number of registers are being popped, multiple LDRD patterns are created for
20324 all register pairs. If odd number of registers are popped, last register is
20325 loaded by using LDR pattern. */
20326 static void
20327 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20329 int num_regs = 0;
20330 int i, j;
20331 rtx par = NULL_RTX;
20332 rtx dwarf = NULL_RTX;
20333 rtx tmp, reg, tmp1;
20334 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20336 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20337 if (saved_regs_mask & (1 << i))
20338 num_regs++;
20340 gcc_assert (num_regs && num_regs <= 16);
20342 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20343 to be popped. So, if num_regs is even, now it will become odd,
20344 and we can generate pop with PC. If num_regs is odd, it will be
20345 even now, and ldr with return can be generated for PC. */
20346 if (return_in_pc)
20347 num_regs--;
20349 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20351 /* Var j iterates over all the registers to gather all the registers in
20352 saved_regs_mask. Var i gives index of saved registers in stack frame.
20353 A PARALLEL RTX of register-pair is created here, so that pattern for
20354 LDRD can be matched. As PC is always last register to be popped, and
20355 we have already decremented num_regs if PC, we don't have to worry
20356 about PC in this loop. */
20357 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20358 if (saved_regs_mask & (1 << j))
20360 /* Create RTX for memory load. */
20361 reg = gen_rtx_REG (SImode, j);
20362 tmp = gen_rtx_SET (reg,
20363 gen_frame_mem (SImode,
20364 plus_constant (Pmode,
20365 stack_pointer_rtx, 4 * i)));
20366 RTX_FRAME_RELATED_P (tmp) = 1;
20368 if (i % 2 == 0)
20370 /* When saved-register index (i) is even, the RTX to be emitted is
20371 yet to be created. Hence create it first. The LDRD pattern we
20372 are generating is :
20373 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20374 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20375 where target registers need not be consecutive. */
20376 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20377 dwarf = NULL_RTX;
20380 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20381 added as 0th element and if i is odd, reg_i is added as 1st element
20382 of LDRD pattern shown above. */
20383 XVECEXP (par, 0, (i % 2)) = tmp;
20384 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20386 if ((i % 2) == 1)
20388 /* When saved-register index (i) is odd, RTXs for both the registers
20389 to be loaded are generated in above given LDRD pattern, and the
20390 pattern can be emitted now. */
20391 par = emit_insn (par);
20392 REG_NOTES (par) = dwarf;
20393 RTX_FRAME_RELATED_P (par) = 1;
20396 i++;
20399 /* If the number of registers pushed is odd AND return_in_pc is false OR
20400 number of registers are even AND return_in_pc is true, last register is
20401 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20402 then LDR with post increment. */
20404 /* Increment the stack pointer, based on there being
20405 num_regs 4-byte registers to restore. */
20406 tmp = gen_rtx_SET (stack_pointer_rtx,
20407 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20408 RTX_FRAME_RELATED_P (tmp) = 1;
20409 tmp = emit_insn (tmp);
20410 if (!return_in_pc)
20412 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20413 stack_pointer_rtx, stack_pointer_rtx);
20416 dwarf = NULL_RTX;
20418 if (((num_regs % 2) == 1 && !return_in_pc)
20419 || ((num_regs % 2) == 0 && return_in_pc))
20421 /* Scan for the single register to be popped. Skip until the saved
20422 register is found. */
20423 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20425 /* Gen LDR with post increment here. */
20426 tmp1 = gen_rtx_MEM (SImode,
20427 gen_rtx_POST_INC (SImode,
20428 stack_pointer_rtx));
20429 set_mem_alias_set (tmp1, get_frame_alias_set ());
20431 reg = gen_rtx_REG (SImode, j);
20432 tmp = gen_rtx_SET (reg, tmp1);
20433 RTX_FRAME_RELATED_P (tmp) = 1;
20434 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20436 if (return_in_pc)
20438 /* If return_in_pc, j must be PC_REGNUM. */
20439 gcc_assert (j == PC_REGNUM);
20440 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20441 XVECEXP (par, 0, 0) = ret_rtx;
20442 XVECEXP (par, 0, 1) = tmp;
20443 par = emit_jump_insn (par);
20445 else
20447 par = emit_insn (tmp);
20448 REG_NOTES (par) = dwarf;
20449 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20450 stack_pointer_rtx, stack_pointer_rtx);
20454 else if ((num_regs % 2) == 1 && return_in_pc)
20456 /* There are 2 registers to be popped. So, generate the pattern
20457 pop_multiple_with_stack_update_and_return to pop in PC. */
20458 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20461 return;
20464 /* LDRD in ARM mode needs consecutive registers as operands. This function
20465 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20466 offset addressing and then generates one separate stack udpate. This provides
20467 more scheduling freedom, compared to writeback on every load. However,
20468 if the function returns using load into PC directly
20469 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20470 before the last load. TODO: Add a peephole optimization to recognize
20471 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20472 peephole optimization to merge the load at stack-offset zero
20473 with the stack update instruction using load with writeback
20474 in post-index addressing mode. */
20475 static void
20476 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20478 int j = 0;
20479 int offset = 0;
20480 rtx par = NULL_RTX;
20481 rtx dwarf = NULL_RTX;
20482 rtx tmp, mem;
20484 /* Restore saved registers. */
20485 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20486 j = 0;
20487 while (j <= LAST_ARM_REGNUM)
20488 if (saved_regs_mask & (1 << j))
20490 if ((j % 2) == 0
20491 && (saved_regs_mask & (1 << (j + 1)))
20492 && (j + 1) != PC_REGNUM)
20494 /* Current register and next register form register pair for which
20495 LDRD can be generated. PC is always the last register popped, and
20496 we handle it separately. */
20497 if (offset > 0)
20498 mem = gen_frame_mem (DImode,
20499 plus_constant (Pmode,
20500 stack_pointer_rtx,
20501 offset));
20502 else
20503 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20505 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20506 tmp = emit_insn (tmp);
20507 RTX_FRAME_RELATED_P (tmp) = 1;
20509 /* Generate dwarf info. */
20511 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20512 gen_rtx_REG (SImode, j),
20513 NULL_RTX);
20514 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20515 gen_rtx_REG (SImode, j + 1),
20516 dwarf);
20518 REG_NOTES (tmp) = dwarf;
20520 offset += 8;
20521 j += 2;
20523 else if (j != PC_REGNUM)
20525 /* Emit a single word load. */
20526 if (offset > 0)
20527 mem = gen_frame_mem (SImode,
20528 plus_constant (Pmode,
20529 stack_pointer_rtx,
20530 offset));
20531 else
20532 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20534 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20535 tmp = emit_insn (tmp);
20536 RTX_FRAME_RELATED_P (tmp) = 1;
20538 /* Generate dwarf info. */
20539 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20540 gen_rtx_REG (SImode, j),
20541 NULL_RTX);
20543 offset += 4;
20544 j += 1;
20546 else /* j == PC_REGNUM */
20547 j++;
20549 else
20550 j++;
20552 /* Update the stack. */
20553 if (offset > 0)
20555 tmp = gen_rtx_SET (stack_pointer_rtx,
20556 plus_constant (Pmode,
20557 stack_pointer_rtx,
20558 offset));
20559 tmp = emit_insn (tmp);
20560 arm_add_cfa_adjust_cfa_note (tmp, offset,
20561 stack_pointer_rtx, stack_pointer_rtx);
20562 offset = 0;
20565 if (saved_regs_mask & (1 << PC_REGNUM))
20567 /* Only PC is to be popped. */
20568 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20569 XVECEXP (par, 0, 0) = ret_rtx;
20570 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20571 gen_frame_mem (SImode,
20572 gen_rtx_POST_INC (SImode,
20573 stack_pointer_rtx)));
20574 RTX_FRAME_RELATED_P (tmp) = 1;
20575 XVECEXP (par, 0, 1) = tmp;
20576 par = emit_jump_insn (par);
20578 /* Generate dwarf info. */
20579 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20580 gen_rtx_REG (SImode, PC_REGNUM),
20581 NULL_RTX);
20582 REG_NOTES (par) = dwarf;
20583 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20584 stack_pointer_rtx, stack_pointer_rtx);
20588 /* Calculate the size of the return value that is passed in registers. */
20589 static unsigned
20590 arm_size_return_regs (void)
20592 machine_mode mode;
20594 if (crtl->return_rtx != 0)
20595 mode = GET_MODE (crtl->return_rtx);
20596 else
20597 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20599 return GET_MODE_SIZE (mode);
20602 /* Return true if the current function needs to save/restore LR. */
20603 static bool
20604 thumb_force_lr_save (void)
20606 return !cfun->machine->lr_save_eliminated
20607 && (!crtl->is_leaf
20608 || thumb_far_jump_used_p ()
20609 || df_regs_ever_live_p (LR_REGNUM));
20612 /* We do not know if r3 will be available because
20613 we do have an indirect tailcall happening in this
20614 particular case. */
20615 static bool
20616 is_indirect_tailcall_p (rtx call)
20618 rtx pat = PATTERN (call);
20620 /* Indirect tail call. */
20621 pat = XVECEXP (pat, 0, 0);
20622 if (GET_CODE (pat) == SET)
20623 pat = SET_SRC (pat);
20625 pat = XEXP (XEXP (pat, 0), 0);
20626 return REG_P (pat);
20629 /* Return true if r3 is used by any of the tail call insns in the
20630 current function. */
20631 static bool
20632 any_sibcall_could_use_r3 (void)
20634 edge_iterator ei;
20635 edge e;
20637 if (!crtl->tail_call_emit)
20638 return false;
20639 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20640 if (e->flags & EDGE_SIBCALL)
20642 rtx_insn *call = BB_END (e->src);
20643 if (!CALL_P (call))
20644 call = prev_nonnote_nondebug_insn (call);
20645 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20646 if (find_regno_fusage (call, USE, 3)
20647 || is_indirect_tailcall_p (call))
20648 return true;
20650 return false;
20654 /* Compute the distance from register FROM to register TO.
20655 These can be the arg pointer (26), the soft frame pointer (25),
20656 the stack pointer (13) or the hard frame pointer (11).
20657 In thumb mode r7 is used as the soft frame pointer, if needed.
20658 Typical stack layout looks like this:
20660 old stack pointer -> | |
20661 ----
20662 | | \
20663 | | saved arguments for
20664 | | vararg functions
20665 | | /
20667 hard FP & arg pointer -> | | \
20668 | | stack
20669 | | frame
20670 | | /
20672 | | \
20673 | | call saved
20674 | | registers
20675 soft frame pointer -> | | /
20677 | | \
20678 | | local
20679 | | variables
20680 locals base pointer -> | | /
20682 | | \
20683 | | outgoing
20684 | | arguments
20685 current stack pointer -> | | /
20688 For a given function some or all of these stack components
20689 may not be needed, giving rise to the possibility of
20690 eliminating some of the registers.
20692 The values returned by this function must reflect the behavior
20693 of arm_expand_prologue() and arm_compute_save_reg_mask().
20695 The sign of the number returned reflects the direction of stack
20696 growth, so the values are positive for all eliminations except
20697 from the soft frame pointer to the hard frame pointer.
20699 SFP may point just inside the local variables block to ensure correct
20700 alignment. */
20703 /* Calculate stack offsets. These are used to calculate register elimination
20704 offsets and in prologue/epilogue code. Also calculates which registers
20705 should be saved. */
20707 static arm_stack_offsets *
20708 arm_get_frame_offsets (void)
20710 struct arm_stack_offsets *offsets;
20711 unsigned long func_type;
20712 int saved;
20713 int core_saved;
20714 HOST_WIDE_INT frame_size;
20715 int i;
20717 offsets = &cfun->machine->stack_offsets;
20719 if (reload_completed)
20720 return offsets;
20722 /* Initially this is the size of the local variables. It will translated
20723 into an offset once we have determined the size of preceding data. */
20724 frame_size = ROUND_UP_WORD (get_frame_size ());
20726 /* Space for variadic functions. */
20727 offsets->saved_args = crtl->args.pretend_args_size;
20729 /* In Thumb mode this is incorrect, but never used. */
20730 offsets->frame
20731 = (offsets->saved_args
20732 + arm_compute_static_chain_stack_bytes ()
20733 + (frame_pointer_needed ? 4 : 0));
20735 if (TARGET_32BIT)
20737 unsigned int regno;
20739 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20740 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20741 saved = core_saved;
20743 /* We know that SP will be doubleword aligned on entry, and we must
20744 preserve that condition at any subroutine call. We also require the
20745 soft frame pointer to be doubleword aligned. */
20747 if (TARGET_REALLY_IWMMXT)
20749 /* Check for the call-saved iWMMXt registers. */
20750 for (regno = FIRST_IWMMXT_REGNUM;
20751 regno <= LAST_IWMMXT_REGNUM;
20752 regno++)
20753 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20754 saved += 8;
20757 func_type = arm_current_func_type ();
20758 /* Space for saved VFP registers. */
20759 if (! IS_VOLATILE (func_type)
20760 && TARGET_HARD_FLOAT)
20761 saved += arm_get_vfp_saved_size ();
20763 else /* TARGET_THUMB1 */
20765 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20766 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20767 saved = core_saved;
20768 if (TARGET_BACKTRACE)
20769 saved += 16;
20772 /* Saved registers include the stack frame. */
20773 offsets->saved_regs
20774 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20775 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20777 /* A leaf function does not need any stack alignment if it has nothing
20778 on the stack. */
20779 if (crtl->is_leaf && frame_size == 0
20780 /* However if it calls alloca(), we have a dynamically allocated
20781 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20782 && ! cfun->calls_alloca)
20784 offsets->outgoing_args = offsets->soft_frame;
20785 offsets->locals_base = offsets->soft_frame;
20786 return offsets;
20789 /* Ensure SFP has the correct alignment. */
20790 if (ARM_DOUBLEWORD_ALIGN
20791 && (offsets->soft_frame & 7))
20793 offsets->soft_frame += 4;
20794 /* Try to align stack by pushing an extra reg. Don't bother doing this
20795 when there is a stack frame as the alignment will be rolled into
20796 the normal stack adjustment. */
20797 if (frame_size + crtl->outgoing_args_size == 0)
20799 int reg = -1;
20801 /* Register r3 is caller-saved. Normally it does not need to be
20802 saved on entry by the prologue. However if we choose to save
20803 it for padding then we may confuse the compiler into thinking
20804 a prologue sequence is required when in fact it is not. This
20805 will occur when shrink-wrapping if r3 is used as a scratch
20806 register and there are no other callee-saved writes.
20808 This situation can be avoided when other callee-saved registers
20809 are available and r3 is not mandatory if we choose a callee-saved
20810 register for padding. */
20811 bool prefer_callee_reg_p = false;
20813 /* If it is safe to use r3, then do so. This sometimes
20814 generates better code on Thumb-2 by avoiding the need to
20815 use 32-bit push/pop instructions. */
20816 if (! any_sibcall_could_use_r3 ()
20817 && arm_size_return_regs () <= 12
20818 && (offsets->saved_regs_mask & (1 << 3)) == 0
20819 && (TARGET_THUMB2
20820 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20822 reg = 3;
20823 if (!TARGET_THUMB2)
20824 prefer_callee_reg_p = true;
20826 if (reg == -1
20827 || prefer_callee_reg_p)
20829 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20831 /* Avoid fixed registers; they may be changed at
20832 arbitrary times so it's unsafe to restore them
20833 during the epilogue. */
20834 if (!fixed_regs[i]
20835 && (offsets->saved_regs_mask & (1 << i)) == 0)
20837 reg = i;
20838 break;
20843 if (reg != -1)
20845 offsets->saved_regs += 4;
20846 offsets->saved_regs_mask |= (1 << reg);
20851 offsets->locals_base = offsets->soft_frame + frame_size;
20852 offsets->outgoing_args = (offsets->locals_base
20853 + crtl->outgoing_args_size);
20855 if (ARM_DOUBLEWORD_ALIGN)
20857 /* Ensure SP remains doubleword aligned. */
20858 if (offsets->outgoing_args & 7)
20859 offsets->outgoing_args += 4;
20860 gcc_assert (!(offsets->outgoing_args & 7));
20863 return offsets;
20867 /* Calculate the relative offsets for the different stack pointers. Positive
20868 offsets are in the direction of stack growth. */
20870 HOST_WIDE_INT
20871 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20873 arm_stack_offsets *offsets;
20875 offsets = arm_get_frame_offsets ();
20877 /* OK, now we have enough information to compute the distances.
20878 There must be an entry in these switch tables for each pair
20879 of registers in ELIMINABLE_REGS, even if some of the entries
20880 seem to be redundant or useless. */
20881 switch (from)
20883 case ARG_POINTER_REGNUM:
20884 switch (to)
20886 case THUMB_HARD_FRAME_POINTER_REGNUM:
20887 return 0;
20889 case FRAME_POINTER_REGNUM:
20890 /* This is the reverse of the soft frame pointer
20891 to hard frame pointer elimination below. */
20892 return offsets->soft_frame - offsets->saved_args;
20894 case ARM_HARD_FRAME_POINTER_REGNUM:
20895 /* This is only non-zero in the case where the static chain register
20896 is stored above the frame. */
20897 return offsets->frame - offsets->saved_args - 4;
20899 case STACK_POINTER_REGNUM:
20900 /* If nothing has been pushed on the stack at all
20901 then this will return -4. This *is* correct! */
20902 return offsets->outgoing_args - (offsets->saved_args + 4);
20904 default:
20905 gcc_unreachable ();
20907 gcc_unreachable ();
20909 case FRAME_POINTER_REGNUM:
20910 switch (to)
20912 case THUMB_HARD_FRAME_POINTER_REGNUM:
20913 return 0;
20915 case ARM_HARD_FRAME_POINTER_REGNUM:
20916 /* The hard frame pointer points to the top entry in the
20917 stack frame. The soft frame pointer to the bottom entry
20918 in the stack frame. If there is no stack frame at all,
20919 then they are identical. */
20921 return offsets->frame - offsets->soft_frame;
20923 case STACK_POINTER_REGNUM:
20924 return offsets->outgoing_args - offsets->soft_frame;
20926 default:
20927 gcc_unreachable ();
20929 gcc_unreachable ();
20931 default:
20932 /* You cannot eliminate from the stack pointer.
20933 In theory you could eliminate from the hard frame
20934 pointer to the stack pointer, but this will never
20935 happen, since if a stack frame is not needed the
20936 hard frame pointer will never be used. */
20937 gcc_unreachable ();
20941 /* Given FROM and TO register numbers, say whether this elimination is
20942 allowed. Frame pointer elimination is automatically handled.
20944 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20945 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20946 pointer, we must eliminate FRAME_POINTER_REGNUM into
20947 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20948 ARG_POINTER_REGNUM. */
20950 bool
20951 arm_can_eliminate (const int from, const int to)
20953 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20954 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20955 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20956 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20957 true);
20960 /* Emit RTL to save coprocessor registers on function entry. Returns the
20961 number of bytes pushed. */
20963 static int
20964 arm_save_coproc_regs(void)
20966 int saved_size = 0;
20967 unsigned reg;
20968 unsigned start_reg;
20969 rtx insn;
20971 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20972 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20974 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20975 insn = gen_rtx_MEM (V2SImode, insn);
20976 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20977 RTX_FRAME_RELATED_P (insn) = 1;
20978 saved_size += 8;
20981 if (TARGET_HARD_FLOAT)
20983 start_reg = FIRST_VFP_REGNUM;
20985 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20987 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20988 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20990 if (start_reg != reg)
20991 saved_size += vfp_emit_fstmd (start_reg,
20992 (reg - start_reg) / 2);
20993 start_reg = reg + 2;
20996 if (start_reg != reg)
20997 saved_size += vfp_emit_fstmd (start_reg,
20998 (reg - start_reg) / 2);
21000 return saved_size;
21004 /* Set the Thumb frame pointer from the stack pointer. */
21006 static void
21007 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21009 HOST_WIDE_INT amount;
21010 rtx insn, dwarf;
21012 amount = offsets->outgoing_args - offsets->locals_base;
21013 if (amount < 1024)
21014 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21015 stack_pointer_rtx, GEN_INT (amount)));
21016 else
21018 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21019 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21020 expects the first two operands to be the same. */
21021 if (TARGET_THUMB2)
21023 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21024 stack_pointer_rtx,
21025 hard_frame_pointer_rtx));
21027 else
21029 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21030 hard_frame_pointer_rtx,
21031 stack_pointer_rtx));
21033 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21034 plus_constant (Pmode, stack_pointer_rtx, amount));
21035 RTX_FRAME_RELATED_P (dwarf) = 1;
21036 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21039 RTX_FRAME_RELATED_P (insn) = 1;
21042 struct scratch_reg {
21043 rtx reg;
21044 bool saved;
21047 /* Return a short-lived scratch register for use as a 2nd scratch register on
21048 function entry after the registers are saved in the prologue. This register
21049 must be released by means of release_scratch_register_on_entry. IP is not
21050 considered since it is always used as the 1st scratch register if available.
21052 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21053 mask of live registers. */
21055 static void
21056 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21057 unsigned long live_regs)
21059 int regno = -1;
21061 sr->saved = false;
21063 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21064 regno = LR_REGNUM;
21065 else
21067 unsigned int i;
21069 for (i = 4; i < 11; i++)
21070 if (regno1 != i && (live_regs & (1 << i)) != 0)
21072 regno = i;
21073 break;
21076 if (regno < 0)
21078 /* If IP is used as the 1st scratch register for a nested function,
21079 then either r3 wasn't available or is used to preserve IP. */
21080 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21081 regno1 = 3;
21082 regno = (regno1 == 3 ? 2 : 3);
21083 sr->saved
21084 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21085 regno);
21089 sr->reg = gen_rtx_REG (SImode, regno);
21090 if (sr->saved)
21092 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21093 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21094 rtx x = gen_rtx_SET (stack_pointer_rtx,
21095 plus_constant (Pmode, stack_pointer_rtx, -4));
21096 RTX_FRAME_RELATED_P (insn) = 1;
21097 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21101 /* Release a scratch register obtained from the preceding function. */
21103 static void
21104 release_scratch_register_on_entry (struct scratch_reg *sr)
21106 if (sr->saved)
21108 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21109 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21110 rtx x = gen_rtx_SET (stack_pointer_rtx,
21111 plus_constant (Pmode, stack_pointer_rtx, 4));
21112 RTX_FRAME_RELATED_P (insn) = 1;
21113 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21117 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21119 #if PROBE_INTERVAL > 4096
21120 #error Cannot use indexed addressing mode for stack probing
21121 #endif
21123 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21124 inclusive. These are offsets from the current stack pointer. REGNO1
21125 is the index number of the 1st scratch register and LIVE_REGS is the
21126 mask of live registers. */
21128 static void
21129 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21130 unsigned int regno1, unsigned long live_regs)
21132 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21134 /* See if we have a constant small number of probes to generate. If so,
21135 that's the easy case. */
21136 if (size <= PROBE_INTERVAL)
21138 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21139 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21140 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21143 /* The run-time loop is made up of 10 insns in the generic case while the
21144 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21145 else if (size <= 5 * PROBE_INTERVAL)
21147 HOST_WIDE_INT i, rem;
21149 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21150 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21151 emit_stack_probe (reg1);
21153 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21154 it exceeds SIZE. If only two probes are needed, this will not
21155 generate any code. Then probe at FIRST + SIZE. */
21156 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21158 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21159 emit_stack_probe (reg1);
21162 rem = size - (i - PROBE_INTERVAL);
21163 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21165 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21166 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21168 else
21169 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21172 /* Otherwise, do the same as above, but in a loop. Note that we must be
21173 extra careful with variables wrapping around because we might be at
21174 the very top (or the very bottom) of the address space and we have
21175 to be able to handle this case properly; in particular, we use an
21176 equality test for the loop condition. */
21177 else
21179 HOST_WIDE_INT rounded_size;
21180 struct scratch_reg sr;
21182 get_scratch_register_on_entry (&sr, regno1, live_regs);
21184 emit_move_insn (reg1, GEN_INT (first));
21187 /* Step 1: round SIZE to the previous multiple of the interval. */
21189 rounded_size = size & -PROBE_INTERVAL;
21190 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21193 /* Step 2: compute initial and final value of the loop counter. */
21195 /* TEST_ADDR = SP + FIRST. */
21196 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21198 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21199 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21202 /* Step 3: the loop
21206 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21207 probe at TEST_ADDR
21209 while (TEST_ADDR != LAST_ADDR)
21211 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21212 until it is equal to ROUNDED_SIZE. */
21214 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21217 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21218 that SIZE is equal to ROUNDED_SIZE. */
21220 if (size != rounded_size)
21222 HOST_WIDE_INT rem = size - rounded_size;
21224 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21226 emit_set_insn (sr.reg,
21227 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21228 emit_stack_probe (plus_constant (Pmode, sr.reg,
21229 PROBE_INTERVAL - rem));
21231 else
21232 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21235 release_scratch_register_on_entry (&sr);
21238 /* Make sure nothing is scheduled before we are done. */
21239 emit_insn (gen_blockage ());
21242 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21243 absolute addresses. */
21245 const char *
21246 output_probe_stack_range (rtx reg1, rtx reg2)
21248 static int labelno = 0;
21249 char loop_lab[32];
21250 rtx xops[2];
21252 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21254 /* Loop. */
21255 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21257 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21258 xops[0] = reg1;
21259 xops[1] = GEN_INT (PROBE_INTERVAL);
21260 output_asm_insn ("sub\t%0, %0, %1", xops);
21262 /* Probe at TEST_ADDR. */
21263 output_asm_insn ("str\tr0, [%0, #0]", xops);
21265 /* Test if TEST_ADDR == LAST_ADDR. */
21266 xops[1] = reg2;
21267 output_asm_insn ("cmp\t%0, %1", xops);
21269 /* Branch. */
21270 fputs ("\tbne\t", asm_out_file);
21271 assemble_name_raw (asm_out_file, loop_lab);
21272 fputc ('\n', asm_out_file);
21274 return "";
21277 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21278 function. */
21279 void
21280 arm_expand_prologue (void)
21282 rtx amount;
21283 rtx insn;
21284 rtx ip_rtx;
21285 unsigned long live_regs_mask;
21286 unsigned long func_type;
21287 int fp_offset = 0;
21288 int saved_pretend_args = 0;
21289 int saved_regs = 0;
21290 unsigned HOST_WIDE_INT args_to_push;
21291 HOST_WIDE_INT size;
21292 arm_stack_offsets *offsets;
21293 bool clobber_ip;
21295 func_type = arm_current_func_type ();
21297 /* Naked functions don't have prologues. */
21298 if (IS_NAKED (func_type))
21300 if (flag_stack_usage_info)
21301 current_function_static_stack_size = 0;
21302 return;
21305 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21306 args_to_push = crtl->args.pretend_args_size;
21308 /* Compute which register we will have to save onto the stack. */
21309 offsets = arm_get_frame_offsets ();
21310 live_regs_mask = offsets->saved_regs_mask;
21312 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21314 if (IS_STACKALIGN (func_type))
21316 rtx r0, r1;
21318 /* Handle a word-aligned stack pointer. We generate the following:
21320 mov r0, sp
21321 bic r1, r0, #7
21322 mov sp, r1
21323 <save and restore r0 in normal prologue/epilogue>
21324 mov sp, r0
21325 bx lr
21327 The unwinder doesn't need to know about the stack realignment.
21328 Just tell it we saved SP in r0. */
21329 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21331 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21332 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21334 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21335 RTX_FRAME_RELATED_P (insn) = 1;
21336 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21338 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21340 /* ??? The CFA changes here, which may cause GDB to conclude that it
21341 has entered a different function. That said, the unwind info is
21342 correct, individually, before and after this instruction because
21343 we've described the save of SP, which will override the default
21344 handling of SP as restoring from the CFA. */
21345 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21348 /* The static chain register is the same as the IP register. If it is
21349 clobbered when creating the frame, we need to save and restore it. */
21350 clobber_ip = IS_NESTED (func_type)
21351 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21352 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21353 && !df_regs_ever_live_p (LR_REGNUM)
21354 && arm_r3_live_at_start_p ()));
21356 /* Find somewhere to store IP whilst the frame is being created.
21357 We try the following places in order:
21359 1. The last argument register r3 if it is available.
21360 2. A slot on the stack above the frame if there are no
21361 arguments to push onto the stack.
21362 3. Register r3 again, after pushing the argument registers
21363 onto the stack, if this is a varargs function.
21364 4. The last slot on the stack created for the arguments to
21365 push, if this isn't a varargs function.
21367 Note - we only need to tell the dwarf2 backend about the SP
21368 adjustment in the second variant; the static chain register
21369 doesn't need to be unwound, as it doesn't contain a value
21370 inherited from the caller. */
21371 if (clobber_ip)
21373 if (!arm_r3_live_at_start_p ())
21374 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21375 else if (args_to_push == 0)
21377 rtx addr, dwarf;
21379 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21380 saved_regs += 4;
21382 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21383 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21384 fp_offset = 4;
21386 /* Just tell the dwarf backend that we adjusted SP. */
21387 dwarf = gen_rtx_SET (stack_pointer_rtx,
21388 plus_constant (Pmode, stack_pointer_rtx,
21389 -fp_offset));
21390 RTX_FRAME_RELATED_P (insn) = 1;
21391 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21393 else
21395 /* Store the args on the stack. */
21396 if (cfun->machine->uses_anonymous_args)
21398 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21399 (0xf0 >> (args_to_push / 4)) & 0xf);
21400 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21401 saved_pretend_args = 1;
21403 else
21405 rtx addr, dwarf;
21407 if (args_to_push == 4)
21408 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21409 else
21410 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21411 plus_constant (Pmode,
21412 stack_pointer_rtx,
21413 -args_to_push));
21415 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21417 /* Just tell the dwarf backend that we adjusted SP. */
21418 dwarf = gen_rtx_SET (stack_pointer_rtx,
21419 plus_constant (Pmode, stack_pointer_rtx,
21420 -args_to_push));
21421 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21424 RTX_FRAME_RELATED_P (insn) = 1;
21425 fp_offset = args_to_push;
21426 args_to_push = 0;
21430 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21432 if (IS_INTERRUPT (func_type))
21434 /* Interrupt functions must not corrupt any registers.
21435 Creating a frame pointer however, corrupts the IP
21436 register, so we must push it first. */
21437 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21439 /* Do not set RTX_FRAME_RELATED_P on this insn.
21440 The dwarf stack unwinding code only wants to see one
21441 stack decrement per function, and this is not it. If
21442 this instruction is labeled as being part of the frame
21443 creation sequence then dwarf2out_frame_debug_expr will
21444 die when it encounters the assignment of IP to FP
21445 later on, since the use of SP here establishes SP as
21446 the CFA register and not IP.
21448 Anyway this instruction is not really part of the stack
21449 frame creation although it is part of the prologue. */
21452 insn = emit_set_insn (ip_rtx,
21453 plus_constant (Pmode, stack_pointer_rtx,
21454 fp_offset));
21455 RTX_FRAME_RELATED_P (insn) = 1;
21458 if (args_to_push)
21460 /* Push the argument registers, or reserve space for them. */
21461 if (cfun->machine->uses_anonymous_args)
21462 insn = emit_multi_reg_push
21463 ((0xf0 >> (args_to_push / 4)) & 0xf,
21464 (0xf0 >> (args_to_push / 4)) & 0xf);
21465 else
21466 insn = emit_insn
21467 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21468 GEN_INT (- args_to_push)));
21469 RTX_FRAME_RELATED_P (insn) = 1;
21472 /* If this is an interrupt service routine, and the link register
21473 is going to be pushed, and we're not generating extra
21474 push of IP (needed when frame is needed and frame layout if apcs),
21475 subtracting four from LR now will mean that the function return
21476 can be done with a single instruction. */
21477 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21478 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21479 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21480 && TARGET_ARM)
21482 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21484 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21487 if (live_regs_mask)
21489 unsigned long dwarf_regs_mask = live_regs_mask;
21491 saved_regs += bit_count (live_regs_mask) * 4;
21492 if (optimize_size && !frame_pointer_needed
21493 && saved_regs == offsets->saved_regs - offsets->saved_args)
21495 /* If no coprocessor registers are being pushed and we don't have
21496 to worry about a frame pointer then push extra registers to
21497 create the stack frame. This is done is a way that does not
21498 alter the frame layout, so is independent of the epilogue. */
21499 int n;
21500 int frame;
21501 n = 0;
21502 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21503 n++;
21504 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21505 if (frame && n * 4 >= frame)
21507 n = frame / 4;
21508 live_regs_mask |= (1 << n) - 1;
21509 saved_regs += frame;
21513 if (TARGET_LDRD
21514 && current_tune->prefer_ldrd_strd
21515 && !optimize_function_for_size_p (cfun))
21517 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21518 if (TARGET_THUMB2)
21519 thumb2_emit_strd_push (live_regs_mask);
21520 else if (TARGET_ARM
21521 && !TARGET_APCS_FRAME
21522 && !IS_INTERRUPT (func_type))
21523 arm_emit_strd_push (live_regs_mask);
21524 else
21526 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21527 RTX_FRAME_RELATED_P (insn) = 1;
21530 else
21532 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21533 RTX_FRAME_RELATED_P (insn) = 1;
21537 if (! IS_VOLATILE (func_type))
21538 saved_regs += arm_save_coproc_regs ();
21540 if (frame_pointer_needed && TARGET_ARM)
21542 /* Create the new frame pointer. */
21543 if (TARGET_APCS_FRAME)
21545 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21546 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21547 RTX_FRAME_RELATED_P (insn) = 1;
21549 else
21551 insn = GEN_INT (saved_regs - (4 + fp_offset));
21552 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21553 stack_pointer_rtx, insn));
21554 RTX_FRAME_RELATED_P (insn) = 1;
21558 size = offsets->outgoing_args - offsets->saved_args;
21559 if (flag_stack_usage_info)
21560 current_function_static_stack_size = size;
21562 /* If this isn't an interrupt service routine and we have a frame, then do
21563 stack checking. We use IP as the first scratch register, except for the
21564 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21565 if (!IS_INTERRUPT (func_type)
21566 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21568 unsigned int regno;
21570 if (!IS_NESTED (func_type) || clobber_ip)
21571 regno = IP_REGNUM;
21572 else if (df_regs_ever_live_p (LR_REGNUM))
21573 regno = LR_REGNUM;
21574 else
21575 regno = 3;
21577 if (crtl->is_leaf && !cfun->calls_alloca)
21579 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21580 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21581 size - STACK_CHECK_PROTECT,
21582 regno, live_regs_mask);
21584 else if (size > 0)
21585 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21586 regno, live_regs_mask);
21589 /* Recover the static chain register. */
21590 if (clobber_ip)
21592 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21593 insn = gen_rtx_REG (SImode, 3);
21594 else
21596 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21597 insn = gen_frame_mem (SImode, insn);
21599 emit_set_insn (ip_rtx, insn);
21600 emit_insn (gen_force_register_use (ip_rtx));
21603 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21605 /* This add can produce multiple insns for a large constant, so we
21606 need to get tricky. */
21607 rtx_insn *last = get_last_insn ();
21609 amount = GEN_INT (offsets->saved_args + saved_regs
21610 - offsets->outgoing_args);
21612 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21613 amount));
21616 last = last ? NEXT_INSN (last) : get_insns ();
21617 RTX_FRAME_RELATED_P (last) = 1;
21619 while (last != insn);
21621 /* If the frame pointer is needed, emit a special barrier that
21622 will prevent the scheduler from moving stores to the frame
21623 before the stack adjustment. */
21624 if (frame_pointer_needed)
21625 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21626 hard_frame_pointer_rtx));
21630 if (frame_pointer_needed && TARGET_THUMB2)
21631 thumb_set_frame_pointer (offsets);
21633 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21635 unsigned long mask;
21637 mask = live_regs_mask;
21638 mask &= THUMB2_WORK_REGS;
21639 if (!IS_NESTED (func_type))
21640 mask |= (1 << IP_REGNUM);
21641 arm_load_pic_register (mask);
21644 /* If we are profiling, make sure no instructions are scheduled before
21645 the call to mcount. Similarly if the user has requested no
21646 scheduling in the prolog. Similarly if we want non-call exceptions
21647 using the EABI unwinder, to prevent faulting instructions from being
21648 swapped with a stack adjustment. */
21649 if (crtl->profile || !TARGET_SCHED_PROLOG
21650 || (arm_except_unwind_info (&global_options) == UI_TARGET
21651 && cfun->can_throw_non_call_exceptions))
21652 emit_insn (gen_blockage ());
21654 /* If the link register is being kept alive, with the return address in it,
21655 then make sure that it does not get reused by the ce2 pass. */
21656 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21657 cfun->machine->lr_save_eliminated = 1;
21660 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21661 static void
21662 arm_print_condition (FILE *stream)
21664 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21666 /* Branch conversion is not implemented for Thumb-2. */
21667 if (TARGET_THUMB)
21669 output_operand_lossage ("predicated Thumb instruction");
21670 return;
21672 if (current_insn_predicate != NULL)
21674 output_operand_lossage
21675 ("predicated instruction in conditional sequence");
21676 return;
21679 fputs (arm_condition_codes[arm_current_cc], stream);
21681 else if (current_insn_predicate)
21683 enum arm_cond_code code;
21685 if (TARGET_THUMB1)
21687 output_operand_lossage ("predicated Thumb instruction");
21688 return;
21691 code = get_arm_condition_code (current_insn_predicate);
21692 fputs (arm_condition_codes[code], stream);
21697 /* Globally reserved letters: acln
21698 Puncutation letters currently used: @_|?().!#
21699 Lower case letters currently used: bcdefhimpqtvwxyz
21700 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21701 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21703 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21705 If CODE is 'd', then the X is a condition operand and the instruction
21706 should only be executed if the condition is true.
21707 if CODE is 'D', then the X is a condition operand and the instruction
21708 should only be executed if the condition is false: however, if the mode
21709 of the comparison is CCFPEmode, then always execute the instruction -- we
21710 do this because in these circumstances !GE does not necessarily imply LT;
21711 in these cases the instruction pattern will take care to make sure that
21712 an instruction containing %d will follow, thereby undoing the effects of
21713 doing this instruction unconditionally.
21714 If CODE is 'N' then X is a floating point operand that must be negated
21715 before output.
21716 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21717 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21718 static void
21719 arm_print_operand (FILE *stream, rtx x, int code)
21721 switch (code)
21723 case '@':
21724 fputs (ASM_COMMENT_START, stream);
21725 return;
21727 case '_':
21728 fputs (user_label_prefix, stream);
21729 return;
21731 case '|':
21732 fputs (REGISTER_PREFIX, stream);
21733 return;
21735 case '?':
21736 arm_print_condition (stream);
21737 return;
21739 case '.':
21740 /* The current condition code for a condition code setting instruction.
21741 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21742 fputc('s', stream);
21743 arm_print_condition (stream);
21744 return;
21746 case '!':
21747 /* If the instruction is conditionally executed then print
21748 the current condition code, otherwise print 's'. */
21749 gcc_assert (TARGET_THUMB2);
21750 if (current_insn_predicate)
21751 arm_print_condition (stream);
21752 else
21753 fputc('s', stream);
21754 break;
21756 /* %# is a "break" sequence. It doesn't output anything, but is used to
21757 separate e.g. operand numbers from following text, if that text consists
21758 of further digits which we don't want to be part of the operand
21759 number. */
21760 case '#':
21761 return;
21763 case 'N':
21765 REAL_VALUE_TYPE r;
21766 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21767 fprintf (stream, "%s", fp_const_from_val (&r));
21769 return;
21771 /* An integer or symbol address without a preceding # sign. */
21772 case 'c':
21773 switch (GET_CODE (x))
21775 case CONST_INT:
21776 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21777 break;
21779 case SYMBOL_REF:
21780 output_addr_const (stream, x);
21781 break;
21783 case CONST:
21784 if (GET_CODE (XEXP (x, 0)) == PLUS
21785 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21787 output_addr_const (stream, x);
21788 break;
21790 /* Fall through. */
21792 default:
21793 output_operand_lossage ("Unsupported operand for code '%c'", code);
21795 return;
21797 /* An integer that we want to print in HEX. */
21798 case 'x':
21799 switch (GET_CODE (x))
21801 case CONST_INT:
21802 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21803 break;
21805 default:
21806 output_operand_lossage ("Unsupported operand for code '%c'", code);
21808 return;
21810 case 'B':
21811 if (CONST_INT_P (x))
21813 HOST_WIDE_INT val;
21814 val = ARM_SIGN_EXTEND (~INTVAL (x));
21815 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21817 else
21819 putc ('~', stream);
21820 output_addr_const (stream, x);
21822 return;
21824 case 'b':
21825 /* Print the log2 of a CONST_INT. */
21827 HOST_WIDE_INT val;
21829 if (!CONST_INT_P (x)
21830 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21831 output_operand_lossage ("Unsupported operand for code '%c'", code);
21832 else
21833 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21835 return;
21837 case 'L':
21838 /* The low 16 bits of an immediate constant. */
21839 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21840 return;
21842 case 'i':
21843 fprintf (stream, "%s", arithmetic_instr (x, 1));
21844 return;
21846 case 'I':
21847 fprintf (stream, "%s", arithmetic_instr (x, 0));
21848 return;
21850 case 'S':
21852 HOST_WIDE_INT val;
21853 const char *shift;
21855 shift = shift_op (x, &val);
21857 if (shift)
21859 fprintf (stream, ", %s ", shift);
21860 if (val == -1)
21861 arm_print_operand (stream, XEXP (x, 1), 0);
21862 else
21863 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21866 return;
21868 /* An explanation of the 'Q', 'R' and 'H' register operands:
21870 In a pair of registers containing a DI or DF value the 'Q'
21871 operand returns the register number of the register containing
21872 the least significant part of the value. The 'R' operand returns
21873 the register number of the register containing the most
21874 significant part of the value.
21876 The 'H' operand returns the higher of the two register numbers.
21877 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21878 same as the 'Q' operand, since the most significant part of the
21879 value is held in the lower number register. The reverse is true
21880 on systems where WORDS_BIG_ENDIAN is false.
21882 The purpose of these operands is to distinguish between cases
21883 where the endian-ness of the values is important (for example
21884 when they are added together), and cases where the endian-ness
21885 is irrelevant, but the order of register operations is important.
21886 For example when loading a value from memory into a register
21887 pair, the endian-ness does not matter. Provided that the value
21888 from the lower memory address is put into the lower numbered
21889 register, and the value from the higher address is put into the
21890 higher numbered register, the load will work regardless of whether
21891 the value being loaded is big-wordian or little-wordian. The
21892 order of the two register loads can matter however, if the address
21893 of the memory location is actually held in one of the registers
21894 being overwritten by the load.
21896 The 'Q' and 'R' constraints are also available for 64-bit
21897 constants. */
21898 case 'Q':
21899 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21901 rtx part = gen_lowpart (SImode, x);
21902 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21903 return;
21906 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21908 output_operand_lossage ("invalid operand for code '%c'", code);
21909 return;
21912 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21913 return;
21915 case 'R':
21916 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21918 machine_mode mode = GET_MODE (x);
21919 rtx part;
21921 if (mode == VOIDmode)
21922 mode = DImode;
21923 part = gen_highpart_mode (SImode, mode, x);
21924 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21925 return;
21928 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21930 output_operand_lossage ("invalid operand for code '%c'", code);
21931 return;
21934 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21935 return;
21937 case 'H':
21938 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21940 output_operand_lossage ("invalid operand for code '%c'", code);
21941 return;
21944 asm_fprintf (stream, "%r", REGNO (x) + 1);
21945 return;
21947 case 'J':
21948 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21950 output_operand_lossage ("invalid operand for code '%c'", code);
21951 return;
21954 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21955 return;
21957 case 'K':
21958 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21960 output_operand_lossage ("invalid operand for code '%c'", code);
21961 return;
21964 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21965 return;
21967 case 'm':
21968 asm_fprintf (stream, "%r",
21969 REG_P (XEXP (x, 0))
21970 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21971 return;
21973 case 'M':
21974 asm_fprintf (stream, "{%r-%r}",
21975 REGNO (x),
21976 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21977 return;
21979 /* Like 'M', but writing doubleword vector registers, for use by Neon
21980 insns. */
21981 case 'h':
21983 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21984 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21985 if (numregs == 1)
21986 asm_fprintf (stream, "{d%d}", regno);
21987 else
21988 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21990 return;
21992 case 'd':
21993 /* CONST_TRUE_RTX means always -- that's the default. */
21994 if (x == const_true_rtx)
21995 return;
21997 if (!COMPARISON_P (x))
21999 output_operand_lossage ("invalid operand for code '%c'", code);
22000 return;
22003 fputs (arm_condition_codes[get_arm_condition_code (x)],
22004 stream);
22005 return;
22007 case 'D':
22008 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22009 want to do that. */
22010 if (x == const_true_rtx)
22012 output_operand_lossage ("instruction never executed");
22013 return;
22015 if (!COMPARISON_P (x))
22017 output_operand_lossage ("invalid operand for code '%c'", code);
22018 return;
22021 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22022 (get_arm_condition_code (x))],
22023 stream);
22024 return;
22026 case 's':
22027 case 'V':
22028 case 'W':
22029 case 'X':
22030 case 'Y':
22031 case 'Z':
22032 /* Former Maverick support, removed after GCC-4.7. */
22033 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22034 return;
22036 case 'U':
22037 if (!REG_P (x)
22038 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22039 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22040 /* Bad value for wCG register number. */
22042 output_operand_lossage ("invalid operand for code '%c'", code);
22043 return;
22046 else
22047 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22048 return;
22050 /* Print an iWMMXt control register name. */
22051 case 'w':
22052 if (!CONST_INT_P (x)
22053 || INTVAL (x) < 0
22054 || INTVAL (x) >= 16)
22055 /* Bad value for wC register number. */
22057 output_operand_lossage ("invalid operand for code '%c'", code);
22058 return;
22061 else
22063 static const char * wc_reg_names [16] =
22065 "wCID", "wCon", "wCSSF", "wCASF",
22066 "wC4", "wC5", "wC6", "wC7",
22067 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22068 "wC12", "wC13", "wC14", "wC15"
22071 fputs (wc_reg_names [INTVAL (x)], stream);
22073 return;
22075 /* Print the high single-precision register of a VFP double-precision
22076 register. */
22077 case 'p':
22079 machine_mode mode = GET_MODE (x);
22080 int regno;
22082 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22084 output_operand_lossage ("invalid operand for code '%c'", code);
22085 return;
22088 regno = REGNO (x);
22089 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22091 output_operand_lossage ("invalid operand for code '%c'", code);
22092 return;
22095 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22097 return;
22099 /* Print a VFP/Neon double precision or quad precision register name. */
22100 case 'P':
22101 case 'q':
22103 machine_mode mode = GET_MODE (x);
22104 int is_quad = (code == 'q');
22105 int regno;
22107 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22109 output_operand_lossage ("invalid operand for code '%c'", code);
22110 return;
22113 if (!REG_P (x)
22114 || !IS_VFP_REGNUM (REGNO (x)))
22116 output_operand_lossage ("invalid operand for code '%c'", code);
22117 return;
22120 regno = REGNO (x);
22121 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22122 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22124 output_operand_lossage ("invalid operand for code '%c'", code);
22125 return;
22128 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22129 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22131 return;
22133 /* These two codes print the low/high doubleword register of a Neon quad
22134 register, respectively. For pair-structure types, can also print
22135 low/high quadword registers. */
22136 case 'e':
22137 case 'f':
22139 machine_mode mode = GET_MODE (x);
22140 int regno;
22142 if ((GET_MODE_SIZE (mode) != 16
22143 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22145 output_operand_lossage ("invalid operand for code '%c'", code);
22146 return;
22149 regno = REGNO (x);
22150 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22152 output_operand_lossage ("invalid operand for code '%c'", code);
22153 return;
22156 if (GET_MODE_SIZE (mode) == 16)
22157 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22158 + (code == 'f' ? 1 : 0));
22159 else
22160 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22161 + (code == 'f' ? 1 : 0));
22163 return;
22165 /* Print a VFPv3 floating-point constant, represented as an integer
22166 index. */
22167 case 'G':
22169 int index = vfp3_const_double_index (x);
22170 gcc_assert (index != -1);
22171 fprintf (stream, "%d", index);
22173 return;
22175 /* Print bits representing opcode features for Neon.
22177 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22178 and polynomials as unsigned.
22180 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22182 Bit 2 is 1 for rounding functions, 0 otherwise. */
22184 /* Identify the type as 's', 'u', 'p' or 'f'. */
22185 case 'T':
22187 HOST_WIDE_INT bits = INTVAL (x);
22188 fputc ("uspf"[bits & 3], stream);
22190 return;
22192 /* Likewise, but signed and unsigned integers are both 'i'. */
22193 case 'F':
22195 HOST_WIDE_INT bits = INTVAL (x);
22196 fputc ("iipf"[bits & 3], stream);
22198 return;
22200 /* As for 'T', but emit 'u' instead of 'p'. */
22201 case 't':
22203 HOST_WIDE_INT bits = INTVAL (x);
22204 fputc ("usuf"[bits & 3], stream);
22206 return;
22208 /* Bit 2: rounding (vs none). */
22209 case 'O':
22211 HOST_WIDE_INT bits = INTVAL (x);
22212 fputs ((bits & 4) != 0 ? "r" : "", stream);
22214 return;
22216 /* Memory operand for vld1/vst1 instruction. */
22217 case 'A':
22219 rtx addr;
22220 bool postinc = FALSE;
22221 rtx postinc_reg = NULL;
22222 unsigned align, memsize, align_bits;
22224 gcc_assert (MEM_P (x));
22225 addr = XEXP (x, 0);
22226 if (GET_CODE (addr) == POST_INC)
22228 postinc = 1;
22229 addr = XEXP (addr, 0);
22231 if (GET_CODE (addr) == POST_MODIFY)
22233 postinc_reg = XEXP( XEXP (addr, 1), 1);
22234 addr = XEXP (addr, 0);
22236 asm_fprintf (stream, "[%r", REGNO (addr));
22238 /* We know the alignment of this access, so we can emit a hint in the
22239 instruction (for some alignments) as an aid to the memory subsystem
22240 of the target. */
22241 align = MEM_ALIGN (x) >> 3;
22242 memsize = MEM_SIZE (x);
22244 /* Only certain alignment specifiers are supported by the hardware. */
22245 if (memsize == 32 && (align % 32) == 0)
22246 align_bits = 256;
22247 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22248 align_bits = 128;
22249 else if (memsize >= 8 && (align % 8) == 0)
22250 align_bits = 64;
22251 else
22252 align_bits = 0;
22254 if (align_bits != 0)
22255 asm_fprintf (stream, ":%d", align_bits);
22257 asm_fprintf (stream, "]");
22259 if (postinc)
22260 fputs("!", stream);
22261 if (postinc_reg)
22262 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22264 return;
22266 case 'C':
22268 rtx addr;
22270 gcc_assert (MEM_P (x));
22271 addr = XEXP (x, 0);
22272 gcc_assert (REG_P (addr));
22273 asm_fprintf (stream, "[%r]", REGNO (addr));
22275 return;
22277 /* Translate an S register number into a D register number and element index. */
22278 case 'y':
22280 machine_mode mode = GET_MODE (x);
22281 int regno;
22283 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22285 output_operand_lossage ("invalid operand for code '%c'", code);
22286 return;
22289 regno = REGNO (x);
22290 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22292 output_operand_lossage ("invalid operand for code '%c'", code);
22293 return;
22296 regno = regno - FIRST_VFP_REGNUM;
22297 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22299 return;
22301 case 'v':
22302 gcc_assert (CONST_DOUBLE_P (x));
22303 int result;
22304 result = vfp3_const_double_for_fract_bits (x);
22305 if (result == 0)
22306 result = vfp3_const_double_for_bits (x);
22307 fprintf (stream, "#%d", result);
22308 return;
22310 /* Register specifier for vld1.16/vst1.16. Translate the S register
22311 number into a D register number and element index. */
22312 case 'z':
22314 machine_mode mode = GET_MODE (x);
22315 int regno;
22317 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22319 output_operand_lossage ("invalid operand for code '%c'", code);
22320 return;
22323 regno = REGNO (x);
22324 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22326 output_operand_lossage ("invalid operand for code '%c'", code);
22327 return;
22330 regno = regno - FIRST_VFP_REGNUM;
22331 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22333 return;
22335 default:
22336 if (x == 0)
22338 output_operand_lossage ("missing operand");
22339 return;
22342 switch (GET_CODE (x))
22344 case REG:
22345 asm_fprintf (stream, "%r", REGNO (x));
22346 break;
22348 case MEM:
22349 output_address (GET_MODE (x), XEXP (x, 0));
22350 break;
22352 case CONST_DOUBLE:
22354 char fpstr[20];
22355 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22356 sizeof (fpstr), 0, 1);
22357 fprintf (stream, "#%s", fpstr);
22359 break;
22361 default:
22362 gcc_assert (GET_CODE (x) != NEG);
22363 fputc ('#', stream);
22364 if (GET_CODE (x) == HIGH)
22366 fputs (":lower16:", stream);
22367 x = XEXP (x, 0);
22370 output_addr_const (stream, x);
22371 break;
22376 /* Target hook for printing a memory address. */
22377 static void
22378 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22380 if (TARGET_32BIT)
22382 int is_minus = GET_CODE (x) == MINUS;
22384 if (REG_P (x))
22385 asm_fprintf (stream, "[%r]", REGNO (x));
22386 else if (GET_CODE (x) == PLUS || is_minus)
22388 rtx base = XEXP (x, 0);
22389 rtx index = XEXP (x, 1);
22390 HOST_WIDE_INT offset = 0;
22391 if (!REG_P (base)
22392 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22394 /* Ensure that BASE is a register. */
22395 /* (one of them must be). */
22396 /* Also ensure the SP is not used as in index register. */
22397 std::swap (base, index);
22399 switch (GET_CODE (index))
22401 case CONST_INT:
22402 offset = INTVAL (index);
22403 if (is_minus)
22404 offset = -offset;
22405 asm_fprintf (stream, "[%r, #%wd]",
22406 REGNO (base), offset);
22407 break;
22409 case REG:
22410 asm_fprintf (stream, "[%r, %s%r]",
22411 REGNO (base), is_minus ? "-" : "",
22412 REGNO (index));
22413 break;
22415 case MULT:
22416 case ASHIFTRT:
22417 case LSHIFTRT:
22418 case ASHIFT:
22419 case ROTATERT:
22421 asm_fprintf (stream, "[%r, %s%r",
22422 REGNO (base), is_minus ? "-" : "",
22423 REGNO (XEXP (index, 0)));
22424 arm_print_operand (stream, index, 'S');
22425 fputs ("]", stream);
22426 break;
22429 default:
22430 gcc_unreachable ();
22433 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22434 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22436 gcc_assert (REG_P (XEXP (x, 0)));
22438 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22439 asm_fprintf (stream, "[%r, #%s%d]!",
22440 REGNO (XEXP (x, 0)),
22441 GET_CODE (x) == PRE_DEC ? "-" : "",
22442 GET_MODE_SIZE (mode));
22443 else
22444 asm_fprintf (stream, "[%r], #%s%d",
22445 REGNO (XEXP (x, 0)),
22446 GET_CODE (x) == POST_DEC ? "-" : "",
22447 GET_MODE_SIZE (mode));
22449 else if (GET_CODE (x) == PRE_MODIFY)
22451 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22452 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22453 asm_fprintf (stream, "#%wd]!",
22454 INTVAL (XEXP (XEXP (x, 1), 1)));
22455 else
22456 asm_fprintf (stream, "%r]!",
22457 REGNO (XEXP (XEXP (x, 1), 1)));
22459 else if (GET_CODE (x) == POST_MODIFY)
22461 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22462 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22463 asm_fprintf (stream, "#%wd",
22464 INTVAL (XEXP (XEXP (x, 1), 1)));
22465 else
22466 asm_fprintf (stream, "%r",
22467 REGNO (XEXP (XEXP (x, 1), 1)));
22469 else output_addr_const (stream, x);
22471 else
22473 if (REG_P (x))
22474 asm_fprintf (stream, "[%r]", REGNO (x));
22475 else if (GET_CODE (x) == POST_INC)
22476 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22477 else if (GET_CODE (x) == PLUS)
22479 gcc_assert (REG_P (XEXP (x, 0)));
22480 if (CONST_INT_P (XEXP (x, 1)))
22481 asm_fprintf (stream, "[%r, #%wd]",
22482 REGNO (XEXP (x, 0)),
22483 INTVAL (XEXP (x, 1)));
22484 else
22485 asm_fprintf (stream, "[%r, %r]",
22486 REGNO (XEXP (x, 0)),
22487 REGNO (XEXP (x, 1)));
22489 else
22490 output_addr_const (stream, x);
22494 /* Target hook for indicating whether a punctuation character for
22495 TARGET_PRINT_OPERAND is valid. */
22496 static bool
22497 arm_print_operand_punct_valid_p (unsigned char code)
22499 return (code == '@' || code == '|' || code == '.'
22500 || code == '(' || code == ')' || code == '#'
22501 || (TARGET_32BIT && (code == '?'))
22502 || (TARGET_THUMB2 && (code == '!'))
22503 || (TARGET_THUMB && (code == '_')));
22506 /* Target hook for assembling integer objects. The ARM version needs to
22507 handle word-sized values specially. */
22508 static bool
22509 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22511 machine_mode mode;
22513 if (size == UNITS_PER_WORD && aligned_p)
22515 fputs ("\t.word\t", asm_out_file);
22516 output_addr_const (asm_out_file, x);
22518 /* Mark symbols as position independent. We only do this in the
22519 .text segment, not in the .data segment. */
22520 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22521 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22523 /* See legitimize_pic_address for an explanation of the
22524 TARGET_VXWORKS_RTP check. */
22525 /* References to weak symbols cannot be resolved locally:
22526 they may be overridden by a non-weak definition at link
22527 time. */
22528 if (!arm_pic_data_is_text_relative
22529 || (GET_CODE (x) == SYMBOL_REF
22530 && (!SYMBOL_REF_LOCAL_P (x)
22531 || (SYMBOL_REF_DECL (x)
22532 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22533 fputs ("(GOT)", asm_out_file);
22534 else
22535 fputs ("(GOTOFF)", asm_out_file);
22537 fputc ('\n', asm_out_file);
22538 return true;
22541 mode = GET_MODE (x);
22543 if (arm_vector_mode_supported_p (mode))
22545 int i, units;
22547 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22549 units = CONST_VECTOR_NUNITS (x);
22550 size = GET_MODE_UNIT_SIZE (mode);
22552 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22553 for (i = 0; i < units; i++)
22555 rtx elt = CONST_VECTOR_ELT (x, i);
22556 assemble_integer
22557 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22559 else
22560 for (i = 0; i < units; i++)
22562 rtx elt = CONST_VECTOR_ELT (x, i);
22563 assemble_real
22564 (*CONST_DOUBLE_REAL_VALUE (elt), GET_MODE_INNER (mode),
22565 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22568 return true;
22571 return default_assemble_integer (x, size, aligned_p);
22574 static void
22575 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22577 section *s;
22579 if (!TARGET_AAPCS_BASED)
22581 (is_ctor ?
22582 default_named_section_asm_out_constructor
22583 : default_named_section_asm_out_destructor) (symbol, priority);
22584 return;
22587 /* Put these in the .init_array section, using a special relocation. */
22588 if (priority != DEFAULT_INIT_PRIORITY)
22590 char buf[18];
22591 sprintf (buf, "%s.%.5u",
22592 is_ctor ? ".init_array" : ".fini_array",
22593 priority);
22594 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22596 else if (is_ctor)
22597 s = ctors_section;
22598 else
22599 s = dtors_section;
22601 switch_to_section (s);
22602 assemble_align (POINTER_SIZE);
22603 fputs ("\t.word\t", asm_out_file);
22604 output_addr_const (asm_out_file, symbol);
22605 fputs ("(target1)\n", asm_out_file);
22608 /* Add a function to the list of static constructors. */
22610 static void
22611 arm_elf_asm_constructor (rtx symbol, int priority)
22613 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22616 /* Add a function to the list of static destructors. */
22618 static void
22619 arm_elf_asm_destructor (rtx symbol, int priority)
22621 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22624 /* A finite state machine takes care of noticing whether or not instructions
22625 can be conditionally executed, and thus decrease execution time and code
22626 size by deleting branch instructions. The fsm is controlled by
22627 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22629 /* The state of the fsm controlling condition codes are:
22630 0: normal, do nothing special
22631 1: make ASM_OUTPUT_OPCODE not output this instruction
22632 2: make ASM_OUTPUT_OPCODE not output this instruction
22633 3: make instructions conditional
22634 4: make instructions conditional
22636 State transitions (state->state by whom under condition):
22637 0 -> 1 final_prescan_insn if the `target' is a label
22638 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22639 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22640 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22641 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22642 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22643 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22644 (the target insn is arm_target_insn).
22646 If the jump clobbers the conditions then we use states 2 and 4.
22648 A similar thing can be done with conditional return insns.
22650 XXX In case the `target' is an unconditional branch, this conditionalising
22651 of the instructions always reduces code size, but not always execution
22652 time. But then, I want to reduce the code size to somewhere near what
22653 /bin/cc produces. */
22655 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22656 instructions. When a COND_EXEC instruction is seen the subsequent
22657 instructions are scanned so that multiple conditional instructions can be
22658 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22659 specify the length and true/false mask for the IT block. These will be
22660 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22662 /* Returns the index of the ARM condition code string in
22663 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22664 COMPARISON should be an rtx like `(eq (...) (...))'. */
22666 enum arm_cond_code
22667 maybe_get_arm_condition_code (rtx comparison)
22669 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22670 enum arm_cond_code code;
22671 enum rtx_code comp_code = GET_CODE (comparison);
22673 if (GET_MODE_CLASS (mode) != MODE_CC)
22674 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22675 XEXP (comparison, 1));
22677 switch (mode)
22679 case CC_DNEmode: code = ARM_NE; goto dominance;
22680 case CC_DEQmode: code = ARM_EQ; goto dominance;
22681 case CC_DGEmode: code = ARM_GE; goto dominance;
22682 case CC_DGTmode: code = ARM_GT; goto dominance;
22683 case CC_DLEmode: code = ARM_LE; goto dominance;
22684 case CC_DLTmode: code = ARM_LT; goto dominance;
22685 case CC_DGEUmode: code = ARM_CS; goto dominance;
22686 case CC_DGTUmode: code = ARM_HI; goto dominance;
22687 case CC_DLEUmode: code = ARM_LS; goto dominance;
22688 case CC_DLTUmode: code = ARM_CC;
22690 dominance:
22691 if (comp_code == EQ)
22692 return ARM_INVERSE_CONDITION_CODE (code);
22693 if (comp_code == NE)
22694 return code;
22695 return ARM_NV;
22697 case CC_NOOVmode:
22698 switch (comp_code)
22700 case NE: return ARM_NE;
22701 case EQ: return ARM_EQ;
22702 case GE: return ARM_PL;
22703 case LT: return ARM_MI;
22704 default: return ARM_NV;
22707 case CC_Zmode:
22708 switch (comp_code)
22710 case NE: return ARM_NE;
22711 case EQ: return ARM_EQ;
22712 default: return ARM_NV;
22715 case CC_Nmode:
22716 switch (comp_code)
22718 case NE: return ARM_MI;
22719 case EQ: return ARM_PL;
22720 default: return ARM_NV;
22723 case CCFPEmode:
22724 case CCFPmode:
22725 /* We can handle all cases except UNEQ and LTGT. */
22726 switch (comp_code)
22728 case GE: return ARM_GE;
22729 case GT: return ARM_GT;
22730 case LE: return ARM_LS;
22731 case LT: return ARM_MI;
22732 case NE: return ARM_NE;
22733 case EQ: return ARM_EQ;
22734 case ORDERED: return ARM_VC;
22735 case UNORDERED: return ARM_VS;
22736 case UNLT: return ARM_LT;
22737 case UNLE: return ARM_LE;
22738 case UNGT: return ARM_HI;
22739 case UNGE: return ARM_PL;
22740 /* UNEQ and LTGT do not have a representation. */
22741 case UNEQ: /* Fall through. */
22742 case LTGT: /* Fall through. */
22743 default: return ARM_NV;
22746 case CC_SWPmode:
22747 switch (comp_code)
22749 case NE: return ARM_NE;
22750 case EQ: return ARM_EQ;
22751 case GE: return ARM_LE;
22752 case GT: return ARM_LT;
22753 case LE: return ARM_GE;
22754 case LT: return ARM_GT;
22755 case GEU: return ARM_LS;
22756 case GTU: return ARM_CC;
22757 case LEU: return ARM_CS;
22758 case LTU: return ARM_HI;
22759 default: return ARM_NV;
22762 case CC_Cmode:
22763 switch (comp_code)
22765 case LTU: return ARM_CS;
22766 case GEU: return ARM_CC;
22767 case NE: return ARM_CS;
22768 case EQ: return ARM_CC;
22769 default: return ARM_NV;
22772 case CC_CZmode:
22773 switch (comp_code)
22775 case NE: return ARM_NE;
22776 case EQ: return ARM_EQ;
22777 case GEU: return ARM_CS;
22778 case GTU: return ARM_HI;
22779 case LEU: return ARM_LS;
22780 case LTU: return ARM_CC;
22781 default: return ARM_NV;
22784 case CC_NCVmode:
22785 switch (comp_code)
22787 case GE: return ARM_GE;
22788 case LT: return ARM_LT;
22789 case GEU: return ARM_CS;
22790 case LTU: return ARM_CC;
22791 default: return ARM_NV;
22794 case CC_Vmode:
22795 switch (comp_code)
22797 case NE: return ARM_VS;
22798 case EQ: return ARM_VC;
22799 default: return ARM_NV;
22802 case CCmode:
22803 switch (comp_code)
22805 case NE: return ARM_NE;
22806 case EQ: return ARM_EQ;
22807 case GE: return ARM_GE;
22808 case GT: return ARM_GT;
22809 case LE: return ARM_LE;
22810 case LT: return ARM_LT;
22811 case GEU: return ARM_CS;
22812 case GTU: return ARM_HI;
22813 case LEU: return ARM_LS;
22814 case LTU: return ARM_CC;
22815 default: return ARM_NV;
22818 default: gcc_unreachable ();
22822 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22823 static enum arm_cond_code
22824 get_arm_condition_code (rtx comparison)
22826 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22827 gcc_assert (code != ARM_NV);
22828 return code;
22831 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22832 instructions. */
22833 void
22834 thumb2_final_prescan_insn (rtx_insn *insn)
22836 rtx_insn *first_insn = insn;
22837 rtx body = PATTERN (insn);
22838 rtx predicate;
22839 enum arm_cond_code code;
22840 int n;
22841 int mask;
22842 int max;
22844 /* max_insns_skipped in the tune was already taken into account in the
22845 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22846 just emit the IT blocks as we can. It does not make sense to split
22847 the IT blocks. */
22848 max = MAX_INSN_PER_IT_BLOCK;
22850 /* Remove the previous insn from the count of insns to be output. */
22851 if (arm_condexec_count)
22852 arm_condexec_count--;
22854 /* Nothing to do if we are already inside a conditional block. */
22855 if (arm_condexec_count)
22856 return;
22858 if (GET_CODE (body) != COND_EXEC)
22859 return;
22861 /* Conditional jumps are implemented directly. */
22862 if (JUMP_P (insn))
22863 return;
22865 predicate = COND_EXEC_TEST (body);
22866 arm_current_cc = get_arm_condition_code (predicate);
22868 n = get_attr_ce_count (insn);
22869 arm_condexec_count = 1;
22870 arm_condexec_mask = (1 << n) - 1;
22871 arm_condexec_masklen = n;
22872 /* See if subsequent instructions can be combined into the same block. */
22873 for (;;)
22875 insn = next_nonnote_insn (insn);
22877 /* Jumping into the middle of an IT block is illegal, so a label or
22878 barrier terminates the block. */
22879 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22880 break;
22882 body = PATTERN (insn);
22883 /* USE and CLOBBER aren't really insns, so just skip them. */
22884 if (GET_CODE (body) == USE
22885 || GET_CODE (body) == CLOBBER)
22886 continue;
22888 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22889 if (GET_CODE (body) != COND_EXEC)
22890 break;
22891 /* Maximum number of conditionally executed instructions in a block. */
22892 n = get_attr_ce_count (insn);
22893 if (arm_condexec_masklen + n > max)
22894 break;
22896 predicate = COND_EXEC_TEST (body);
22897 code = get_arm_condition_code (predicate);
22898 mask = (1 << n) - 1;
22899 if (arm_current_cc == code)
22900 arm_condexec_mask |= (mask << arm_condexec_masklen);
22901 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22902 break;
22904 arm_condexec_count++;
22905 arm_condexec_masklen += n;
22907 /* A jump must be the last instruction in a conditional block. */
22908 if (JUMP_P (insn))
22909 break;
22911 /* Restore recog_data (getting the attributes of other insns can
22912 destroy this array, but final.c assumes that it remains intact
22913 across this call). */
22914 extract_constrain_insn_cached (first_insn);
22917 void
22918 arm_final_prescan_insn (rtx_insn *insn)
22920 /* BODY will hold the body of INSN. */
22921 rtx body = PATTERN (insn);
22923 /* This will be 1 if trying to repeat the trick, and things need to be
22924 reversed if it appears to fail. */
22925 int reverse = 0;
22927 /* If we start with a return insn, we only succeed if we find another one. */
22928 int seeking_return = 0;
22929 enum rtx_code return_code = UNKNOWN;
22931 /* START_INSN will hold the insn from where we start looking. This is the
22932 first insn after the following code_label if REVERSE is true. */
22933 rtx_insn *start_insn = insn;
22935 /* If in state 4, check if the target branch is reached, in order to
22936 change back to state 0. */
22937 if (arm_ccfsm_state == 4)
22939 if (insn == arm_target_insn)
22941 arm_target_insn = NULL;
22942 arm_ccfsm_state = 0;
22944 return;
22947 /* If in state 3, it is possible to repeat the trick, if this insn is an
22948 unconditional branch to a label, and immediately following this branch
22949 is the previous target label which is only used once, and the label this
22950 branch jumps to is not too far off. */
22951 if (arm_ccfsm_state == 3)
22953 if (simplejump_p (insn))
22955 start_insn = next_nonnote_insn (start_insn);
22956 if (BARRIER_P (start_insn))
22958 /* XXX Isn't this always a barrier? */
22959 start_insn = next_nonnote_insn (start_insn);
22961 if (LABEL_P (start_insn)
22962 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22963 && LABEL_NUSES (start_insn) == 1)
22964 reverse = TRUE;
22965 else
22966 return;
22968 else if (ANY_RETURN_P (body))
22970 start_insn = next_nonnote_insn (start_insn);
22971 if (BARRIER_P (start_insn))
22972 start_insn = next_nonnote_insn (start_insn);
22973 if (LABEL_P (start_insn)
22974 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22975 && LABEL_NUSES (start_insn) == 1)
22977 reverse = TRUE;
22978 seeking_return = 1;
22979 return_code = GET_CODE (body);
22981 else
22982 return;
22984 else
22985 return;
22988 gcc_assert (!arm_ccfsm_state || reverse);
22989 if (!JUMP_P (insn))
22990 return;
22992 /* This jump might be paralleled with a clobber of the condition codes
22993 the jump should always come first */
22994 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22995 body = XVECEXP (body, 0, 0);
22997 if (reverse
22998 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22999 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23001 int insns_skipped;
23002 int fail = FALSE, succeed = FALSE;
23003 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23004 int then_not_else = TRUE;
23005 rtx_insn *this_insn = start_insn;
23006 rtx label = 0;
23008 /* Register the insn jumped to. */
23009 if (reverse)
23011 if (!seeking_return)
23012 label = XEXP (SET_SRC (body), 0);
23014 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23015 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23016 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23018 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23019 then_not_else = FALSE;
23021 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23023 seeking_return = 1;
23024 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23026 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23028 seeking_return = 1;
23029 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23030 then_not_else = FALSE;
23032 else
23033 gcc_unreachable ();
23035 /* See how many insns this branch skips, and what kind of insns. If all
23036 insns are okay, and the label or unconditional branch to the same
23037 label is not too far away, succeed. */
23038 for (insns_skipped = 0;
23039 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23041 rtx scanbody;
23043 this_insn = next_nonnote_insn (this_insn);
23044 if (!this_insn)
23045 break;
23047 switch (GET_CODE (this_insn))
23049 case CODE_LABEL:
23050 /* Succeed if it is the target label, otherwise fail since
23051 control falls in from somewhere else. */
23052 if (this_insn == label)
23054 arm_ccfsm_state = 1;
23055 succeed = TRUE;
23057 else
23058 fail = TRUE;
23059 break;
23061 case BARRIER:
23062 /* Succeed if the following insn is the target label.
23063 Otherwise fail.
23064 If return insns are used then the last insn in a function
23065 will be a barrier. */
23066 this_insn = next_nonnote_insn (this_insn);
23067 if (this_insn && this_insn == label)
23069 arm_ccfsm_state = 1;
23070 succeed = TRUE;
23072 else
23073 fail = TRUE;
23074 break;
23076 case CALL_INSN:
23077 /* The AAPCS says that conditional calls should not be
23078 used since they make interworking inefficient (the
23079 linker can't transform BL<cond> into BLX). That's
23080 only a problem if the machine has BLX. */
23081 if (arm_arch5)
23083 fail = TRUE;
23084 break;
23087 /* Succeed if the following insn is the target label, or
23088 if the following two insns are a barrier and the
23089 target label. */
23090 this_insn = next_nonnote_insn (this_insn);
23091 if (this_insn && BARRIER_P (this_insn))
23092 this_insn = next_nonnote_insn (this_insn);
23094 if (this_insn && this_insn == label
23095 && insns_skipped < max_insns_skipped)
23097 arm_ccfsm_state = 1;
23098 succeed = TRUE;
23100 else
23101 fail = TRUE;
23102 break;
23104 case JUMP_INSN:
23105 /* If this is an unconditional branch to the same label, succeed.
23106 If it is to another label, do nothing. If it is conditional,
23107 fail. */
23108 /* XXX Probably, the tests for SET and the PC are
23109 unnecessary. */
23111 scanbody = PATTERN (this_insn);
23112 if (GET_CODE (scanbody) == SET
23113 && GET_CODE (SET_DEST (scanbody)) == PC)
23115 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23116 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23118 arm_ccfsm_state = 2;
23119 succeed = TRUE;
23121 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23122 fail = TRUE;
23124 /* Fail if a conditional return is undesirable (e.g. on a
23125 StrongARM), but still allow this if optimizing for size. */
23126 else if (GET_CODE (scanbody) == return_code
23127 && !use_return_insn (TRUE, NULL)
23128 && !optimize_size)
23129 fail = TRUE;
23130 else if (GET_CODE (scanbody) == return_code)
23132 arm_ccfsm_state = 2;
23133 succeed = TRUE;
23135 else if (GET_CODE (scanbody) == PARALLEL)
23137 switch (get_attr_conds (this_insn))
23139 case CONDS_NOCOND:
23140 break;
23141 default:
23142 fail = TRUE;
23143 break;
23146 else
23147 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23149 break;
23151 case INSN:
23152 /* Instructions using or affecting the condition codes make it
23153 fail. */
23154 scanbody = PATTERN (this_insn);
23155 if (!(GET_CODE (scanbody) == SET
23156 || GET_CODE (scanbody) == PARALLEL)
23157 || get_attr_conds (this_insn) != CONDS_NOCOND)
23158 fail = TRUE;
23159 break;
23161 default:
23162 break;
23165 if (succeed)
23167 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23168 arm_target_label = CODE_LABEL_NUMBER (label);
23169 else
23171 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23173 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23175 this_insn = next_nonnote_insn (this_insn);
23176 gcc_assert (!this_insn
23177 || (!BARRIER_P (this_insn)
23178 && !LABEL_P (this_insn)));
23180 if (!this_insn)
23182 /* Oh, dear! we ran off the end.. give up. */
23183 extract_constrain_insn_cached (insn);
23184 arm_ccfsm_state = 0;
23185 arm_target_insn = NULL;
23186 return;
23188 arm_target_insn = this_insn;
23191 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23192 what it was. */
23193 if (!reverse)
23194 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23196 if (reverse || then_not_else)
23197 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23200 /* Restore recog_data (getting the attributes of other insns can
23201 destroy this array, but final.c assumes that it remains intact
23202 across this call. */
23203 extract_constrain_insn_cached (insn);
23207 /* Output IT instructions. */
23208 void
23209 thumb2_asm_output_opcode (FILE * stream)
23211 char buff[5];
23212 int n;
23214 if (arm_condexec_mask)
23216 for (n = 0; n < arm_condexec_masklen; n++)
23217 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23218 buff[n] = 0;
23219 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23220 arm_condition_codes[arm_current_cc]);
23221 arm_condexec_mask = 0;
23225 /* Returns true if REGNO is a valid register
23226 for holding a quantity of type MODE. */
23228 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23230 if (GET_MODE_CLASS (mode) == MODE_CC)
23231 return (regno == CC_REGNUM
23232 || (TARGET_HARD_FLOAT
23233 && regno == VFPCC_REGNUM));
23235 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23236 return false;
23238 if (TARGET_THUMB1)
23239 /* For the Thumb we only allow values bigger than SImode in
23240 registers 0 - 6, so that there is always a second low
23241 register available to hold the upper part of the value.
23242 We probably we ought to ensure that the register is the
23243 start of an even numbered register pair. */
23244 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23246 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23248 if (mode == SFmode || mode == SImode)
23249 return VFP_REGNO_OK_FOR_SINGLE (regno);
23251 if (mode == DFmode)
23252 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23254 if (mode == HFmode)
23255 return VFP_REGNO_OK_FOR_SINGLE (regno);
23257 /* VFP registers can hold HImode values. */
23258 if (mode == HImode)
23259 return VFP_REGNO_OK_FOR_SINGLE (regno);
23261 if (TARGET_NEON)
23262 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23263 || (VALID_NEON_QREG_MODE (mode)
23264 && NEON_REGNO_OK_FOR_QUAD (regno))
23265 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23266 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23267 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23268 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23269 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23271 return FALSE;
23274 if (TARGET_REALLY_IWMMXT)
23276 if (IS_IWMMXT_GR_REGNUM (regno))
23277 return mode == SImode;
23279 if (IS_IWMMXT_REGNUM (regno))
23280 return VALID_IWMMXT_REG_MODE (mode);
23283 /* We allow almost any value to be stored in the general registers.
23284 Restrict doubleword quantities to even register pairs in ARM state
23285 so that we can use ldrd. Do not allow very large Neon structure
23286 opaque modes in general registers; they would use too many. */
23287 if (regno <= LAST_ARM_REGNUM)
23289 if (ARM_NUM_REGS (mode) > 4)
23290 return FALSE;
23292 if (TARGET_THUMB2)
23293 return TRUE;
23295 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23298 if (regno == FRAME_POINTER_REGNUM
23299 || regno == ARG_POINTER_REGNUM)
23300 /* We only allow integers in the fake hard registers. */
23301 return GET_MODE_CLASS (mode) == MODE_INT;
23303 return FALSE;
23306 /* Implement MODES_TIEABLE_P. */
23308 bool
23309 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23311 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23312 return true;
23314 /* We specifically want to allow elements of "structure" modes to
23315 be tieable to the structure. This more general condition allows
23316 other rarer situations too. */
23317 if (TARGET_NEON
23318 && (VALID_NEON_DREG_MODE (mode1)
23319 || VALID_NEON_QREG_MODE (mode1)
23320 || VALID_NEON_STRUCT_MODE (mode1))
23321 && (VALID_NEON_DREG_MODE (mode2)
23322 || VALID_NEON_QREG_MODE (mode2)
23323 || VALID_NEON_STRUCT_MODE (mode2)))
23324 return true;
23326 return false;
23329 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23330 not used in arm mode. */
23332 enum reg_class
23333 arm_regno_class (int regno)
23335 if (regno == PC_REGNUM)
23336 return NO_REGS;
23338 if (TARGET_THUMB1)
23340 if (regno == STACK_POINTER_REGNUM)
23341 return STACK_REG;
23342 if (regno == CC_REGNUM)
23343 return CC_REG;
23344 if (regno < 8)
23345 return LO_REGS;
23346 return HI_REGS;
23349 if (TARGET_THUMB2 && regno < 8)
23350 return LO_REGS;
23352 if ( regno <= LAST_ARM_REGNUM
23353 || regno == FRAME_POINTER_REGNUM
23354 || regno == ARG_POINTER_REGNUM)
23355 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23357 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23358 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23360 if (IS_VFP_REGNUM (regno))
23362 if (regno <= D7_VFP_REGNUM)
23363 return VFP_D0_D7_REGS;
23364 else if (regno <= LAST_LO_VFP_REGNUM)
23365 return VFP_LO_REGS;
23366 else
23367 return VFP_HI_REGS;
23370 if (IS_IWMMXT_REGNUM (regno))
23371 return IWMMXT_REGS;
23373 if (IS_IWMMXT_GR_REGNUM (regno))
23374 return IWMMXT_GR_REGS;
23376 return NO_REGS;
23379 /* Handle a special case when computing the offset
23380 of an argument from the frame pointer. */
23382 arm_debugger_arg_offset (int value, rtx addr)
23384 rtx_insn *insn;
23386 /* We are only interested if dbxout_parms() failed to compute the offset. */
23387 if (value != 0)
23388 return 0;
23390 /* We can only cope with the case where the address is held in a register. */
23391 if (!REG_P (addr))
23392 return 0;
23394 /* If we are using the frame pointer to point at the argument, then
23395 an offset of 0 is correct. */
23396 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23397 return 0;
23399 /* If we are using the stack pointer to point at the
23400 argument, then an offset of 0 is correct. */
23401 /* ??? Check this is consistent with thumb2 frame layout. */
23402 if ((TARGET_THUMB || !frame_pointer_needed)
23403 && REGNO (addr) == SP_REGNUM)
23404 return 0;
23406 /* Oh dear. The argument is pointed to by a register rather
23407 than being held in a register, or being stored at a known
23408 offset from the frame pointer. Since GDB only understands
23409 those two kinds of argument we must translate the address
23410 held in the register into an offset from the frame pointer.
23411 We do this by searching through the insns for the function
23412 looking to see where this register gets its value. If the
23413 register is initialized from the frame pointer plus an offset
23414 then we are in luck and we can continue, otherwise we give up.
23416 This code is exercised by producing debugging information
23417 for a function with arguments like this:
23419 double func (double a, double b, int c, double d) {return d;}
23421 Without this code the stab for parameter 'd' will be set to
23422 an offset of 0 from the frame pointer, rather than 8. */
23424 /* The if() statement says:
23426 If the insn is a normal instruction
23427 and if the insn is setting the value in a register
23428 and if the register being set is the register holding the address of the argument
23429 and if the address is computing by an addition
23430 that involves adding to a register
23431 which is the frame pointer
23432 a constant integer
23434 then... */
23436 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23438 if ( NONJUMP_INSN_P (insn)
23439 && GET_CODE (PATTERN (insn)) == SET
23440 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23441 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23442 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23443 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23444 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23447 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23449 break;
23453 if (value == 0)
23455 debug_rtx (addr);
23456 warning (0, "unable to compute real location of stacked parameter");
23457 value = 8; /* XXX magic hack */
23460 return value;
23463 /* Implement TARGET_PROMOTED_TYPE. */
23465 static tree
23466 arm_promoted_type (const_tree t)
23468 if (SCALAR_FLOAT_TYPE_P (t)
23469 && TYPE_PRECISION (t) == 16
23470 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23471 return float_type_node;
23472 return NULL_TREE;
23475 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23476 This simply adds HFmode as a supported mode; even though we don't
23477 implement arithmetic on this type directly, it's supported by
23478 optabs conversions, much the way the double-word arithmetic is
23479 special-cased in the default hook. */
23481 static bool
23482 arm_scalar_mode_supported_p (machine_mode mode)
23484 if (mode == HFmode)
23485 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23486 else if (ALL_FIXED_POINT_MODE_P (mode))
23487 return true;
23488 else
23489 return default_scalar_mode_supported_p (mode);
23492 /* Set the value of FLT_EVAL_METHOD.
23493 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23495 0: evaluate all operations and constants, whose semantic type has at
23496 most the range and precision of type float, to the range and
23497 precision of float; evaluate all other operations and constants to
23498 the range and precision of the semantic type;
23500 N, where _FloatN is a supported interchange floating type
23501 evaluate all operations and constants, whose semantic type has at
23502 most the range and precision of _FloatN type, to the range and
23503 precision of the _FloatN type; evaluate all other operations and
23504 constants to the range and precision of the semantic type;
23506 If we have the ARMv8.2-A extensions then we support _Float16 in native
23507 precision, so we should set this to 16. Otherwise, we support the type,
23508 but want to evaluate expressions in float precision, so set this to
23509 0. */
23511 static enum flt_eval_method
23512 arm_excess_precision (enum excess_precision_type type)
23514 switch (type)
23516 case EXCESS_PRECISION_TYPE_FAST:
23517 case EXCESS_PRECISION_TYPE_STANDARD:
23518 /* We can calculate either in 16-bit range and precision or
23519 32-bit range and precision. Make that decision based on whether
23520 we have native support for the ARMv8.2-A 16-bit floating-point
23521 instructions or not. */
23522 return (TARGET_VFP_FP16INST
23523 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23524 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23525 case EXCESS_PRECISION_TYPE_IMPLICIT:
23526 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23527 default:
23528 gcc_unreachable ();
23530 return FLT_EVAL_METHOD_UNPREDICTABLE;
23534 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23535 _Float16 if we are using anything other than ieee format for 16-bit
23536 floating point. Otherwise, punt to the default implementation. */
23537 static machine_mode
23538 arm_floatn_mode (int n, bool extended)
23540 if (!extended && n == 16)
23541 return arm_fp16_format == ARM_FP16_FORMAT_IEEE ? HFmode : VOIDmode;
23543 return default_floatn_mode (n, extended);
23547 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23548 not to early-clobber SRC registers in the process.
23550 We assume that the operands described by SRC and DEST represent a
23551 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23552 number of components into which the copy has been decomposed. */
23553 void
23554 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23556 unsigned int i;
23558 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23559 || REGNO (operands[0]) < REGNO (operands[1]))
23561 for (i = 0; i < count; i++)
23563 operands[2 * i] = dest[i];
23564 operands[2 * i + 1] = src[i];
23567 else
23569 for (i = 0; i < count; i++)
23571 operands[2 * i] = dest[count - i - 1];
23572 operands[2 * i + 1] = src[count - i - 1];
23577 /* Split operands into moves from op[1] + op[2] into op[0]. */
23579 void
23580 neon_split_vcombine (rtx operands[3])
23582 unsigned int dest = REGNO (operands[0]);
23583 unsigned int src1 = REGNO (operands[1]);
23584 unsigned int src2 = REGNO (operands[2]);
23585 machine_mode halfmode = GET_MODE (operands[1]);
23586 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23587 rtx destlo, desthi;
23589 if (src1 == dest && src2 == dest + halfregs)
23591 /* No-op move. Can't split to nothing; emit something. */
23592 emit_note (NOTE_INSN_DELETED);
23593 return;
23596 /* Preserve register attributes for variable tracking. */
23597 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23598 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23599 GET_MODE_SIZE (halfmode));
23601 /* Special case of reversed high/low parts. Use VSWP. */
23602 if (src2 == dest && src1 == dest + halfregs)
23604 rtx x = gen_rtx_SET (destlo, operands[1]);
23605 rtx y = gen_rtx_SET (desthi, operands[2]);
23606 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23607 return;
23610 if (!reg_overlap_mentioned_p (operands[2], destlo))
23612 /* Try to avoid unnecessary moves if part of the result
23613 is in the right place already. */
23614 if (src1 != dest)
23615 emit_move_insn (destlo, operands[1]);
23616 if (src2 != dest + halfregs)
23617 emit_move_insn (desthi, operands[2]);
23619 else
23621 if (src2 != dest + halfregs)
23622 emit_move_insn (desthi, operands[2]);
23623 if (src1 != dest)
23624 emit_move_insn (destlo, operands[1]);
23628 /* Return the number (counting from 0) of
23629 the least significant set bit in MASK. */
23631 inline static int
23632 number_of_first_bit_set (unsigned mask)
23634 return ctz_hwi (mask);
23637 /* Like emit_multi_reg_push, but allowing for a different set of
23638 registers to be described as saved. MASK is the set of registers
23639 to be saved; REAL_REGS is the set of registers to be described as
23640 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23642 static rtx_insn *
23643 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23645 unsigned long regno;
23646 rtx par[10], tmp, reg;
23647 rtx_insn *insn;
23648 int i, j;
23650 /* Build the parallel of the registers actually being stored. */
23651 for (i = 0; mask; ++i, mask &= mask - 1)
23653 regno = ctz_hwi (mask);
23654 reg = gen_rtx_REG (SImode, regno);
23656 if (i == 0)
23657 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23658 else
23659 tmp = gen_rtx_USE (VOIDmode, reg);
23661 par[i] = tmp;
23664 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23665 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23666 tmp = gen_frame_mem (BLKmode, tmp);
23667 tmp = gen_rtx_SET (tmp, par[0]);
23668 par[0] = tmp;
23670 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23671 insn = emit_insn (tmp);
23673 /* Always build the stack adjustment note for unwind info. */
23674 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23675 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23676 par[0] = tmp;
23678 /* Build the parallel of the registers recorded as saved for unwind. */
23679 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23681 regno = ctz_hwi (real_regs);
23682 reg = gen_rtx_REG (SImode, regno);
23684 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23685 tmp = gen_frame_mem (SImode, tmp);
23686 tmp = gen_rtx_SET (tmp, reg);
23687 RTX_FRAME_RELATED_P (tmp) = 1;
23688 par[j + 1] = tmp;
23691 if (j == 0)
23692 tmp = par[0];
23693 else
23695 RTX_FRAME_RELATED_P (par[0]) = 1;
23696 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23699 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23701 return insn;
23704 /* Emit code to push or pop registers to or from the stack. F is the
23705 assembly file. MASK is the registers to pop. */
23706 static void
23707 thumb_pop (FILE *f, unsigned long mask)
23709 int regno;
23710 int lo_mask = mask & 0xFF;
23711 int pushed_words = 0;
23713 gcc_assert (mask);
23715 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23717 /* Special case. Do not generate a POP PC statement here, do it in
23718 thumb_exit() */
23719 thumb_exit (f, -1);
23720 return;
23723 fprintf (f, "\tpop\t{");
23725 /* Look at the low registers first. */
23726 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23728 if (lo_mask & 1)
23730 asm_fprintf (f, "%r", regno);
23732 if ((lo_mask & ~1) != 0)
23733 fprintf (f, ", ");
23735 pushed_words++;
23739 if (mask & (1 << PC_REGNUM))
23741 /* Catch popping the PC. */
23742 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23743 || IS_CMSE_ENTRY (arm_current_func_type ()))
23745 /* The PC is never poped directly, instead
23746 it is popped into r3 and then BX is used. */
23747 fprintf (f, "}\n");
23749 thumb_exit (f, -1);
23751 return;
23753 else
23755 if (mask & 0xFF)
23756 fprintf (f, ", ");
23758 asm_fprintf (f, "%r", PC_REGNUM);
23762 fprintf (f, "}\n");
23765 /* Generate code to return from a thumb function.
23766 If 'reg_containing_return_addr' is -1, then the return address is
23767 actually on the stack, at the stack pointer. */
23768 static void
23769 thumb_exit (FILE *f, int reg_containing_return_addr)
23771 unsigned regs_available_for_popping;
23772 unsigned regs_to_pop;
23773 int pops_needed;
23774 unsigned available;
23775 unsigned required;
23776 machine_mode mode;
23777 int size;
23778 int restore_a4 = FALSE;
23780 /* Compute the registers we need to pop. */
23781 regs_to_pop = 0;
23782 pops_needed = 0;
23784 if (reg_containing_return_addr == -1)
23786 regs_to_pop |= 1 << LR_REGNUM;
23787 ++pops_needed;
23790 if (TARGET_BACKTRACE)
23792 /* Restore the (ARM) frame pointer and stack pointer. */
23793 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23794 pops_needed += 2;
23797 /* If there is nothing to pop then just emit the BX instruction and
23798 return. */
23799 if (pops_needed == 0)
23801 if (crtl->calls_eh_return)
23802 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23804 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23806 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23807 reg_containing_return_addr);
23808 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23810 else
23811 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23812 return;
23814 /* Otherwise if we are not supporting interworking and we have not created
23815 a backtrace structure and the function was not entered in ARM mode then
23816 just pop the return address straight into the PC. */
23817 else if (!TARGET_INTERWORK
23818 && !TARGET_BACKTRACE
23819 && !is_called_in_ARM_mode (current_function_decl)
23820 && !crtl->calls_eh_return
23821 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23823 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23824 return;
23827 /* Find out how many of the (return) argument registers we can corrupt. */
23828 regs_available_for_popping = 0;
23830 /* If returning via __builtin_eh_return, the bottom three registers
23831 all contain information needed for the return. */
23832 if (crtl->calls_eh_return)
23833 size = 12;
23834 else
23836 /* If we can deduce the registers used from the function's
23837 return value. This is more reliable that examining
23838 df_regs_ever_live_p () because that will be set if the register is
23839 ever used in the function, not just if the register is used
23840 to hold a return value. */
23842 if (crtl->return_rtx != 0)
23843 mode = GET_MODE (crtl->return_rtx);
23844 else
23845 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23847 size = GET_MODE_SIZE (mode);
23849 if (size == 0)
23851 /* In a void function we can use any argument register.
23852 In a function that returns a structure on the stack
23853 we can use the second and third argument registers. */
23854 if (mode == VOIDmode)
23855 regs_available_for_popping =
23856 (1 << ARG_REGISTER (1))
23857 | (1 << ARG_REGISTER (2))
23858 | (1 << ARG_REGISTER (3));
23859 else
23860 regs_available_for_popping =
23861 (1 << ARG_REGISTER (2))
23862 | (1 << ARG_REGISTER (3));
23864 else if (size <= 4)
23865 regs_available_for_popping =
23866 (1 << ARG_REGISTER (2))
23867 | (1 << ARG_REGISTER (3));
23868 else if (size <= 8)
23869 regs_available_for_popping =
23870 (1 << ARG_REGISTER (3));
23873 /* Match registers to be popped with registers into which we pop them. */
23874 for (available = regs_available_for_popping,
23875 required = regs_to_pop;
23876 required != 0 && available != 0;
23877 available &= ~(available & - available),
23878 required &= ~(required & - required))
23879 -- pops_needed;
23881 /* If we have any popping registers left over, remove them. */
23882 if (available > 0)
23883 regs_available_for_popping &= ~available;
23885 /* Otherwise if we need another popping register we can use
23886 the fourth argument register. */
23887 else if (pops_needed)
23889 /* If we have not found any free argument registers and
23890 reg a4 contains the return address, we must move it. */
23891 if (regs_available_for_popping == 0
23892 && reg_containing_return_addr == LAST_ARG_REGNUM)
23894 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23895 reg_containing_return_addr = LR_REGNUM;
23897 else if (size > 12)
23899 /* Register a4 is being used to hold part of the return value,
23900 but we have dire need of a free, low register. */
23901 restore_a4 = TRUE;
23903 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23906 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23908 /* The fourth argument register is available. */
23909 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23911 --pops_needed;
23915 /* Pop as many registers as we can. */
23916 thumb_pop (f, regs_available_for_popping);
23918 /* Process the registers we popped. */
23919 if (reg_containing_return_addr == -1)
23921 /* The return address was popped into the lowest numbered register. */
23922 regs_to_pop &= ~(1 << LR_REGNUM);
23924 reg_containing_return_addr =
23925 number_of_first_bit_set (regs_available_for_popping);
23927 /* Remove this register for the mask of available registers, so that
23928 the return address will not be corrupted by further pops. */
23929 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23932 /* If we popped other registers then handle them here. */
23933 if (regs_available_for_popping)
23935 int frame_pointer;
23937 /* Work out which register currently contains the frame pointer. */
23938 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23940 /* Move it into the correct place. */
23941 asm_fprintf (f, "\tmov\t%r, %r\n",
23942 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23944 /* (Temporarily) remove it from the mask of popped registers. */
23945 regs_available_for_popping &= ~(1 << frame_pointer);
23946 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23948 if (regs_available_for_popping)
23950 int stack_pointer;
23952 /* We popped the stack pointer as well,
23953 find the register that contains it. */
23954 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23956 /* Move it into the stack register. */
23957 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23959 /* At this point we have popped all necessary registers, so
23960 do not worry about restoring regs_available_for_popping
23961 to its correct value:
23963 assert (pops_needed == 0)
23964 assert (regs_available_for_popping == (1 << frame_pointer))
23965 assert (regs_to_pop == (1 << STACK_POINTER)) */
23967 else
23969 /* Since we have just move the popped value into the frame
23970 pointer, the popping register is available for reuse, and
23971 we know that we still have the stack pointer left to pop. */
23972 regs_available_for_popping |= (1 << frame_pointer);
23976 /* If we still have registers left on the stack, but we no longer have
23977 any registers into which we can pop them, then we must move the return
23978 address into the link register and make available the register that
23979 contained it. */
23980 if (regs_available_for_popping == 0 && pops_needed > 0)
23982 regs_available_for_popping |= 1 << reg_containing_return_addr;
23984 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23985 reg_containing_return_addr);
23987 reg_containing_return_addr = LR_REGNUM;
23990 /* If we have registers left on the stack then pop some more.
23991 We know that at most we will want to pop FP and SP. */
23992 if (pops_needed > 0)
23994 int popped_into;
23995 int move_to;
23997 thumb_pop (f, regs_available_for_popping);
23999 /* We have popped either FP or SP.
24000 Move whichever one it is into the correct register. */
24001 popped_into = number_of_first_bit_set (regs_available_for_popping);
24002 move_to = number_of_first_bit_set (regs_to_pop);
24004 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24006 regs_to_pop &= ~(1 << move_to);
24008 --pops_needed;
24011 /* If we still have not popped everything then we must have only
24012 had one register available to us and we are now popping the SP. */
24013 if (pops_needed > 0)
24015 int popped_into;
24017 thumb_pop (f, regs_available_for_popping);
24019 popped_into = number_of_first_bit_set (regs_available_for_popping);
24021 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24023 assert (regs_to_pop == (1 << STACK_POINTER))
24024 assert (pops_needed == 1)
24028 /* If necessary restore the a4 register. */
24029 if (restore_a4)
24031 if (reg_containing_return_addr != LR_REGNUM)
24033 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24034 reg_containing_return_addr = LR_REGNUM;
24037 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24040 if (crtl->calls_eh_return)
24041 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24043 /* Return to caller. */
24044 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24046 /* This is for the cases where LR is not being used to contain the return
24047 address. It may therefore contain information that we might not want
24048 to leak, hence it must be cleared. The value in R0 will never be a
24049 secret at this point, so it is safe to use it, see the clearing code
24050 in 'cmse_nonsecure_entry_clear_before_return'. */
24051 if (reg_containing_return_addr != LR_REGNUM)
24052 asm_fprintf (f, "\tmov\tlr, r0\n");
24054 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24055 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24057 else
24058 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24061 /* Scan INSN just before assembler is output for it.
24062 For Thumb-1, we track the status of the condition codes; this
24063 information is used in the cbranchsi4_insn pattern. */
24064 void
24065 thumb1_final_prescan_insn (rtx_insn *insn)
24067 if (flag_print_asm_name)
24068 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24069 INSN_ADDRESSES (INSN_UID (insn)));
24070 /* Don't overwrite the previous setter when we get to a cbranch. */
24071 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24073 enum attr_conds conds;
24075 if (cfun->machine->thumb1_cc_insn)
24077 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24078 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24079 CC_STATUS_INIT;
24081 conds = get_attr_conds (insn);
24082 if (conds == CONDS_SET)
24084 rtx set = single_set (insn);
24085 cfun->machine->thumb1_cc_insn = insn;
24086 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24087 cfun->machine->thumb1_cc_op1 = const0_rtx;
24088 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24089 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24091 rtx src1 = XEXP (SET_SRC (set), 1);
24092 if (src1 == const0_rtx)
24093 cfun->machine->thumb1_cc_mode = CCmode;
24095 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24097 /* Record the src register operand instead of dest because
24098 cprop_hardreg pass propagates src. */
24099 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24102 else if (conds != CONDS_NOCOND)
24103 cfun->machine->thumb1_cc_insn = NULL_RTX;
24106 /* Check if unexpected far jump is used. */
24107 if (cfun->machine->lr_save_eliminated
24108 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24109 internal_error("Unexpected thumb1 far jump");
24113 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24115 unsigned HOST_WIDE_INT mask = 0xff;
24116 int i;
24118 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24119 if (val == 0) /* XXX */
24120 return 0;
24122 for (i = 0; i < 25; i++)
24123 if ((val & (mask << i)) == val)
24124 return 1;
24126 return 0;
24129 /* Returns nonzero if the current function contains,
24130 or might contain a far jump. */
24131 static int
24132 thumb_far_jump_used_p (void)
24134 rtx_insn *insn;
24135 bool far_jump = false;
24136 unsigned int func_size = 0;
24138 /* If we have already decided that far jumps may be used,
24139 do not bother checking again, and always return true even if
24140 it turns out that they are not being used. Once we have made
24141 the decision that far jumps are present (and that hence the link
24142 register will be pushed onto the stack) we cannot go back on it. */
24143 if (cfun->machine->far_jump_used)
24144 return 1;
24146 /* If this function is not being called from the prologue/epilogue
24147 generation code then it must be being called from the
24148 INITIAL_ELIMINATION_OFFSET macro. */
24149 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24151 /* In this case we know that we are being asked about the elimination
24152 of the arg pointer register. If that register is not being used,
24153 then there are no arguments on the stack, and we do not have to
24154 worry that a far jump might force the prologue to push the link
24155 register, changing the stack offsets. In this case we can just
24156 return false, since the presence of far jumps in the function will
24157 not affect stack offsets.
24159 If the arg pointer is live (or if it was live, but has now been
24160 eliminated and so set to dead) then we do have to test to see if
24161 the function might contain a far jump. This test can lead to some
24162 false negatives, since before reload is completed, then length of
24163 branch instructions is not known, so gcc defaults to returning their
24164 longest length, which in turn sets the far jump attribute to true.
24166 A false negative will not result in bad code being generated, but it
24167 will result in a needless push and pop of the link register. We
24168 hope that this does not occur too often.
24170 If we need doubleword stack alignment this could affect the other
24171 elimination offsets so we can't risk getting it wrong. */
24172 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24173 cfun->machine->arg_pointer_live = 1;
24174 else if (!cfun->machine->arg_pointer_live)
24175 return 0;
24178 /* We should not change far_jump_used during or after reload, as there is
24179 no chance to change stack frame layout. */
24180 if (reload_in_progress || reload_completed)
24181 return 0;
24183 /* Check to see if the function contains a branch
24184 insn with the far jump attribute set. */
24185 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24187 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24189 far_jump = true;
24191 func_size += get_attr_length (insn);
24194 /* Attribute far_jump will always be true for thumb1 before
24195 shorten_branch pass. So checking far_jump attribute before
24196 shorten_branch isn't much useful.
24198 Following heuristic tries to estimate more accurately if a far jump
24199 may finally be used. The heuristic is very conservative as there is
24200 no chance to roll-back the decision of not to use far jump.
24202 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24203 2-byte insn is associated with a 4 byte constant pool. Using
24204 function size 2048/3 as the threshold is conservative enough. */
24205 if (far_jump)
24207 if ((func_size * 3) >= 2048)
24209 /* Record the fact that we have decided that
24210 the function does use far jumps. */
24211 cfun->machine->far_jump_used = 1;
24212 return 1;
24216 return 0;
24219 /* Return nonzero if FUNC must be entered in ARM mode. */
24220 static bool
24221 is_called_in_ARM_mode (tree func)
24223 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24225 /* Ignore the problem about functions whose address is taken. */
24226 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24227 return true;
24229 #ifdef ARM_PE
24230 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24231 #else
24232 return false;
24233 #endif
24236 /* Given the stack offsets and register mask in OFFSETS, decide how
24237 many additional registers to push instead of subtracting a constant
24238 from SP. For epilogues the principle is the same except we use pop.
24239 FOR_PROLOGUE indicates which we're generating. */
24240 static int
24241 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24243 HOST_WIDE_INT amount;
24244 unsigned long live_regs_mask = offsets->saved_regs_mask;
24245 /* Extract a mask of the ones we can give to the Thumb's push/pop
24246 instruction. */
24247 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24248 /* Then count how many other high registers will need to be pushed. */
24249 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24250 int n_free, reg_base, size;
24252 if (!for_prologue && frame_pointer_needed)
24253 amount = offsets->locals_base - offsets->saved_regs;
24254 else
24255 amount = offsets->outgoing_args - offsets->saved_regs;
24257 /* If the stack frame size is 512 exactly, we can save one load
24258 instruction, which should make this a win even when optimizing
24259 for speed. */
24260 if (!optimize_size && amount != 512)
24261 return 0;
24263 /* Can't do this if there are high registers to push. */
24264 if (high_regs_pushed != 0)
24265 return 0;
24267 /* Shouldn't do it in the prologue if no registers would normally
24268 be pushed at all. In the epilogue, also allow it if we'll have
24269 a pop insn for the PC. */
24270 if (l_mask == 0
24271 && (for_prologue
24272 || TARGET_BACKTRACE
24273 || (live_regs_mask & 1 << LR_REGNUM) == 0
24274 || TARGET_INTERWORK
24275 || crtl->args.pretend_args_size != 0))
24276 return 0;
24278 /* Don't do this if thumb_expand_prologue wants to emit instructions
24279 between the push and the stack frame allocation. */
24280 if (for_prologue
24281 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24282 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24283 return 0;
24285 reg_base = 0;
24286 n_free = 0;
24287 if (!for_prologue)
24289 size = arm_size_return_regs ();
24290 reg_base = ARM_NUM_INTS (size);
24291 live_regs_mask >>= reg_base;
24294 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24295 && (for_prologue || call_used_regs[reg_base + n_free]))
24297 live_regs_mask >>= 1;
24298 n_free++;
24301 if (n_free == 0)
24302 return 0;
24303 gcc_assert (amount / 4 * 4 == amount);
24305 if (amount >= 512 && (amount - n_free * 4) < 512)
24306 return (amount - 508) / 4;
24307 if (amount <= n_free * 4)
24308 return amount / 4;
24309 return 0;
24312 /* The bits which aren't usefully expanded as rtl. */
24313 const char *
24314 thumb1_unexpanded_epilogue (void)
24316 arm_stack_offsets *offsets;
24317 int regno;
24318 unsigned long live_regs_mask = 0;
24319 int high_regs_pushed = 0;
24320 int extra_pop;
24321 int had_to_push_lr;
24322 int size;
24324 if (cfun->machine->return_used_this_function != 0)
24325 return "";
24327 if (IS_NAKED (arm_current_func_type ()))
24328 return "";
24330 offsets = arm_get_frame_offsets ();
24331 live_regs_mask = offsets->saved_regs_mask;
24332 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24334 /* If we can deduce the registers used from the function's return value.
24335 This is more reliable that examining df_regs_ever_live_p () because that
24336 will be set if the register is ever used in the function, not just if
24337 the register is used to hold a return value. */
24338 size = arm_size_return_regs ();
24340 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24341 if (extra_pop > 0)
24343 unsigned long extra_mask = (1 << extra_pop) - 1;
24344 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24347 /* The prolog may have pushed some high registers to use as
24348 work registers. e.g. the testsuite file:
24349 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24350 compiles to produce:
24351 push {r4, r5, r6, r7, lr}
24352 mov r7, r9
24353 mov r6, r8
24354 push {r6, r7}
24355 as part of the prolog. We have to undo that pushing here. */
24357 if (high_regs_pushed)
24359 unsigned long mask = live_regs_mask & 0xff;
24360 int next_hi_reg;
24362 /* The available low registers depend on the size of the value we are
24363 returning. */
24364 if (size <= 12)
24365 mask |= 1 << 3;
24366 if (size <= 8)
24367 mask |= 1 << 2;
24369 if (mask == 0)
24370 /* Oh dear! We have no low registers into which we can pop
24371 high registers! */
24372 internal_error
24373 ("no low registers available for popping high registers");
24375 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24376 if (live_regs_mask & (1 << next_hi_reg))
24377 break;
24379 while (high_regs_pushed)
24381 /* Find lo register(s) into which the high register(s) can
24382 be popped. */
24383 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24385 if (mask & (1 << regno))
24386 high_regs_pushed--;
24387 if (high_regs_pushed == 0)
24388 break;
24391 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24393 /* Pop the values into the low register(s). */
24394 thumb_pop (asm_out_file, mask);
24396 /* Move the value(s) into the high registers. */
24397 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24399 if (mask & (1 << regno))
24401 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24402 regno);
24404 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24405 if (live_regs_mask & (1 << next_hi_reg))
24406 break;
24410 live_regs_mask &= ~0x0f00;
24413 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24414 live_regs_mask &= 0xff;
24416 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24418 /* Pop the return address into the PC. */
24419 if (had_to_push_lr)
24420 live_regs_mask |= 1 << PC_REGNUM;
24422 /* Either no argument registers were pushed or a backtrace
24423 structure was created which includes an adjusted stack
24424 pointer, so just pop everything. */
24425 if (live_regs_mask)
24426 thumb_pop (asm_out_file, live_regs_mask);
24428 /* We have either just popped the return address into the
24429 PC or it is was kept in LR for the entire function.
24430 Note that thumb_pop has already called thumb_exit if the
24431 PC was in the list. */
24432 if (!had_to_push_lr)
24433 thumb_exit (asm_out_file, LR_REGNUM);
24435 else
24437 /* Pop everything but the return address. */
24438 if (live_regs_mask)
24439 thumb_pop (asm_out_file, live_regs_mask);
24441 if (had_to_push_lr)
24443 if (size > 12)
24445 /* We have no free low regs, so save one. */
24446 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24447 LAST_ARG_REGNUM);
24450 /* Get the return address into a temporary register. */
24451 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24453 if (size > 12)
24455 /* Move the return address to lr. */
24456 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24457 LAST_ARG_REGNUM);
24458 /* Restore the low register. */
24459 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24460 IP_REGNUM);
24461 regno = LR_REGNUM;
24463 else
24464 regno = LAST_ARG_REGNUM;
24466 else
24467 regno = LR_REGNUM;
24469 /* Remove the argument registers that were pushed onto the stack. */
24470 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24471 SP_REGNUM, SP_REGNUM,
24472 crtl->args.pretend_args_size);
24474 thumb_exit (asm_out_file, regno);
24477 return "";
24480 /* Functions to save and restore machine-specific function data. */
24481 static struct machine_function *
24482 arm_init_machine_status (void)
24484 struct machine_function *machine;
24485 machine = ggc_cleared_alloc<machine_function> ();
24487 #if ARM_FT_UNKNOWN != 0
24488 machine->func_type = ARM_FT_UNKNOWN;
24489 #endif
24490 return machine;
24493 /* Return an RTX indicating where the return address to the
24494 calling function can be found. */
24496 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24498 if (count != 0)
24499 return NULL_RTX;
24501 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24504 /* Do anything needed before RTL is emitted for each function. */
24505 void
24506 arm_init_expanders (void)
24508 /* Arrange to initialize and mark the machine per-function status. */
24509 init_machine_status = arm_init_machine_status;
24511 /* This is to stop the combine pass optimizing away the alignment
24512 adjustment of va_arg. */
24513 /* ??? It is claimed that this should not be necessary. */
24514 if (cfun)
24515 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24518 /* Check that FUNC is called with a different mode. */
24520 bool
24521 arm_change_mode_p (tree func)
24523 if (TREE_CODE (func) != FUNCTION_DECL)
24524 return false;
24526 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24528 if (!callee_tree)
24529 callee_tree = target_option_default_node;
24531 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24532 int flags = callee_opts->x_target_flags;
24534 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24537 /* Like arm_compute_initial_elimination offset. Simpler because there
24538 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24539 to point at the base of the local variables after static stack
24540 space for a function has been allocated. */
24542 HOST_WIDE_INT
24543 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24545 arm_stack_offsets *offsets;
24547 offsets = arm_get_frame_offsets ();
24549 switch (from)
24551 case ARG_POINTER_REGNUM:
24552 switch (to)
24554 case STACK_POINTER_REGNUM:
24555 return offsets->outgoing_args - offsets->saved_args;
24557 case FRAME_POINTER_REGNUM:
24558 return offsets->soft_frame - offsets->saved_args;
24560 case ARM_HARD_FRAME_POINTER_REGNUM:
24561 return offsets->saved_regs - offsets->saved_args;
24563 case THUMB_HARD_FRAME_POINTER_REGNUM:
24564 return offsets->locals_base - offsets->saved_args;
24566 default:
24567 gcc_unreachable ();
24569 break;
24571 case FRAME_POINTER_REGNUM:
24572 switch (to)
24574 case STACK_POINTER_REGNUM:
24575 return offsets->outgoing_args - offsets->soft_frame;
24577 case ARM_HARD_FRAME_POINTER_REGNUM:
24578 return offsets->saved_regs - offsets->soft_frame;
24580 case THUMB_HARD_FRAME_POINTER_REGNUM:
24581 return offsets->locals_base - offsets->soft_frame;
24583 default:
24584 gcc_unreachable ();
24586 break;
24588 default:
24589 gcc_unreachable ();
24593 /* Generate the function's prologue. */
24595 void
24596 thumb1_expand_prologue (void)
24598 rtx_insn *insn;
24600 HOST_WIDE_INT amount;
24601 HOST_WIDE_INT size;
24602 arm_stack_offsets *offsets;
24603 unsigned long func_type;
24604 int regno;
24605 unsigned long live_regs_mask;
24606 unsigned long l_mask;
24607 unsigned high_regs_pushed = 0;
24608 bool lr_needs_saving;
24610 func_type = arm_current_func_type ();
24612 /* Naked functions don't have prologues. */
24613 if (IS_NAKED (func_type))
24615 if (flag_stack_usage_info)
24616 current_function_static_stack_size = 0;
24617 return;
24620 if (IS_INTERRUPT (func_type))
24622 error ("interrupt Service Routines cannot be coded in Thumb mode");
24623 return;
24626 if (is_called_in_ARM_mode (current_function_decl))
24627 emit_insn (gen_prologue_thumb1_interwork ());
24629 offsets = arm_get_frame_offsets ();
24630 live_regs_mask = offsets->saved_regs_mask;
24631 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24633 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24634 l_mask = live_regs_mask & 0x40ff;
24635 /* Then count how many other high registers will need to be pushed. */
24636 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24638 if (crtl->args.pretend_args_size)
24640 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24642 if (cfun->machine->uses_anonymous_args)
24644 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24645 unsigned long mask;
24647 mask = 1ul << (LAST_ARG_REGNUM + 1);
24648 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24650 insn = thumb1_emit_multi_reg_push (mask, 0);
24652 else
24654 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24655 stack_pointer_rtx, x));
24657 RTX_FRAME_RELATED_P (insn) = 1;
24660 if (TARGET_BACKTRACE)
24662 HOST_WIDE_INT offset = 0;
24663 unsigned work_register;
24664 rtx work_reg, x, arm_hfp_rtx;
24666 /* We have been asked to create a stack backtrace structure.
24667 The code looks like this:
24669 0 .align 2
24670 0 func:
24671 0 sub SP, #16 Reserve space for 4 registers.
24672 2 push {R7} Push low registers.
24673 4 add R7, SP, #20 Get the stack pointer before the push.
24674 6 str R7, [SP, #8] Store the stack pointer
24675 (before reserving the space).
24676 8 mov R7, PC Get hold of the start of this code + 12.
24677 10 str R7, [SP, #16] Store it.
24678 12 mov R7, FP Get hold of the current frame pointer.
24679 14 str R7, [SP, #4] Store it.
24680 16 mov R7, LR Get hold of the current return address.
24681 18 str R7, [SP, #12] Store it.
24682 20 add R7, SP, #16 Point at the start of the
24683 backtrace structure.
24684 22 mov FP, R7 Put this value into the frame pointer. */
24686 work_register = thumb_find_work_register (live_regs_mask);
24687 work_reg = gen_rtx_REG (SImode, work_register);
24688 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24690 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24691 stack_pointer_rtx, GEN_INT (-16)));
24692 RTX_FRAME_RELATED_P (insn) = 1;
24694 if (l_mask)
24696 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24697 RTX_FRAME_RELATED_P (insn) = 1;
24698 lr_needs_saving = false;
24700 offset = bit_count (l_mask) * UNITS_PER_WORD;
24703 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24704 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24706 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24707 x = gen_frame_mem (SImode, x);
24708 emit_move_insn (x, work_reg);
24710 /* Make sure that the instruction fetching the PC is in the right place
24711 to calculate "start of backtrace creation code + 12". */
24712 /* ??? The stores using the common WORK_REG ought to be enough to
24713 prevent the scheduler from doing anything weird. Failing that
24714 we could always move all of the following into an UNSPEC_VOLATILE. */
24715 if (l_mask)
24717 x = gen_rtx_REG (SImode, PC_REGNUM);
24718 emit_move_insn (work_reg, x);
24720 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24721 x = gen_frame_mem (SImode, x);
24722 emit_move_insn (x, work_reg);
24724 emit_move_insn (work_reg, arm_hfp_rtx);
24726 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24727 x = gen_frame_mem (SImode, x);
24728 emit_move_insn (x, work_reg);
24730 else
24732 emit_move_insn (work_reg, arm_hfp_rtx);
24734 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24735 x = gen_frame_mem (SImode, x);
24736 emit_move_insn (x, work_reg);
24738 x = gen_rtx_REG (SImode, PC_REGNUM);
24739 emit_move_insn (work_reg, x);
24741 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24742 x = gen_frame_mem (SImode, x);
24743 emit_move_insn (x, work_reg);
24746 x = gen_rtx_REG (SImode, LR_REGNUM);
24747 emit_move_insn (work_reg, x);
24749 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24750 x = gen_frame_mem (SImode, x);
24751 emit_move_insn (x, work_reg);
24753 x = GEN_INT (offset + 12);
24754 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24756 emit_move_insn (arm_hfp_rtx, work_reg);
24758 /* Optimization: If we are not pushing any low registers but we are going
24759 to push some high registers then delay our first push. This will just
24760 be a push of LR and we can combine it with the push of the first high
24761 register. */
24762 else if ((l_mask & 0xff) != 0
24763 || (high_regs_pushed == 0 && lr_needs_saving))
24765 unsigned long mask = l_mask;
24766 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24767 insn = thumb1_emit_multi_reg_push (mask, mask);
24768 RTX_FRAME_RELATED_P (insn) = 1;
24769 lr_needs_saving = false;
24772 if (high_regs_pushed)
24774 unsigned pushable_regs;
24775 unsigned next_hi_reg;
24776 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24777 : crtl->args.info.nregs;
24778 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24780 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24781 if (live_regs_mask & (1 << next_hi_reg))
24782 break;
24784 /* Here we need to mask out registers used for passing arguments
24785 even if they can be pushed. This is to avoid using them to stash the high
24786 registers. Such kind of stash may clobber the use of arguments. */
24787 pushable_regs = l_mask & (~arg_regs_mask);
24788 if (lr_needs_saving)
24789 pushable_regs &= ~(1 << LR_REGNUM);
24791 if (pushable_regs == 0)
24792 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24794 while (high_regs_pushed > 0)
24796 unsigned long real_regs_mask = 0;
24797 unsigned long push_mask = 0;
24799 for (regno = LR_REGNUM; regno >= 0; regno --)
24801 if (pushable_regs & (1 << regno))
24803 emit_move_insn (gen_rtx_REG (SImode, regno),
24804 gen_rtx_REG (SImode, next_hi_reg));
24806 high_regs_pushed --;
24807 real_regs_mask |= (1 << next_hi_reg);
24808 push_mask |= (1 << regno);
24810 if (high_regs_pushed)
24812 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24813 next_hi_reg --)
24814 if (live_regs_mask & (1 << next_hi_reg))
24815 break;
24817 else
24818 break;
24822 /* If we had to find a work register and we have not yet
24823 saved the LR then add it to the list of regs to push. */
24824 if (lr_needs_saving)
24826 push_mask |= 1 << LR_REGNUM;
24827 real_regs_mask |= 1 << LR_REGNUM;
24828 lr_needs_saving = false;
24831 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24832 RTX_FRAME_RELATED_P (insn) = 1;
24836 /* Load the pic register before setting the frame pointer,
24837 so we can use r7 as a temporary work register. */
24838 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24839 arm_load_pic_register (live_regs_mask);
24841 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24842 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24843 stack_pointer_rtx);
24845 size = offsets->outgoing_args - offsets->saved_args;
24846 if (flag_stack_usage_info)
24847 current_function_static_stack_size = size;
24849 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24850 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24851 sorry ("-fstack-check=specific for Thumb-1");
24853 amount = offsets->outgoing_args - offsets->saved_regs;
24854 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24855 if (amount)
24857 if (amount < 512)
24859 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24860 GEN_INT (- amount)));
24861 RTX_FRAME_RELATED_P (insn) = 1;
24863 else
24865 rtx reg, dwarf;
24867 /* The stack decrement is too big for an immediate value in a single
24868 insn. In theory we could issue multiple subtracts, but after
24869 three of them it becomes more space efficient to place the full
24870 value in the constant pool and load into a register. (Also the
24871 ARM debugger really likes to see only one stack decrement per
24872 function). So instead we look for a scratch register into which
24873 we can load the decrement, and then we subtract this from the
24874 stack pointer. Unfortunately on the thumb the only available
24875 scratch registers are the argument registers, and we cannot use
24876 these as they may hold arguments to the function. Instead we
24877 attempt to locate a call preserved register which is used by this
24878 function. If we can find one, then we know that it will have
24879 been pushed at the start of the prologue and so we can corrupt
24880 it now. */
24881 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24882 if (live_regs_mask & (1 << regno))
24883 break;
24885 gcc_assert(regno <= LAST_LO_REGNUM);
24887 reg = gen_rtx_REG (SImode, regno);
24889 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24891 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24892 stack_pointer_rtx, reg));
24894 dwarf = gen_rtx_SET (stack_pointer_rtx,
24895 plus_constant (Pmode, stack_pointer_rtx,
24896 -amount));
24897 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24898 RTX_FRAME_RELATED_P (insn) = 1;
24902 if (frame_pointer_needed)
24903 thumb_set_frame_pointer (offsets);
24905 /* If we are profiling, make sure no instructions are scheduled before
24906 the call to mcount. Similarly if the user has requested no
24907 scheduling in the prolog. Similarly if we want non-call exceptions
24908 using the EABI unwinder, to prevent faulting instructions from being
24909 swapped with a stack adjustment. */
24910 if (crtl->profile || !TARGET_SCHED_PROLOG
24911 || (arm_except_unwind_info (&global_options) == UI_TARGET
24912 && cfun->can_throw_non_call_exceptions))
24913 emit_insn (gen_blockage ());
24915 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24916 if (live_regs_mask & 0xff)
24917 cfun->machine->lr_save_eliminated = 0;
24920 /* Clear caller saved registers not used to pass return values and leaked
24921 condition flags before exiting a cmse_nonsecure_entry function. */
24923 void
24924 cmse_nonsecure_entry_clear_before_return (void)
24926 uint64_t to_clear_mask[2];
24927 uint32_t padding_bits_to_clear = 0;
24928 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
24929 int regno, maxregno = IP_REGNUM;
24930 tree result_type;
24931 rtx result_rtl;
24933 to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
24934 to_clear_mask[0] |= (1ULL << IP_REGNUM);
24936 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
24937 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
24938 to make sure the instructions used to clear them are present. */
24939 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
24941 uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
24942 maxregno = LAST_VFP_REGNUM;
24944 float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
24945 to_clear_mask[0] |= float_mask;
24947 float_mask = (1ULL << (maxregno - 63)) - 1;
24948 to_clear_mask[1] = float_mask;
24950 /* Make sure we don't clear the two scratch registers used to clear the
24951 relevant FPSCR bits in output_return_instruction. */
24952 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
24953 to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
24954 emit_use (gen_rtx_REG (SImode, 4));
24955 to_clear_mask[0] &= ~(1ULL << 4);
24958 /* If the user has defined registers to be caller saved, these are no longer
24959 restored by the function before returning and must thus be cleared for
24960 security purposes. */
24961 for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
24963 /* We do not touch registers that can be used to pass arguments as per
24964 the AAPCS, since these should never be made callee-saved by user
24965 options. */
24966 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
24967 continue;
24968 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
24969 continue;
24970 if (call_used_regs[regno])
24971 to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
24974 /* Make sure we do not clear the registers used to return the result in. */
24975 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
24976 if (!VOID_TYPE_P (result_type))
24978 result_rtl = arm_function_value (result_type, current_function_decl, 0);
24980 /* No need to check that we return in registers, because we don't
24981 support returning on stack yet. */
24982 to_clear_mask[0]
24983 &= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
24984 padding_bits_to_clear_ptr);
24987 if (padding_bits_to_clear != 0)
24989 rtx reg_rtx;
24990 /* Padding bits to clear is not 0 so we know we are dealing with
24991 returning a composite type, which only uses r0. Let's make sure that
24992 r1-r3 is cleared too, we will use r1 as a scratch register. */
24993 gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
24995 reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
24997 /* Fill the lower half of the negated padding_bits_to_clear. */
24998 emit_move_insn (reg_rtx,
24999 GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25001 /* Also fill the top half of the negated padding_bits_to_clear. */
25002 if (((~padding_bits_to_clear) >> 16) > 0)
25003 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25004 GEN_INT (16),
25005 GEN_INT (16)),
25006 GEN_INT ((~padding_bits_to_clear) >> 16)));
25008 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25009 gen_rtx_REG (SImode, R0_REGNUM),
25010 reg_rtx));
25013 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25015 if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
25016 continue;
25018 if (IS_VFP_REGNUM (regno))
25020 /* If regno is an even vfp register and its successor is also to
25021 be cleared, use vmov. */
25022 if (TARGET_VFP_DOUBLE
25023 && VFP_REGNO_OK_FOR_DOUBLE (regno)
25024 && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
25026 emit_move_insn (gen_rtx_REG (DFmode, regno),
25027 CONST1_RTX (DFmode));
25028 emit_use (gen_rtx_REG (DFmode, regno));
25029 regno++;
25031 else
25033 emit_move_insn (gen_rtx_REG (SFmode, regno),
25034 CONST1_RTX (SFmode));
25035 emit_use (gen_rtx_REG (SFmode, regno));
25038 else
25040 if (TARGET_THUMB1)
25042 if (regno == R0_REGNUM)
25043 emit_move_insn (gen_rtx_REG (SImode, regno),
25044 const0_rtx);
25045 else
25046 /* R0 has either been cleared before, see code above, or it
25047 holds a return value, either way it is not secret
25048 information. */
25049 emit_move_insn (gen_rtx_REG (SImode, regno),
25050 gen_rtx_REG (SImode, R0_REGNUM));
25051 emit_use (gen_rtx_REG (SImode, regno));
25053 else
25055 emit_move_insn (gen_rtx_REG (SImode, regno),
25056 gen_rtx_REG (SImode, LR_REGNUM));
25057 emit_use (gen_rtx_REG (SImode, regno));
25063 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25064 POP instruction can be generated. LR should be replaced by PC. All
25065 the checks required are already done by USE_RETURN_INSN (). Hence,
25066 all we really need to check here is if single register is to be
25067 returned, or multiple register return. */
25068 void
25069 thumb2_expand_return (bool simple_return)
25071 int i, num_regs;
25072 unsigned long saved_regs_mask;
25073 arm_stack_offsets *offsets;
25075 offsets = arm_get_frame_offsets ();
25076 saved_regs_mask = offsets->saved_regs_mask;
25078 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25079 if (saved_regs_mask & (1 << i))
25080 num_regs++;
25082 if (!simple_return && saved_regs_mask)
25084 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25085 functions or adapt code to handle according to ACLE. This path should
25086 not be reachable for cmse_nonsecure_entry functions though we prefer
25087 to assert it for now to ensure that future code changes do not silently
25088 change this behavior. */
25089 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25090 if (num_regs == 1)
25092 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25093 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25094 rtx addr = gen_rtx_MEM (SImode,
25095 gen_rtx_POST_INC (SImode,
25096 stack_pointer_rtx));
25097 set_mem_alias_set (addr, get_frame_alias_set ());
25098 XVECEXP (par, 0, 0) = ret_rtx;
25099 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25100 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25101 emit_jump_insn (par);
25103 else
25105 saved_regs_mask &= ~ (1 << LR_REGNUM);
25106 saved_regs_mask |= (1 << PC_REGNUM);
25107 arm_emit_multi_reg_pop (saved_regs_mask);
25110 else
25112 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25113 cmse_nonsecure_entry_clear_before_return ();
25114 emit_jump_insn (simple_return_rtx);
25118 void
25119 thumb1_expand_epilogue (void)
25121 HOST_WIDE_INT amount;
25122 arm_stack_offsets *offsets;
25123 int regno;
25125 /* Naked functions don't have prologues. */
25126 if (IS_NAKED (arm_current_func_type ()))
25127 return;
25129 offsets = arm_get_frame_offsets ();
25130 amount = offsets->outgoing_args - offsets->saved_regs;
25132 if (frame_pointer_needed)
25134 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25135 amount = offsets->locals_base - offsets->saved_regs;
25137 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25139 gcc_assert (amount >= 0);
25140 if (amount)
25142 emit_insn (gen_blockage ());
25144 if (amount < 512)
25145 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25146 GEN_INT (amount)));
25147 else
25149 /* r3 is always free in the epilogue. */
25150 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25152 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25153 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25157 /* Emit a USE (stack_pointer_rtx), so that
25158 the stack adjustment will not be deleted. */
25159 emit_insn (gen_force_register_use (stack_pointer_rtx));
25161 if (crtl->profile || !TARGET_SCHED_PROLOG)
25162 emit_insn (gen_blockage ());
25164 /* Emit a clobber for each insn that will be restored in the epilogue,
25165 so that flow2 will get register lifetimes correct. */
25166 for (regno = 0; regno < 13; regno++)
25167 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25168 emit_clobber (gen_rtx_REG (SImode, regno));
25170 if (! df_regs_ever_live_p (LR_REGNUM))
25171 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25173 /* Clear all caller-saved regs that are not used to return. */
25174 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25175 cmse_nonsecure_entry_clear_before_return ();
25178 /* Epilogue code for APCS frame. */
25179 static void
25180 arm_expand_epilogue_apcs_frame (bool really_return)
25182 unsigned long func_type;
25183 unsigned long saved_regs_mask;
25184 int num_regs = 0;
25185 int i;
25186 int floats_from_frame = 0;
25187 arm_stack_offsets *offsets;
25189 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25190 func_type = arm_current_func_type ();
25192 /* Get frame offsets for ARM. */
25193 offsets = arm_get_frame_offsets ();
25194 saved_regs_mask = offsets->saved_regs_mask;
25196 /* Find the offset of the floating-point save area in the frame. */
25197 floats_from_frame
25198 = (offsets->saved_args
25199 + arm_compute_static_chain_stack_bytes ()
25200 - offsets->frame);
25202 /* Compute how many core registers saved and how far away the floats are. */
25203 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25204 if (saved_regs_mask & (1 << i))
25206 num_regs++;
25207 floats_from_frame += 4;
25210 if (TARGET_HARD_FLOAT)
25212 int start_reg;
25213 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25215 /* The offset is from IP_REGNUM. */
25216 int saved_size = arm_get_vfp_saved_size ();
25217 if (saved_size > 0)
25219 rtx_insn *insn;
25220 floats_from_frame += saved_size;
25221 insn = emit_insn (gen_addsi3 (ip_rtx,
25222 hard_frame_pointer_rtx,
25223 GEN_INT (-floats_from_frame)));
25224 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25225 ip_rtx, hard_frame_pointer_rtx);
25228 /* Generate VFP register multi-pop. */
25229 start_reg = FIRST_VFP_REGNUM;
25231 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25232 /* Look for a case where a reg does not need restoring. */
25233 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25234 && (!df_regs_ever_live_p (i + 1)
25235 || call_used_regs[i + 1]))
25237 if (start_reg != i)
25238 arm_emit_vfp_multi_reg_pop (start_reg,
25239 (i - start_reg) / 2,
25240 gen_rtx_REG (SImode,
25241 IP_REGNUM));
25242 start_reg = i + 2;
25245 /* Restore the remaining regs that we have discovered (or possibly
25246 even all of them, if the conditional in the for loop never
25247 fired). */
25248 if (start_reg != i)
25249 arm_emit_vfp_multi_reg_pop (start_reg,
25250 (i - start_reg) / 2,
25251 gen_rtx_REG (SImode, IP_REGNUM));
25254 if (TARGET_IWMMXT)
25256 /* The frame pointer is guaranteed to be non-double-word aligned, as
25257 it is set to double-word-aligned old_stack_pointer - 4. */
25258 rtx_insn *insn;
25259 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25261 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25262 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25264 rtx addr = gen_frame_mem (V2SImode,
25265 plus_constant (Pmode, hard_frame_pointer_rtx,
25266 - lrm_count * 4));
25267 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25268 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25269 gen_rtx_REG (V2SImode, i),
25270 NULL_RTX);
25271 lrm_count += 2;
25275 /* saved_regs_mask should contain IP which contains old stack pointer
25276 at the time of activation creation. Since SP and IP are adjacent registers,
25277 we can restore the value directly into SP. */
25278 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25279 saved_regs_mask &= ~(1 << IP_REGNUM);
25280 saved_regs_mask |= (1 << SP_REGNUM);
25282 /* There are two registers left in saved_regs_mask - LR and PC. We
25283 only need to restore LR (the return address), but to
25284 save time we can load it directly into PC, unless we need a
25285 special function exit sequence, or we are not really returning. */
25286 if (really_return
25287 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25288 && !crtl->calls_eh_return)
25289 /* Delete LR from the register mask, so that LR on
25290 the stack is loaded into the PC in the register mask. */
25291 saved_regs_mask &= ~(1 << LR_REGNUM);
25292 else
25293 saved_regs_mask &= ~(1 << PC_REGNUM);
25295 num_regs = bit_count (saved_regs_mask);
25296 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25298 rtx_insn *insn;
25299 emit_insn (gen_blockage ());
25300 /* Unwind the stack to just below the saved registers. */
25301 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25302 hard_frame_pointer_rtx,
25303 GEN_INT (- 4 * num_regs)));
25305 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25306 stack_pointer_rtx, hard_frame_pointer_rtx);
25309 arm_emit_multi_reg_pop (saved_regs_mask);
25311 if (IS_INTERRUPT (func_type))
25313 /* Interrupt handlers will have pushed the
25314 IP onto the stack, so restore it now. */
25315 rtx_insn *insn;
25316 rtx addr = gen_rtx_MEM (SImode,
25317 gen_rtx_POST_INC (SImode,
25318 stack_pointer_rtx));
25319 set_mem_alias_set (addr, get_frame_alias_set ());
25320 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25321 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25322 gen_rtx_REG (SImode, IP_REGNUM),
25323 NULL_RTX);
25326 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25327 return;
25329 if (crtl->calls_eh_return)
25330 emit_insn (gen_addsi3 (stack_pointer_rtx,
25331 stack_pointer_rtx,
25332 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25334 if (IS_STACKALIGN (func_type))
25335 /* Restore the original stack pointer. Before prologue, the stack was
25336 realigned and the original stack pointer saved in r0. For details,
25337 see comment in arm_expand_prologue. */
25338 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25340 emit_jump_insn (simple_return_rtx);
25343 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25344 function is not a sibcall. */
25345 void
25346 arm_expand_epilogue (bool really_return)
25348 unsigned long func_type;
25349 unsigned long saved_regs_mask;
25350 int num_regs = 0;
25351 int i;
25352 int amount;
25353 arm_stack_offsets *offsets;
25355 func_type = arm_current_func_type ();
25357 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25358 let output_return_instruction take care of instruction emission if any. */
25359 if (IS_NAKED (func_type)
25360 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25362 if (really_return)
25363 emit_jump_insn (simple_return_rtx);
25364 return;
25367 /* If we are throwing an exception, then we really must be doing a
25368 return, so we can't tail-call. */
25369 gcc_assert (!crtl->calls_eh_return || really_return);
25371 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25373 arm_expand_epilogue_apcs_frame (really_return);
25374 return;
25377 /* Get frame offsets for ARM. */
25378 offsets = arm_get_frame_offsets ();
25379 saved_regs_mask = offsets->saved_regs_mask;
25380 num_regs = bit_count (saved_regs_mask);
25382 if (frame_pointer_needed)
25384 rtx_insn *insn;
25385 /* Restore stack pointer if necessary. */
25386 if (TARGET_ARM)
25388 /* In ARM mode, frame pointer points to first saved register.
25389 Restore stack pointer to last saved register. */
25390 amount = offsets->frame - offsets->saved_regs;
25392 /* Force out any pending memory operations that reference stacked data
25393 before stack de-allocation occurs. */
25394 emit_insn (gen_blockage ());
25395 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25396 hard_frame_pointer_rtx,
25397 GEN_INT (amount)));
25398 arm_add_cfa_adjust_cfa_note (insn, amount,
25399 stack_pointer_rtx,
25400 hard_frame_pointer_rtx);
25402 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25403 deleted. */
25404 emit_insn (gen_force_register_use (stack_pointer_rtx));
25406 else
25408 /* In Thumb-2 mode, the frame pointer points to the last saved
25409 register. */
25410 amount = offsets->locals_base - offsets->saved_regs;
25411 if (amount)
25413 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25414 hard_frame_pointer_rtx,
25415 GEN_INT (amount)));
25416 arm_add_cfa_adjust_cfa_note (insn, amount,
25417 hard_frame_pointer_rtx,
25418 hard_frame_pointer_rtx);
25421 /* Force out any pending memory operations that reference stacked data
25422 before stack de-allocation occurs. */
25423 emit_insn (gen_blockage ());
25424 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25425 hard_frame_pointer_rtx));
25426 arm_add_cfa_adjust_cfa_note (insn, 0,
25427 stack_pointer_rtx,
25428 hard_frame_pointer_rtx);
25429 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25430 deleted. */
25431 emit_insn (gen_force_register_use (stack_pointer_rtx));
25434 else
25436 /* Pop off outgoing args and local frame to adjust stack pointer to
25437 last saved register. */
25438 amount = offsets->outgoing_args - offsets->saved_regs;
25439 if (amount)
25441 rtx_insn *tmp;
25442 /* Force out any pending memory operations that reference stacked data
25443 before stack de-allocation occurs. */
25444 emit_insn (gen_blockage ());
25445 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25446 stack_pointer_rtx,
25447 GEN_INT (amount)));
25448 arm_add_cfa_adjust_cfa_note (tmp, amount,
25449 stack_pointer_rtx, stack_pointer_rtx);
25450 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25451 not deleted. */
25452 emit_insn (gen_force_register_use (stack_pointer_rtx));
25456 if (TARGET_HARD_FLOAT)
25458 /* Generate VFP register multi-pop. */
25459 int end_reg = LAST_VFP_REGNUM + 1;
25461 /* Scan the registers in reverse order. We need to match
25462 any groupings made in the prologue and generate matching
25463 vldm operations. The need to match groups is because,
25464 unlike pop, vldm can only do consecutive regs. */
25465 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25466 /* Look for a case where a reg does not need restoring. */
25467 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25468 && (!df_regs_ever_live_p (i + 1)
25469 || call_used_regs[i + 1]))
25471 /* Restore the regs discovered so far (from reg+2 to
25472 end_reg). */
25473 if (end_reg > i + 2)
25474 arm_emit_vfp_multi_reg_pop (i + 2,
25475 (end_reg - (i + 2)) / 2,
25476 stack_pointer_rtx);
25477 end_reg = i;
25480 /* Restore the remaining regs that we have discovered (or possibly
25481 even all of them, if the conditional in the for loop never
25482 fired). */
25483 if (end_reg > i + 2)
25484 arm_emit_vfp_multi_reg_pop (i + 2,
25485 (end_reg - (i + 2)) / 2,
25486 stack_pointer_rtx);
25489 if (TARGET_IWMMXT)
25490 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25491 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25493 rtx_insn *insn;
25494 rtx addr = gen_rtx_MEM (V2SImode,
25495 gen_rtx_POST_INC (SImode,
25496 stack_pointer_rtx));
25497 set_mem_alias_set (addr, get_frame_alias_set ());
25498 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25499 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25500 gen_rtx_REG (V2SImode, i),
25501 NULL_RTX);
25502 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25503 stack_pointer_rtx, stack_pointer_rtx);
25506 if (saved_regs_mask)
25508 rtx insn;
25509 bool return_in_pc = false;
25511 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25512 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25513 && !IS_CMSE_ENTRY (func_type)
25514 && !IS_STACKALIGN (func_type)
25515 && really_return
25516 && crtl->args.pretend_args_size == 0
25517 && saved_regs_mask & (1 << LR_REGNUM)
25518 && !crtl->calls_eh_return)
25520 saved_regs_mask &= ~(1 << LR_REGNUM);
25521 saved_regs_mask |= (1 << PC_REGNUM);
25522 return_in_pc = true;
25525 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25527 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25528 if (saved_regs_mask & (1 << i))
25530 rtx addr = gen_rtx_MEM (SImode,
25531 gen_rtx_POST_INC (SImode,
25532 stack_pointer_rtx));
25533 set_mem_alias_set (addr, get_frame_alias_set ());
25535 if (i == PC_REGNUM)
25537 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25538 XVECEXP (insn, 0, 0) = ret_rtx;
25539 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25540 addr);
25541 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25542 insn = emit_jump_insn (insn);
25544 else
25546 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25547 addr));
25548 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25549 gen_rtx_REG (SImode, i),
25550 NULL_RTX);
25551 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25552 stack_pointer_rtx,
25553 stack_pointer_rtx);
25557 else
25559 if (TARGET_LDRD
25560 && current_tune->prefer_ldrd_strd
25561 && !optimize_function_for_size_p (cfun))
25563 if (TARGET_THUMB2)
25564 thumb2_emit_ldrd_pop (saved_regs_mask);
25565 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25566 arm_emit_ldrd_pop (saved_regs_mask);
25567 else
25568 arm_emit_multi_reg_pop (saved_regs_mask);
25570 else
25571 arm_emit_multi_reg_pop (saved_regs_mask);
25574 if (return_in_pc)
25575 return;
25578 amount
25579 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25580 if (amount)
25582 int i, j;
25583 rtx dwarf = NULL_RTX;
25584 rtx_insn *tmp =
25585 emit_insn (gen_addsi3 (stack_pointer_rtx,
25586 stack_pointer_rtx,
25587 GEN_INT (amount)));
25589 RTX_FRAME_RELATED_P (tmp) = 1;
25591 if (cfun->machine->uses_anonymous_args)
25593 /* Restore pretend args. Refer arm_expand_prologue on how to save
25594 pretend_args in stack. */
25595 int num_regs = crtl->args.pretend_args_size / 4;
25596 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25597 for (j = 0, i = 0; j < num_regs; i++)
25598 if (saved_regs_mask & (1 << i))
25600 rtx reg = gen_rtx_REG (SImode, i);
25601 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25602 j++;
25604 REG_NOTES (tmp) = dwarf;
25606 arm_add_cfa_adjust_cfa_note (tmp, amount,
25607 stack_pointer_rtx, stack_pointer_rtx);
25610 /* Clear all caller-saved regs that are not used to return. */
25611 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25613 /* CMSE_ENTRY always returns. */
25614 gcc_assert (really_return);
25615 cmse_nonsecure_entry_clear_before_return ();
25618 if (!really_return)
25619 return;
25621 if (crtl->calls_eh_return)
25622 emit_insn (gen_addsi3 (stack_pointer_rtx,
25623 stack_pointer_rtx,
25624 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25626 if (IS_STACKALIGN (func_type))
25627 /* Restore the original stack pointer. Before prologue, the stack was
25628 realigned and the original stack pointer saved in r0. For details,
25629 see comment in arm_expand_prologue. */
25630 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25632 emit_jump_insn (simple_return_rtx);
25635 /* Implementation of insn prologue_thumb1_interwork. This is the first
25636 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25638 const char *
25639 thumb1_output_interwork (void)
25641 const char * name;
25642 FILE *f = asm_out_file;
25644 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25645 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25646 == SYMBOL_REF);
25647 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25649 /* Generate code sequence to switch us into Thumb mode. */
25650 /* The .code 32 directive has already been emitted by
25651 ASM_DECLARE_FUNCTION_NAME. */
25652 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25653 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25655 /* Generate a label, so that the debugger will notice the
25656 change in instruction sets. This label is also used by
25657 the assembler to bypass the ARM code when this function
25658 is called from a Thumb encoded function elsewhere in the
25659 same file. Hence the definition of STUB_NAME here must
25660 agree with the definition in gas/config/tc-arm.c. */
25662 #define STUB_NAME ".real_start_of"
25664 fprintf (f, "\t.code\t16\n");
25665 #ifdef ARM_PE
25666 if (arm_dllexport_name_p (name))
25667 name = arm_strip_name_encoding (name);
25668 #endif
25669 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25670 fprintf (f, "\t.thumb_func\n");
25671 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25673 return "";
25676 /* Handle the case of a double word load into a low register from
25677 a computed memory address. The computed address may involve a
25678 register which is overwritten by the load. */
25679 const char *
25680 thumb_load_double_from_address (rtx *operands)
25682 rtx addr;
25683 rtx base;
25684 rtx offset;
25685 rtx arg1;
25686 rtx arg2;
25688 gcc_assert (REG_P (operands[0]));
25689 gcc_assert (MEM_P (operands[1]));
25691 /* Get the memory address. */
25692 addr = XEXP (operands[1], 0);
25694 /* Work out how the memory address is computed. */
25695 switch (GET_CODE (addr))
25697 case REG:
25698 operands[2] = adjust_address (operands[1], SImode, 4);
25700 if (REGNO (operands[0]) == REGNO (addr))
25702 output_asm_insn ("ldr\t%H0, %2", operands);
25703 output_asm_insn ("ldr\t%0, %1", operands);
25705 else
25707 output_asm_insn ("ldr\t%0, %1", operands);
25708 output_asm_insn ("ldr\t%H0, %2", operands);
25710 break;
25712 case CONST:
25713 /* Compute <address> + 4 for the high order load. */
25714 operands[2] = adjust_address (operands[1], SImode, 4);
25716 output_asm_insn ("ldr\t%0, %1", operands);
25717 output_asm_insn ("ldr\t%H0, %2", operands);
25718 break;
25720 case PLUS:
25721 arg1 = XEXP (addr, 0);
25722 arg2 = XEXP (addr, 1);
25724 if (CONSTANT_P (arg1))
25725 base = arg2, offset = arg1;
25726 else
25727 base = arg1, offset = arg2;
25729 gcc_assert (REG_P (base));
25731 /* Catch the case of <address> = <reg> + <reg> */
25732 if (REG_P (offset))
25734 int reg_offset = REGNO (offset);
25735 int reg_base = REGNO (base);
25736 int reg_dest = REGNO (operands[0]);
25738 /* Add the base and offset registers together into the
25739 higher destination register. */
25740 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25741 reg_dest + 1, reg_base, reg_offset);
25743 /* Load the lower destination register from the address in
25744 the higher destination register. */
25745 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25746 reg_dest, reg_dest + 1);
25748 /* Load the higher destination register from its own address
25749 plus 4. */
25750 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25751 reg_dest + 1, reg_dest + 1);
25753 else
25755 /* Compute <address> + 4 for the high order load. */
25756 operands[2] = adjust_address (operands[1], SImode, 4);
25758 /* If the computed address is held in the low order register
25759 then load the high order register first, otherwise always
25760 load the low order register first. */
25761 if (REGNO (operands[0]) == REGNO (base))
25763 output_asm_insn ("ldr\t%H0, %2", operands);
25764 output_asm_insn ("ldr\t%0, %1", operands);
25766 else
25768 output_asm_insn ("ldr\t%0, %1", operands);
25769 output_asm_insn ("ldr\t%H0, %2", operands);
25772 break;
25774 case LABEL_REF:
25775 /* With no registers to worry about we can just load the value
25776 directly. */
25777 operands[2] = adjust_address (operands[1], SImode, 4);
25779 output_asm_insn ("ldr\t%H0, %2", operands);
25780 output_asm_insn ("ldr\t%0, %1", operands);
25781 break;
25783 default:
25784 gcc_unreachable ();
25787 return "";
25790 const char *
25791 thumb_output_move_mem_multiple (int n, rtx *operands)
25793 switch (n)
25795 case 2:
25796 if (REGNO (operands[4]) > REGNO (operands[5]))
25797 std::swap (operands[4], operands[5]);
25799 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25800 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25801 break;
25803 case 3:
25804 if (REGNO (operands[4]) > REGNO (operands[5]))
25805 std::swap (operands[4], operands[5]);
25806 if (REGNO (operands[5]) > REGNO (operands[6]))
25807 std::swap (operands[5], operands[6]);
25808 if (REGNO (operands[4]) > REGNO (operands[5]))
25809 std::swap (operands[4], operands[5]);
25811 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25812 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25813 break;
25815 default:
25816 gcc_unreachable ();
25819 return "";
25822 /* Output a call-via instruction for thumb state. */
25823 const char *
25824 thumb_call_via_reg (rtx reg)
25826 int regno = REGNO (reg);
25827 rtx *labelp;
25829 gcc_assert (regno < LR_REGNUM);
25831 /* If we are in the normal text section we can use a single instance
25832 per compilation unit. If we are doing function sections, then we need
25833 an entry per section, since we can't rely on reachability. */
25834 if (in_section == text_section)
25836 thumb_call_reg_needed = 1;
25838 if (thumb_call_via_label[regno] == NULL)
25839 thumb_call_via_label[regno] = gen_label_rtx ();
25840 labelp = thumb_call_via_label + regno;
25842 else
25844 if (cfun->machine->call_via[regno] == NULL)
25845 cfun->machine->call_via[regno] = gen_label_rtx ();
25846 labelp = cfun->machine->call_via + regno;
25849 output_asm_insn ("bl\t%a0", labelp);
25850 return "";
25853 /* Routines for generating rtl. */
25854 void
25855 thumb_expand_movmemqi (rtx *operands)
25857 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25858 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25859 HOST_WIDE_INT len = INTVAL (operands[2]);
25860 HOST_WIDE_INT offset = 0;
25862 while (len >= 12)
25864 emit_insn (gen_movmem12b (out, in, out, in));
25865 len -= 12;
25868 if (len >= 8)
25870 emit_insn (gen_movmem8b (out, in, out, in));
25871 len -= 8;
25874 if (len >= 4)
25876 rtx reg = gen_reg_rtx (SImode);
25877 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25878 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25879 len -= 4;
25880 offset += 4;
25883 if (len >= 2)
25885 rtx reg = gen_reg_rtx (HImode);
25886 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25887 plus_constant (Pmode, in,
25888 offset))));
25889 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25890 offset)),
25891 reg));
25892 len -= 2;
25893 offset += 2;
25896 if (len)
25898 rtx reg = gen_reg_rtx (QImode);
25899 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25900 plus_constant (Pmode, in,
25901 offset))));
25902 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25903 offset)),
25904 reg));
25908 void
25909 thumb_reload_out_hi (rtx *operands)
25911 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25914 /* Return the length of a function name prefix
25915 that starts with the character 'c'. */
25916 static int
25917 arm_get_strip_length (int c)
25919 switch (c)
25921 ARM_NAME_ENCODING_LENGTHS
25922 default: return 0;
25926 /* Return a pointer to a function's name with any
25927 and all prefix encodings stripped from it. */
25928 const char *
25929 arm_strip_name_encoding (const char *name)
25931 int skip;
25933 while ((skip = arm_get_strip_length (* name)))
25934 name += skip;
25936 return name;
25939 /* If there is a '*' anywhere in the name's prefix, then
25940 emit the stripped name verbatim, otherwise prepend an
25941 underscore if leading underscores are being used. */
25942 void
25943 arm_asm_output_labelref (FILE *stream, const char *name)
25945 int skip;
25946 int verbatim = 0;
25948 while ((skip = arm_get_strip_length (* name)))
25950 verbatim |= (*name == '*');
25951 name += skip;
25954 if (verbatim)
25955 fputs (name, stream);
25956 else
25957 asm_fprintf (stream, "%U%s", name);
25960 /* This function is used to emit an EABI tag and its associated value.
25961 We emit the numerical value of the tag in case the assembler does not
25962 support textual tags. (Eg gas prior to 2.20). If requested we include
25963 the tag name in a comment so that anyone reading the assembler output
25964 will know which tag is being set.
25966 This function is not static because arm-c.c needs it too. */
25968 void
25969 arm_emit_eabi_attribute (const char *name, int num, int val)
25971 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25972 if (flag_verbose_asm || flag_debug_asm)
25973 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25974 asm_fprintf (asm_out_file, "\n");
25977 /* This function is used to print CPU tuning information as comment
25978 in assembler file. Pointers are not printed for now. */
25980 void
25981 arm_print_tune_info (void)
25983 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
25984 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
25985 current_tune->constant_limit);
25986 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
25987 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
25988 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
25989 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
25990 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
25991 "prefetch.l1_cache_size:\t%d\n",
25992 current_tune->prefetch.l1_cache_size);
25993 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
25994 "prefetch.l1_cache_line_size:\t%d\n",
25995 current_tune->prefetch.l1_cache_line_size);
25996 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
25997 "prefer_constant_pool:\t%d\n",
25998 (int) current_tune->prefer_constant_pool);
25999 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26000 "branch_cost:\t(s:speed, p:predictable)\n");
26001 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26002 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26003 current_tune->branch_cost (false, false));
26004 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26005 current_tune->branch_cost (false, true));
26006 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26007 current_tune->branch_cost (true, false));
26008 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26009 current_tune->branch_cost (true, true));
26010 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26011 "prefer_ldrd_strd:\t%d\n",
26012 (int) current_tune->prefer_ldrd_strd);
26013 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26014 "logical_op_non_short_circuit:\t[%d,%d]\n",
26015 (int) current_tune->logical_op_non_short_circuit_thumb,
26016 (int) current_tune->logical_op_non_short_circuit_arm);
26017 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26018 "prefer_neon_for_64bits:\t%d\n",
26019 (int) current_tune->prefer_neon_for_64bits);
26020 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26021 "disparage_flag_setting_t16_encodings:\t%d\n",
26022 (int) current_tune->disparage_flag_setting_t16_encodings);
26023 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26024 "string_ops_prefer_neon:\t%d\n",
26025 (int) current_tune->string_ops_prefer_neon);
26026 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26027 "max_insns_inline_memset:\t%d\n",
26028 current_tune->max_insns_inline_memset);
26029 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26030 current_tune->fusible_ops);
26031 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26032 (int) current_tune->sched_autopref);
26035 static void
26036 arm_file_start (void)
26038 int val;
26040 if (TARGET_BPABI)
26042 /* We don't have a specified CPU. Use the architecture to
26043 generate the tags.
26045 Note: it might be better to do this unconditionally, then the
26046 assembler would not need to know about all new CPU names as
26047 they are added. */
26048 if (!arm_active_target.core_name)
26050 /* armv7ve doesn't support any extensions. */
26051 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26053 /* Keep backward compatability for assemblers
26054 which don't support armv7ve. */
26055 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26056 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26057 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26058 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26059 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26061 else
26063 const char* pos = strchr (arm_active_target.arch_name, '+');
26064 if (pos)
26066 char buf[32];
26067 gcc_assert (strlen (arm_active_target.arch_name)
26068 <= sizeof (buf) / sizeof (*pos));
26069 strncpy (buf, arm_active_target.arch_name,
26070 (pos - arm_active_target.arch_name) * sizeof (*pos));
26071 buf[pos - arm_active_target.arch_name] = '\0';
26072 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
26073 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
26075 else
26076 asm_fprintf (asm_out_file, "\t.arch %s\n",
26077 arm_active_target.arch_name);
26080 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26081 asm_fprintf (asm_out_file, "\t.arch %s\n",
26082 arm_active_target.core_name + 8);
26083 else
26085 const char* truncated_name
26086 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26087 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26090 if (print_tune_info)
26091 arm_print_tune_info ();
26093 if (! TARGET_SOFT_FLOAT)
26095 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26096 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26098 if (TARGET_HARD_FLOAT_ABI)
26099 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26102 /* Some of these attributes only apply when the corresponding features
26103 are used. However we don't have any easy way of figuring this out.
26104 Conservatively record the setting that would have been used. */
26106 if (flag_rounding_math)
26107 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26109 if (!flag_unsafe_math_optimizations)
26111 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26112 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26114 if (flag_signaling_nans)
26115 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26117 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26118 flag_finite_math_only ? 1 : 3);
26120 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26121 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26122 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26123 flag_short_enums ? 1 : 2);
26125 /* Tag_ABI_optimization_goals. */
26126 if (optimize_size)
26127 val = 4;
26128 else if (optimize >= 2)
26129 val = 2;
26130 else if (optimize)
26131 val = 1;
26132 else
26133 val = 6;
26134 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26136 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26137 unaligned_access);
26139 if (arm_fp16_format)
26140 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26141 (int) arm_fp16_format);
26143 if (arm_lang_output_object_attributes_hook)
26144 arm_lang_output_object_attributes_hook();
26147 default_file_start ();
26150 static void
26151 arm_file_end (void)
26153 int regno;
26155 if (NEED_INDICATE_EXEC_STACK)
26156 /* Add .note.GNU-stack. */
26157 file_end_indicate_exec_stack ();
26159 if (! thumb_call_reg_needed)
26160 return;
26162 switch_to_section (text_section);
26163 asm_fprintf (asm_out_file, "\t.code 16\n");
26164 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26166 for (regno = 0; regno < LR_REGNUM; regno++)
26168 rtx label = thumb_call_via_label[regno];
26170 if (label != 0)
26172 targetm.asm_out.internal_label (asm_out_file, "L",
26173 CODE_LABEL_NUMBER (label));
26174 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26179 #ifndef ARM_PE
26180 /* Symbols in the text segment can be accessed without indirecting via the
26181 constant pool; it may take an extra binary operation, but this is still
26182 faster than indirecting via memory. Don't do this when not optimizing,
26183 since we won't be calculating al of the offsets necessary to do this
26184 simplification. */
26186 static void
26187 arm_encode_section_info (tree decl, rtx rtl, int first)
26189 if (optimize > 0 && TREE_CONSTANT (decl))
26190 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26192 default_encode_section_info (decl, rtl, first);
26194 #endif /* !ARM_PE */
26196 static void
26197 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26199 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26200 && !strcmp (prefix, "L"))
26202 arm_ccfsm_state = 0;
26203 arm_target_insn = NULL;
26205 default_internal_label (stream, prefix, labelno);
26208 /* Output code to add DELTA to the first argument, and then jump
26209 to FUNCTION. Used for C++ multiple inheritance. */
26211 static void
26212 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26213 HOST_WIDE_INT, tree function)
26215 static int thunk_label = 0;
26216 char label[256];
26217 char labelpc[256];
26218 int mi_delta = delta;
26219 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26220 int shift = 0;
26221 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26222 ? 1 : 0);
26223 if (mi_delta < 0)
26224 mi_delta = - mi_delta;
26226 final_start_function (emit_barrier (), file, 1);
26228 if (TARGET_THUMB1)
26230 int labelno = thunk_label++;
26231 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26232 /* Thunks are entered in arm mode when avaiable. */
26233 if (TARGET_THUMB1_ONLY)
26235 /* push r3 so we can use it as a temporary. */
26236 /* TODO: Omit this save if r3 is not used. */
26237 fputs ("\tpush {r3}\n", file);
26238 fputs ("\tldr\tr3, ", file);
26240 else
26242 fputs ("\tldr\tr12, ", file);
26244 assemble_name (file, label);
26245 fputc ('\n', file);
26246 if (flag_pic)
26248 /* If we are generating PIC, the ldr instruction below loads
26249 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26250 the address of the add + 8, so we have:
26252 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26253 = target + 1.
26255 Note that we have "+ 1" because some versions of GNU ld
26256 don't set the low bit of the result for R_ARM_REL32
26257 relocations against thumb function symbols.
26258 On ARMv6M this is +4, not +8. */
26259 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26260 assemble_name (file, labelpc);
26261 fputs (":\n", file);
26262 if (TARGET_THUMB1_ONLY)
26264 /* This is 2 insns after the start of the thunk, so we know it
26265 is 4-byte aligned. */
26266 fputs ("\tadd\tr3, pc, r3\n", file);
26267 fputs ("\tmov r12, r3\n", file);
26269 else
26270 fputs ("\tadd\tr12, pc, r12\n", file);
26272 else if (TARGET_THUMB1_ONLY)
26273 fputs ("\tmov r12, r3\n", file);
26275 if (TARGET_THUMB1_ONLY)
26277 if (mi_delta > 255)
26279 fputs ("\tldr\tr3, ", file);
26280 assemble_name (file, label);
26281 fputs ("+4\n", file);
26282 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26283 mi_op, this_regno, this_regno);
26285 else if (mi_delta != 0)
26287 /* Thumb1 unified syntax requires s suffix in instruction name when
26288 one of the operands is immediate. */
26289 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26290 mi_op, this_regno, this_regno,
26291 mi_delta);
26294 else
26296 /* TODO: Use movw/movt for large constants when available. */
26297 while (mi_delta != 0)
26299 if ((mi_delta & (3 << shift)) == 0)
26300 shift += 2;
26301 else
26303 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26304 mi_op, this_regno, this_regno,
26305 mi_delta & (0xff << shift));
26306 mi_delta &= ~(0xff << shift);
26307 shift += 8;
26311 if (TARGET_THUMB1)
26313 if (TARGET_THUMB1_ONLY)
26314 fputs ("\tpop\t{r3}\n", file);
26316 fprintf (file, "\tbx\tr12\n");
26317 ASM_OUTPUT_ALIGN (file, 2);
26318 assemble_name (file, label);
26319 fputs (":\n", file);
26320 if (flag_pic)
26322 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26323 rtx tem = XEXP (DECL_RTL (function), 0);
26324 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26325 pipeline offset is four rather than eight. Adjust the offset
26326 accordingly. */
26327 tem = plus_constant (GET_MODE (tem), tem,
26328 TARGET_THUMB1_ONLY ? -3 : -7);
26329 tem = gen_rtx_MINUS (GET_MODE (tem),
26330 tem,
26331 gen_rtx_SYMBOL_REF (Pmode,
26332 ggc_strdup (labelpc)));
26333 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26335 else
26336 /* Output ".word .LTHUNKn". */
26337 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26339 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26340 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26342 else
26344 fputs ("\tb\t", file);
26345 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26346 if (NEED_PLT_RELOC)
26347 fputs ("(PLT)", file);
26348 fputc ('\n', file);
26351 final_end_function ();
26354 /* MI thunk handling for TARGET_32BIT. */
26356 static void
26357 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26358 HOST_WIDE_INT vcall_offset, tree function)
26360 /* On ARM, this_regno is R0 or R1 depending on
26361 whether the function returns an aggregate or not.
26363 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26364 function)
26365 ? R1_REGNUM : R0_REGNUM);
26367 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26368 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26369 reload_completed = 1;
26370 emit_note (NOTE_INSN_PROLOGUE_END);
26372 /* Add DELTA to THIS_RTX. */
26373 if (delta != 0)
26374 arm_split_constant (PLUS, Pmode, NULL_RTX,
26375 delta, this_rtx, this_rtx, false);
26377 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26378 if (vcall_offset != 0)
26380 /* Load *THIS_RTX. */
26381 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26382 /* Compute *THIS_RTX + VCALL_OFFSET. */
26383 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26384 false);
26385 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26386 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26387 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26390 /* Generate a tail call to the target function. */
26391 if (!TREE_USED (function))
26393 assemble_external (function);
26394 TREE_USED (function) = 1;
26396 rtx funexp = XEXP (DECL_RTL (function), 0);
26397 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26398 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26399 SIBLING_CALL_P (insn) = 1;
26401 insn = get_insns ();
26402 shorten_branches (insn);
26403 final_start_function (insn, file, 1);
26404 final (insn, file, 1);
26405 final_end_function ();
26407 /* Stop pretending this is a post-reload pass. */
26408 reload_completed = 0;
26411 /* Output code to add DELTA to the first argument, and then jump
26412 to FUNCTION. Used for C++ multiple inheritance. */
26414 static void
26415 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26416 HOST_WIDE_INT vcall_offset, tree function)
26418 if (TARGET_32BIT)
26419 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26420 else
26421 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26425 arm_emit_vector_const (FILE *file, rtx x)
26427 int i;
26428 const char * pattern;
26430 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26432 switch (GET_MODE (x))
26434 case V2SImode: pattern = "%08x"; break;
26435 case V4HImode: pattern = "%04x"; break;
26436 case V8QImode: pattern = "%02x"; break;
26437 default: gcc_unreachable ();
26440 fprintf (file, "0x");
26441 for (i = CONST_VECTOR_NUNITS (x); i--;)
26443 rtx element;
26445 element = CONST_VECTOR_ELT (x, i);
26446 fprintf (file, pattern, INTVAL (element));
26449 return 1;
26452 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26453 HFmode constant pool entries are actually loaded with ldr. */
26454 void
26455 arm_emit_fp16_const (rtx c)
26457 long bits;
26459 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26460 if (WORDS_BIG_ENDIAN)
26461 assemble_zeros (2);
26462 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26463 if (!WORDS_BIG_ENDIAN)
26464 assemble_zeros (2);
26467 const char *
26468 arm_output_load_gr (rtx *operands)
26470 rtx reg;
26471 rtx offset;
26472 rtx wcgr;
26473 rtx sum;
26475 if (!MEM_P (operands [1])
26476 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26477 || !REG_P (reg = XEXP (sum, 0))
26478 || !CONST_INT_P (offset = XEXP (sum, 1))
26479 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26480 return "wldrw%?\t%0, %1";
26482 /* Fix up an out-of-range load of a GR register. */
26483 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26484 wcgr = operands[0];
26485 operands[0] = reg;
26486 output_asm_insn ("ldr%?\t%0, %1", operands);
26488 operands[0] = wcgr;
26489 operands[1] = reg;
26490 output_asm_insn ("tmcr%?\t%0, %1", operands);
26491 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26493 return "";
26496 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26498 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26499 named arg and all anonymous args onto the stack.
26500 XXX I know the prologue shouldn't be pushing registers, but it is faster
26501 that way. */
26503 static void
26504 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26505 machine_mode mode,
26506 tree type,
26507 int *pretend_size,
26508 int second_time ATTRIBUTE_UNUSED)
26510 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26511 int nregs;
26513 cfun->machine->uses_anonymous_args = 1;
26514 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26516 nregs = pcum->aapcs_ncrn;
26517 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
26518 nregs++;
26520 else
26521 nregs = pcum->nregs;
26523 if (nregs < NUM_ARG_REGS)
26524 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26527 /* We can't rely on the caller doing the proper promotion when
26528 using APCS or ATPCS. */
26530 static bool
26531 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26533 return !TARGET_AAPCS_BASED;
26536 static machine_mode
26537 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26538 machine_mode mode,
26539 int *punsignedp ATTRIBUTE_UNUSED,
26540 const_tree fntype ATTRIBUTE_UNUSED,
26541 int for_return ATTRIBUTE_UNUSED)
26543 if (GET_MODE_CLASS (mode) == MODE_INT
26544 && GET_MODE_SIZE (mode) < 4)
26545 return SImode;
26547 return mode;
26550 /* AAPCS based ABIs use short enums by default. */
26552 static bool
26553 arm_default_short_enums (void)
26555 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26559 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26561 static bool
26562 arm_align_anon_bitfield (void)
26564 return TARGET_AAPCS_BASED;
26568 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26570 static tree
26571 arm_cxx_guard_type (void)
26573 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26577 /* The EABI says test the least significant bit of a guard variable. */
26579 static bool
26580 arm_cxx_guard_mask_bit (void)
26582 return TARGET_AAPCS_BASED;
26586 /* The EABI specifies that all array cookies are 8 bytes long. */
26588 static tree
26589 arm_get_cookie_size (tree type)
26591 tree size;
26593 if (!TARGET_AAPCS_BASED)
26594 return default_cxx_get_cookie_size (type);
26596 size = build_int_cst (sizetype, 8);
26597 return size;
26601 /* The EABI says that array cookies should also contain the element size. */
26603 static bool
26604 arm_cookie_has_size (void)
26606 return TARGET_AAPCS_BASED;
26610 /* The EABI says constructors and destructors should return a pointer to
26611 the object constructed/destroyed. */
26613 static bool
26614 arm_cxx_cdtor_returns_this (void)
26616 return TARGET_AAPCS_BASED;
26619 /* The EABI says that an inline function may never be the key
26620 method. */
26622 static bool
26623 arm_cxx_key_method_may_be_inline (void)
26625 return !TARGET_AAPCS_BASED;
26628 static void
26629 arm_cxx_determine_class_data_visibility (tree decl)
26631 if (!TARGET_AAPCS_BASED
26632 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26633 return;
26635 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26636 is exported. However, on systems without dynamic vague linkage,
26637 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26638 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26639 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26640 else
26641 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26642 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26645 static bool
26646 arm_cxx_class_data_always_comdat (void)
26648 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26649 vague linkage if the class has no key function. */
26650 return !TARGET_AAPCS_BASED;
26654 /* The EABI says __aeabi_atexit should be used to register static
26655 destructors. */
26657 static bool
26658 arm_cxx_use_aeabi_atexit (void)
26660 return TARGET_AAPCS_BASED;
26664 void
26665 arm_set_return_address (rtx source, rtx scratch)
26667 arm_stack_offsets *offsets;
26668 HOST_WIDE_INT delta;
26669 rtx addr;
26670 unsigned long saved_regs;
26672 offsets = arm_get_frame_offsets ();
26673 saved_regs = offsets->saved_regs_mask;
26675 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26676 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26677 else
26679 if (frame_pointer_needed)
26680 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26681 else
26683 /* LR will be the first saved register. */
26684 delta = offsets->outgoing_args - (offsets->frame + 4);
26687 if (delta >= 4096)
26689 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26690 GEN_INT (delta & ~4095)));
26691 addr = scratch;
26692 delta &= 4095;
26694 else
26695 addr = stack_pointer_rtx;
26697 addr = plus_constant (Pmode, addr, delta);
26699 /* The store needs to be marked as frame related in order to prevent
26700 DSE from deleting it as dead if it is based on fp. */
26701 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26702 RTX_FRAME_RELATED_P (insn) = 1;
26703 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26708 void
26709 thumb_set_return_address (rtx source, rtx scratch)
26711 arm_stack_offsets *offsets;
26712 HOST_WIDE_INT delta;
26713 HOST_WIDE_INT limit;
26714 int reg;
26715 rtx addr;
26716 unsigned long mask;
26718 emit_use (source);
26720 offsets = arm_get_frame_offsets ();
26721 mask = offsets->saved_regs_mask;
26722 if (mask & (1 << LR_REGNUM))
26724 limit = 1024;
26725 /* Find the saved regs. */
26726 if (frame_pointer_needed)
26728 delta = offsets->soft_frame - offsets->saved_args;
26729 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26730 if (TARGET_THUMB1)
26731 limit = 128;
26733 else
26735 delta = offsets->outgoing_args - offsets->saved_args;
26736 reg = SP_REGNUM;
26738 /* Allow for the stack frame. */
26739 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26740 delta -= 16;
26741 /* The link register is always the first saved register. */
26742 delta -= 4;
26744 /* Construct the address. */
26745 addr = gen_rtx_REG (SImode, reg);
26746 if (delta > limit)
26748 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26749 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26750 addr = scratch;
26752 else
26753 addr = plus_constant (Pmode, addr, delta);
26755 /* The store needs to be marked as frame related in order to prevent
26756 DSE from deleting it as dead if it is based on fp. */
26757 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26758 RTX_FRAME_RELATED_P (insn) = 1;
26759 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26761 else
26762 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26765 /* Implements target hook vector_mode_supported_p. */
26766 bool
26767 arm_vector_mode_supported_p (machine_mode mode)
26769 /* Neon also supports V2SImode, etc. listed in the clause below. */
26770 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26771 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26772 || mode == V2DImode || mode == V8HFmode))
26773 return true;
26775 if ((TARGET_NEON || TARGET_IWMMXT)
26776 && ((mode == V2SImode)
26777 || (mode == V4HImode)
26778 || (mode == V8QImode)))
26779 return true;
26781 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26782 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26783 || mode == V2HAmode))
26784 return true;
26786 return false;
26789 /* Implements target hook array_mode_supported_p. */
26791 static bool
26792 arm_array_mode_supported_p (machine_mode mode,
26793 unsigned HOST_WIDE_INT nelems)
26795 if (TARGET_NEON
26796 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26797 && (nelems >= 2 && nelems <= 4))
26798 return true;
26800 return false;
26803 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26804 registers when autovectorizing for Neon, at least until multiple vector
26805 widths are supported properly by the middle-end. */
26807 static machine_mode
26808 arm_preferred_simd_mode (machine_mode mode)
26810 if (TARGET_NEON)
26811 switch (mode)
26813 case SFmode:
26814 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26815 case SImode:
26816 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26817 case HImode:
26818 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26819 case QImode:
26820 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26821 case DImode:
26822 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26823 return V2DImode;
26824 break;
26826 default:;
26829 if (TARGET_REALLY_IWMMXT)
26830 switch (mode)
26832 case SImode:
26833 return V2SImode;
26834 case HImode:
26835 return V4HImode;
26836 case QImode:
26837 return V8QImode;
26839 default:;
26842 return word_mode;
26845 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26847 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26848 using r0-r4 for function arguments, r7 for the stack frame and don't have
26849 enough left over to do doubleword arithmetic. For Thumb-2 all the
26850 potentially problematic instructions accept high registers so this is not
26851 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26852 that require many low registers. */
26853 static bool
26854 arm_class_likely_spilled_p (reg_class_t rclass)
26856 if ((TARGET_THUMB1 && rclass == LO_REGS)
26857 || rclass == CC_REG)
26858 return true;
26860 return false;
26863 /* Implements target hook small_register_classes_for_mode_p. */
26864 bool
26865 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26867 return TARGET_THUMB1;
26870 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26871 ARM insns and therefore guarantee that the shift count is modulo 256.
26872 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26873 guarantee no particular behavior for out-of-range counts. */
26875 static unsigned HOST_WIDE_INT
26876 arm_shift_truncation_mask (machine_mode mode)
26878 return mode == SImode ? 255 : 0;
26882 /* Map internal gcc register numbers to DWARF2 register numbers. */
26884 unsigned int
26885 arm_dbx_register_number (unsigned int regno)
26887 if (regno < 16)
26888 return regno;
26890 if (IS_VFP_REGNUM (regno))
26892 /* See comment in arm_dwarf_register_span. */
26893 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26894 return 64 + regno - FIRST_VFP_REGNUM;
26895 else
26896 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26899 if (IS_IWMMXT_GR_REGNUM (regno))
26900 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26902 if (IS_IWMMXT_REGNUM (regno))
26903 return 112 + regno - FIRST_IWMMXT_REGNUM;
26905 return DWARF_FRAME_REGISTERS;
26908 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26909 GCC models tham as 64 32-bit registers, so we need to describe this to
26910 the DWARF generation code. Other registers can use the default. */
26911 static rtx
26912 arm_dwarf_register_span (rtx rtl)
26914 machine_mode mode;
26915 unsigned regno;
26916 rtx parts[16];
26917 int nregs;
26918 int i;
26920 regno = REGNO (rtl);
26921 if (!IS_VFP_REGNUM (regno))
26922 return NULL_RTX;
26924 /* XXX FIXME: The EABI defines two VFP register ranges:
26925 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26926 256-287: D0-D31
26927 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26928 corresponding D register. Until GDB supports this, we shall use the
26929 legacy encodings. We also use these encodings for D0-D15 for
26930 compatibility with older debuggers. */
26931 mode = GET_MODE (rtl);
26932 if (GET_MODE_SIZE (mode) < 8)
26933 return NULL_RTX;
26935 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26937 nregs = GET_MODE_SIZE (mode) / 4;
26938 for (i = 0; i < nregs; i += 2)
26939 if (TARGET_BIG_END)
26941 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26942 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26944 else
26946 parts[i] = gen_rtx_REG (SImode, regno + i);
26947 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26950 else
26952 nregs = GET_MODE_SIZE (mode) / 8;
26953 for (i = 0; i < nregs; i++)
26954 parts[i] = gen_rtx_REG (DImode, regno + i);
26957 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26960 #if ARM_UNWIND_INFO
26961 /* Emit unwind directives for a store-multiple instruction or stack pointer
26962 push during alignment.
26963 These should only ever be generated by the function prologue code, so
26964 expect them to have a particular form.
26965 The store-multiple instruction sometimes pushes pc as the last register,
26966 although it should not be tracked into unwind information, or for -Os
26967 sometimes pushes some dummy registers before first register that needs
26968 to be tracked in unwind information; such dummy registers are there just
26969 to avoid separate stack adjustment, and will not be restored in the
26970 epilogue. */
26972 static void
26973 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26975 int i;
26976 HOST_WIDE_INT offset;
26977 HOST_WIDE_INT nregs;
26978 int reg_size;
26979 unsigned reg;
26980 unsigned lastreg;
26981 unsigned padfirst = 0, padlast = 0;
26982 rtx e;
26984 e = XVECEXP (p, 0, 0);
26985 gcc_assert (GET_CODE (e) == SET);
26987 /* First insn will adjust the stack pointer. */
26988 gcc_assert (GET_CODE (e) == SET
26989 && REG_P (SET_DEST (e))
26990 && REGNO (SET_DEST (e)) == SP_REGNUM
26991 && GET_CODE (SET_SRC (e)) == PLUS);
26993 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26994 nregs = XVECLEN (p, 0) - 1;
26995 gcc_assert (nregs);
26997 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26998 if (reg < 16)
27000 /* For -Os dummy registers can be pushed at the beginning to
27001 avoid separate stack pointer adjustment. */
27002 e = XVECEXP (p, 0, 1);
27003 e = XEXP (SET_DEST (e), 0);
27004 if (GET_CODE (e) == PLUS)
27005 padfirst = INTVAL (XEXP (e, 1));
27006 gcc_assert (padfirst == 0 || optimize_size);
27007 /* The function prologue may also push pc, but not annotate it as it is
27008 never restored. We turn this into a stack pointer adjustment. */
27009 e = XVECEXP (p, 0, nregs);
27010 e = XEXP (SET_DEST (e), 0);
27011 if (GET_CODE (e) == PLUS)
27012 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27013 else
27014 padlast = offset - 4;
27015 gcc_assert (padlast == 0 || padlast == 4);
27016 if (padlast == 4)
27017 fprintf (asm_out_file, "\t.pad #4\n");
27018 reg_size = 4;
27019 fprintf (asm_out_file, "\t.save {");
27021 else if (IS_VFP_REGNUM (reg))
27023 reg_size = 8;
27024 fprintf (asm_out_file, "\t.vsave {");
27026 else
27027 /* Unknown register type. */
27028 gcc_unreachable ();
27030 /* If the stack increment doesn't match the size of the saved registers,
27031 something has gone horribly wrong. */
27032 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27034 offset = padfirst;
27035 lastreg = 0;
27036 /* The remaining insns will describe the stores. */
27037 for (i = 1; i <= nregs; i++)
27039 /* Expect (set (mem <addr>) (reg)).
27040 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27041 e = XVECEXP (p, 0, i);
27042 gcc_assert (GET_CODE (e) == SET
27043 && MEM_P (SET_DEST (e))
27044 && REG_P (SET_SRC (e)));
27046 reg = REGNO (SET_SRC (e));
27047 gcc_assert (reg >= lastreg);
27049 if (i != 1)
27050 fprintf (asm_out_file, ", ");
27051 /* We can't use %r for vfp because we need to use the
27052 double precision register names. */
27053 if (IS_VFP_REGNUM (reg))
27054 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27055 else
27056 asm_fprintf (asm_out_file, "%r", reg);
27058 if (flag_checking)
27060 /* Check that the addresses are consecutive. */
27061 e = XEXP (SET_DEST (e), 0);
27062 if (GET_CODE (e) == PLUS)
27063 gcc_assert (REG_P (XEXP (e, 0))
27064 && REGNO (XEXP (e, 0)) == SP_REGNUM
27065 && CONST_INT_P (XEXP (e, 1))
27066 && offset == INTVAL (XEXP (e, 1)));
27067 else
27068 gcc_assert (i == 1
27069 && REG_P (e)
27070 && REGNO (e) == SP_REGNUM);
27071 offset += reg_size;
27074 fprintf (asm_out_file, "}\n");
27075 if (padfirst)
27076 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27079 /* Emit unwind directives for a SET. */
27081 static void
27082 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27084 rtx e0;
27085 rtx e1;
27086 unsigned reg;
27088 e0 = XEXP (p, 0);
27089 e1 = XEXP (p, 1);
27090 switch (GET_CODE (e0))
27092 case MEM:
27093 /* Pushing a single register. */
27094 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27095 || !REG_P (XEXP (XEXP (e0, 0), 0))
27096 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27097 abort ();
27099 asm_fprintf (asm_out_file, "\t.save ");
27100 if (IS_VFP_REGNUM (REGNO (e1)))
27101 asm_fprintf(asm_out_file, "{d%d}\n",
27102 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27103 else
27104 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27105 break;
27107 case REG:
27108 if (REGNO (e0) == SP_REGNUM)
27110 /* A stack increment. */
27111 if (GET_CODE (e1) != PLUS
27112 || !REG_P (XEXP (e1, 0))
27113 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27114 || !CONST_INT_P (XEXP (e1, 1)))
27115 abort ();
27117 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27118 -INTVAL (XEXP (e1, 1)));
27120 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27122 HOST_WIDE_INT offset;
27124 if (GET_CODE (e1) == PLUS)
27126 if (!REG_P (XEXP (e1, 0))
27127 || !CONST_INT_P (XEXP (e1, 1)))
27128 abort ();
27129 reg = REGNO (XEXP (e1, 0));
27130 offset = INTVAL (XEXP (e1, 1));
27131 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27132 HARD_FRAME_POINTER_REGNUM, reg,
27133 offset);
27135 else if (REG_P (e1))
27137 reg = REGNO (e1);
27138 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27139 HARD_FRAME_POINTER_REGNUM, reg);
27141 else
27142 abort ();
27144 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27146 /* Move from sp to reg. */
27147 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27149 else if (GET_CODE (e1) == PLUS
27150 && REG_P (XEXP (e1, 0))
27151 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27152 && CONST_INT_P (XEXP (e1, 1)))
27154 /* Set reg to offset from sp. */
27155 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27156 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27158 else
27159 abort ();
27160 break;
27162 default:
27163 abort ();
27168 /* Emit unwind directives for the given insn. */
27170 static void
27171 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27173 rtx note, pat;
27174 bool handled_one = false;
27176 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27177 return;
27179 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27180 && (TREE_NOTHROW (current_function_decl)
27181 || crtl->all_throwers_are_sibcalls))
27182 return;
27184 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27185 return;
27187 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27189 switch (REG_NOTE_KIND (note))
27191 case REG_FRAME_RELATED_EXPR:
27192 pat = XEXP (note, 0);
27193 goto found;
27195 case REG_CFA_REGISTER:
27196 pat = XEXP (note, 0);
27197 if (pat == NULL)
27199 pat = PATTERN (insn);
27200 if (GET_CODE (pat) == PARALLEL)
27201 pat = XVECEXP (pat, 0, 0);
27204 /* Only emitted for IS_STACKALIGN re-alignment. */
27206 rtx dest, src;
27207 unsigned reg;
27209 src = SET_SRC (pat);
27210 dest = SET_DEST (pat);
27212 gcc_assert (src == stack_pointer_rtx);
27213 reg = REGNO (dest);
27214 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27215 reg + 0x90, reg);
27217 handled_one = true;
27218 break;
27220 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27221 to get correct dwarf information for shrink-wrap. We should not
27222 emit unwind information for it because these are used either for
27223 pretend arguments or notes to adjust sp and restore registers from
27224 stack. */
27225 case REG_CFA_DEF_CFA:
27226 case REG_CFA_ADJUST_CFA:
27227 case REG_CFA_RESTORE:
27228 return;
27230 case REG_CFA_EXPRESSION:
27231 case REG_CFA_OFFSET:
27232 /* ??? Only handling here what we actually emit. */
27233 gcc_unreachable ();
27235 default:
27236 break;
27239 if (handled_one)
27240 return;
27241 pat = PATTERN (insn);
27242 found:
27244 switch (GET_CODE (pat))
27246 case SET:
27247 arm_unwind_emit_set (asm_out_file, pat);
27248 break;
27250 case SEQUENCE:
27251 /* Store multiple. */
27252 arm_unwind_emit_sequence (asm_out_file, pat);
27253 break;
27255 default:
27256 abort();
27261 /* Output a reference from a function exception table to the type_info
27262 object X. The EABI specifies that the symbol should be relocated by
27263 an R_ARM_TARGET2 relocation. */
27265 static bool
27266 arm_output_ttype (rtx x)
27268 fputs ("\t.word\t", asm_out_file);
27269 output_addr_const (asm_out_file, x);
27270 /* Use special relocations for symbol references. */
27271 if (!CONST_INT_P (x))
27272 fputs ("(TARGET2)", asm_out_file);
27273 fputc ('\n', asm_out_file);
27275 return TRUE;
27278 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27280 static void
27281 arm_asm_emit_except_personality (rtx personality)
27283 fputs ("\t.personality\t", asm_out_file);
27284 output_addr_const (asm_out_file, personality);
27285 fputc ('\n', asm_out_file);
27287 #endif /* ARM_UNWIND_INFO */
27289 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27291 static void
27292 arm_asm_init_sections (void)
27294 #if ARM_UNWIND_INFO
27295 exception_section = get_unnamed_section (0, output_section_asm_op,
27296 "\t.handlerdata");
27297 #endif /* ARM_UNWIND_INFO */
27299 #ifdef OBJECT_FORMAT_ELF
27300 if (target_pure_code)
27301 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27302 #endif
27305 /* Output unwind directives for the start/end of a function. */
27307 void
27308 arm_output_fn_unwind (FILE * f, bool prologue)
27310 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27311 return;
27313 if (prologue)
27314 fputs ("\t.fnstart\n", f);
27315 else
27317 /* If this function will never be unwound, then mark it as such.
27318 The came condition is used in arm_unwind_emit to suppress
27319 the frame annotations. */
27320 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27321 && (TREE_NOTHROW (current_function_decl)
27322 || crtl->all_throwers_are_sibcalls))
27323 fputs("\t.cantunwind\n", f);
27325 fputs ("\t.fnend\n", f);
27329 static bool
27330 arm_emit_tls_decoration (FILE *fp, rtx x)
27332 enum tls_reloc reloc;
27333 rtx val;
27335 val = XVECEXP (x, 0, 0);
27336 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27338 output_addr_const (fp, val);
27340 switch (reloc)
27342 case TLS_GD32:
27343 fputs ("(tlsgd)", fp);
27344 break;
27345 case TLS_LDM32:
27346 fputs ("(tlsldm)", fp);
27347 break;
27348 case TLS_LDO32:
27349 fputs ("(tlsldo)", fp);
27350 break;
27351 case TLS_IE32:
27352 fputs ("(gottpoff)", fp);
27353 break;
27354 case TLS_LE32:
27355 fputs ("(tpoff)", fp);
27356 break;
27357 case TLS_DESCSEQ:
27358 fputs ("(tlsdesc)", fp);
27359 break;
27360 default:
27361 gcc_unreachable ();
27364 switch (reloc)
27366 case TLS_GD32:
27367 case TLS_LDM32:
27368 case TLS_IE32:
27369 case TLS_DESCSEQ:
27370 fputs (" + (. - ", fp);
27371 output_addr_const (fp, XVECEXP (x, 0, 2));
27372 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27373 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27374 output_addr_const (fp, XVECEXP (x, 0, 3));
27375 fputc (')', fp);
27376 break;
27377 default:
27378 break;
27381 return TRUE;
27384 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27386 static void
27387 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27389 gcc_assert (size == 4);
27390 fputs ("\t.word\t", file);
27391 output_addr_const (file, x);
27392 fputs ("(tlsldo)", file);
27395 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27397 static bool
27398 arm_output_addr_const_extra (FILE *fp, rtx x)
27400 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27401 return arm_emit_tls_decoration (fp, x);
27402 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27404 char label[256];
27405 int labelno = INTVAL (XVECEXP (x, 0, 0));
27407 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27408 assemble_name_raw (fp, label);
27410 return TRUE;
27412 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27414 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27415 if (GOT_PCREL)
27416 fputs ("+.", fp);
27417 fputs ("-(", fp);
27418 output_addr_const (fp, XVECEXP (x, 0, 0));
27419 fputc (')', fp);
27420 return TRUE;
27422 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27424 output_addr_const (fp, XVECEXP (x, 0, 0));
27425 if (GOT_PCREL)
27426 fputs ("+.", fp);
27427 fputs ("-(", fp);
27428 output_addr_const (fp, XVECEXP (x, 0, 1));
27429 fputc (')', fp);
27430 return TRUE;
27432 else if (GET_CODE (x) == CONST_VECTOR)
27433 return arm_emit_vector_const (fp, x);
27435 return FALSE;
27438 /* Output assembly for a shift instruction.
27439 SET_FLAGS determines how the instruction modifies the condition codes.
27440 0 - Do not set condition codes.
27441 1 - Set condition codes.
27442 2 - Use smallest instruction. */
27443 const char *
27444 arm_output_shift(rtx * operands, int set_flags)
27446 char pattern[100];
27447 static const char flag_chars[3] = {'?', '.', '!'};
27448 const char *shift;
27449 HOST_WIDE_INT val;
27450 char c;
27452 c = flag_chars[set_flags];
27453 shift = shift_op(operands[3], &val);
27454 if (shift)
27456 if (val != -1)
27457 operands[2] = GEN_INT(val);
27458 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27460 else
27461 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27463 output_asm_insn (pattern, operands);
27464 return "";
27467 /* Output assembly for a WMMX immediate shift instruction. */
27468 const char *
27469 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27471 int shift = INTVAL (operands[2]);
27472 char templ[50];
27473 machine_mode opmode = GET_MODE (operands[0]);
27475 gcc_assert (shift >= 0);
27477 /* If the shift value in the register versions is > 63 (for D qualifier),
27478 31 (for W qualifier) or 15 (for H qualifier). */
27479 if (((opmode == V4HImode) && (shift > 15))
27480 || ((opmode == V2SImode) && (shift > 31))
27481 || ((opmode == DImode) && (shift > 63)))
27483 if (wror_or_wsra)
27485 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27486 output_asm_insn (templ, operands);
27487 if (opmode == DImode)
27489 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27490 output_asm_insn (templ, operands);
27493 else
27495 /* The destination register will contain all zeros. */
27496 sprintf (templ, "wzero\t%%0");
27497 output_asm_insn (templ, operands);
27499 return "";
27502 if ((opmode == DImode) && (shift > 32))
27504 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27505 output_asm_insn (templ, operands);
27506 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27507 output_asm_insn (templ, operands);
27509 else
27511 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27512 output_asm_insn (templ, operands);
27514 return "";
27517 /* Output assembly for a WMMX tinsr instruction. */
27518 const char *
27519 arm_output_iwmmxt_tinsr (rtx *operands)
27521 int mask = INTVAL (operands[3]);
27522 int i;
27523 char templ[50];
27524 int units = mode_nunits[GET_MODE (operands[0])];
27525 gcc_assert ((mask & (mask - 1)) == 0);
27526 for (i = 0; i < units; ++i)
27528 if ((mask & 0x01) == 1)
27530 break;
27532 mask >>= 1;
27534 gcc_assert (i < units);
27536 switch (GET_MODE (operands[0]))
27538 case V8QImode:
27539 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27540 break;
27541 case V4HImode:
27542 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27543 break;
27544 case V2SImode:
27545 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27546 break;
27547 default:
27548 gcc_unreachable ();
27549 break;
27551 output_asm_insn (templ, operands);
27553 return "";
27556 /* Output a Thumb-1 casesi dispatch sequence. */
27557 const char *
27558 thumb1_output_casesi (rtx *operands)
27560 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27562 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27564 switch (GET_MODE(diff_vec))
27566 case QImode:
27567 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27568 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27569 case HImode:
27570 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27571 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27572 case SImode:
27573 return "bl\t%___gnu_thumb1_case_si";
27574 default:
27575 gcc_unreachable ();
27579 /* Output a Thumb-2 casesi instruction. */
27580 const char *
27581 thumb2_output_casesi (rtx *operands)
27583 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27585 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27587 output_asm_insn ("cmp\t%0, %1", operands);
27588 output_asm_insn ("bhi\t%l3", operands);
27589 switch (GET_MODE(diff_vec))
27591 case QImode:
27592 return "tbb\t[%|pc, %0]";
27593 case HImode:
27594 return "tbh\t[%|pc, %0, lsl #1]";
27595 case SImode:
27596 if (flag_pic)
27598 output_asm_insn ("adr\t%4, %l2", operands);
27599 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27600 output_asm_insn ("add\t%4, %4, %5", operands);
27601 return "bx\t%4";
27603 else
27605 output_asm_insn ("adr\t%4, %l2", operands);
27606 return "ldr\t%|pc, [%4, %0, lsl #2]";
27608 default:
27609 gcc_unreachable ();
27613 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27614 per-core tuning structs. */
27615 static int
27616 arm_issue_rate (void)
27618 return current_tune->issue_rate;
27621 /* Return how many instructions should scheduler lookahead to choose the
27622 best one. */
27623 static int
27624 arm_first_cycle_multipass_dfa_lookahead (void)
27626 int issue_rate = arm_issue_rate ();
27628 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27631 /* Enable modeling of L2 auto-prefetcher. */
27632 static int
27633 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27635 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27638 const char *
27639 arm_mangle_type (const_tree type)
27641 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27642 has to be managled as if it is in the "std" namespace. */
27643 if (TARGET_AAPCS_BASED
27644 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27645 return "St9__va_list";
27647 /* Half-precision float. */
27648 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27649 return "Dh";
27651 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27652 builtin type. */
27653 if (TYPE_NAME (type) != NULL)
27654 return arm_mangle_builtin_type (type);
27656 /* Use the default mangling. */
27657 return NULL;
27660 /* Order of allocation of core registers for Thumb: this allocation is
27661 written over the corresponding initial entries of the array
27662 initialized with REG_ALLOC_ORDER. We allocate all low registers
27663 first. Saving and restoring a low register is usually cheaper than
27664 using a call-clobbered high register. */
27666 static const int thumb_core_reg_alloc_order[] =
27668 3, 2, 1, 0, 4, 5, 6, 7,
27669 12, 14, 8, 9, 10, 11
27672 /* Adjust register allocation order when compiling for Thumb. */
27674 void
27675 arm_order_regs_for_local_alloc (void)
27677 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27678 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27679 if (TARGET_THUMB)
27680 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27681 sizeof (thumb_core_reg_alloc_order));
27684 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27686 bool
27687 arm_frame_pointer_required (void)
27689 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27690 return true;
27692 /* If the function receives nonlocal gotos, it needs to save the frame
27693 pointer in the nonlocal_goto_save_area object. */
27694 if (cfun->has_nonlocal_label)
27695 return true;
27697 /* The frame pointer is required for non-leaf APCS frames. */
27698 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27699 return true;
27701 /* If we are probing the stack in the prologue, we will have a faulting
27702 instruction prior to the stack adjustment and this requires a frame
27703 pointer if we want to catch the exception using the EABI unwinder. */
27704 if (!IS_INTERRUPT (arm_current_func_type ())
27705 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27706 && arm_except_unwind_info (&global_options) == UI_TARGET
27707 && cfun->can_throw_non_call_exceptions)
27709 HOST_WIDE_INT size = get_frame_size ();
27711 /* That's irrelevant if there is no stack adjustment. */
27712 if (size <= 0)
27713 return false;
27715 /* That's relevant only if there is a stack probe. */
27716 if (crtl->is_leaf && !cfun->calls_alloca)
27718 /* We don't have the final size of the frame so adjust. */
27719 size += 32 * UNITS_PER_WORD;
27720 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27721 return true;
27723 else
27724 return true;
27727 return false;
27730 /* Only thumb1 can't support conditional execution, so return true if
27731 the target is not thumb1. */
27732 static bool
27733 arm_have_conditional_execution (void)
27735 return !TARGET_THUMB1;
27738 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27739 static HOST_WIDE_INT
27740 arm_vector_alignment (const_tree type)
27742 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27744 if (TARGET_AAPCS_BASED)
27745 align = MIN (align, 64);
27747 return align;
27750 static unsigned int
27751 arm_autovectorize_vector_sizes (void)
27753 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27756 static bool
27757 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27759 /* Vectors which aren't in packed structures will not be less aligned than
27760 the natural alignment of their element type, so this is safe. */
27761 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27762 return !is_packed;
27764 return default_builtin_vector_alignment_reachable (type, is_packed);
27767 static bool
27768 arm_builtin_support_vector_misalignment (machine_mode mode,
27769 const_tree type, int misalignment,
27770 bool is_packed)
27772 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27774 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27776 if (is_packed)
27777 return align == 1;
27779 /* If the misalignment is unknown, we should be able to handle the access
27780 so long as it is not to a member of a packed data structure. */
27781 if (misalignment == -1)
27782 return true;
27784 /* Return true if the misalignment is a multiple of the natural alignment
27785 of the vector's element type. This is probably always going to be
27786 true in practice, since we've already established that this isn't a
27787 packed access. */
27788 return ((misalignment % align) == 0);
27791 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27792 is_packed);
27795 static void
27796 arm_conditional_register_usage (void)
27798 int regno;
27800 if (TARGET_THUMB1 && optimize_size)
27802 /* When optimizing for size on Thumb-1, it's better not
27803 to use the HI regs, because of the overhead of
27804 stacking them. */
27805 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27806 fixed_regs[regno] = call_used_regs[regno] = 1;
27809 /* The link register can be clobbered by any branch insn,
27810 but we have no way to track that at present, so mark
27811 it as unavailable. */
27812 if (TARGET_THUMB1)
27813 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27815 if (TARGET_32BIT && TARGET_HARD_FLOAT)
27817 /* VFPv3 registers are disabled when earlier VFP
27818 versions are selected due to the definition of
27819 LAST_VFP_REGNUM. */
27820 for (regno = FIRST_VFP_REGNUM;
27821 regno <= LAST_VFP_REGNUM; ++ regno)
27823 fixed_regs[regno] = 0;
27824 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27825 || regno >= FIRST_VFP_REGNUM + 32;
27829 if (TARGET_REALLY_IWMMXT)
27831 regno = FIRST_IWMMXT_GR_REGNUM;
27832 /* The 2002/10/09 revision of the XScale ABI has wCG0
27833 and wCG1 as call-preserved registers. The 2002/11/21
27834 revision changed this so that all wCG registers are
27835 scratch registers. */
27836 for (regno = FIRST_IWMMXT_GR_REGNUM;
27837 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27838 fixed_regs[regno] = 0;
27839 /* The XScale ABI has wR0 - wR9 as scratch registers,
27840 the rest as call-preserved registers. */
27841 for (regno = FIRST_IWMMXT_REGNUM;
27842 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27844 fixed_regs[regno] = 0;
27845 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27849 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27851 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27852 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27854 else if (TARGET_APCS_STACK)
27856 fixed_regs[10] = 1;
27857 call_used_regs[10] = 1;
27859 /* -mcaller-super-interworking reserves r11 for calls to
27860 _interwork_r11_call_via_rN(). Making the register global
27861 is an easy way of ensuring that it remains valid for all
27862 calls. */
27863 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27864 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27866 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27867 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27868 if (TARGET_CALLER_INTERWORKING)
27869 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27871 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27874 static reg_class_t
27875 arm_preferred_rename_class (reg_class_t rclass)
27877 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27878 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27879 and code size can be reduced. */
27880 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27881 return LO_REGS;
27882 else
27883 return NO_REGS;
27886 /* Compute the attribute "length" of insn "*push_multi".
27887 So this function MUST be kept in sync with that insn pattern. */
27889 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27891 int i, regno, hi_reg;
27892 int num_saves = XVECLEN (parallel_op, 0);
27894 /* ARM mode. */
27895 if (TARGET_ARM)
27896 return 4;
27897 /* Thumb1 mode. */
27898 if (TARGET_THUMB1)
27899 return 2;
27901 /* Thumb2 mode. */
27902 regno = REGNO (first_op);
27903 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27904 list is 8-bit. Normally this means all registers in the list must be
27905 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
27906 encodings. There is one exception for PUSH that LR in HI_REGS can be used
27907 with 16-bit encoding. */
27908 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27909 for (i = 1; i < num_saves && !hi_reg; i++)
27911 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27912 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27915 if (!hi_reg)
27916 return 2;
27917 return 4;
27920 /* Compute the attribute "length" of insn. Currently, this function is used
27921 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
27922 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
27923 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
27924 true if OPERANDS contains insn which explicit updates base register. */
27927 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
27929 /* ARM mode. */
27930 if (TARGET_ARM)
27931 return 4;
27932 /* Thumb1 mode. */
27933 if (TARGET_THUMB1)
27934 return 2;
27936 rtx parallel_op = operands[0];
27937 /* Initialize to elements number of PARALLEL. */
27938 unsigned indx = XVECLEN (parallel_op, 0) - 1;
27939 /* Initialize the value to base register. */
27940 unsigned regno = REGNO (operands[1]);
27941 /* Skip return and write back pattern.
27942 We only need register pop pattern for later analysis. */
27943 unsigned first_indx = 0;
27944 first_indx += return_pc ? 1 : 0;
27945 first_indx += write_back_p ? 1 : 0;
27947 /* A pop operation can be done through LDM or POP. If the base register is SP
27948 and if it's with write back, then a LDM will be alias of POP. */
27949 bool pop_p = (regno == SP_REGNUM && write_back_p);
27950 bool ldm_p = !pop_p;
27952 /* Check base register for LDM. */
27953 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
27954 return 4;
27956 /* Check each register in the list. */
27957 for (; indx >= first_indx; indx--)
27959 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
27960 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
27961 comment in arm_attr_length_push_multi. */
27962 if (REGNO_REG_CLASS (regno) == HI_REGS
27963 && (regno != PC_REGNUM || ldm_p))
27964 return 4;
27967 return 2;
27970 /* Compute the number of instructions emitted by output_move_double. */
27972 arm_count_output_move_double_insns (rtx *operands)
27974 int count;
27975 rtx ops[2];
27976 /* output_move_double may modify the operands array, so call it
27977 here on a copy of the array. */
27978 ops[0] = operands[0];
27979 ops[1] = operands[1];
27980 output_move_double (ops, false, &count);
27981 return count;
27985 vfp3_const_double_for_fract_bits (rtx operand)
27987 REAL_VALUE_TYPE r0;
27989 if (!CONST_DOUBLE_P (operand))
27990 return 0;
27992 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
27993 if (exact_real_inverse (DFmode, &r0)
27994 && !REAL_VALUE_NEGATIVE (r0))
27996 if (exact_real_truncate (DFmode, &r0))
27998 HOST_WIDE_INT value = real_to_integer (&r0);
27999 value = value & 0xffffffff;
28000 if ((value != 0) && ( (value & (value - 1)) == 0))
28002 int ret = exact_log2 (value);
28003 gcc_assert (IN_RANGE (ret, 0, 31));
28004 return ret;
28008 return 0;
28011 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28012 log2 is in [1, 32], return that log2. Otherwise return -1.
28013 This is used in the patterns for vcvt.s32.f32 floating-point to
28014 fixed-point conversions. */
28017 vfp3_const_double_for_bits (rtx x)
28019 const REAL_VALUE_TYPE *r;
28021 if (!CONST_DOUBLE_P (x))
28022 return -1;
28024 r = CONST_DOUBLE_REAL_VALUE (x);
28026 if (REAL_VALUE_NEGATIVE (*r)
28027 || REAL_VALUE_ISNAN (*r)
28028 || REAL_VALUE_ISINF (*r)
28029 || !real_isinteger (r, SFmode))
28030 return -1;
28032 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28034 /* The exact_log2 above will have returned -1 if this is
28035 not an exact log2. */
28036 if (!IN_RANGE (hwint, 1, 32))
28037 return -1;
28039 return hwint;
28043 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28045 static void
28046 arm_pre_atomic_barrier (enum memmodel model)
28048 if (need_atomic_barrier_p (model, true))
28049 emit_insn (gen_memory_barrier ());
28052 static void
28053 arm_post_atomic_barrier (enum memmodel model)
28055 if (need_atomic_barrier_p (model, false))
28056 emit_insn (gen_memory_barrier ());
28059 /* Emit the load-exclusive and store-exclusive instructions.
28060 Use acquire and release versions if necessary. */
28062 static void
28063 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28065 rtx (*gen) (rtx, rtx);
28067 if (acq)
28069 switch (mode)
28071 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28072 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28073 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28074 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28075 default:
28076 gcc_unreachable ();
28079 else
28081 switch (mode)
28083 case QImode: gen = gen_arm_load_exclusiveqi; break;
28084 case HImode: gen = gen_arm_load_exclusivehi; break;
28085 case SImode: gen = gen_arm_load_exclusivesi; break;
28086 case DImode: gen = gen_arm_load_exclusivedi; break;
28087 default:
28088 gcc_unreachable ();
28092 emit_insn (gen (rval, mem));
28095 static void
28096 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28097 rtx mem, bool rel)
28099 rtx (*gen) (rtx, rtx, rtx);
28101 if (rel)
28103 switch (mode)
28105 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
28106 case HImode: gen = gen_arm_store_release_exclusivehi; break;
28107 case SImode: gen = gen_arm_store_release_exclusivesi; break;
28108 case DImode: gen = gen_arm_store_release_exclusivedi; break;
28109 default:
28110 gcc_unreachable ();
28113 else
28115 switch (mode)
28117 case QImode: gen = gen_arm_store_exclusiveqi; break;
28118 case HImode: gen = gen_arm_store_exclusivehi; break;
28119 case SImode: gen = gen_arm_store_exclusivesi; break;
28120 case DImode: gen = gen_arm_store_exclusivedi; break;
28121 default:
28122 gcc_unreachable ();
28126 emit_insn (gen (bval, rval, mem));
28129 /* Mark the previous jump instruction as unlikely. */
28131 static void
28132 emit_unlikely_jump (rtx insn)
28134 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
28136 rtx_insn *jump = emit_jump_insn (insn);
28137 add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
28140 /* Expand a compare and swap pattern. */
28142 void
28143 arm_expand_compare_and_swap (rtx operands[])
28145 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28146 machine_mode mode;
28147 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28149 bval = operands[0];
28150 rval = operands[1];
28151 mem = operands[2];
28152 oldval = operands[3];
28153 newval = operands[4];
28154 is_weak = operands[5];
28155 mod_s = operands[6];
28156 mod_f = operands[7];
28157 mode = GET_MODE (mem);
28159 /* Normally the succ memory model must be stronger than fail, but in the
28160 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28161 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28163 if (TARGET_HAVE_LDACQ
28164 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28165 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28166 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28168 switch (mode)
28170 case QImode:
28171 case HImode:
28172 /* For narrow modes, we're going to perform the comparison in SImode,
28173 so do the zero-extension now. */
28174 rval = gen_reg_rtx (SImode);
28175 oldval = convert_modes (SImode, mode, oldval, true);
28176 /* FALLTHRU */
28178 case SImode:
28179 /* Force the value into a register if needed. We waited until after
28180 the zero-extension above to do this properly. */
28181 if (!arm_add_operand (oldval, SImode))
28182 oldval = force_reg (SImode, oldval);
28183 break;
28185 case DImode:
28186 if (!cmpdi_operand (oldval, mode))
28187 oldval = force_reg (mode, oldval);
28188 break;
28190 default:
28191 gcc_unreachable ();
28194 switch (mode)
28196 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
28197 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
28198 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
28199 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
28200 default:
28201 gcc_unreachable ();
28204 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CCmode, CC_REGNUM);
28205 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28207 if (mode == QImode || mode == HImode)
28208 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28210 /* In all cases, we arrange for success to be signaled by Z set.
28211 This arrangement allows for the boolean result to be used directly
28212 in a subsequent branch, post optimization. For Thumb-1 targets, the
28213 boolean negation of the result is also stored in bval because Thumb-1
28214 backend lacks dependency tracking for CC flag due to flag-setting not
28215 being represented at RTL level. */
28216 if (TARGET_THUMB1)
28217 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28218 else
28220 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28221 emit_insn (gen_rtx_SET (bval, x));
28225 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28226 another memory store between the load-exclusive and store-exclusive can
28227 reset the monitor from Exclusive to Open state. This means we must wait
28228 until after reload to split the pattern, lest we get a register spill in
28229 the middle of the atomic sequence. Success of the compare and swap is
28230 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28231 for Thumb-1 targets (ie. negation of the boolean value returned by
28232 atomic_compare_and_swapmode standard pattern in operand 0). */
28234 void
28235 arm_split_compare_and_swap (rtx operands[])
28237 rtx rval, mem, oldval, newval, neg_bval;
28238 machine_mode mode;
28239 enum memmodel mod_s, mod_f;
28240 bool is_weak;
28241 rtx_code_label *label1, *label2;
28242 rtx x, cond;
28244 rval = operands[1];
28245 mem = operands[2];
28246 oldval = operands[3];
28247 newval = operands[4];
28248 is_weak = (operands[5] != const0_rtx);
28249 mod_s = memmodel_from_int (INTVAL (operands[6]));
28250 mod_f = memmodel_from_int (INTVAL (operands[7]));
28251 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28252 mode = GET_MODE (mem);
28254 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28256 bool use_acquire = TARGET_HAVE_LDACQ
28257 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28258 || is_mm_release (mod_s));
28260 bool use_release = TARGET_HAVE_LDACQ
28261 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28262 || is_mm_acquire (mod_s));
28264 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28265 a full barrier is emitted after the store-release. */
28266 if (is_armv8_sync)
28267 use_acquire = false;
28269 /* Checks whether a barrier is needed and emits one accordingly. */
28270 if (!(use_acquire || use_release))
28271 arm_pre_atomic_barrier (mod_s);
28273 label1 = NULL;
28274 if (!is_weak)
28276 label1 = gen_label_rtx ();
28277 emit_label (label1);
28279 label2 = gen_label_rtx ();
28281 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28283 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28284 as required to communicate with arm_expand_compare_and_swap. */
28285 if (TARGET_32BIT)
28287 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28288 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28289 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28290 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28291 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28293 else
28295 emit_move_insn (neg_bval, const1_rtx);
28296 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28297 if (thumb1_cmpneg_operand (oldval, SImode))
28298 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28299 label2, cond));
28300 else
28301 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28304 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28306 /* Weak or strong, we want EQ to be true for success, so that we
28307 match the flags that we got from the compare above. */
28308 if (TARGET_32BIT)
28310 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28311 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28312 emit_insn (gen_rtx_SET (cond, x));
28315 if (!is_weak)
28317 /* Z is set to boolean value of !neg_bval, as required to communicate
28318 with arm_expand_compare_and_swap. */
28319 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28320 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28323 if (!is_mm_relaxed (mod_f))
28324 emit_label (label2);
28326 /* Checks whether a barrier is needed and emits one accordingly. */
28327 if (is_armv8_sync
28328 || !(use_acquire || use_release))
28329 arm_post_atomic_barrier (mod_s);
28331 if (is_mm_relaxed (mod_f))
28332 emit_label (label2);
28335 /* Split an atomic operation pattern. Operation is given by CODE and is one
28336 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28337 operation). Operation is performed on the content at MEM and on VALUE
28338 following the memory model MODEL_RTX. The content at MEM before and after
28339 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28340 success of the operation is returned in COND. Using a scratch register or
28341 an operand register for these determines what result is returned for that
28342 pattern. */
28344 void
28345 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28346 rtx value, rtx model_rtx, rtx cond)
28348 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28349 machine_mode mode = GET_MODE (mem);
28350 machine_mode wmode = (mode == DImode ? DImode : SImode);
28351 rtx_code_label *label;
28352 bool all_low_regs, bind_old_new;
28353 rtx x;
28355 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28357 bool use_acquire = TARGET_HAVE_LDACQ
28358 && !(is_mm_relaxed (model) || is_mm_consume (model)
28359 || is_mm_release (model));
28361 bool use_release = TARGET_HAVE_LDACQ
28362 && !(is_mm_relaxed (model) || is_mm_consume (model)
28363 || is_mm_acquire (model));
28365 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28366 a full barrier is emitted after the store-release. */
28367 if (is_armv8_sync)
28368 use_acquire = false;
28370 /* Checks whether a barrier is needed and emits one accordingly. */
28371 if (!(use_acquire || use_release))
28372 arm_pre_atomic_barrier (model);
28374 label = gen_label_rtx ();
28375 emit_label (label);
28377 if (new_out)
28378 new_out = gen_lowpart (wmode, new_out);
28379 if (old_out)
28380 old_out = gen_lowpart (wmode, old_out);
28381 else
28382 old_out = new_out;
28383 value = simplify_gen_subreg (wmode, value, mode, 0);
28385 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28387 /* Does the operation require destination and first operand to use the same
28388 register? This is decided by register constraints of relevant insn
28389 patterns in thumb1.md. */
28390 gcc_assert (!new_out || REG_P (new_out));
28391 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28392 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28393 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28394 bind_old_new =
28395 (TARGET_THUMB1
28396 && code != SET
28397 && code != MINUS
28398 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28400 /* We want to return the old value while putting the result of the operation
28401 in the same register as the old value so copy the old value over to the
28402 destination register and use that register for the operation. */
28403 if (old_out && bind_old_new)
28405 emit_move_insn (new_out, old_out);
28406 old_out = new_out;
28409 switch (code)
28411 case SET:
28412 new_out = value;
28413 break;
28415 case NOT:
28416 x = gen_rtx_AND (wmode, old_out, value);
28417 emit_insn (gen_rtx_SET (new_out, x));
28418 x = gen_rtx_NOT (wmode, new_out);
28419 emit_insn (gen_rtx_SET (new_out, x));
28420 break;
28422 case MINUS:
28423 if (CONST_INT_P (value))
28425 value = GEN_INT (-INTVAL (value));
28426 code = PLUS;
28428 /* FALLTHRU */
28430 case PLUS:
28431 if (mode == DImode)
28433 /* DImode plus/minus need to clobber flags. */
28434 /* The adddi3 and subdi3 patterns are incorrectly written so that
28435 they require matching operands, even when we could easily support
28436 three operands. Thankfully, this can be fixed up post-splitting,
28437 as the individual add+adc patterns do accept three operands and
28438 post-reload cprop can make these moves go away. */
28439 emit_move_insn (new_out, old_out);
28440 if (code == PLUS)
28441 x = gen_adddi3 (new_out, new_out, value);
28442 else
28443 x = gen_subdi3 (new_out, new_out, value);
28444 emit_insn (x);
28445 break;
28447 /* FALLTHRU */
28449 default:
28450 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28451 emit_insn (gen_rtx_SET (new_out, x));
28452 break;
28455 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28456 use_release);
28458 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28459 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28461 /* Checks whether a barrier is needed and emits one accordingly. */
28462 if (is_armv8_sync
28463 || !(use_acquire || use_release))
28464 arm_post_atomic_barrier (model);
28467 #define MAX_VECT_LEN 16
28469 struct expand_vec_perm_d
28471 rtx target, op0, op1;
28472 unsigned char perm[MAX_VECT_LEN];
28473 machine_mode vmode;
28474 unsigned char nelt;
28475 bool one_vector_p;
28476 bool testing_p;
28479 /* Generate a variable permutation. */
28481 static void
28482 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28484 machine_mode vmode = GET_MODE (target);
28485 bool one_vector_p = rtx_equal_p (op0, op1);
28487 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28488 gcc_checking_assert (GET_MODE (op0) == vmode);
28489 gcc_checking_assert (GET_MODE (op1) == vmode);
28490 gcc_checking_assert (GET_MODE (sel) == vmode);
28491 gcc_checking_assert (TARGET_NEON);
28493 if (one_vector_p)
28495 if (vmode == V8QImode)
28496 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28497 else
28498 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28500 else
28502 rtx pair;
28504 if (vmode == V8QImode)
28506 pair = gen_reg_rtx (V16QImode);
28507 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28508 pair = gen_lowpart (TImode, pair);
28509 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28511 else
28513 pair = gen_reg_rtx (OImode);
28514 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28515 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28520 void
28521 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28523 machine_mode vmode = GET_MODE (target);
28524 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28525 bool one_vector_p = rtx_equal_p (op0, op1);
28526 rtx rmask[MAX_VECT_LEN], mask;
28528 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28529 numbering of elements for big-endian, we must reverse the order. */
28530 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28532 /* The VTBL instruction does not use a modulo index, so we must take care
28533 of that ourselves. */
28534 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28535 for (i = 0; i < nelt; ++i)
28536 rmask[i] = mask;
28537 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28538 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28540 arm_expand_vec_perm_1 (target, op0, op1, sel);
28543 /* Map lane ordering between architectural lane order, and GCC lane order,
28544 taking into account ABI. See comment above output_move_neon for details. */
28546 static int
28547 neon_endian_lane_map (machine_mode mode, int lane)
28549 if (BYTES_BIG_ENDIAN)
28551 int nelems = GET_MODE_NUNITS (mode);
28552 /* Reverse lane order. */
28553 lane = (nelems - 1 - lane);
28554 /* Reverse D register order, to match ABI. */
28555 if (GET_MODE_SIZE (mode) == 16)
28556 lane = lane ^ (nelems / 2);
28558 return lane;
28561 /* Some permutations index into pairs of vectors, this is a helper function
28562 to map indexes into those pairs of vectors. */
28564 static int
28565 neon_pair_endian_lane_map (machine_mode mode, int lane)
28567 int nelem = GET_MODE_NUNITS (mode);
28568 if (BYTES_BIG_ENDIAN)
28569 lane =
28570 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28571 return lane;
28574 /* Generate or test for an insn that supports a constant permutation. */
28576 /* Recognize patterns for the VUZP insns. */
28578 static bool
28579 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28581 unsigned int i, odd, mask, nelt = d->nelt;
28582 rtx out0, out1, in0, in1;
28583 rtx (*gen)(rtx, rtx, rtx, rtx);
28584 int first_elem;
28585 int swap_nelt;
28587 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28588 return false;
28590 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28591 big endian pattern on 64 bit vectors, so we correct for that. */
28592 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28593 && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28595 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28597 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28598 odd = 0;
28599 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28600 odd = 1;
28601 else
28602 return false;
28603 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28605 for (i = 0; i < nelt; i++)
28607 unsigned elt =
28608 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28609 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28610 return false;
28613 /* Success! */
28614 if (d->testing_p)
28615 return true;
28617 switch (d->vmode)
28619 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28620 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28621 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28622 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28623 case V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
28624 case V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
28625 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28626 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28627 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28628 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28629 default:
28630 gcc_unreachable ();
28633 in0 = d->op0;
28634 in1 = d->op1;
28635 if (swap_nelt != 0)
28636 std::swap (in0, in1);
28638 out0 = d->target;
28639 out1 = gen_reg_rtx (d->vmode);
28640 if (odd)
28641 std::swap (out0, out1);
28643 emit_insn (gen (out0, in0, in1, out1));
28644 return true;
28647 /* Recognize patterns for the VZIP insns. */
28649 static bool
28650 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28652 unsigned int i, high, mask, nelt = d->nelt;
28653 rtx out0, out1, in0, in1;
28654 rtx (*gen)(rtx, rtx, rtx, rtx);
28655 int first_elem;
28656 bool is_swapped;
28658 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28659 return false;
28661 is_swapped = BYTES_BIG_ENDIAN;
28663 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28665 high = nelt / 2;
28666 if (first_elem == neon_endian_lane_map (d->vmode, high))
28668 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28669 high = 0;
28670 else
28671 return false;
28672 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28674 for (i = 0; i < nelt / 2; i++)
28676 unsigned elt =
28677 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28678 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28679 != elt)
28680 return false;
28681 elt =
28682 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28683 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28684 != elt)
28685 return false;
28688 /* Success! */
28689 if (d->testing_p)
28690 return true;
28692 switch (d->vmode)
28694 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28695 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28696 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28697 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28698 case V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
28699 case V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
28700 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
28701 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
28702 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28703 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28704 default:
28705 gcc_unreachable ();
28708 in0 = d->op0;
28709 in1 = d->op1;
28710 if (is_swapped)
28711 std::swap (in0, in1);
28713 out0 = d->target;
28714 out1 = gen_reg_rtx (d->vmode);
28715 if (high)
28716 std::swap (out0, out1);
28718 emit_insn (gen (out0, in0, in1, out1));
28719 return true;
28722 /* Recognize patterns for the VREV insns. */
28724 static bool
28725 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28727 unsigned int i, j, diff, nelt = d->nelt;
28728 rtx (*gen)(rtx, rtx);
28730 if (!d->one_vector_p)
28731 return false;
28733 diff = d->perm[0];
28734 switch (diff)
28736 case 7:
28737 switch (d->vmode)
28739 case V16QImode: gen = gen_neon_vrev64v16qi; break;
28740 case V8QImode: gen = gen_neon_vrev64v8qi; break;
28741 default:
28742 return false;
28744 break;
28745 case 3:
28746 switch (d->vmode)
28748 case V16QImode: gen = gen_neon_vrev32v16qi; break;
28749 case V8QImode: gen = gen_neon_vrev32v8qi; break;
28750 case V8HImode: gen = gen_neon_vrev64v8hi; break;
28751 case V4HImode: gen = gen_neon_vrev64v4hi; break;
28752 case V8HFmode: gen = gen_neon_vrev64v8hf; break;
28753 case V4HFmode: gen = gen_neon_vrev64v4hf; break;
28754 default:
28755 return false;
28757 break;
28758 case 1:
28759 switch (d->vmode)
28761 case V16QImode: gen = gen_neon_vrev16v16qi; break;
28762 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28763 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28764 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28765 case V4SImode: gen = gen_neon_vrev64v4si; break;
28766 case V2SImode: gen = gen_neon_vrev64v2si; break;
28767 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28768 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28769 default:
28770 return false;
28772 break;
28773 default:
28774 return false;
28777 for (i = 0; i < nelt ; i += diff + 1)
28778 for (j = 0; j <= diff; j += 1)
28780 /* This is guaranteed to be true as the value of diff
28781 is 7, 3, 1 and we should have enough elements in the
28782 queue to generate this. Getting a vector mask with a
28783 value of diff other than these values implies that
28784 something is wrong by the time we get here. */
28785 gcc_assert (i + j < nelt);
28786 if (d->perm[i + j] != i + diff - j)
28787 return false;
28790 /* Success! */
28791 if (d->testing_p)
28792 return true;
28794 emit_insn (gen (d->target, d->op0));
28795 return true;
28798 /* Recognize patterns for the VTRN insns. */
28800 static bool
28801 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28803 unsigned int i, odd, mask, nelt = d->nelt;
28804 rtx out0, out1, in0, in1;
28805 rtx (*gen)(rtx, rtx, rtx, rtx);
28807 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28808 return false;
28810 /* Note that these are little-endian tests. Adjust for big-endian later. */
28811 if (d->perm[0] == 0)
28812 odd = 0;
28813 else if (d->perm[0] == 1)
28814 odd = 1;
28815 else
28816 return false;
28817 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28819 for (i = 0; i < nelt; i += 2)
28821 if (d->perm[i] != i + odd)
28822 return false;
28823 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28824 return false;
28827 /* Success! */
28828 if (d->testing_p)
28829 return true;
28831 switch (d->vmode)
28833 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28834 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28835 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28836 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28837 case V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
28838 case V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
28839 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28840 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28841 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28842 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28843 default:
28844 gcc_unreachable ();
28847 in0 = d->op0;
28848 in1 = d->op1;
28849 if (BYTES_BIG_ENDIAN)
28851 std::swap (in0, in1);
28852 odd = !odd;
28855 out0 = d->target;
28856 out1 = gen_reg_rtx (d->vmode);
28857 if (odd)
28858 std::swap (out0, out1);
28860 emit_insn (gen (out0, in0, in1, out1));
28861 return true;
28864 /* Recognize patterns for the VEXT insns. */
28866 static bool
28867 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28869 unsigned int i, nelt = d->nelt;
28870 rtx (*gen) (rtx, rtx, rtx, rtx);
28871 rtx offset;
28873 unsigned int location;
28875 unsigned int next = d->perm[0] + 1;
28877 /* TODO: Handle GCC's numbering of elements for big-endian. */
28878 if (BYTES_BIG_ENDIAN)
28879 return false;
28881 /* Check if the extracted indexes are increasing by one. */
28882 for (i = 1; i < nelt; next++, i++)
28884 /* If we hit the most significant element of the 2nd vector in
28885 the previous iteration, no need to test further. */
28886 if (next == 2 * nelt)
28887 return false;
28889 /* If we are operating on only one vector: it could be a
28890 rotation. If there are only two elements of size < 64, let
28891 arm_evpc_neon_vrev catch it. */
28892 if (d->one_vector_p && (next == nelt))
28894 if ((nelt == 2) && (d->vmode != V2DImode))
28895 return false;
28896 else
28897 next = 0;
28900 if (d->perm[i] != next)
28901 return false;
28904 location = d->perm[0];
28906 switch (d->vmode)
28908 case V16QImode: gen = gen_neon_vextv16qi; break;
28909 case V8QImode: gen = gen_neon_vextv8qi; break;
28910 case V4HImode: gen = gen_neon_vextv4hi; break;
28911 case V8HImode: gen = gen_neon_vextv8hi; break;
28912 case V2SImode: gen = gen_neon_vextv2si; break;
28913 case V4SImode: gen = gen_neon_vextv4si; break;
28914 case V4HFmode: gen = gen_neon_vextv4hf; break;
28915 case V8HFmode: gen = gen_neon_vextv8hf; break;
28916 case V2SFmode: gen = gen_neon_vextv2sf; break;
28917 case V4SFmode: gen = gen_neon_vextv4sf; break;
28918 case V2DImode: gen = gen_neon_vextv2di; break;
28919 default:
28920 return false;
28923 /* Success! */
28924 if (d->testing_p)
28925 return true;
28927 offset = GEN_INT (location);
28928 emit_insn (gen (d->target, d->op0, d->op1, offset));
28929 return true;
28932 /* The NEON VTBL instruction is a fully variable permuation that's even
28933 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28934 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28935 can do slightly better by expanding this as a constant where we don't
28936 have to apply a mask. */
28938 static bool
28939 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28941 rtx rperm[MAX_VECT_LEN], sel;
28942 machine_mode vmode = d->vmode;
28943 unsigned int i, nelt = d->nelt;
28945 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28946 numbering of elements for big-endian, we must reverse the order. */
28947 if (BYTES_BIG_ENDIAN)
28948 return false;
28950 if (d->testing_p)
28951 return true;
28953 /* Generic code will try constant permutation twice. Once with the
28954 original mode and again with the elements lowered to QImode.
28955 So wait and don't do the selector expansion ourselves. */
28956 if (vmode != V8QImode && vmode != V16QImode)
28957 return false;
28959 for (i = 0; i < nelt; ++i)
28960 rperm[i] = GEN_INT (d->perm[i]);
28961 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28962 sel = force_reg (vmode, sel);
28964 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28965 return true;
28968 static bool
28969 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28971 /* Check if the input mask matches vext before reordering the
28972 operands. */
28973 if (TARGET_NEON)
28974 if (arm_evpc_neon_vext (d))
28975 return true;
28977 /* The pattern matching functions above are written to look for a small
28978 number to begin the sequence (0, 1, N/2). If we begin with an index
28979 from the second operand, we can swap the operands. */
28980 if (d->perm[0] >= d->nelt)
28982 unsigned i, nelt = d->nelt;
28984 for (i = 0; i < nelt; ++i)
28985 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28987 std::swap (d->op0, d->op1);
28990 if (TARGET_NEON)
28992 if (arm_evpc_neon_vuzp (d))
28993 return true;
28994 if (arm_evpc_neon_vzip (d))
28995 return true;
28996 if (arm_evpc_neon_vrev (d))
28997 return true;
28998 if (arm_evpc_neon_vtrn (d))
28999 return true;
29000 return arm_evpc_neon_vtbl (d);
29002 return false;
29005 /* Expand a vec_perm_const pattern. */
29007 bool
29008 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29010 struct expand_vec_perm_d d;
29011 int i, nelt, which;
29013 d.target = target;
29014 d.op0 = op0;
29015 d.op1 = op1;
29017 d.vmode = GET_MODE (target);
29018 gcc_assert (VECTOR_MODE_P (d.vmode));
29019 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29020 d.testing_p = false;
29022 for (i = which = 0; i < nelt; ++i)
29024 rtx e = XVECEXP (sel, 0, i);
29025 int ei = INTVAL (e) & (2 * nelt - 1);
29026 which |= (ei < nelt ? 1 : 2);
29027 d.perm[i] = ei;
29030 switch (which)
29032 default:
29033 gcc_unreachable();
29035 case 3:
29036 d.one_vector_p = false;
29037 if (!rtx_equal_p (op0, op1))
29038 break;
29040 /* The elements of PERM do not suggest that only the first operand
29041 is used, but both operands are identical. Allow easier matching
29042 of the permutation by folding the permutation into the single
29043 input vector. */
29044 /* FALLTHRU */
29045 case 2:
29046 for (i = 0; i < nelt; ++i)
29047 d.perm[i] &= nelt - 1;
29048 d.op0 = op1;
29049 d.one_vector_p = true;
29050 break;
29052 case 1:
29053 d.op1 = op0;
29054 d.one_vector_p = true;
29055 break;
29058 return arm_expand_vec_perm_const_1 (&d);
29061 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29063 static bool
29064 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
29065 const unsigned char *sel)
29067 struct expand_vec_perm_d d;
29068 unsigned int i, nelt, which;
29069 bool ret;
29071 d.vmode = vmode;
29072 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29073 d.testing_p = true;
29074 memcpy (d.perm, sel, nelt);
29076 /* Categorize the set of elements in the selector. */
29077 for (i = which = 0; i < nelt; ++i)
29079 unsigned char e = d.perm[i];
29080 gcc_assert (e < 2 * nelt);
29081 which |= (e < nelt ? 1 : 2);
29084 /* For all elements from second vector, fold the elements to first. */
29085 if (which == 2)
29086 for (i = 0; i < nelt; ++i)
29087 d.perm[i] -= nelt;
29089 /* Check whether the mask can be applied to the vector type. */
29090 d.one_vector_p = (which != 3);
29092 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29093 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29094 if (!d.one_vector_p)
29095 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29097 start_sequence ();
29098 ret = arm_expand_vec_perm_const_1 (&d);
29099 end_sequence ();
29101 return ret;
29104 bool
29105 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29107 /* If we are soft float and we do not have ldrd
29108 then all auto increment forms are ok. */
29109 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29110 return true;
29112 switch (code)
29114 /* Post increment and Pre Decrement are supported for all
29115 instruction forms except for vector forms. */
29116 case ARM_POST_INC:
29117 case ARM_PRE_DEC:
29118 if (VECTOR_MODE_P (mode))
29120 if (code != ARM_PRE_DEC)
29121 return true;
29122 else
29123 return false;
29126 return true;
29128 case ARM_POST_DEC:
29129 case ARM_PRE_INC:
29130 /* Without LDRD and mode size greater than
29131 word size, there is no point in auto-incrementing
29132 because ldm and stm will not have these forms. */
29133 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29134 return false;
29136 /* Vector and floating point modes do not support
29137 these auto increment forms. */
29138 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29139 return false;
29141 return true;
29143 default:
29144 return false;
29148 return false;
29151 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29152 on ARM, since we know that shifts by negative amounts are no-ops.
29153 Additionally, the default expansion code is not available or suitable
29154 for post-reload insn splits (this can occur when the register allocator
29155 chooses not to do a shift in NEON).
29157 This function is used in both initial expand and post-reload splits, and
29158 handles all kinds of 64-bit shifts.
29160 Input requirements:
29161 - It is safe for the input and output to be the same register, but
29162 early-clobber rules apply for the shift amount and scratch registers.
29163 - Shift by register requires both scratch registers. In all other cases
29164 the scratch registers may be NULL.
29165 - Ashiftrt by a register also clobbers the CC register. */
29166 void
29167 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29168 rtx amount, rtx scratch1, rtx scratch2)
29170 rtx out_high = gen_highpart (SImode, out);
29171 rtx out_low = gen_lowpart (SImode, out);
29172 rtx in_high = gen_highpart (SImode, in);
29173 rtx in_low = gen_lowpart (SImode, in);
29175 /* Terminology:
29176 in = the register pair containing the input value.
29177 out = the destination register pair.
29178 up = the high- or low-part of each pair.
29179 down = the opposite part to "up".
29180 In a shift, we can consider bits to shift from "up"-stream to
29181 "down"-stream, so in a left-shift "up" is the low-part and "down"
29182 is the high-part of each register pair. */
29184 rtx out_up = code == ASHIFT ? out_low : out_high;
29185 rtx out_down = code == ASHIFT ? out_high : out_low;
29186 rtx in_up = code == ASHIFT ? in_low : in_high;
29187 rtx in_down = code == ASHIFT ? in_high : in_low;
29189 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29190 gcc_assert (out
29191 && (REG_P (out) || GET_CODE (out) == SUBREG)
29192 && GET_MODE (out) == DImode);
29193 gcc_assert (in
29194 && (REG_P (in) || GET_CODE (in) == SUBREG)
29195 && GET_MODE (in) == DImode);
29196 gcc_assert (amount
29197 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29198 && GET_MODE (amount) == SImode)
29199 || CONST_INT_P (amount)));
29200 gcc_assert (scratch1 == NULL
29201 || (GET_CODE (scratch1) == SCRATCH)
29202 || (GET_MODE (scratch1) == SImode
29203 && REG_P (scratch1)));
29204 gcc_assert (scratch2 == NULL
29205 || (GET_CODE (scratch2) == SCRATCH)
29206 || (GET_MODE (scratch2) == SImode
29207 && REG_P (scratch2)));
29208 gcc_assert (!REG_P (out) || !REG_P (amount)
29209 || !HARD_REGISTER_P (out)
29210 || (REGNO (out) != REGNO (amount)
29211 && REGNO (out) + 1 != REGNO (amount)));
29213 /* Macros to make following code more readable. */
29214 #define SUB_32(DEST,SRC) \
29215 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29216 #define RSB_32(DEST,SRC) \
29217 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29218 #define SUB_S_32(DEST,SRC) \
29219 gen_addsi3_compare0 ((DEST), (SRC), \
29220 GEN_INT (-32))
29221 #define SET(DEST,SRC) \
29222 gen_rtx_SET ((DEST), (SRC))
29223 #define SHIFT(CODE,SRC,AMOUNT) \
29224 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29225 #define LSHIFT(CODE,SRC,AMOUNT) \
29226 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29227 SImode, (SRC), (AMOUNT))
29228 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29229 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29230 SImode, (SRC), (AMOUNT))
29231 #define ORR(A,B) \
29232 gen_rtx_IOR (SImode, (A), (B))
29233 #define BRANCH(COND,LABEL) \
29234 gen_arm_cond_branch ((LABEL), \
29235 gen_rtx_ ## COND (CCmode, cc_reg, \
29236 const0_rtx), \
29237 cc_reg)
29239 /* Shifts by register and shifts by constant are handled separately. */
29240 if (CONST_INT_P (amount))
29242 /* We have a shift-by-constant. */
29244 /* First, handle out-of-range shift amounts.
29245 In both cases we try to match the result an ARM instruction in a
29246 shift-by-register would give. This helps reduce execution
29247 differences between optimization levels, but it won't stop other
29248 parts of the compiler doing different things. This is "undefined
29249 behavior, in any case. */
29250 if (INTVAL (amount) <= 0)
29251 emit_insn (gen_movdi (out, in));
29252 else if (INTVAL (amount) >= 64)
29254 if (code == ASHIFTRT)
29256 rtx const31_rtx = GEN_INT (31);
29257 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29258 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29260 else
29261 emit_insn (gen_movdi (out, const0_rtx));
29264 /* Now handle valid shifts. */
29265 else if (INTVAL (amount) < 32)
29267 /* Shifts by a constant less than 32. */
29268 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29270 /* Clearing the out register in DImode first avoids lots
29271 of spilling and results in less stack usage.
29272 Later this redundant insn is completely removed.
29273 Do that only if "in" and "out" are different registers. */
29274 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29275 emit_insn (SET (out, const0_rtx));
29276 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29277 emit_insn (SET (out_down,
29278 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29279 out_down)));
29280 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29282 else
29284 /* Shifts by a constant greater than 31. */
29285 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29287 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29288 emit_insn (SET (out, const0_rtx));
29289 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29290 if (code == ASHIFTRT)
29291 emit_insn (gen_ashrsi3 (out_up, in_up,
29292 GEN_INT (31)));
29293 else
29294 emit_insn (SET (out_up, const0_rtx));
29297 else
29299 /* We have a shift-by-register. */
29300 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29302 /* This alternative requires the scratch registers. */
29303 gcc_assert (scratch1 && REG_P (scratch1));
29304 gcc_assert (scratch2 && REG_P (scratch2));
29306 /* We will need the values "amount-32" and "32-amount" later.
29307 Swapping them around now allows the later code to be more general. */
29308 switch (code)
29310 case ASHIFT:
29311 emit_insn (SUB_32 (scratch1, amount));
29312 emit_insn (RSB_32 (scratch2, amount));
29313 break;
29314 case ASHIFTRT:
29315 emit_insn (RSB_32 (scratch1, amount));
29316 /* Also set CC = amount > 32. */
29317 emit_insn (SUB_S_32 (scratch2, amount));
29318 break;
29319 case LSHIFTRT:
29320 emit_insn (RSB_32 (scratch1, amount));
29321 emit_insn (SUB_32 (scratch2, amount));
29322 break;
29323 default:
29324 gcc_unreachable ();
29327 /* Emit code like this:
29329 arithmetic-left:
29330 out_down = in_down << amount;
29331 out_down = (in_up << (amount - 32)) | out_down;
29332 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29333 out_up = in_up << amount;
29335 arithmetic-right:
29336 out_down = in_down >> amount;
29337 out_down = (in_up << (32 - amount)) | out_down;
29338 if (amount < 32)
29339 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29340 out_up = in_up << amount;
29342 logical-right:
29343 out_down = in_down >> amount;
29344 out_down = (in_up << (32 - amount)) | out_down;
29345 if (amount < 32)
29346 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29347 out_up = in_up << amount;
29349 The ARM and Thumb2 variants are the same but implemented slightly
29350 differently. If this were only called during expand we could just
29351 use the Thumb2 case and let combine do the right thing, but this
29352 can also be called from post-reload splitters. */
29354 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29356 if (!TARGET_THUMB2)
29358 /* Emit code for ARM mode. */
29359 emit_insn (SET (out_down,
29360 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29361 if (code == ASHIFTRT)
29363 rtx_code_label *done_label = gen_label_rtx ();
29364 emit_jump_insn (BRANCH (LT, done_label));
29365 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29366 out_down)));
29367 emit_label (done_label);
29369 else
29370 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29371 out_down)));
29373 else
29375 /* Emit code for Thumb2 mode.
29376 Thumb2 can't do shift and or in one insn. */
29377 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29378 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29380 if (code == ASHIFTRT)
29382 rtx_code_label *done_label = gen_label_rtx ();
29383 emit_jump_insn (BRANCH (LT, done_label));
29384 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29385 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29386 emit_label (done_label);
29388 else
29390 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29391 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29395 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29398 #undef SUB_32
29399 #undef RSB_32
29400 #undef SUB_S_32
29401 #undef SET
29402 #undef SHIFT
29403 #undef LSHIFT
29404 #undef REV_LSHIFT
29405 #undef ORR
29406 #undef BRANCH
29409 /* Returns true if the pattern is a valid symbolic address, which is either a
29410 symbol_ref or (symbol_ref + addend).
29412 According to the ARM ELF ABI, the initial addend of REL-type relocations
29413 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29414 literal field of the instruction as a 16-bit signed value in the range
29415 -32768 <= A < 32768. */
29417 bool
29418 arm_valid_symbolic_address_p (rtx addr)
29420 rtx xop0, xop1 = NULL_RTX;
29421 rtx tmp = addr;
29423 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29424 return true;
29426 /* (const (plus: symbol_ref const_int)) */
29427 if (GET_CODE (addr) == CONST)
29428 tmp = XEXP (addr, 0);
29430 if (GET_CODE (tmp) == PLUS)
29432 xop0 = XEXP (tmp, 0);
29433 xop1 = XEXP (tmp, 1);
29435 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29436 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29439 return false;
29442 /* Returns true if a valid comparison operation and makes
29443 the operands in a form that is valid. */
29444 bool
29445 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29447 enum rtx_code code = GET_CODE (*comparison);
29448 int code_int;
29449 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29450 ? GET_MODE (*op2) : GET_MODE (*op1);
29452 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29454 if (code == UNEQ || code == LTGT)
29455 return false;
29457 code_int = (int)code;
29458 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29459 PUT_CODE (*comparison, (enum rtx_code)code_int);
29461 switch (mode)
29463 case SImode:
29464 if (!arm_add_operand (*op1, mode))
29465 *op1 = force_reg (mode, *op1);
29466 if (!arm_add_operand (*op2, mode))
29467 *op2 = force_reg (mode, *op2);
29468 return true;
29470 case DImode:
29471 if (!cmpdi_operand (*op1, mode))
29472 *op1 = force_reg (mode, *op1);
29473 if (!cmpdi_operand (*op2, mode))
29474 *op2 = force_reg (mode, *op2);
29475 return true;
29477 case HFmode:
29478 if (!TARGET_VFP_FP16INST)
29479 break;
29480 /* FP16 comparisons are done in SF mode. */
29481 mode = SFmode;
29482 *op1 = convert_to_mode (mode, *op1, 1);
29483 *op2 = convert_to_mode (mode, *op2, 1);
29484 /* Fall through. */
29485 case SFmode:
29486 case DFmode:
29487 if (!vfp_compare_operand (*op1, mode))
29488 *op1 = force_reg (mode, *op1);
29489 if (!vfp_compare_operand (*op2, mode))
29490 *op2 = force_reg (mode, *op2);
29491 return true;
29492 default:
29493 break;
29496 return false;
29500 /* Maximum number of instructions to set block of memory. */
29501 static int
29502 arm_block_set_max_insns (void)
29504 if (optimize_function_for_size_p (cfun))
29505 return 4;
29506 else
29507 return current_tune->max_insns_inline_memset;
29510 /* Return TRUE if it's profitable to set block of memory for
29511 non-vectorized case. VAL is the value to set the memory
29512 with. LENGTH is the number of bytes to set. ALIGN is the
29513 alignment of the destination memory in bytes. UNALIGNED_P
29514 is TRUE if we can only set the memory with instructions
29515 meeting alignment requirements. USE_STRD_P is TRUE if we
29516 can use strd to set the memory. */
29517 static bool
29518 arm_block_set_non_vect_profit_p (rtx val,
29519 unsigned HOST_WIDE_INT length,
29520 unsigned HOST_WIDE_INT align,
29521 bool unaligned_p, bool use_strd_p)
29523 int num = 0;
29524 /* For leftovers in bytes of 0-7, we can set the memory block using
29525 strb/strh/str with minimum instruction number. */
29526 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29528 if (unaligned_p)
29530 num = arm_const_inline_cost (SET, val);
29531 num += length / align + length % align;
29533 else if (use_strd_p)
29535 num = arm_const_double_inline_cost (val);
29536 num += (length >> 3) + leftover[length & 7];
29538 else
29540 num = arm_const_inline_cost (SET, val);
29541 num += (length >> 2) + leftover[length & 3];
29544 /* We may be able to combine last pair STRH/STRB into a single STR
29545 by shifting one byte back. */
29546 if (unaligned_access && length > 3 && (length & 3) == 3)
29547 num--;
29549 return (num <= arm_block_set_max_insns ());
29552 /* Return TRUE if it's profitable to set block of memory for
29553 vectorized case. LENGTH is the number of bytes to set.
29554 ALIGN is the alignment of destination memory in bytes.
29555 MODE is the vector mode used to set the memory. */
29556 static bool
29557 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29558 unsigned HOST_WIDE_INT align,
29559 machine_mode mode)
29561 int num;
29562 bool unaligned_p = ((align & 3) != 0);
29563 unsigned int nelt = GET_MODE_NUNITS (mode);
29565 /* Instruction loading constant value. */
29566 num = 1;
29567 /* Instructions storing the memory. */
29568 num += (length + nelt - 1) / nelt;
29569 /* Instructions adjusting the address expression. Only need to
29570 adjust address expression if it's 4 bytes aligned and bytes
29571 leftover can only be stored by mis-aligned store instruction. */
29572 if (!unaligned_p && (length & 3) != 0)
29573 num++;
29575 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29576 if (!unaligned_p && mode == V16QImode)
29577 num--;
29579 return (num <= arm_block_set_max_insns ());
29582 /* Set a block of memory using vectorization instructions for the
29583 unaligned case. We fill the first LENGTH bytes of the memory
29584 area starting from DSTBASE with byte constant VALUE. ALIGN is
29585 the alignment requirement of memory. Return TRUE if succeeded. */
29586 static bool
29587 arm_block_set_unaligned_vect (rtx dstbase,
29588 unsigned HOST_WIDE_INT length,
29589 unsigned HOST_WIDE_INT value,
29590 unsigned HOST_WIDE_INT align)
29592 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29593 rtx dst, mem;
29594 rtx val_elt, val_vec, reg;
29595 rtx rval[MAX_VECT_LEN];
29596 rtx (*gen_func) (rtx, rtx);
29597 machine_mode mode;
29598 unsigned HOST_WIDE_INT v = value;
29599 unsigned int offset = 0;
29600 gcc_assert ((align & 0x3) != 0);
29601 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29602 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29603 if (length >= nelt_v16)
29605 mode = V16QImode;
29606 gen_func = gen_movmisalignv16qi;
29608 else
29610 mode = V8QImode;
29611 gen_func = gen_movmisalignv8qi;
29613 nelt_mode = GET_MODE_NUNITS (mode);
29614 gcc_assert (length >= nelt_mode);
29615 /* Skip if it isn't profitable. */
29616 if (!arm_block_set_vect_profit_p (length, align, mode))
29617 return false;
29619 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29620 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29622 v = sext_hwi (v, BITS_PER_WORD);
29623 val_elt = GEN_INT (v);
29624 for (j = 0; j < nelt_mode; j++)
29625 rval[j] = val_elt;
29627 reg = gen_reg_rtx (mode);
29628 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29629 /* Emit instruction loading the constant value. */
29630 emit_move_insn (reg, val_vec);
29632 /* Handle nelt_mode bytes in a vector. */
29633 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29635 emit_insn ((*gen_func) (mem, reg));
29636 if (i + 2 * nelt_mode <= length)
29638 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29639 offset += nelt_mode;
29640 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29644 /* If there are not less than nelt_v8 bytes leftover, we must be in
29645 V16QI mode. */
29646 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29648 /* Handle (8, 16) bytes leftover. */
29649 if (i + nelt_v8 < length)
29651 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29652 offset += length - i;
29653 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29655 /* We are shifting bytes back, set the alignment accordingly. */
29656 if ((length & 1) != 0 && align >= 2)
29657 set_mem_align (mem, BITS_PER_UNIT);
29659 emit_insn (gen_movmisalignv16qi (mem, reg));
29661 /* Handle (0, 8] bytes leftover. */
29662 else if (i < length && i + nelt_v8 >= length)
29664 if (mode == V16QImode)
29665 reg = gen_lowpart (V8QImode, reg);
29667 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29668 + (nelt_mode - nelt_v8))));
29669 offset += (length - i) + (nelt_mode - nelt_v8);
29670 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29672 /* We are shifting bytes back, set the alignment accordingly. */
29673 if ((length & 1) != 0 && align >= 2)
29674 set_mem_align (mem, BITS_PER_UNIT);
29676 emit_insn (gen_movmisalignv8qi (mem, reg));
29679 return true;
29682 /* Set a block of memory using vectorization instructions for the
29683 aligned case. We fill the first LENGTH bytes of the memory area
29684 starting from DSTBASE with byte constant VALUE. ALIGN is the
29685 alignment requirement of memory. Return TRUE if succeeded. */
29686 static bool
29687 arm_block_set_aligned_vect (rtx dstbase,
29688 unsigned HOST_WIDE_INT length,
29689 unsigned HOST_WIDE_INT value,
29690 unsigned HOST_WIDE_INT align)
29692 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29693 rtx dst, addr, mem;
29694 rtx val_elt, val_vec, reg;
29695 rtx rval[MAX_VECT_LEN];
29696 machine_mode mode;
29697 unsigned HOST_WIDE_INT v = value;
29698 unsigned int offset = 0;
29700 gcc_assert ((align & 0x3) == 0);
29701 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29702 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29703 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29704 mode = V16QImode;
29705 else
29706 mode = V8QImode;
29708 nelt_mode = GET_MODE_NUNITS (mode);
29709 gcc_assert (length >= nelt_mode);
29710 /* Skip if it isn't profitable. */
29711 if (!arm_block_set_vect_profit_p (length, align, mode))
29712 return false;
29714 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29716 v = sext_hwi (v, BITS_PER_WORD);
29717 val_elt = GEN_INT (v);
29718 for (j = 0; j < nelt_mode; j++)
29719 rval[j] = val_elt;
29721 reg = gen_reg_rtx (mode);
29722 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29723 /* Emit instruction loading the constant value. */
29724 emit_move_insn (reg, val_vec);
29726 i = 0;
29727 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29728 if (mode == V16QImode)
29730 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29731 emit_insn (gen_movmisalignv16qi (mem, reg));
29732 i += nelt_mode;
29733 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29734 if (i + nelt_v8 < length && i + nelt_v16 > length)
29736 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29737 offset += length - nelt_mode;
29738 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29739 /* We are shifting bytes back, set the alignment accordingly. */
29740 if ((length & 0x3) == 0)
29741 set_mem_align (mem, BITS_PER_UNIT * 4);
29742 else if ((length & 0x1) == 0)
29743 set_mem_align (mem, BITS_PER_UNIT * 2);
29744 else
29745 set_mem_align (mem, BITS_PER_UNIT);
29747 emit_insn (gen_movmisalignv16qi (mem, reg));
29748 return true;
29750 /* Fall through for bytes leftover. */
29751 mode = V8QImode;
29752 nelt_mode = GET_MODE_NUNITS (mode);
29753 reg = gen_lowpart (V8QImode, reg);
29756 /* Handle 8 bytes in a vector. */
29757 for (; (i + nelt_mode <= length); i += nelt_mode)
29759 addr = plus_constant (Pmode, dst, i);
29760 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29761 emit_move_insn (mem, reg);
29764 /* Handle single word leftover by shifting 4 bytes back. We can
29765 use aligned access for this case. */
29766 if (i + UNITS_PER_WORD == length)
29768 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29769 offset += i - UNITS_PER_WORD;
29770 mem = adjust_automodify_address (dstbase, mode, addr, offset);
29771 /* We are shifting 4 bytes back, set the alignment accordingly. */
29772 if (align > UNITS_PER_WORD)
29773 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29775 emit_move_insn (mem, reg);
29777 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29778 We have to use unaligned access for this case. */
29779 else if (i < length)
29781 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29782 offset += length - nelt_mode;
29783 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29784 /* We are shifting bytes back, set the alignment accordingly. */
29785 if ((length & 1) == 0)
29786 set_mem_align (mem, BITS_PER_UNIT * 2);
29787 else
29788 set_mem_align (mem, BITS_PER_UNIT);
29790 emit_insn (gen_movmisalignv8qi (mem, reg));
29793 return true;
29796 /* Set a block of memory using plain strh/strb instructions, only
29797 using instructions allowed by ALIGN on processor. We fill the
29798 first LENGTH bytes of the memory area starting from DSTBASE
29799 with byte constant VALUE. ALIGN is the alignment requirement
29800 of memory. */
29801 static bool
29802 arm_block_set_unaligned_non_vect (rtx dstbase,
29803 unsigned HOST_WIDE_INT length,
29804 unsigned HOST_WIDE_INT value,
29805 unsigned HOST_WIDE_INT align)
29807 unsigned int i;
29808 rtx dst, addr, mem;
29809 rtx val_exp, val_reg, reg;
29810 machine_mode mode;
29811 HOST_WIDE_INT v = value;
29813 gcc_assert (align == 1 || align == 2);
29815 if (align == 2)
29816 v |= (value << BITS_PER_UNIT);
29818 v = sext_hwi (v, BITS_PER_WORD);
29819 val_exp = GEN_INT (v);
29820 /* Skip if it isn't profitable. */
29821 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29822 align, true, false))
29823 return false;
29825 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29826 mode = (align == 2 ? HImode : QImode);
29827 val_reg = force_reg (SImode, val_exp);
29828 reg = gen_lowpart (mode, val_reg);
29830 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29832 addr = plus_constant (Pmode, dst, i);
29833 mem = adjust_automodify_address (dstbase, mode, addr, i);
29834 emit_move_insn (mem, reg);
29837 /* Handle single byte leftover. */
29838 if (i + 1 == length)
29840 reg = gen_lowpart (QImode, val_reg);
29841 addr = plus_constant (Pmode, dst, i);
29842 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29843 emit_move_insn (mem, reg);
29844 i++;
29847 gcc_assert (i == length);
29848 return true;
29851 /* Set a block of memory using plain strd/str/strh/strb instructions,
29852 to permit unaligned copies on processors which support unaligned
29853 semantics for those instructions. We fill the first LENGTH bytes
29854 of the memory area starting from DSTBASE with byte constant VALUE.
29855 ALIGN is the alignment requirement of memory. */
29856 static bool
29857 arm_block_set_aligned_non_vect (rtx dstbase,
29858 unsigned HOST_WIDE_INT length,
29859 unsigned HOST_WIDE_INT value,
29860 unsigned HOST_WIDE_INT align)
29862 unsigned int i;
29863 rtx dst, addr, mem;
29864 rtx val_exp, val_reg, reg;
29865 unsigned HOST_WIDE_INT v;
29866 bool use_strd_p;
29868 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29869 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29871 v = (value | (value << 8) | (value << 16) | (value << 24));
29872 if (length < UNITS_PER_WORD)
29873 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29875 if (use_strd_p)
29876 v |= (v << BITS_PER_WORD);
29877 else
29878 v = sext_hwi (v, BITS_PER_WORD);
29880 val_exp = GEN_INT (v);
29881 /* Skip if it isn't profitable. */
29882 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29883 align, false, use_strd_p))
29885 if (!use_strd_p)
29886 return false;
29888 /* Try without strd. */
29889 v = (v >> BITS_PER_WORD);
29890 v = sext_hwi (v, BITS_PER_WORD);
29891 val_exp = GEN_INT (v);
29892 use_strd_p = false;
29893 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29894 align, false, use_strd_p))
29895 return false;
29898 i = 0;
29899 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29900 /* Handle double words using strd if possible. */
29901 if (use_strd_p)
29903 val_reg = force_reg (DImode, val_exp);
29904 reg = val_reg;
29905 for (; (i + 8 <= length); i += 8)
29907 addr = plus_constant (Pmode, dst, i);
29908 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29909 emit_move_insn (mem, reg);
29912 else
29913 val_reg = force_reg (SImode, val_exp);
29915 /* Handle words. */
29916 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29917 for (; (i + 4 <= length); i += 4)
29919 addr = plus_constant (Pmode, dst, i);
29920 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29921 if ((align & 3) == 0)
29922 emit_move_insn (mem, reg);
29923 else
29924 emit_insn (gen_unaligned_storesi (mem, reg));
29927 /* Merge last pair of STRH and STRB into a STR if possible. */
29928 if (unaligned_access && i > 0 && (i + 3) == length)
29930 addr = plus_constant (Pmode, dst, i - 1);
29931 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29932 /* We are shifting one byte back, set the alignment accordingly. */
29933 if ((align & 1) == 0)
29934 set_mem_align (mem, BITS_PER_UNIT);
29936 /* Most likely this is an unaligned access, and we can't tell at
29937 compilation time. */
29938 emit_insn (gen_unaligned_storesi (mem, reg));
29939 return true;
29942 /* Handle half word leftover. */
29943 if (i + 2 <= length)
29945 reg = gen_lowpart (HImode, val_reg);
29946 addr = plus_constant (Pmode, dst, i);
29947 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29948 if ((align & 1) == 0)
29949 emit_move_insn (mem, reg);
29950 else
29951 emit_insn (gen_unaligned_storehi (mem, reg));
29953 i += 2;
29956 /* Handle single byte leftover. */
29957 if (i + 1 == length)
29959 reg = gen_lowpart (QImode, val_reg);
29960 addr = plus_constant (Pmode, dst, i);
29961 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29962 emit_move_insn (mem, reg);
29965 return true;
29968 /* Set a block of memory using vectorization instructions for both
29969 aligned and unaligned cases. We fill the first LENGTH bytes of
29970 the memory area starting from DSTBASE with byte constant VALUE.
29971 ALIGN is the alignment requirement of memory. */
29972 static bool
29973 arm_block_set_vect (rtx dstbase,
29974 unsigned HOST_WIDE_INT length,
29975 unsigned HOST_WIDE_INT value,
29976 unsigned HOST_WIDE_INT align)
29978 /* Check whether we need to use unaligned store instruction. */
29979 if (((align & 3) != 0 || (length & 3) != 0)
29980 /* Check whether unaligned store instruction is available. */
29981 && (!unaligned_access || BYTES_BIG_ENDIAN))
29982 return false;
29984 if ((align & 3) == 0)
29985 return arm_block_set_aligned_vect (dstbase, length, value, align);
29986 else
29987 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29990 /* Expand string store operation. Firstly we try to do that by using
29991 vectorization instructions, then try with ARM unaligned access and
29992 double-word store if profitable. OPERANDS[0] is the destination,
29993 OPERANDS[1] is the number of bytes, operands[2] is the value to
29994 initialize the memory, OPERANDS[3] is the known alignment of the
29995 destination. */
29996 bool
29997 arm_gen_setmem (rtx *operands)
29999 rtx dstbase = operands[0];
30000 unsigned HOST_WIDE_INT length;
30001 unsigned HOST_WIDE_INT value;
30002 unsigned HOST_WIDE_INT align;
30004 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30005 return false;
30007 length = UINTVAL (operands[1]);
30008 if (length > 64)
30009 return false;
30011 value = (UINTVAL (operands[2]) & 0xFF);
30012 align = UINTVAL (operands[3]);
30013 if (TARGET_NEON && length >= 8
30014 && current_tune->string_ops_prefer_neon
30015 && arm_block_set_vect (dstbase, length, value, align))
30016 return true;
30018 if (!unaligned_access && (align & 3) != 0)
30019 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30021 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30025 static bool
30026 arm_macro_fusion_p (void)
30028 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30031 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30032 for MOVW / MOVT macro fusion. */
30034 static bool
30035 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30037 /* We are trying to fuse
30038 movw imm / movt imm
30039 instructions as a group that gets scheduled together. */
30041 rtx set_dest = SET_DEST (curr_set);
30043 if (GET_MODE (set_dest) != SImode)
30044 return false;
30046 /* We are trying to match:
30047 prev (movw) == (set (reg r0) (const_int imm16))
30048 curr (movt) == (set (zero_extract (reg r0)
30049 (const_int 16)
30050 (const_int 16))
30051 (const_int imm16_1))
30053 prev (movw) == (set (reg r1)
30054 (high (symbol_ref ("SYM"))))
30055 curr (movt) == (set (reg r0)
30056 (lo_sum (reg r1)
30057 (symbol_ref ("SYM")))) */
30059 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30061 if (CONST_INT_P (SET_SRC (curr_set))
30062 && CONST_INT_P (SET_SRC (prev_set))
30063 && REG_P (XEXP (set_dest, 0))
30064 && REG_P (SET_DEST (prev_set))
30065 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30066 return true;
30069 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30070 && REG_P (SET_DEST (curr_set))
30071 && REG_P (SET_DEST (prev_set))
30072 && GET_CODE (SET_SRC (prev_set)) == HIGH
30073 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30074 return true;
30076 return false;
30079 static bool
30080 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30082 rtx prev_set = single_set (prev);
30083 rtx curr_set = single_set (curr);
30085 if (!prev_set
30086 || !curr_set)
30087 return false;
30089 if (any_condjump_p (curr))
30090 return false;
30092 if (!arm_macro_fusion_p ())
30093 return false;
30095 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30096 && aarch_crypto_can_dual_issue (prev, curr))
30097 return true;
30099 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30100 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30101 return true;
30103 return false;
30106 /* Return true iff the instruction fusion described by OP is enabled. */
30107 bool
30108 arm_fusion_enabled_p (tune_params::fuse_ops op)
30110 return current_tune->fusible_ops & op;
30113 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30114 scheduled for speculative execution. Reject the long-running division
30115 and square-root instructions. */
30117 static bool
30118 arm_sched_can_speculate_insn (rtx_insn *insn)
30120 switch (get_attr_type (insn))
30122 case TYPE_SDIV:
30123 case TYPE_UDIV:
30124 case TYPE_FDIVS:
30125 case TYPE_FDIVD:
30126 case TYPE_FSQRTS:
30127 case TYPE_FSQRTD:
30128 case TYPE_NEON_FP_SQRT_S:
30129 case TYPE_NEON_FP_SQRT_D:
30130 case TYPE_NEON_FP_SQRT_S_Q:
30131 case TYPE_NEON_FP_SQRT_D_Q:
30132 case TYPE_NEON_FP_DIV_S:
30133 case TYPE_NEON_FP_DIV_D:
30134 case TYPE_NEON_FP_DIV_S_Q:
30135 case TYPE_NEON_FP_DIV_D_Q:
30136 return false;
30137 default:
30138 return true;
30142 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30144 static unsigned HOST_WIDE_INT
30145 arm_asan_shadow_offset (void)
30147 return HOST_WIDE_INT_1U << 29;
30151 /* This is a temporary fix for PR60655. Ideally we need
30152 to handle most of these cases in the generic part but
30153 currently we reject minus (..) (sym_ref). We try to
30154 ameliorate the case with minus (sym_ref1) (sym_ref2)
30155 where they are in the same section. */
30157 static bool
30158 arm_const_not_ok_for_debug_p (rtx p)
30160 tree decl_op0 = NULL;
30161 tree decl_op1 = NULL;
30163 if (GET_CODE (p) == MINUS)
30165 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30167 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30168 if (decl_op1
30169 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30170 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30172 if ((VAR_P (decl_op1)
30173 || TREE_CODE (decl_op1) == CONST_DECL)
30174 && (VAR_P (decl_op0)
30175 || TREE_CODE (decl_op0) == CONST_DECL))
30176 return (get_variable_section (decl_op1, false)
30177 != get_variable_section (decl_op0, false));
30179 if (TREE_CODE (decl_op1) == LABEL_DECL
30180 && TREE_CODE (decl_op0) == LABEL_DECL)
30181 return (DECL_CONTEXT (decl_op1)
30182 != DECL_CONTEXT (decl_op0));
30185 return true;
30189 return false;
30192 /* return TRUE if x is a reference to a value in a constant pool */
30193 extern bool
30194 arm_is_constant_pool_ref (rtx x)
30196 return (MEM_P (x)
30197 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30198 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30201 /* Remember the last target of arm_set_current_function. */
30202 static GTY(()) tree arm_previous_fndecl;
30204 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30206 void
30207 save_restore_target_globals (tree new_tree)
30209 /* If we have a previous state, use it. */
30210 if (TREE_TARGET_GLOBALS (new_tree))
30211 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30212 else if (new_tree == target_option_default_node)
30213 restore_target_globals (&default_target_globals);
30214 else
30216 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30217 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30220 arm_option_params_internal ();
30223 /* Invalidate arm_previous_fndecl. */
30225 void
30226 arm_reset_previous_fndecl (void)
30228 arm_previous_fndecl = NULL_TREE;
30231 /* Establish appropriate back-end context for processing the function
30232 FNDECL. The argument might be NULL to indicate processing at top
30233 level, outside of any function scope. */
30235 static void
30236 arm_set_current_function (tree fndecl)
30238 if (!fndecl || fndecl == arm_previous_fndecl)
30239 return;
30241 tree old_tree = (arm_previous_fndecl
30242 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30243 : NULL_TREE);
30245 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30247 /* If current function has no attributes but previous one did,
30248 use the default node. */
30249 if (! new_tree && old_tree)
30250 new_tree = target_option_default_node;
30252 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30253 the default have been handled by save_restore_target_globals from
30254 arm_pragma_target_parse. */
30255 if (old_tree == new_tree)
30256 return;
30258 arm_previous_fndecl = fndecl;
30260 /* First set the target options. */
30261 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30263 save_restore_target_globals (new_tree);
30266 /* Implement TARGET_OPTION_PRINT. */
30268 static void
30269 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30271 int flags = ptr->x_target_flags;
30272 const char *fpu_name;
30274 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30275 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30277 fprintf (file, "%*sselected arch %s\n", indent, "",
30278 TARGET_THUMB2_P (flags) ? "thumb2" :
30279 TARGET_THUMB_P (flags) ? "thumb1" :
30280 "arm");
30282 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30285 /* Hook to determine if one function can safely inline another. */
30287 static bool
30288 arm_can_inline_p (tree caller, tree callee)
30290 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30291 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30292 bool can_inline = true;
30294 struct cl_target_option *caller_opts
30295 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30296 : target_option_default_node);
30298 struct cl_target_option *callee_opts
30299 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30300 : target_option_default_node);
30302 if (callee_opts == caller_opts)
30303 return true;
30305 /* Callee's ISA features should be a subset of the caller's. */
30306 struct arm_build_target caller_target;
30307 struct arm_build_target callee_target;
30308 caller_target.isa = sbitmap_alloc (isa_num_bits);
30309 callee_target.isa = sbitmap_alloc (isa_num_bits);
30311 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30312 false);
30313 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30314 false);
30315 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30316 can_inline = false;
30318 sbitmap_free (caller_target.isa);
30319 sbitmap_free (callee_target.isa);
30321 /* OK to inline between different modes.
30322 Function with mode specific instructions, e.g using asm,
30323 must be explicitly protected with noinline. */
30324 return can_inline;
30327 /* Hook to fix function's alignment affected by target attribute. */
30329 static void
30330 arm_relayout_function (tree fndecl)
30332 if (DECL_USER_ALIGN (fndecl))
30333 return;
30335 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30337 if (!callee_tree)
30338 callee_tree = target_option_default_node;
30340 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30341 SET_DECL_ALIGN
30342 (fndecl,
30343 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30346 /* Inner function to process the attribute((target(...))), take an argument and
30347 set the current options from the argument. If we have a list, recursively
30348 go over the list. */
30350 static bool
30351 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30353 if (TREE_CODE (args) == TREE_LIST)
30355 bool ret = true;
30357 for (; args; args = TREE_CHAIN (args))
30358 if (TREE_VALUE (args)
30359 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30360 ret = false;
30361 return ret;
30364 else if (TREE_CODE (args) != STRING_CST)
30366 error ("attribute %<target%> argument not a string");
30367 return false;
30370 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30371 char *q;
30373 while ((q = strtok (argstr, ",")) != NULL)
30375 while (ISSPACE (*q)) ++q;
30377 argstr = NULL;
30378 if (!strncmp (q, "thumb", 5))
30379 opts->x_target_flags |= MASK_THUMB;
30381 else if (!strncmp (q, "arm", 3))
30382 opts->x_target_flags &= ~MASK_THUMB;
30384 else if (!strncmp (q, "fpu=", 4))
30386 int fpu_index;
30387 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30388 &fpu_index, CL_TARGET))
30390 error ("invalid fpu for attribute(target(\"%s\"))", q);
30391 return false;
30393 if (fpu_index == TARGET_FPU_auto)
30395 /* This doesn't really make sense until we support
30396 general dynamic selection of the architecture and all
30397 sub-features. */
30398 sorry ("auto fpu selection not currently permitted here");
30399 return false;
30401 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30403 else
30405 error ("attribute(target(\"%s\")) is unknown", q);
30406 return false;
30410 return true;
30413 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30415 tree
30416 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30417 struct gcc_options *opts_set)
30419 struct cl_target_option cl_opts;
30421 if (!arm_valid_target_attribute_rec (args, opts))
30422 return NULL_TREE;
30424 cl_target_option_save (&cl_opts, opts);
30425 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30426 arm_option_check_internal (opts);
30427 /* Do any overrides, such as global options arch=xxx. */
30428 arm_option_override_internal (opts, opts_set);
30430 return build_target_option_node (opts);
30433 static void
30434 add_attribute (const char * mode, tree *attributes)
30436 size_t len = strlen (mode);
30437 tree value = build_string (len, mode);
30439 TREE_TYPE (value) = build_array_type (char_type_node,
30440 build_index_type (size_int (len)));
30442 *attributes = tree_cons (get_identifier ("target"),
30443 build_tree_list (NULL_TREE, value),
30444 *attributes);
30447 /* For testing. Insert thumb or arm modes alternatively on functions. */
30449 static void
30450 arm_insert_attributes (tree fndecl, tree * attributes)
30452 const char *mode;
30454 if (! TARGET_FLIP_THUMB)
30455 return;
30457 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30458 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30459 return;
30461 /* Nested definitions must inherit mode. */
30462 if (current_function_decl)
30464 mode = TARGET_THUMB ? "thumb" : "arm";
30465 add_attribute (mode, attributes);
30466 return;
30469 /* If there is already a setting don't change it. */
30470 if (lookup_attribute ("target", *attributes) != NULL)
30471 return;
30473 mode = thumb_flipper ? "thumb" : "arm";
30474 add_attribute (mode, attributes);
30476 thumb_flipper = !thumb_flipper;
30479 /* Hook to validate attribute((target("string"))). */
30481 static bool
30482 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30483 tree args, int ARG_UNUSED (flags))
30485 bool ret = true;
30486 struct gcc_options func_options;
30487 tree cur_tree, new_optimize;
30488 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30490 /* Get the optimization options of the current function. */
30491 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30493 /* If the function changed the optimization levels as well as setting target
30494 options, start with the optimizations specified. */
30495 if (!func_optimize)
30496 func_optimize = optimization_default_node;
30498 /* Init func_options. */
30499 memset (&func_options, 0, sizeof (func_options));
30500 init_options_struct (&func_options, NULL);
30501 lang_hooks.init_options_struct (&func_options);
30503 /* Initialize func_options to the defaults. */
30504 cl_optimization_restore (&func_options,
30505 TREE_OPTIMIZATION (func_optimize));
30507 cl_target_option_restore (&func_options,
30508 TREE_TARGET_OPTION (target_option_default_node));
30510 /* Set func_options flags with new target mode. */
30511 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30512 &global_options_set);
30514 if (cur_tree == NULL_TREE)
30515 ret = false;
30517 new_optimize = build_optimization_node (&func_options);
30519 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30521 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30523 finalize_options_struct (&func_options);
30525 return ret;
30528 /* Match an ISA feature bitmap to a named FPU. We always use the
30529 first entry that exactly matches the feature set, so that we
30530 effectively canonicalize the FPU name for the assembler. */
30531 static const char*
30532 arm_identify_fpu_from_isa (sbitmap isa)
30534 auto_sbitmap fpubits (isa_num_bits);
30535 auto_sbitmap cand_fpubits (isa_num_bits);
30537 bitmap_and (fpubits, isa, isa_all_fpubits);
30539 /* If there are no ISA feature bits relating to the FPU, we must be
30540 doing soft-float. */
30541 if (bitmap_empty_p (fpubits))
30542 return "softvfp";
30544 for (unsigned int i = 0; i < ARRAY_SIZE (all_fpus); i++)
30546 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30547 if (bitmap_equal_p (fpubits, cand_fpubits))
30548 return all_fpus[i].name;
30550 /* We must find an entry, or things have gone wrong. */
30551 gcc_unreachable ();
30554 void
30555 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30558 fprintf (stream, "\t.syntax unified\n");
30560 if (TARGET_THUMB)
30562 if (is_called_in_ARM_mode (decl)
30563 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30564 && cfun->is_thunk))
30565 fprintf (stream, "\t.code 32\n");
30566 else if (TARGET_THUMB1)
30567 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30568 else
30569 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30571 else
30572 fprintf (stream, "\t.arm\n");
30574 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30575 (TARGET_SOFT_FLOAT
30576 ? "softvfp"
30577 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30579 if (TARGET_POKE_FUNCTION_NAME)
30580 arm_poke_function_name (stream, (const char *) name);
30583 /* If MEM is in the form of [base+offset], extract the two parts
30584 of address and set to BASE and OFFSET, otherwise return false
30585 after clearing BASE and OFFSET. */
30587 static bool
30588 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30590 rtx addr;
30592 gcc_assert (MEM_P (mem));
30594 addr = XEXP (mem, 0);
30596 /* Strip off const from addresses like (const (addr)). */
30597 if (GET_CODE (addr) == CONST)
30598 addr = XEXP (addr, 0);
30600 if (GET_CODE (addr) == REG)
30602 *base = addr;
30603 *offset = const0_rtx;
30604 return true;
30607 if (GET_CODE (addr) == PLUS
30608 && GET_CODE (XEXP (addr, 0)) == REG
30609 && CONST_INT_P (XEXP (addr, 1)))
30611 *base = XEXP (addr, 0);
30612 *offset = XEXP (addr, 1);
30613 return true;
30616 *base = NULL_RTX;
30617 *offset = NULL_RTX;
30619 return false;
30622 /* If INSN is a load or store of address in the form of [base+offset],
30623 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30624 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30625 otherwise return FALSE. */
30627 static bool
30628 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30630 rtx x, dest, src;
30632 gcc_assert (INSN_P (insn));
30633 x = PATTERN (insn);
30634 if (GET_CODE (x) != SET)
30635 return false;
30637 src = SET_SRC (x);
30638 dest = SET_DEST (x);
30639 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30641 *is_load = false;
30642 extract_base_offset_in_addr (dest, base, offset);
30644 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30646 *is_load = true;
30647 extract_base_offset_in_addr (src, base, offset);
30649 else
30650 return false;
30652 return (*base != NULL_RTX && *offset != NULL_RTX);
30655 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30657 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30658 and PRI are only calculated for these instructions. For other instruction,
30659 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30660 instruction fusion can be supported by returning different priorities.
30662 It's important that irrelevant instructions get the largest FUSION_PRI. */
30664 static void
30665 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30666 int *fusion_pri, int *pri)
30668 int tmp, off_val;
30669 bool is_load;
30670 rtx base, offset;
30672 gcc_assert (INSN_P (insn));
30674 tmp = max_pri - 1;
30675 if (!fusion_load_store (insn, &base, &offset, &is_load))
30677 *pri = tmp;
30678 *fusion_pri = tmp;
30679 return;
30682 /* Load goes first. */
30683 if (is_load)
30684 *fusion_pri = tmp - 1;
30685 else
30686 *fusion_pri = tmp - 2;
30688 tmp /= 2;
30690 /* INSN with smaller base register goes first. */
30691 tmp -= ((REGNO (base) & 0xff) << 20);
30693 /* INSN with smaller offset goes first. */
30694 off_val = (int)(INTVAL (offset));
30695 if (off_val >= 0)
30696 tmp -= (off_val & 0xfffff);
30697 else
30698 tmp += ((- off_val) & 0xfffff);
30700 *pri = tmp;
30701 return;
30705 /* Construct and return a PARALLEL RTX vector with elements numbering the
30706 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30707 the vector - from the perspective of the architecture. This does not
30708 line up with GCC's perspective on lane numbers, so we end up with
30709 different masks depending on our target endian-ness. The diagram
30710 below may help. We must draw the distinction when building masks
30711 which select one half of the vector. An instruction selecting
30712 architectural low-lanes for a big-endian target, must be described using
30713 a mask selecting GCC high-lanes.
30715 Big-Endian Little-Endian
30717 GCC 0 1 2 3 3 2 1 0
30718 | x | x | x | x | | x | x | x | x |
30719 Architecture 3 2 1 0 3 2 1 0
30721 Low Mask: { 2, 3 } { 0, 1 }
30722 High Mask: { 0, 1 } { 2, 3 }
30726 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30728 int nunits = GET_MODE_NUNITS (mode);
30729 rtvec v = rtvec_alloc (nunits / 2);
30730 int high_base = nunits / 2;
30731 int low_base = 0;
30732 int base;
30733 rtx t1;
30734 int i;
30736 if (BYTES_BIG_ENDIAN)
30737 base = high ? low_base : high_base;
30738 else
30739 base = high ? high_base : low_base;
30741 for (i = 0; i < nunits / 2; i++)
30742 RTVEC_ELT (v, i) = GEN_INT (base + i);
30744 t1 = gen_rtx_PARALLEL (mode, v);
30745 return t1;
30748 /* Check OP for validity as a PARALLEL RTX vector with elements
30749 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30750 from the perspective of the architecture. See the diagram above
30751 arm_simd_vect_par_cnst_half_p for more details. */
30753 bool
30754 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30755 bool high)
30757 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30758 HOST_WIDE_INT count_op = XVECLEN (op, 0);
30759 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30760 int i = 0;
30762 if (!VECTOR_MODE_P (mode))
30763 return false;
30765 if (count_op != count_ideal)
30766 return false;
30768 for (i = 0; i < count_ideal; i++)
30770 rtx elt_op = XVECEXP (op, 0, i);
30771 rtx elt_ideal = XVECEXP (ideal, 0, i);
30773 if (!CONST_INT_P (elt_op)
30774 || INTVAL (elt_ideal) != INTVAL (elt_op))
30775 return false;
30777 return true;
30780 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30781 in Thumb1. */
30782 static bool
30783 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30784 const_tree)
30786 /* For now, we punt and not handle this for TARGET_THUMB1. */
30787 if (vcall_offset && TARGET_THUMB1)
30788 return false;
30790 /* Otherwise ok. */
30791 return true;
30794 /* Generate RTL for a conditional branch with rtx comparison CODE in
30795 mode CC_MODE. The destination of the unlikely conditional branch
30796 is LABEL_REF. */
30798 void
30799 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
30800 rtx label_ref)
30802 rtx x;
30803 x = gen_rtx_fmt_ee (code, VOIDmode,
30804 gen_rtx_REG (cc_mode, CC_REGNUM),
30805 const0_rtx);
30807 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30808 gen_rtx_LABEL_REF (VOIDmode, label_ref),
30809 pc_rtx);
30810 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30813 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30815 For pure-code sections there is no letter code for this attribute, so
30816 output all the section flags numerically when this is needed. */
30818 static bool
30819 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
30822 if (flags & SECTION_ARM_PURECODE)
30824 *num = 0x20000000;
30826 if (!(flags & SECTION_DEBUG))
30827 *num |= 0x2;
30828 if (flags & SECTION_EXCLUDE)
30829 *num |= 0x80000000;
30830 if (flags & SECTION_WRITE)
30831 *num |= 0x1;
30832 if (flags & SECTION_CODE)
30833 *num |= 0x4;
30834 if (flags & SECTION_MERGE)
30835 *num |= 0x10;
30836 if (flags & SECTION_STRINGS)
30837 *num |= 0x20;
30838 if (flags & SECTION_TLS)
30839 *num |= 0x400;
30840 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
30841 *num |= 0x200;
30843 return true;
30846 return false;
30849 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
30851 If pure-code is passed as an option, make sure all functions are in
30852 sections that have the SHF_ARM_PURECODE attribute. */
30854 static section *
30855 arm_function_section (tree decl, enum node_frequency freq,
30856 bool startup, bool exit)
30858 const char * section_name;
30859 section * sec;
30861 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
30862 return default_function_section (decl, freq, startup, exit);
30864 if (!target_pure_code)
30865 return default_function_section (decl, freq, startup, exit);
30868 section_name = DECL_SECTION_NAME (decl);
30870 /* If a function is not in a named section then it falls under the 'default'
30871 text section, also known as '.text'. We can preserve previous behavior as
30872 the default text section already has the SHF_ARM_PURECODE section
30873 attribute. */
30874 if (!section_name)
30876 section *default_sec = default_function_section (decl, freq, startup,
30877 exit);
30879 /* If default_sec is not null, then it must be a special section like for
30880 example .text.startup. We set the pure-code attribute and return the
30881 same section to preserve existing behavior. */
30882 if (default_sec)
30883 default_sec->common.flags |= SECTION_ARM_PURECODE;
30884 return default_sec;
30887 /* Otherwise look whether a section has already been created with
30888 'section_name'. */
30889 sec = get_named_section (decl, section_name, 0);
30890 if (!sec)
30891 /* If that is not the case passing NULL as the section's name to
30892 'get_named_section' will create a section with the declaration's
30893 section name. */
30894 sec = get_named_section (decl, NULL, 0);
30896 /* Set the SHF_ARM_PURECODE attribute. */
30897 sec->common.flags |= SECTION_ARM_PURECODE;
30899 return sec;
30902 /* Implements the TARGET_SECTION_FLAGS hook.
30904 If DECL is a function declaration and pure-code is passed as an option
30905 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
30906 section's name and RELOC indicates whether the declarations initializer may
30907 contain runtime relocations. */
30909 static unsigned int
30910 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
30912 unsigned int flags = default_section_type_flags (decl, name, reloc);
30914 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
30915 flags |= SECTION_ARM_PURECODE;
30917 return flags;
30920 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
30922 static void
30923 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
30924 rtx op0, rtx op1,
30925 rtx *quot_p, rtx *rem_p)
30927 if (mode == SImode)
30928 gcc_assert (!TARGET_IDIV);
30930 machine_mode libval_mode = smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode),
30931 MODE_INT);
30933 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
30934 libval_mode, 2,
30935 op0, GET_MODE (op0),
30936 op1, GET_MODE (op1));
30938 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
30939 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
30940 GET_MODE_SIZE (mode));
30942 gcc_assert (quotient);
30943 gcc_assert (remainder);
30945 *quot_p = quotient;
30946 *rem_p = remainder;
30949 /* This function checks for the availability of the coprocessor builtin passed
30950 in BUILTIN for the current target. Returns true if it is available and
30951 false otherwise. If a BUILTIN is passed for which this function has not
30952 been implemented it will cause an exception. */
30954 bool
30955 arm_coproc_builtin_available (enum unspecv builtin)
30957 /* None of these builtins are available in Thumb mode if the target only
30958 supports Thumb-1. */
30959 if (TARGET_THUMB1)
30960 return false;
30962 switch (builtin)
30964 case VUNSPEC_CDP:
30965 case VUNSPEC_LDC:
30966 case VUNSPEC_LDCL:
30967 case VUNSPEC_STC:
30968 case VUNSPEC_STCL:
30969 case VUNSPEC_MCR:
30970 case VUNSPEC_MRC:
30971 if (arm_arch4)
30972 return true;
30973 break;
30974 case VUNSPEC_CDP2:
30975 case VUNSPEC_LDC2:
30976 case VUNSPEC_LDC2L:
30977 case VUNSPEC_STC2:
30978 case VUNSPEC_STC2L:
30979 case VUNSPEC_MCR2:
30980 case VUNSPEC_MRC2:
30981 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
30982 ARMv8-{A,M}. */
30983 if (arm_arch5)
30984 return true;
30985 break;
30986 case VUNSPEC_MCRR:
30987 case VUNSPEC_MRRC:
30988 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
30989 ARMv8-{A,M}. */
30990 if (arm_arch6 || arm_arch5te)
30991 return true;
30992 break;
30993 case VUNSPEC_MCRR2:
30994 case VUNSPEC_MRRC2:
30995 if (arm_arch6)
30996 return true;
30997 break;
30998 default:
30999 gcc_unreachable ();
31001 return false;
31004 /* This function returns true if OP is a valid memory operand for the ldc and
31005 stc coprocessor instructions and false otherwise. */
31007 bool
31008 arm_coproc_ldc_stc_legitimate_address (rtx op)
31010 HOST_WIDE_INT range;
31011 /* Has to be a memory operand. */
31012 if (!MEM_P (op))
31013 return false;
31015 op = XEXP (op, 0);
31017 /* We accept registers. */
31018 if (REG_P (op))
31019 return true;
31021 switch GET_CODE (op)
31023 case PLUS:
31025 /* Or registers with an offset. */
31026 if (!REG_P (XEXP (op, 0)))
31027 return false;
31029 op = XEXP (op, 1);
31031 /* The offset must be an immediate though. */
31032 if (!CONST_INT_P (op))
31033 return false;
31035 range = INTVAL (op);
31037 /* Within the range of [-1020,1020]. */
31038 if (!IN_RANGE (range, -1020, 1020))
31039 return false;
31041 /* And a multiple of 4. */
31042 return (range % 4) == 0;
31044 case PRE_INC:
31045 case POST_INC:
31046 case PRE_DEC:
31047 case POST_DEC:
31048 return REG_P (XEXP (op, 0));
31049 default:
31050 gcc_unreachable ();
31052 return false;
31054 #include "gt-arm.h"