[ARM][2/2] Remove old rtx costs
[official-gcc.git] / gcc / config / arm / arm.c
blob13f61f49ff8f4c460529772283be282a354264e4
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "flags.h"
49 #include "reload.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "cfgrtl.h"
53 #include "sched-int.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "intl.h"
57 #include "libfuncs.h"
58 #include "params.h"
59 #include "opts.h"
60 #include "dumpfile.h"
61 #include "target-globals.h"
62 #include "builtins.h"
63 #include "tm-constrs.h"
64 #include "rtl-iter.h"
65 #include "optabs-libfuncs.h"
67 /* This file should be included last. */
68 #include "target-def.h"
70 /* Forward definitions of types. */
71 typedef struct minipool_node Mnode;
72 typedef struct minipool_fixup Mfix;
74 void (*arm_lang_output_object_attributes_hook)(void);
76 struct four_ints
78 int i[4];
81 /* Forward function declarations. */
82 static bool arm_const_not_ok_for_debug_p (rtx);
83 static bool arm_needs_doubleword_align (machine_mode, const_tree);
84 static int arm_compute_static_chain_stack_bytes (void);
85 static arm_stack_offsets *arm_get_frame_offsets (void);
86 static void arm_add_gc_roots (void);
87 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
88 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
89 static unsigned bit_count (unsigned long);
90 static unsigned feature_count (const arm_feature_set*);
91 static int arm_address_register_rtx_p (rtx, int);
92 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
93 static bool is_called_in_ARM_mode (tree);
94 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
95 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
96 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
97 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
98 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
99 inline static int thumb1_index_register_rtx_p (rtx, int);
100 static int thumb_far_jump_used_p (void);
101 static bool thumb_force_lr_save (void);
102 static unsigned arm_size_return_regs (void);
103 static bool arm_assemble_integer (rtx, unsigned int, int);
104 static void arm_print_operand (FILE *, rtx, int);
105 static void arm_print_operand_address (FILE *, machine_mode, rtx);
106 static bool arm_print_operand_punct_valid_p (unsigned char code);
107 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
108 static arm_cc get_arm_condition_code (rtx);
109 static const char *output_multi_immediate (rtx *, const char *, const char *,
110 int, HOST_WIDE_INT);
111 static const char *shift_op (rtx, HOST_WIDE_INT *);
112 static struct machine_function *arm_init_machine_status (void);
113 static void thumb_exit (FILE *, int);
114 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
115 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
116 static Mnode *add_minipool_forward_ref (Mfix *);
117 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
118 static Mnode *add_minipool_backward_ref (Mfix *);
119 static void assign_minipool_offsets (Mfix *);
120 static void arm_print_value (FILE *, rtx);
121 static void dump_minipool (rtx_insn *);
122 static int arm_barrier_cost (rtx_insn *);
123 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
124 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
125 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
126 machine_mode, rtx);
127 static void arm_reorg (void);
128 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
129 static unsigned long arm_compute_save_reg0_reg12_mask (void);
130 static unsigned long arm_compute_save_reg_mask (void);
131 static unsigned long arm_isr_value (tree);
132 static unsigned long arm_compute_func_type (void);
133 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
134 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
135 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
136 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
137 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
138 #endif
139 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
140 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
141 static int arm_comp_type_attributes (const_tree, const_tree);
142 static void arm_set_default_type_attributes (tree);
143 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
144 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
145 static int optimal_immediate_sequence (enum rtx_code code,
146 unsigned HOST_WIDE_INT val,
147 struct four_ints *return_sequence);
148 static int optimal_immediate_sequence_1 (enum rtx_code code,
149 unsigned HOST_WIDE_INT val,
150 struct four_ints *return_sequence,
151 int i);
152 static int arm_get_strip_length (int);
153 static bool arm_function_ok_for_sibcall (tree, tree);
154 static machine_mode arm_promote_function_mode (const_tree,
155 machine_mode, int *,
156 const_tree, int);
157 static bool arm_return_in_memory (const_tree, const_tree);
158 static rtx arm_function_value (const_tree, const_tree, bool);
159 static rtx arm_libcall_value_1 (machine_mode);
160 static rtx arm_libcall_value (machine_mode, const_rtx);
161 static bool arm_function_value_regno_p (const unsigned int);
162 static void arm_internal_label (FILE *, const char *, unsigned long);
163 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
164 tree);
165 static bool arm_have_conditional_execution (void);
166 static bool arm_cannot_force_const_mem (machine_mode, rtx);
167 static bool arm_legitimate_constant_p (machine_mode, rtx);
168 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
169 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
170 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
171 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
172 static void emit_constant_insn (rtx cond, rtx pattern);
173 static rtx_insn *emit_set_insn (rtx, rtx);
174 static rtx emit_multi_reg_push (unsigned long, unsigned long);
175 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
176 tree, bool);
177 static rtx arm_function_arg (cumulative_args_t, machine_mode,
178 const_tree, bool);
179 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
180 const_tree, bool);
181 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
182 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
183 const_tree);
184 static rtx aapcs_libcall_value (machine_mode);
185 static int aapcs_select_return_coproc (const_tree, const_tree);
187 #ifdef OBJECT_FORMAT_ELF
188 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
189 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
190 #endif
191 #ifndef ARM_PE
192 static void arm_encode_section_info (tree, rtx, int);
193 #endif
195 static void arm_file_end (void);
196 static void arm_file_start (void);
197 static void arm_insert_attributes (tree, tree *);
199 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
200 tree, int *, int);
201 static bool arm_pass_by_reference (cumulative_args_t,
202 machine_mode, const_tree, bool);
203 static bool arm_promote_prototypes (const_tree);
204 static bool arm_default_short_enums (void);
205 static bool arm_align_anon_bitfield (void);
206 static bool arm_return_in_msb (const_tree);
207 static bool arm_must_pass_in_stack (machine_mode, const_tree);
208 static bool arm_return_in_memory (const_tree, const_tree);
209 #if ARM_UNWIND_INFO
210 static void arm_unwind_emit (FILE *, rtx_insn *);
211 static bool arm_output_ttype (rtx);
212 static void arm_asm_emit_except_personality (rtx);
213 #endif
214 static void arm_asm_init_sections (void);
215 static rtx arm_dwarf_register_span (rtx);
217 static tree arm_cxx_guard_type (void);
218 static bool arm_cxx_guard_mask_bit (void);
219 static tree arm_get_cookie_size (tree);
220 static bool arm_cookie_has_size (void);
221 static bool arm_cxx_cdtor_returns_this (void);
222 static bool arm_cxx_key_method_may_be_inline (void);
223 static void arm_cxx_determine_class_data_visibility (tree);
224 static bool arm_cxx_class_data_always_comdat (void);
225 static bool arm_cxx_use_aeabi_atexit (void);
226 static void arm_init_libfuncs (void);
227 static tree arm_build_builtin_va_list (void);
228 static void arm_expand_builtin_va_start (tree, rtx);
229 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
230 static void arm_option_override (void);
231 static void arm_override_options_after_change (void);
232 static void arm_option_print (FILE *, int, struct cl_target_option *);
233 static void arm_set_current_function (tree);
234 static bool arm_can_inline_p (tree, tree);
235 static void arm_relayout_function (tree);
236 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
237 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
238 static bool arm_macro_fusion_p (void);
239 static bool arm_cannot_copy_insn_p (rtx_insn *);
240 static int arm_issue_rate (void);
241 static int arm_first_cycle_multipass_dfa_lookahead (void);
242 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
243 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
244 static bool arm_output_addr_const_extra (FILE *, rtx);
245 static bool arm_allocate_stack_slots_for_args (void);
246 static bool arm_warn_func_return (tree);
247 static tree arm_promoted_type (const_tree t);
248 static tree arm_convert_to_type (tree type, tree expr);
249 static bool arm_scalar_mode_supported_p (machine_mode);
250 static bool arm_frame_pointer_required (void);
251 static bool arm_can_eliminate (const int, const int);
252 static void arm_asm_trampoline_template (FILE *);
253 static void arm_trampoline_init (rtx, tree, rtx);
254 static rtx arm_trampoline_adjust_address (rtx);
255 static rtx arm_pic_static_addr (rtx orig, rtx reg);
256 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
257 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
258 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
259 static bool arm_array_mode_supported_p (machine_mode,
260 unsigned HOST_WIDE_INT);
261 static machine_mode arm_preferred_simd_mode (machine_mode);
262 static bool arm_class_likely_spilled_p (reg_class_t);
263 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
264 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
265 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
266 const_tree type,
267 int misalignment,
268 bool is_packed);
269 static void arm_conditional_register_usage (void);
270 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
271 static unsigned int arm_autovectorize_vector_sizes (void);
272 static int arm_default_branch_cost (bool, bool);
273 static int arm_cortex_a5_branch_cost (bool, bool);
274 static int arm_cortex_m_branch_cost (bool, bool);
275 static int arm_cortex_m7_branch_cost (bool, bool);
277 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
278 const unsigned char *sel);
280 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
282 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
283 tree vectype,
284 int misalign ATTRIBUTE_UNUSED);
285 static unsigned arm_add_stmt_cost (void *data, int count,
286 enum vect_cost_for_stmt kind,
287 struct _stmt_vec_info *stmt_info,
288 int misalign,
289 enum vect_cost_model_location where);
291 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
292 bool op0_preserve_value);
293 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
295 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
296 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
297 const_tree);
298 static section *arm_function_section (tree, enum node_frequency, bool, bool);
299 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
300 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
301 int reloc);
302 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
304 /* Table of machine attributes. */
305 static const struct attribute_spec arm_attribute_table[] =
307 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
308 affects_type_identity } */
309 /* Function calls made to this symbol must be done indirectly, because
310 it may lie outside of the 26 bit addressing range of a normal function
311 call. */
312 { "long_call", 0, 0, false, true, true, NULL, false },
313 /* Whereas these functions are always known to reside within the 26 bit
314 addressing range. */
315 { "short_call", 0, 0, false, true, true, NULL, false },
316 /* Specify the procedure call conventions for a function. */
317 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
318 false },
319 /* Interrupt Service Routines have special prologue and epilogue requirements. */
320 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
321 false },
322 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
323 false },
324 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
325 false },
326 #ifdef ARM_PE
327 /* ARM/PE has three new attributes:
328 interfacearm - ?
329 dllexport - for exporting a function/variable that will live in a dll
330 dllimport - for importing a function/variable from a dll
332 Microsoft allows multiple declspecs in one __declspec, separating
333 them with spaces. We do NOT support this. Instead, use __declspec
334 multiple times.
336 { "dllimport", 0, 0, true, false, false, NULL, false },
337 { "dllexport", 0, 0, true, false, false, NULL, false },
338 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
339 false },
340 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
341 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
342 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
343 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
344 false },
345 #endif
346 { NULL, 0, 0, false, false, false, NULL, false }
349 /* Initialize the GCC target structure. */
350 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
351 #undef TARGET_MERGE_DECL_ATTRIBUTES
352 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
353 #endif
355 #undef TARGET_LEGITIMIZE_ADDRESS
356 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
358 #undef TARGET_ATTRIBUTE_TABLE
359 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
361 #undef TARGET_INSERT_ATTRIBUTES
362 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
364 #undef TARGET_ASM_FILE_START
365 #define TARGET_ASM_FILE_START arm_file_start
366 #undef TARGET_ASM_FILE_END
367 #define TARGET_ASM_FILE_END arm_file_end
369 #undef TARGET_ASM_ALIGNED_SI_OP
370 #define TARGET_ASM_ALIGNED_SI_OP NULL
371 #undef TARGET_ASM_INTEGER
372 #define TARGET_ASM_INTEGER arm_assemble_integer
374 #undef TARGET_PRINT_OPERAND
375 #define TARGET_PRINT_OPERAND arm_print_operand
376 #undef TARGET_PRINT_OPERAND_ADDRESS
377 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
378 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
379 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
381 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
382 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
384 #undef TARGET_ASM_FUNCTION_PROLOGUE
385 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
387 #undef TARGET_ASM_FUNCTION_EPILOGUE
388 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
390 #undef TARGET_CAN_INLINE_P
391 #define TARGET_CAN_INLINE_P arm_can_inline_p
393 #undef TARGET_RELAYOUT_FUNCTION
394 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
396 #undef TARGET_OPTION_OVERRIDE
397 #define TARGET_OPTION_OVERRIDE arm_option_override
399 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
400 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
402 #undef TARGET_OPTION_PRINT
403 #define TARGET_OPTION_PRINT arm_option_print
405 #undef TARGET_COMP_TYPE_ATTRIBUTES
406 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
408 #undef TARGET_SCHED_MACRO_FUSION_P
409 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
411 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
412 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
414 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
415 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
417 #undef TARGET_SCHED_ADJUST_COST
418 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
420 #undef TARGET_SET_CURRENT_FUNCTION
421 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
423 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
424 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
426 #undef TARGET_SCHED_REORDER
427 #define TARGET_SCHED_REORDER arm_sched_reorder
429 #undef TARGET_REGISTER_MOVE_COST
430 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
432 #undef TARGET_MEMORY_MOVE_COST
433 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
435 #undef TARGET_ENCODE_SECTION_INFO
436 #ifdef ARM_PE
437 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
438 #else
439 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
440 #endif
442 #undef TARGET_STRIP_NAME_ENCODING
443 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
445 #undef TARGET_ASM_INTERNAL_LABEL
446 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
448 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
449 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
451 #undef TARGET_FUNCTION_VALUE
452 #define TARGET_FUNCTION_VALUE arm_function_value
454 #undef TARGET_LIBCALL_VALUE
455 #define TARGET_LIBCALL_VALUE arm_libcall_value
457 #undef TARGET_FUNCTION_VALUE_REGNO_P
458 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
460 #undef TARGET_ASM_OUTPUT_MI_THUNK
461 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
462 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
463 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
465 #undef TARGET_RTX_COSTS
466 #define TARGET_RTX_COSTS arm_rtx_costs
467 #undef TARGET_ADDRESS_COST
468 #define TARGET_ADDRESS_COST arm_address_cost
470 #undef TARGET_SHIFT_TRUNCATION_MASK
471 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
472 #undef TARGET_VECTOR_MODE_SUPPORTED_P
473 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
474 #undef TARGET_ARRAY_MODE_SUPPORTED_P
475 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
476 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
477 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
478 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
479 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
480 arm_autovectorize_vector_sizes
482 #undef TARGET_MACHINE_DEPENDENT_REORG
483 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
485 #undef TARGET_INIT_BUILTINS
486 #define TARGET_INIT_BUILTINS arm_init_builtins
487 #undef TARGET_EXPAND_BUILTIN
488 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
489 #undef TARGET_BUILTIN_DECL
490 #define TARGET_BUILTIN_DECL arm_builtin_decl
492 #undef TARGET_INIT_LIBFUNCS
493 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
495 #undef TARGET_PROMOTE_FUNCTION_MODE
496 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
497 #undef TARGET_PROMOTE_PROTOTYPES
498 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
499 #undef TARGET_PASS_BY_REFERENCE
500 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
501 #undef TARGET_ARG_PARTIAL_BYTES
502 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
503 #undef TARGET_FUNCTION_ARG
504 #define TARGET_FUNCTION_ARG arm_function_arg
505 #undef TARGET_FUNCTION_ARG_ADVANCE
506 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
507 #undef TARGET_FUNCTION_ARG_BOUNDARY
508 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
510 #undef TARGET_SETUP_INCOMING_VARARGS
511 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
513 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
514 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
516 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
517 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
518 #undef TARGET_TRAMPOLINE_INIT
519 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
520 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
521 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
523 #undef TARGET_WARN_FUNC_RETURN
524 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
526 #undef TARGET_DEFAULT_SHORT_ENUMS
527 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
529 #undef TARGET_ALIGN_ANON_BITFIELD
530 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
532 #undef TARGET_NARROW_VOLATILE_BITFIELD
533 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
535 #undef TARGET_CXX_GUARD_TYPE
536 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
538 #undef TARGET_CXX_GUARD_MASK_BIT
539 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
541 #undef TARGET_CXX_GET_COOKIE_SIZE
542 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
544 #undef TARGET_CXX_COOKIE_HAS_SIZE
545 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
547 #undef TARGET_CXX_CDTOR_RETURNS_THIS
548 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
550 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
551 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
553 #undef TARGET_CXX_USE_AEABI_ATEXIT
554 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
556 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
557 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
558 arm_cxx_determine_class_data_visibility
560 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
561 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
563 #undef TARGET_RETURN_IN_MSB
564 #define TARGET_RETURN_IN_MSB arm_return_in_msb
566 #undef TARGET_RETURN_IN_MEMORY
567 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
569 #undef TARGET_MUST_PASS_IN_STACK
570 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
572 #if ARM_UNWIND_INFO
573 #undef TARGET_ASM_UNWIND_EMIT
574 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
576 /* EABI unwinding tables use a different format for the typeinfo tables. */
577 #undef TARGET_ASM_TTYPE
578 #define TARGET_ASM_TTYPE arm_output_ttype
580 #undef TARGET_ARM_EABI_UNWINDER
581 #define TARGET_ARM_EABI_UNWINDER true
583 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
584 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
586 #undef TARGET_ASM_INIT_SECTIONS
587 #endif /* ARM_UNWIND_INFO */
588 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
590 #undef TARGET_DWARF_REGISTER_SPAN
591 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
593 #undef TARGET_CANNOT_COPY_INSN_P
594 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
596 #ifdef HAVE_AS_TLS
597 #undef TARGET_HAVE_TLS
598 #define TARGET_HAVE_TLS true
599 #endif
601 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
602 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
604 #undef TARGET_LEGITIMATE_CONSTANT_P
605 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
607 #undef TARGET_CANNOT_FORCE_CONST_MEM
608 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
610 #undef TARGET_MAX_ANCHOR_OFFSET
611 #define TARGET_MAX_ANCHOR_OFFSET 4095
613 /* The minimum is set such that the total size of the block
614 for a particular anchor is -4088 + 1 + 4095 bytes, which is
615 divisible by eight, ensuring natural spacing of anchors. */
616 #undef TARGET_MIN_ANCHOR_OFFSET
617 #define TARGET_MIN_ANCHOR_OFFSET -4088
619 #undef TARGET_SCHED_ISSUE_RATE
620 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
622 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
623 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
624 arm_first_cycle_multipass_dfa_lookahead
626 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
627 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
628 arm_first_cycle_multipass_dfa_lookahead_guard
630 #undef TARGET_MANGLE_TYPE
631 #define TARGET_MANGLE_TYPE arm_mangle_type
633 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
634 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
636 #undef TARGET_BUILD_BUILTIN_VA_LIST
637 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
638 #undef TARGET_EXPAND_BUILTIN_VA_START
639 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
640 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
641 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
643 #ifdef HAVE_AS_TLS
644 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
645 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
646 #endif
648 #undef TARGET_LEGITIMATE_ADDRESS_P
649 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
651 #undef TARGET_PREFERRED_RELOAD_CLASS
652 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
654 #undef TARGET_PROMOTED_TYPE
655 #define TARGET_PROMOTED_TYPE arm_promoted_type
657 #undef TARGET_CONVERT_TO_TYPE
658 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
660 #undef TARGET_SCALAR_MODE_SUPPORTED_P
661 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
663 #undef TARGET_FRAME_POINTER_REQUIRED
664 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
666 #undef TARGET_CAN_ELIMINATE
667 #define TARGET_CAN_ELIMINATE arm_can_eliminate
669 #undef TARGET_CONDITIONAL_REGISTER_USAGE
670 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
672 #undef TARGET_CLASS_LIKELY_SPILLED_P
673 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
675 #undef TARGET_VECTORIZE_BUILTINS
676 #define TARGET_VECTORIZE_BUILTINS
678 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
679 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
680 arm_builtin_vectorized_function
682 #undef TARGET_VECTOR_ALIGNMENT
683 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
685 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
686 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
687 arm_vector_alignment_reachable
689 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
690 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
691 arm_builtin_support_vector_misalignment
693 #undef TARGET_PREFERRED_RENAME_CLASS
694 #define TARGET_PREFERRED_RENAME_CLASS \
695 arm_preferred_rename_class
697 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
698 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
699 arm_vectorize_vec_perm_const_ok
701 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
702 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
703 arm_builtin_vectorization_cost
704 #undef TARGET_VECTORIZE_ADD_STMT_COST
705 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
707 #undef TARGET_CANONICALIZE_COMPARISON
708 #define TARGET_CANONICALIZE_COMPARISON \
709 arm_canonicalize_comparison
711 #undef TARGET_ASAN_SHADOW_OFFSET
712 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
714 #undef MAX_INSN_PER_IT_BLOCK
715 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
717 #undef TARGET_CAN_USE_DOLOOP_P
718 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
720 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
721 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
723 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
724 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
726 #undef TARGET_SCHED_FUSION_PRIORITY
727 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
729 #undef TARGET_ASM_FUNCTION_SECTION
730 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
732 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
733 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
735 #undef TARGET_SECTION_TYPE_FLAGS
736 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
738 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
739 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
741 struct gcc_target targetm = TARGET_INITIALIZER;
743 /* Obstack for minipool constant handling. */
744 static struct obstack minipool_obstack;
745 static char * minipool_startobj;
747 /* The maximum number of insns skipped which
748 will be conditionalised if possible. */
749 static int max_insns_skipped = 5;
751 extern FILE * asm_out_file;
753 /* True if we are currently building a constant table. */
754 int making_const_table;
756 /* The processor for which instructions should be scheduled. */
757 enum processor_type arm_tune = arm_none;
759 /* The current tuning set. */
760 const struct tune_params *current_tune;
762 /* Which floating point hardware to schedule for. */
763 int arm_fpu_attr;
765 /* Used for Thumb call_via trampolines. */
766 rtx thumb_call_via_label[14];
767 static int thumb_call_reg_needed;
769 /* The bits in this mask specify which
770 instructions we are allowed to generate. */
771 arm_feature_set insn_flags = ARM_FSET_EMPTY;
773 /* The bits in this mask specify which instruction scheduling options should
774 be used. */
775 arm_feature_set tune_flags = ARM_FSET_EMPTY;
777 /* The highest ARM architecture version supported by the
778 target. */
779 enum base_architecture arm_base_arch = BASE_ARCH_0;
781 /* The following are used in the arm.md file as equivalents to bits
782 in the above two flag variables. */
784 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
785 int arm_arch3m = 0;
787 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
788 int arm_arch4 = 0;
790 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
791 int arm_arch4t = 0;
793 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
794 int arm_arch5 = 0;
796 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
797 int arm_arch5e = 0;
799 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
800 int arm_arch6 = 0;
802 /* Nonzero if this chip supports the ARM 6K extensions. */
803 int arm_arch6k = 0;
805 /* Nonzero if this chip supports the ARM 6KZ extensions. */
806 int arm_arch6kz = 0;
808 /* Nonzero if instructions present in ARMv6-M can be used. */
809 int arm_arch6m = 0;
811 /* Nonzero if this chip supports the ARM 7 extensions. */
812 int arm_arch7 = 0;
814 /* Nonzero if instructions not present in the 'M' profile can be used. */
815 int arm_arch_notm = 0;
817 /* Nonzero if instructions present in ARMv7E-M can be used. */
818 int arm_arch7em = 0;
820 /* Nonzero if instructions present in ARMv8 can be used. */
821 int arm_arch8 = 0;
823 /* Nonzero if this chip supports the ARMv8.1 extensions. */
824 int arm_arch8_1 = 0;
826 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
827 int arm_arch8_2 = 0;
829 /* Nonzero if this chip supports the FP16 instructions extension of ARM
830 Architecture 8.2. */
831 int arm_fp16_inst = 0;
833 /* Nonzero if this chip can benefit from load scheduling. */
834 int arm_ld_sched = 0;
836 /* Nonzero if this chip is a StrongARM. */
837 int arm_tune_strongarm = 0;
839 /* Nonzero if this chip supports Intel Wireless MMX technology. */
840 int arm_arch_iwmmxt = 0;
842 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
843 int arm_arch_iwmmxt2 = 0;
845 /* Nonzero if this chip is an XScale. */
846 int arm_arch_xscale = 0;
848 /* Nonzero if tuning for XScale */
849 int arm_tune_xscale = 0;
851 /* Nonzero if we want to tune for stores that access the write-buffer.
852 This typically means an ARM6 or ARM7 with MMU or MPU. */
853 int arm_tune_wbuf = 0;
855 /* Nonzero if tuning for Cortex-A9. */
856 int arm_tune_cortex_a9 = 0;
858 /* Nonzero if we should define __THUMB_INTERWORK__ in the
859 preprocessor.
860 XXX This is a bit of a hack, it's intended to help work around
861 problems in GLD which doesn't understand that armv5t code is
862 interworking clean. */
863 int arm_cpp_interwork = 0;
865 /* Nonzero if chip supports Thumb 1. */
866 int arm_arch_thumb1;
868 /* Nonzero if chip supports Thumb 2. */
869 int arm_arch_thumb2;
871 /* Nonzero if chip supports integer division instruction. */
872 int arm_arch_arm_hwdiv;
873 int arm_arch_thumb_hwdiv;
875 /* Nonzero if chip disallows volatile memory access in IT block. */
876 int arm_arch_no_volatile_ce;
878 /* Nonzero if we should use Neon to handle 64-bits operations rather
879 than core registers. */
880 int prefer_neon_for_64bits = 0;
882 /* Nonzero if we shouldn't use literal pools. */
883 bool arm_disable_literal_pool = false;
885 /* The register number to be used for the PIC offset register. */
886 unsigned arm_pic_register = INVALID_REGNUM;
888 enum arm_pcs arm_pcs_default;
890 /* For an explanation of these variables, see final_prescan_insn below. */
891 int arm_ccfsm_state;
892 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
893 enum arm_cond_code arm_current_cc;
895 rtx arm_target_insn;
896 int arm_target_label;
897 /* The number of conditionally executed insns, including the current insn. */
898 int arm_condexec_count = 0;
899 /* A bitmask specifying the patterns for the IT block.
900 Zero means do not output an IT block before this insn. */
901 int arm_condexec_mask = 0;
902 /* The number of bits used in arm_condexec_mask. */
903 int arm_condexec_masklen = 0;
905 /* Nonzero if chip supports the ARMv8 CRC instructions. */
906 int arm_arch_crc = 0;
908 /* Nonzero if the core has a very small, high-latency, multiply unit. */
909 int arm_m_profile_small_mul = 0;
911 /* The condition codes of the ARM, and the inverse function. */
912 static const char * const arm_condition_codes[] =
914 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
915 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
918 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
919 int arm_regs_in_sequence[] =
921 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
924 #define ARM_LSL_NAME "lsl"
925 #define streq(string1, string2) (strcmp (string1, string2) == 0)
927 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
928 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
929 | (1 << PIC_OFFSET_TABLE_REGNUM)))
931 /* Initialization code. */
933 struct processors
935 const char *const name;
936 enum processor_type core;
937 const char *arch;
938 enum base_architecture base_arch;
939 const arm_feature_set flags;
940 const struct tune_params *const tune;
944 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
945 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
947 num_slots, \
948 l1_size, \
949 l1_line_size \
952 /* arm generic vectorizer costs. */
953 static const
954 struct cpu_vec_costs arm_default_vec_cost = {
955 1, /* scalar_stmt_cost. */
956 1, /* scalar load_cost. */
957 1, /* scalar_store_cost. */
958 1, /* vec_stmt_cost. */
959 1, /* vec_to_scalar_cost. */
960 1, /* scalar_to_vec_cost. */
961 1, /* vec_align_load_cost. */
962 1, /* vec_unalign_load_cost. */
963 1, /* vec_unalign_store_cost. */
964 1, /* vec_store_cost. */
965 3, /* cond_taken_branch_cost. */
966 1, /* cond_not_taken_branch_cost. */
969 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
970 #include "aarch-cost-tables.h"
974 const struct cpu_cost_table cortexa9_extra_costs =
976 /* ALU */
978 0, /* arith. */
979 0, /* logical. */
980 0, /* shift. */
981 COSTS_N_INSNS (1), /* shift_reg. */
982 COSTS_N_INSNS (1), /* arith_shift. */
983 COSTS_N_INSNS (2), /* arith_shift_reg. */
984 0, /* log_shift. */
985 COSTS_N_INSNS (1), /* log_shift_reg. */
986 COSTS_N_INSNS (1), /* extend. */
987 COSTS_N_INSNS (2), /* extend_arith. */
988 COSTS_N_INSNS (1), /* bfi. */
989 COSTS_N_INSNS (1), /* bfx. */
990 0, /* clz. */
991 0, /* rev. */
992 0, /* non_exec. */
993 true /* non_exec_costs_exec. */
996 /* MULT SImode */
998 COSTS_N_INSNS (3), /* simple. */
999 COSTS_N_INSNS (3), /* flag_setting. */
1000 COSTS_N_INSNS (2), /* extend. */
1001 COSTS_N_INSNS (3), /* add. */
1002 COSTS_N_INSNS (2), /* extend_add. */
1003 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1005 /* MULT DImode */
1007 0, /* simple (N/A). */
1008 0, /* flag_setting (N/A). */
1009 COSTS_N_INSNS (4), /* extend. */
1010 0, /* add (N/A). */
1011 COSTS_N_INSNS (4), /* extend_add. */
1012 0 /* idiv (N/A). */
1015 /* LD/ST */
1017 COSTS_N_INSNS (2), /* load. */
1018 COSTS_N_INSNS (2), /* load_sign_extend. */
1019 COSTS_N_INSNS (2), /* ldrd. */
1020 COSTS_N_INSNS (2), /* ldm_1st. */
1021 1, /* ldm_regs_per_insn_1st. */
1022 2, /* ldm_regs_per_insn_subsequent. */
1023 COSTS_N_INSNS (5), /* loadf. */
1024 COSTS_N_INSNS (5), /* loadd. */
1025 COSTS_N_INSNS (1), /* load_unaligned. */
1026 COSTS_N_INSNS (2), /* store. */
1027 COSTS_N_INSNS (2), /* strd. */
1028 COSTS_N_INSNS (2), /* stm_1st. */
1029 1, /* stm_regs_per_insn_1st. */
1030 2, /* stm_regs_per_insn_subsequent. */
1031 COSTS_N_INSNS (1), /* storef. */
1032 COSTS_N_INSNS (1), /* stored. */
1033 COSTS_N_INSNS (1), /* store_unaligned. */
1034 COSTS_N_INSNS (1), /* loadv. */
1035 COSTS_N_INSNS (1) /* storev. */
1038 /* FP SFmode */
1040 COSTS_N_INSNS (14), /* div. */
1041 COSTS_N_INSNS (4), /* mult. */
1042 COSTS_N_INSNS (7), /* mult_addsub. */
1043 COSTS_N_INSNS (30), /* fma. */
1044 COSTS_N_INSNS (3), /* addsub. */
1045 COSTS_N_INSNS (1), /* fpconst. */
1046 COSTS_N_INSNS (1), /* neg. */
1047 COSTS_N_INSNS (3), /* compare. */
1048 COSTS_N_INSNS (3), /* widen. */
1049 COSTS_N_INSNS (3), /* narrow. */
1050 COSTS_N_INSNS (3), /* toint. */
1051 COSTS_N_INSNS (3), /* fromint. */
1052 COSTS_N_INSNS (3) /* roundint. */
1054 /* FP DFmode */
1056 COSTS_N_INSNS (24), /* div. */
1057 COSTS_N_INSNS (5), /* mult. */
1058 COSTS_N_INSNS (8), /* mult_addsub. */
1059 COSTS_N_INSNS (30), /* fma. */
1060 COSTS_N_INSNS (3), /* addsub. */
1061 COSTS_N_INSNS (1), /* fpconst. */
1062 COSTS_N_INSNS (1), /* neg. */
1063 COSTS_N_INSNS (3), /* compare. */
1064 COSTS_N_INSNS (3), /* widen. */
1065 COSTS_N_INSNS (3), /* narrow. */
1066 COSTS_N_INSNS (3), /* toint. */
1067 COSTS_N_INSNS (3), /* fromint. */
1068 COSTS_N_INSNS (3) /* roundint. */
1071 /* Vector */
1073 COSTS_N_INSNS (1) /* alu. */
1077 const struct cpu_cost_table cortexa8_extra_costs =
1079 /* ALU */
1081 0, /* arith. */
1082 0, /* logical. */
1083 COSTS_N_INSNS (1), /* shift. */
1084 0, /* shift_reg. */
1085 COSTS_N_INSNS (1), /* arith_shift. */
1086 0, /* arith_shift_reg. */
1087 COSTS_N_INSNS (1), /* log_shift. */
1088 0, /* log_shift_reg. */
1089 0, /* extend. */
1090 0, /* extend_arith. */
1091 0, /* bfi. */
1092 0, /* bfx. */
1093 0, /* clz. */
1094 0, /* rev. */
1095 0, /* non_exec. */
1096 true /* non_exec_costs_exec. */
1099 /* MULT SImode */
1101 COSTS_N_INSNS (1), /* simple. */
1102 COSTS_N_INSNS (1), /* flag_setting. */
1103 COSTS_N_INSNS (1), /* extend. */
1104 COSTS_N_INSNS (1), /* add. */
1105 COSTS_N_INSNS (1), /* extend_add. */
1106 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1108 /* MULT DImode */
1110 0, /* simple (N/A). */
1111 0, /* flag_setting (N/A). */
1112 COSTS_N_INSNS (2), /* extend. */
1113 0, /* add (N/A). */
1114 COSTS_N_INSNS (2), /* extend_add. */
1115 0 /* idiv (N/A). */
1118 /* LD/ST */
1120 COSTS_N_INSNS (1), /* load. */
1121 COSTS_N_INSNS (1), /* load_sign_extend. */
1122 COSTS_N_INSNS (1), /* ldrd. */
1123 COSTS_N_INSNS (1), /* ldm_1st. */
1124 1, /* ldm_regs_per_insn_1st. */
1125 2, /* ldm_regs_per_insn_subsequent. */
1126 COSTS_N_INSNS (1), /* loadf. */
1127 COSTS_N_INSNS (1), /* loadd. */
1128 COSTS_N_INSNS (1), /* load_unaligned. */
1129 COSTS_N_INSNS (1), /* store. */
1130 COSTS_N_INSNS (1), /* strd. */
1131 COSTS_N_INSNS (1), /* stm_1st. */
1132 1, /* stm_regs_per_insn_1st. */
1133 2, /* stm_regs_per_insn_subsequent. */
1134 COSTS_N_INSNS (1), /* storef. */
1135 COSTS_N_INSNS (1), /* stored. */
1136 COSTS_N_INSNS (1), /* store_unaligned. */
1137 COSTS_N_INSNS (1), /* loadv. */
1138 COSTS_N_INSNS (1) /* storev. */
1141 /* FP SFmode */
1143 COSTS_N_INSNS (36), /* div. */
1144 COSTS_N_INSNS (11), /* mult. */
1145 COSTS_N_INSNS (20), /* mult_addsub. */
1146 COSTS_N_INSNS (30), /* fma. */
1147 COSTS_N_INSNS (9), /* addsub. */
1148 COSTS_N_INSNS (3), /* fpconst. */
1149 COSTS_N_INSNS (3), /* neg. */
1150 COSTS_N_INSNS (6), /* compare. */
1151 COSTS_N_INSNS (4), /* widen. */
1152 COSTS_N_INSNS (4), /* narrow. */
1153 COSTS_N_INSNS (8), /* toint. */
1154 COSTS_N_INSNS (8), /* fromint. */
1155 COSTS_N_INSNS (8) /* roundint. */
1157 /* FP DFmode */
1159 COSTS_N_INSNS (64), /* div. */
1160 COSTS_N_INSNS (16), /* mult. */
1161 COSTS_N_INSNS (25), /* mult_addsub. */
1162 COSTS_N_INSNS (30), /* fma. */
1163 COSTS_N_INSNS (9), /* addsub. */
1164 COSTS_N_INSNS (3), /* fpconst. */
1165 COSTS_N_INSNS (3), /* neg. */
1166 COSTS_N_INSNS (6), /* compare. */
1167 COSTS_N_INSNS (6), /* widen. */
1168 COSTS_N_INSNS (6), /* narrow. */
1169 COSTS_N_INSNS (8), /* toint. */
1170 COSTS_N_INSNS (8), /* fromint. */
1171 COSTS_N_INSNS (8) /* roundint. */
1174 /* Vector */
1176 COSTS_N_INSNS (1) /* alu. */
1180 const struct cpu_cost_table cortexa5_extra_costs =
1182 /* ALU */
1184 0, /* arith. */
1185 0, /* logical. */
1186 COSTS_N_INSNS (1), /* shift. */
1187 COSTS_N_INSNS (1), /* shift_reg. */
1188 COSTS_N_INSNS (1), /* arith_shift. */
1189 COSTS_N_INSNS (1), /* arith_shift_reg. */
1190 COSTS_N_INSNS (1), /* log_shift. */
1191 COSTS_N_INSNS (1), /* log_shift_reg. */
1192 COSTS_N_INSNS (1), /* extend. */
1193 COSTS_N_INSNS (1), /* extend_arith. */
1194 COSTS_N_INSNS (1), /* bfi. */
1195 COSTS_N_INSNS (1), /* bfx. */
1196 COSTS_N_INSNS (1), /* clz. */
1197 COSTS_N_INSNS (1), /* rev. */
1198 0, /* non_exec. */
1199 true /* non_exec_costs_exec. */
1203 /* MULT SImode */
1205 0, /* simple. */
1206 COSTS_N_INSNS (1), /* flag_setting. */
1207 COSTS_N_INSNS (1), /* extend. */
1208 COSTS_N_INSNS (1), /* add. */
1209 COSTS_N_INSNS (1), /* extend_add. */
1210 COSTS_N_INSNS (7) /* idiv. */
1212 /* MULT DImode */
1214 0, /* simple (N/A). */
1215 0, /* flag_setting (N/A). */
1216 COSTS_N_INSNS (1), /* extend. */
1217 0, /* add. */
1218 COSTS_N_INSNS (2), /* extend_add. */
1219 0 /* idiv (N/A). */
1222 /* LD/ST */
1224 COSTS_N_INSNS (1), /* load. */
1225 COSTS_N_INSNS (1), /* load_sign_extend. */
1226 COSTS_N_INSNS (6), /* ldrd. */
1227 COSTS_N_INSNS (1), /* ldm_1st. */
1228 1, /* ldm_regs_per_insn_1st. */
1229 2, /* ldm_regs_per_insn_subsequent. */
1230 COSTS_N_INSNS (2), /* loadf. */
1231 COSTS_N_INSNS (4), /* loadd. */
1232 COSTS_N_INSNS (1), /* load_unaligned. */
1233 COSTS_N_INSNS (1), /* store. */
1234 COSTS_N_INSNS (3), /* strd. */
1235 COSTS_N_INSNS (1), /* stm_1st. */
1236 1, /* stm_regs_per_insn_1st. */
1237 2, /* stm_regs_per_insn_subsequent. */
1238 COSTS_N_INSNS (2), /* storef. */
1239 COSTS_N_INSNS (2), /* stored. */
1240 COSTS_N_INSNS (1), /* store_unaligned. */
1241 COSTS_N_INSNS (1), /* loadv. */
1242 COSTS_N_INSNS (1) /* storev. */
1245 /* FP SFmode */
1247 COSTS_N_INSNS (15), /* div. */
1248 COSTS_N_INSNS (3), /* mult. */
1249 COSTS_N_INSNS (7), /* mult_addsub. */
1250 COSTS_N_INSNS (7), /* fma. */
1251 COSTS_N_INSNS (3), /* addsub. */
1252 COSTS_N_INSNS (3), /* fpconst. */
1253 COSTS_N_INSNS (3), /* neg. */
1254 COSTS_N_INSNS (3), /* compare. */
1255 COSTS_N_INSNS (3), /* widen. */
1256 COSTS_N_INSNS (3), /* narrow. */
1257 COSTS_N_INSNS (3), /* toint. */
1258 COSTS_N_INSNS (3), /* fromint. */
1259 COSTS_N_INSNS (3) /* roundint. */
1261 /* FP DFmode */
1263 COSTS_N_INSNS (30), /* div. */
1264 COSTS_N_INSNS (6), /* mult. */
1265 COSTS_N_INSNS (10), /* mult_addsub. */
1266 COSTS_N_INSNS (7), /* fma. */
1267 COSTS_N_INSNS (3), /* addsub. */
1268 COSTS_N_INSNS (3), /* fpconst. */
1269 COSTS_N_INSNS (3), /* neg. */
1270 COSTS_N_INSNS (3), /* compare. */
1271 COSTS_N_INSNS (3), /* widen. */
1272 COSTS_N_INSNS (3), /* narrow. */
1273 COSTS_N_INSNS (3), /* toint. */
1274 COSTS_N_INSNS (3), /* fromint. */
1275 COSTS_N_INSNS (3) /* roundint. */
1278 /* Vector */
1280 COSTS_N_INSNS (1) /* alu. */
1285 const struct cpu_cost_table cortexa7_extra_costs =
1287 /* ALU */
1289 0, /* arith. */
1290 0, /* logical. */
1291 COSTS_N_INSNS (1), /* shift. */
1292 COSTS_N_INSNS (1), /* shift_reg. */
1293 COSTS_N_INSNS (1), /* arith_shift. */
1294 COSTS_N_INSNS (1), /* arith_shift_reg. */
1295 COSTS_N_INSNS (1), /* log_shift. */
1296 COSTS_N_INSNS (1), /* log_shift_reg. */
1297 COSTS_N_INSNS (1), /* extend. */
1298 COSTS_N_INSNS (1), /* extend_arith. */
1299 COSTS_N_INSNS (1), /* bfi. */
1300 COSTS_N_INSNS (1), /* bfx. */
1301 COSTS_N_INSNS (1), /* clz. */
1302 COSTS_N_INSNS (1), /* rev. */
1303 0, /* non_exec. */
1304 true /* non_exec_costs_exec. */
1308 /* MULT SImode */
1310 0, /* simple. */
1311 COSTS_N_INSNS (1), /* flag_setting. */
1312 COSTS_N_INSNS (1), /* extend. */
1313 COSTS_N_INSNS (1), /* add. */
1314 COSTS_N_INSNS (1), /* extend_add. */
1315 COSTS_N_INSNS (7) /* idiv. */
1317 /* MULT DImode */
1319 0, /* simple (N/A). */
1320 0, /* flag_setting (N/A). */
1321 COSTS_N_INSNS (1), /* extend. */
1322 0, /* add. */
1323 COSTS_N_INSNS (2), /* extend_add. */
1324 0 /* idiv (N/A). */
1327 /* LD/ST */
1329 COSTS_N_INSNS (1), /* load. */
1330 COSTS_N_INSNS (1), /* load_sign_extend. */
1331 COSTS_N_INSNS (3), /* ldrd. */
1332 COSTS_N_INSNS (1), /* ldm_1st. */
1333 1, /* ldm_regs_per_insn_1st. */
1334 2, /* ldm_regs_per_insn_subsequent. */
1335 COSTS_N_INSNS (2), /* loadf. */
1336 COSTS_N_INSNS (2), /* loadd. */
1337 COSTS_N_INSNS (1), /* load_unaligned. */
1338 COSTS_N_INSNS (1), /* store. */
1339 COSTS_N_INSNS (3), /* strd. */
1340 COSTS_N_INSNS (1), /* stm_1st. */
1341 1, /* stm_regs_per_insn_1st. */
1342 2, /* stm_regs_per_insn_subsequent. */
1343 COSTS_N_INSNS (2), /* storef. */
1344 COSTS_N_INSNS (2), /* stored. */
1345 COSTS_N_INSNS (1), /* store_unaligned. */
1346 COSTS_N_INSNS (1), /* loadv. */
1347 COSTS_N_INSNS (1) /* storev. */
1350 /* FP SFmode */
1352 COSTS_N_INSNS (15), /* div. */
1353 COSTS_N_INSNS (3), /* mult. */
1354 COSTS_N_INSNS (7), /* mult_addsub. */
1355 COSTS_N_INSNS (7), /* fma. */
1356 COSTS_N_INSNS (3), /* addsub. */
1357 COSTS_N_INSNS (3), /* fpconst. */
1358 COSTS_N_INSNS (3), /* neg. */
1359 COSTS_N_INSNS (3), /* compare. */
1360 COSTS_N_INSNS (3), /* widen. */
1361 COSTS_N_INSNS (3), /* narrow. */
1362 COSTS_N_INSNS (3), /* toint. */
1363 COSTS_N_INSNS (3), /* fromint. */
1364 COSTS_N_INSNS (3) /* roundint. */
1366 /* FP DFmode */
1368 COSTS_N_INSNS (30), /* div. */
1369 COSTS_N_INSNS (6), /* mult. */
1370 COSTS_N_INSNS (10), /* mult_addsub. */
1371 COSTS_N_INSNS (7), /* fma. */
1372 COSTS_N_INSNS (3), /* addsub. */
1373 COSTS_N_INSNS (3), /* fpconst. */
1374 COSTS_N_INSNS (3), /* neg. */
1375 COSTS_N_INSNS (3), /* compare. */
1376 COSTS_N_INSNS (3), /* widen. */
1377 COSTS_N_INSNS (3), /* narrow. */
1378 COSTS_N_INSNS (3), /* toint. */
1379 COSTS_N_INSNS (3), /* fromint. */
1380 COSTS_N_INSNS (3) /* roundint. */
1383 /* Vector */
1385 COSTS_N_INSNS (1) /* alu. */
1389 const struct cpu_cost_table cortexa12_extra_costs =
1391 /* ALU */
1393 0, /* arith. */
1394 0, /* logical. */
1395 0, /* shift. */
1396 COSTS_N_INSNS (1), /* shift_reg. */
1397 COSTS_N_INSNS (1), /* arith_shift. */
1398 COSTS_N_INSNS (1), /* arith_shift_reg. */
1399 COSTS_N_INSNS (1), /* log_shift. */
1400 COSTS_N_INSNS (1), /* log_shift_reg. */
1401 0, /* extend. */
1402 COSTS_N_INSNS (1), /* extend_arith. */
1403 0, /* bfi. */
1404 COSTS_N_INSNS (1), /* bfx. */
1405 COSTS_N_INSNS (1), /* clz. */
1406 COSTS_N_INSNS (1), /* rev. */
1407 0, /* non_exec. */
1408 true /* non_exec_costs_exec. */
1410 /* MULT SImode */
1413 COSTS_N_INSNS (2), /* simple. */
1414 COSTS_N_INSNS (3), /* flag_setting. */
1415 COSTS_N_INSNS (2), /* extend. */
1416 COSTS_N_INSNS (3), /* add. */
1417 COSTS_N_INSNS (2), /* extend_add. */
1418 COSTS_N_INSNS (18) /* idiv. */
1420 /* MULT DImode */
1422 0, /* simple (N/A). */
1423 0, /* flag_setting (N/A). */
1424 COSTS_N_INSNS (3), /* extend. */
1425 0, /* add (N/A). */
1426 COSTS_N_INSNS (3), /* extend_add. */
1427 0 /* idiv (N/A). */
1430 /* LD/ST */
1432 COSTS_N_INSNS (3), /* load. */
1433 COSTS_N_INSNS (3), /* load_sign_extend. */
1434 COSTS_N_INSNS (3), /* ldrd. */
1435 COSTS_N_INSNS (3), /* ldm_1st. */
1436 1, /* ldm_regs_per_insn_1st. */
1437 2, /* ldm_regs_per_insn_subsequent. */
1438 COSTS_N_INSNS (3), /* loadf. */
1439 COSTS_N_INSNS (3), /* loadd. */
1440 0, /* load_unaligned. */
1441 0, /* store. */
1442 0, /* strd. */
1443 0, /* stm_1st. */
1444 1, /* stm_regs_per_insn_1st. */
1445 2, /* stm_regs_per_insn_subsequent. */
1446 COSTS_N_INSNS (2), /* storef. */
1447 COSTS_N_INSNS (2), /* stored. */
1448 0, /* store_unaligned. */
1449 COSTS_N_INSNS (1), /* loadv. */
1450 COSTS_N_INSNS (1) /* storev. */
1453 /* FP SFmode */
1455 COSTS_N_INSNS (17), /* div. */
1456 COSTS_N_INSNS (4), /* mult. */
1457 COSTS_N_INSNS (8), /* mult_addsub. */
1458 COSTS_N_INSNS (8), /* fma. */
1459 COSTS_N_INSNS (4), /* addsub. */
1460 COSTS_N_INSNS (2), /* fpconst. */
1461 COSTS_N_INSNS (2), /* neg. */
1462 COSTS_N_INSNS (2), /* compare. */
1463 COSTS_N_INSNS (4), /* widen. */
1464 COSTS_N_INSNS (4), /* narrow. */
1465 COSTS_N_INSNS (4), /* toint. */
1466 COSTS_N_INSNS (4), /* fromint. */
1467 COSTS_N_INSNS (4) /* roundint. */
1469 /* FP DFmode */
1471 COSTS_N_INSNS (31), /* div. */
1472 COSTS_N_INSNS (4), /* mult. */
1473 COSTS_N_INSNS (8), /* mult_addsub. */
1474 COSTS_N_INSNS (8), /* fma. */
1475 COSTS_N_INSNS (4), /* addsub. */
1476 COSTS_N_INSNS (2), /* fpconst. */
1477 COSTS_N_INSNS (2), /* neg. */
1478 COSTS_N_INSNS (2), /* compare. */
1479 COSTS_N_INSNS (4), /* widen. */
1480 COSTS_N_INSNS (4), /* narrow. */
1481 COSTS_N_INSNS (4), /* toint. */
1482 COSTS_N_INSNS (4), /* fromint. */
1483 COSTS_N_INSNS (4) /* roundint. */
1486 /* Vector */
1488 COSTS_N_INSNS (1) /* alu. */
1492 const struct cpu_cost_table cortexa15_extra_costs =
1494 /* ALU */
1496 0, /* arith. */
1497 0, /* logical. */
1498 0, /* shift. */
1499 0, /* shift_reg. */
1500 COSTS_N_INSNS (1), /* arith_shift. */
1501 COSTS_N_INSNS (1), /* arith_shift_reg. */
1502 COSTS_N_INSNS (1), /* log_shift. */
1503 COSTS_N_INSNS (1), /* log_shift_reg. */
1504 0, /* extend. */
1505 COSTS_N_INSNS (1), /* extend_arith. */
1506 COSTS_N_INSNS (1), /* bfi. */
1507 0, /* bfx. */
1508 0, /* clz. */
1509 0, /* rev. */
1510 0, /* non_exec. */
1511 true /* non_exec_costs_exec. */
1513 /* MULT SImode */
1516 COSTS_N_INSNS (2), /* simple. */
1517 COSTS_N_INSNS (3), /* flag_setting. */
1518 COSTS_N_INSNS (2), /* extend. */
1519 COSTS_N_INSNS (2), /* add. */
1520 COSTS_N_INSNS (2), /* extend_add. */
1521 COSTS_N_INSNS (18) /* idiv. */
1523 /* MULT DImode */
1525 0, /* simple (N/A). */
1526 0, /* flag_setting (N/A). */
1527 COSTS_N_INSNS (3), /* extend. */
1528 0, /* add (N/A). */
1529 COSTS_N_INSNS (3), /* extend_add. */
1530 0 /* idiv (N/A). */
1533 /* LD/ST */
1535 COSTS_N_INSNS (3), /* load. */
1536 COSTS_N_INSNS (3), /* load_sign_extend. */
1537 COSTS_N_INSNS (3), /* ldrd. */
1538 COSTS_N_INSNS (4), /* ldm_1st. */
1539 1, /* ldm_regs_per_insn_1st. */
1540 2, /* ldm_regs_per_insn_subsequent. */
1541 COSTS_N_INSNS (4), /* loadf. */
1542 COSTS_N_INSNS (4), /* loadd. */
1543 0, /* load_unaligned. */
1544 0, /* store. */
1545 0, /* strd. */
1546 COSTS_N_INSNS (1), /* stm_1st. */
1547 1, /* stm_regs_per_insn_1st. */
1548 2, /* stm_regs_per_insn_subsequent. */
1549 0, /* storef. */
1550 0, /* stored. */
1551 0, /* store_unaligned. */
1552 COSTS_N_INSNS (1), /* loadv. */
1553 COSTS_N_INSNS (1) /* storev. */
1556 /* FP SFmode */
1558 COSTS_N_INSNS (17), /* div. */
1559 COSTS_N_INSNS (4), /* mult. */
1560 COSTS_N_INSNS (8), /* mult_addsub. */
1561 COSTS_N_INSNS (8), /* fma. */
1562 COSTS_N_INSNS (4), /* addsub. */
1563 COSTS_N_INSNS (2), /* fpconst. */
1564 COSTS_N_INSNS (2), /* neg. */
1565 COSTS_N_INSNS (5), /* compare. */
1566 COSTS_N_INSNS (4), /* widen. */
1567 COSTS_N_INSNS (4), /* narrow. */
1568 COSTS_N_INSNS (4), /* toint. */
1569 COSTS_N_INSNS (4), /* fromint. */
1570 COSTS_N_INSNS (4) /* roundint. */
1572 /* FP DFmode */
1574 COSTS_N_INSNS (31), /* div. */
1575 COSTS_N_INSNS (4), /* mult. */
1576 COSTS_N_INSNS (8), /* mult_addsub. */
1577 COSTS_N_INSNS (8), /* fma. */
1578 COSTS_N_INSNS (4), /* addsub. */
1579 COSTS_N_INSNS (2), /* fpconst. */
1580 COSTS_N_INSNS (2), /* neg. */
1581 COSTS_N_INSNS (2), /* compare. */
1582 COSTS_N_INSNS (4), /* widen. */
1583 COSTS_N_INSNS (4), /* narrow. */
1584 COSTS_N_INSNS (4), /* toint. */
1585 COSTS_N_INSNS (4), /* fromint. */
1586 COSTS_N_INSNS (4) /* roundint. */
1589 /* Vector */
1591 COSTS_N_INSNS (1) /* alu. */
1595 const struct cpu_cost_table v7m_extra_costs =
1597 /* ALU */
1599 0, /* arith. */
1600 0, /* logical. */
1601 0, /* shift. */
1602 0, /* shift_reg. */
1603 0, /* arith_shift. */
1604 COSTS_N_INSNS (1), /* arith_shift_reg. */
1605 0, /* log_shift. */
1606 COSTS_N_INSNS (1), /* log_shift_reg. */
1607 0, /* extend. */
1608 COSTS_N_INSNS (1), /* extend_arith. */
1609 0, /* bfi. */
1610 0, /* bfx. */
1611 0, /* clz. */
1612 0, /* rev. */
1613 COSTS_N_INSNS (1), /* non_exec. */
1614 false /* non_exec_costs_exec. */
1617 /* MULT SImode */
1619 COSTS_N_INSNS (1), /* simple. */
1620 COSTS_N_INSNS (1), /* flag_setting. */
1621 COSTS_N_INSNS (2), /* extend. */
1622 COSTS_N_INSNS (1), /* add. */
1623 COSTS_N_INSNS (3), /* extend_add. */
1624 COSTS_N_INSNS (8) /* idiv. */
1626 /* MULT DImode */
1628 0, /* simple (N/A). */
1629 0, /* flag_setting (N/A). */
1630 COSTS_N_INSNS (2), /* extend. */
1631 0, /* add (N/A). */
1632 COSTS_N_INSNS (3), /* extend_add. */
1633 0 /* idiv (N/A). */
1636 /* LD/ST */
1638 COSTS_N_INSNS (2), /* load. */
1639 0, /* load_sign_extend. */
1640 COSTS_N_INSNS (3), /* ldrd. */
1641 COSTS_N_INSNS (2), /* ldm_1st. */
1642 1, /* ldm_regs_per_insn_1st. */
1643 1, /* ldm_regs_per_insn_subsequent. */
1644 COSTS_N_INSNS (2), /* loadf. */
1645 COSTS_N_INSNS (3), /* loadd. */
1646 COSTS_N_INSNS (1), /* load_unaligned. */
1647 COSTS_N_INSNS (2), /* store. */
1648 COSTS_N_INSNS (3), /* strd. */
1649 COSTS_N_INSNS (2), /* stm_1st. */
1650 1, /* stm_regs_per_insn_1st. */
1651 1, /* stm_regs_per_insn_subsequent. */
1652 COSTS_N_INSNS (2), /* storef. */
1653 COSTS_N_INSNS (3), /* stored. */
1654 COSTS_N_INSNS (1), /* store_unaligned. */
1655 COSTS_N_INSNS (1), /* loadv. */
1656 COSTS_N_INSNS (1) /* storev. */
1659 /* FP SFmode */
1661 COSTS_N_INSNS (7), /* div. */
1662 COSTS_N_INSNS (2), /* mult. */
1663 COSTS_N_INSNS (5), /* mult_addsub. */
1664 COSTS_N_INSNS (3), /* fma. */
1665 COSTS_N_INSNS (1), /* addsub. */
1666 0, /* fpconst. */
1667 0, /* neg. */
1668 0, /* compare. */
1669 0, /* widen. */
1670 0, /* narrow. */
1671 0, /* toint. */
1672 0, /* fromint. */
1673 0 /* roundint. */
1675 /* FP DFmode */
1677 COSTS_N_INSNS (15), /* div. */
1678 COSTS_N_INSNS (5), /* mult. */
1679 COSTS_N_INSNS (7), /* mult_addsub. */
1680 COSTS_N_INSNS (7), /* fma. */
1681 COSTS_N_INSNS (3), /* addsub. */
1682 0, /* fpconst. */
1683 0, /* neg. */
1684 0, /* compare. */
1685 0, /* widen. */
1686 0, /* narrow. */
1687 0, /* toint. */
1688 0, /* fromint. */
1689 0 /* roundint. */
1692 /* Vector */
1694 COSTS_N_INSNS (1) /* alu. */
1698 const struct tune_params arm_slowmul_tune =
1700 &generic_extra_costs, /* Insn extra costs. */
1701 NULL, /* Sched adj cost. */
1702 arm_default_branch_cost,
1703 &arm_default_vec_cost,
1704 3, /* Constant limit. */
1705 5, /* Max cond insns. */
1706 8, /* Memset max inline. */
1707 1, /* Issue rate. */
1708 ARM_PREFETCH_NOT_BENEFICIAL,
1709 tune_params::PREF_CONST_POOL_TRUE,
1710 tune_params::PREF_LDRD_FALSE,
1711 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1712 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1713 tune_params::DISPARAGE_FLAGS_NEITHER,
1714 tune_params::PREF_NEON_64_FALSE,
1715 tune_params::PREF_NEON_STRINGOPS_FALSE,
1716 tune_params::FUSE_NOTHING,
1717 tune_params::SCHED_AUTOPREF_OFF
1720 const struct tune_params arm_fastmul_tune =
1722 &generic_extra_costs, /* Insn extra costs. */
1723 NULL, /* Sched adj cost. */
1724 arm_default_branch_cost,
1725 &arm_default_vec_cost,
1726 1, /* Constant limit. */
1727 5, /* Max cond insns. */
1728 8, /* Memset max inline. */
1729 1, /* Issue rate. */
1730 ARM_PREFETCH_NOT_BENEFICIAL,
1731 tune_params::PREF_CONST_POOL_TRUE,
1732 tune_params::PREF_LDRD_FALSE,
1733 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1734 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1735 tune_params::DISPARAGE_FLAGS_NEITHER,
1736 tune_params::PREF_NEON_64_FALSE,
1737 tune_params::PREF_NEON_STRINGOPS_FALSE,
1738 tune_params::FUSE_NOTHING,
1739 tune_params::SCHED_AUTOPREF_OFF
1742 /* StrongARM has early execution of branches, so a sequence that is worth
1743 skipping is shorter. Set max_insns_skipped to a lower value. */
1745 const struct tune_params arm_strongarm_tune =
1747 &generic_extra_costs, /* Insn extra costs. */
1748 NULL, /* Sched adj cost. */
1749 arm_default_branch_cost,
1750 &arm_default_vec_cost,
1751 1, /* Constant limit. */
1752 3, /* Max cond insns. */
1753 8, /* Memset max inline. */
1754 1, /* Issue rate. */
1755 ARM_PREFETCH_NOT_BENEFICIAL,
1756 tune_params::PREF_CONST_POOL_TRUE,
1757 tune_params::PREF_LDRD_FALSE,
1758 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1759 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1760 tune_params::DISPARAGE_FLAGS_NEITHER,
1761 tune_params::PREF_NEON_64_FALSE,
1762 tune_params::PREF_NEON_STRINGOPS_FALSE,
1763 tune_params::FUSE_NOTHING,
1764 tune_params::SCHED_AUTOPREF_OFF
1767 const struct tune_params arm_xscale_tune =
1769 &generic_extra_costs, /* Insn extra costs. */
1770 xscale_sched_adjust_cost,
1771 arm_default_branch_cost,
1772 &arm_default_vec_cost,
1773 2, /* Constant limit. */
1774 3, /* Max cond insns. */
1775 8, /* Memset max inline. */
1776 1, /* Issue rate. */
1777 ARM_PREFETCH_NOT_BENEFICIAL,
1778 tune_params::PREF_CONST_POOL_TRUE,
1779 tune_params::PREF_LDRD_FALSE,
1780 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1781 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1782 tune_params::DISPARAGE_FLAGS_NEITHER,
1783 tune_params::PREF_NEON_64_FALSE,
1784 tune_params::PREF_NEON_STRINGOPS_FALSE,
1785 tune_params::FUSE_NOTHING,
1786 tune_params::SCHED_AUTOPREF_OFF
1789 const struct tune_params arm_9e_tune =
1791 &generic_extra_costs, /* Insn extra costs. */
1792 NULL, /* Sched adj cost. */
1793 arm_default_branch_cost,
1794 &arm_default_vec_cost,
1795 1, /* Constant limit. */
1796 5, /* Max cond insns. */
1797 8, /* Memset max inline. */
1798 1, /* Issue rate. */
1799 ARM_PREFETCH_NOT_BENEFICIAL,
1800 tune_params::PREF_CONST_POOL_TRUE,
1801 tune_params::PREF_LDRD_FALSE,
1802 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1803 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1804 tune_params::DISPARAGE_FLAGS_NEITHER,
1805 tune_params::PREF_NEON_64_FALSE,
1806 tune_params::PREF_NEON_STRINGOPS_FALSE,
1807 tune_params::FUSE_NOTHING,
1808 tune_params::SCHED_AUTOPREF_OFF
1811 const struct tune_params arm_marvell_pj4_tune =
1813 &generic_extra_costs, /* Insn extra costs. */
1814 NULL, /* Sched adj cost. */
1815 arm_default_branch_cost,
1816 &arm_default_vec_cost,
1817 1, /* Constant limit. */
1818 5, /* Max cond insns. */
1819 8, /* Memset max inline. */
1820 2, /* Issue rate. */
1821 ARM_PREFETCH_NOT_BENEFICIAL,
1822 tune_params::PREF_CONST_POOL_TRUE,
1823 tune_params::PREF_LDRD_FALSE,
1824 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1825 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1826 tune_params::DISPARAGE_FLAGS_NEITHER,
1827 tune_params::PREF_NEON_64_FALSE,
1828 tune_params::PREF_NEON_STRINGOPS_FALSE,
1829 tune_params::FUSE_NOTHING,
1830 tune_params::SCHED_AUTOPREF_OFF
1833 const struct tune_params arm_v6t2_tune =
1835 &generic_extra_costs, /* Insn extra costs. */
1836 NULL, /* Sched adj cost. */
1837 arm_default_branch_cost,
1838 &arm_default_vec_cost,
1839 1, /* Constant limit. */
1840 5, /* Max cond insns. */
1841 8, /* Memset max inline. */
1842 1, /* Issue rate. */
1843 ARM_PREFETCH_NOT_BENEFICIAL,
1844 tune_params::PREF_CONST_POOL_FALSE,
1845 tune_params::PREF_LDRD_FALSE,
1846 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1847 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1848 tune_params::DISPARAGE_FLAGS_NEITHER,
1849 tune_params::PREF_NEON_64_FALSE,
1850 tune_params::PREF_NEON_STRINGOPS_FALSE,
1851 tune_params::FUSE_NOTHING,
1852 tune_params::SCHED_AUTOPREF_OFF
1856 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1857 const struct tune_params arm_cortex_tune =
1859 &generic_extra_costs,
1860 NULL, /* Sched adj cost. */
1861 arm_default_branch_cost,
1862 &arm_default_vec_cost,
1863 1, /* Constant limit. */
1864 5, /* Max cond insns. */
1865 8, /* Memset max inline. */
1866 2, /* Issue rate. */
1867 ARM_PREFETCH_NOT_BENEFICIAL,
1868 tune_params::PREF_CONST_POOL_FALSE,
1869 tune_params::PREF_LDRD_FALSE,
1870 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1871 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1872 tune_params::DISPARAGE_FLAGS_NEITHER,
1873 tune_params::PREF_NEON_64_FALSE,
1874 tune_params::PREF_NEON_STRINGOPS_FALSE,
1875 tune_params::FUSE_NOTHING,
1876 tune_params::SCHED_AUTOPREF_OFF
1879 const struct tune_params arm_cortex_a8_tune =
1881 &cortexa8_extra_costs,
1882 NULL, /* Sched adj cost. */
1883 arm_default_branch_cost,
1884 &arm_default_vec_cost,
1885 1, /* Constant limit. */
1886 5, /* Max cond insns. */
1887 8, /* Memset max inline. */
1888 2, /* Issue rate. */
1889 ARM_PREFETCH_NOT_BENEFICIAL,
1890 tune_params::PREF_CONST_POOL_FALSE,
1891 tune_params::PREF_LDRD_FALSE,
1892 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1893 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1894 tune_params::DISPARAGE_FLAGS_NEITHER,
1895 tune_params::PREF_NEON_64_FALSE,
1896 tune_params::PREF_NEON_STRINGOPS_TRUE,
1897 tune_params::FUSE_NOTHING,
1898 tune_params::SCHED_AUTOPREF_OFF
1901 const struct tune_params arm_cortex_a7_tune =
1903 &cortexa7_extra_costs,
1904 NULL, /* Sched adj cost. */
1905 arm_default_branch_cost,
1906 &arm_default_vec_cost,
1907 1, /* Constant limit. */
1908 5, /* Max cond insns. */
1909 8, /* Memset max inline. */
1910 2, /* Issue rate. */
1911 ARM_PREFETCH_NOT_BENEFICIAL,
1912 tune_params::PREF_CONST_POOL_FALSE,
1913 tune_params::PREF_LDRD_FALSE,
1914 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1915 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1916 tune_params::DISPARAGE_FLAGS_NEITHER,
1917 tune_params::PREF_NEON_64_FALSE,
1918 tune_params::PREF_NEON_STRINGOPS_TRUE,
1919 tune_params::FUSE_NOTHING,
1920 tune_params::SCHED_AUTOPREF_OFF
1923 const struct tune_params arm_cortex_a15_tune =
1925 &cortexa15_extra_costs,
1926 NULL, /* Sched adj cost. */
1927 arm_default_branch_cost,
1928 &arm_default_vec_cost,
1929 1, /* Constant limit. */
1930 2, /* Max cond insns. */
1931 8, /* Memset max inline. */
1932 3, /* Issue rate. */
1933 ARM_PREFETCH_NOT_BENEFICIAL,
1934 tune_params::PREF_CONST_POOL_FALSE,
1935 tune_params::PREF_LDRD_TRUE,
1936 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1937 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1938 tune_params::DISPARAGE_FLAGS_ALL,
1939 tune_params::PREF_NEON_64_FALSE,
1940 tune_params::PREF_NEON_STRINGOPS_TRUE,
1941 tune_params::FUSE_NOTHING,
1942 tune_params::SCHED_AUTOPREF_FULL
1945 const struct tune_params arm_cortex_a35_tune =
1947 &cortexa53_extra_costs,
1948 NULL, /* Sched adj cost. */
1949 arm_default_branch_cost,
1950 &arm_default_vec_cost,
1951 1, /* Constant limit. */
1952 5, /* Max cond insns. */
1953 8, /* Memset max inline. */
1954 1, /* Issue rate. */
1955 ARM_PREFETCH_NOT_BENEFICIAL,
1956 tune_params::PREF_CONST_POOL_FALSE,
1957 tune_params::PREF_LDRD_FALSE,
1958 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1959 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1960 tune_params::DISPARAGE_FLAGS_NEITHER,
1961 tune_params::PREF_NEON_64_FALSE,
1962 tune_params::PREF_NEON_STRINGOPS_TRUE,
1963 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1964 tune_params::SCHED_AUTOPREF_OFF
1967 const struct tune_params arm_cortex_a53_tune =
1969 &cortexa53_extra_costs,
1970 NULL, /* Sched adj cost. */
1971 arm_default_branch_cost,
1972 &arm_default_vec_cost,
1973 1, /* Constant limit. */
1974 5, /* Max cond insns. */
1975 8, /* Memset max inline. */
1976 2, /* Issue rate. */
1977 ARM_PREFETCH_NOT_BENEFICIAL,
1978 tune_params::PREF_CONST_POOL_FALSE,
1979 tune_params::PREF_LDRD_FALSE,
1980 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1981 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1982 tune_params::DISPARAGE_FLAGS_NEITHER,
1983 tune_params::PREF_NEON_64_FALSE,
1984 tune_params::PREF_NEON_STRINGOPS_TRUE,
1985 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
1986 tune_params::SCHED_AUTOPREF_OFF
1989 const struct tune_params arm_cortex_a57_tune =
1991 &cortexa57_extra_costs,
1992 NULL, /* Sched adj cost. */
1993 arm_default_branch_cost,
1994 &arm_default_vec_cost,
1995 1, /* Constant limit. */
1996 2, /* Max cond insns. */
1997 8, /* Memset max inline. */
1998 3, /* Issue rate. */
1999 ARM_PREFETCH_NOT_BENEFICIAL,
2000 tune_params::PREF_CONST_POOL_FALSE,
2001 tune_params::PREF_LDRD_TRUE,
2002 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2003 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2004 tune_params::DISPARAGE_FLAGS_ALL,
2005 tune_params::PREF_NEON_64_FALSE,
2006 tune_params::PREF_NEON_STRINGOPS_TRUE,
2007 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2008 tune_params::SCHED_AUTOPREF_FULL
2011 const struct tune_params arm_exynosm1_tune =
2013 &exynosm1_extra_costs,
2014 NULL, /* Sched adj cost. */
2015 arm_default_branch_cost,
2016 &arm_default_vec_cost,
2017 1, /* Constant limit. */
2018 2, /* Max cond insns. */
2019 8, /* Memset max inline. */
2020 3, /* Issue rate. */
2021 ARM_PREFETCH_NOT_BENEFICIAL,
2022 tune_params::PREF_CONST_POOL_FALSE,
2023 tune_params::PREF_LDRD_TRUE,
2024 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2025 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2026 tune_params::DISPARAGE_FLAGS_ALL,
2027 tune_params::PREF_NEON_64_FALSE,
2028 tune_params::PREF_NEON_STRINGOPS_TRUE,
2029 tune_params::FUSE_NOTHING,
2030 tune_params::SCHED_AUTOPREF_OFF
2033 const struct tune_params arm_xgene1_tune =
2035 &xgene1_extra_costs,
2036 NULL, /* Sched adj cost. */
2037 arm_default_branch_cost,
2038 &arm_default_vec_cost,
2039 1, /* Constant limit. */
2040 2, /* Max cond insns. */
2041 32, /* Memset max inline. */
2042 4, /* Issue rate. */
2043 ARM_PREFETCH_NOT_BENEFICIAL,
2044 tune_params::PREF_CONST_POOL_FALSE,
2045 tune_params::PREF_LDRD_TRUE,
2046 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2047 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2048 tune_params::DISPARAGE_FLAGS_ALL,
2049 tune_params::PREF_NEON_64_FALSE,
2050 tune_params::PREF_NEON_STRINGOPS_FALSE,
2051 tune_params::FUSE_NOTHING,
2052 tune_params::SCHED_AUTOPREF_OFF
2055 const struct tune_params arm_qdf24xx_tune =
2057 &qdf24xx_extra_costs,
2058 NULL, /* Scheduler cost adjustment. */
2059 arm_default_branch_cost,
2060 &arm_default_vec_cost, /* Vectorizer costs. */
2061 1, /* Constant limit. */
2062 2, /* Max cond insns. */
2063 8, /* Memset max inline. */
2064 4, /* Issue rate. */
2065 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2066 tune_params::PREF_CONST_POOL_FALSE,
2067 tune_params::PREF_LDRD_TRUE,
2068 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2069 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2070 tune_params::DISPARAGE_FLAGS_ALL,
2071 tune_params::PREF_NEON_64_FALSE,
2072 tune_params::PREF_NEON_STRINGOPS_TRUE,
2073 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2074 tune_params::SCHED_AUTOPREF_FULL
2077 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2078 less appealing. Set max_insns_skipped to a low value. */
2080 const struct tune_params arm_cortex_a5_tune =
2082 &cortexa5_extra_costs,
2083 NULL, /* Sched adj cost. */
2084 arm_cortex_a5_branch_cost,
2085 &arm_default_vec_cost,
2086 1, /* Constant limit. */
2087 1, /* Max cond insns. */
2088 8, /* Memset max inline. */
2089 2, /* Issue rate. */
2090 ARM_PREFETCH_NOT_BENEFICIAL,
2091 tune_params::PREF_CONST_POOL_FALSE,
2092 tune_params::PREF_LDRD_FALSE,
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2094 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2095 tune_params::DISPARAGE_FLAGS_NEITHER,
2096 tune_params::PREF_NEON_64_FALSE,
2097 tune_params::PREF_NEON_STRINGOPS_TRUE,
2098 tune_params::FUSE_NOTHING,
2099 tune_params::SCHED_AUTOPREF_OFF
2102 const struct tune_params arm_cortex_a9_tune =
2104 &cortexa9_extra_costs,
2105 cortex_a9_sched_adjust_cost,
2106 arm_default_branch_cost,
2107 &arm_default_vec_cost,
2108 1, /* Constant limit. */
2109 5, /* Max cond insns. */
2110 8, /* Memset max inline. */
2111 2, /* Issue rate. */
2112 ARM_PREFETCH_BENEFICIAL(4,32,32),
2113 tune_params::PREF_CONST_POOL_FALSE,
2114 tune_params::PREF_LDRD_FALSE,
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2116 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2117 tune_params::DISPARAGE_FLAGS_NEITHER,
2118 tune_params::PREF_NEON_64_FALSE,
2119 tune_params::PREF_NEON_STRINGOPS_FALSE,
2120 tune_params::FUSE_NOTHING,
2121 tune_params::SCHED_AUTOPREF_OFF
2124 const struct tune_params arm_cortex_a12_tune =
2126 &cortexa12_extra_costs,
2127 NULL, /* Sched adj cost. */
2128 arm_default_branch_cost,
2129 &arm_default_vec_cost, /* Vectorizer costs. */
2130 1, /* Constant limit. */
2131 2, /* Max cond insns. */
2132 8, /* Memset max inline. */
2133 2, /* Issue rate. */
2134 ARM_PREFETCH_NOT_BENEFICIAL,
2135 tune_params::PREF_CONST_POOL_FALSE,
2136 tune_params::PREF_LDRD_TRUE,
2137 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2138 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2139 tune_params::DISPARAGE_FLAGS_ALL,
2140 tune_params::PREF_NEON_64_FALSE,
2141 tune_params::PREF_NEON_STRINGOPS_TRUE,
2142 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2143 tune_params::SCHED_AUTOPREF_OFF
2146 const struct tune_params arm_cortex_a73_tune =
2148 &cortexa57_extra_costs,
2149 NULL, /* Sched adj cost. */
2150 arm_default_branch_cost,
2151 &arm_default_vec_cost, /* Vectorizer costs. */
2152 1, /* Constant limit. */
2153 2, /* Max cond insns. */
2154 8, /* Memset max inline. */
2155 2, /* Issue rate. */
2156 ARM_PREFETCH_NOT_BENEFICIAL,
2157 tune_params::PREF_CONST_POOL_FALSE,
2158 tune_params::PREF_LDRD_TRUE,
2159 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2160 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2161 tune_params::DISPARAGE_FLAGS_ALL,
2162 tune_params::PREF_NEON_64_FALSE,
2163 tune_params::PREF_NEON_STRINGOPS_TRUE,
2164 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2165 tune_params::SCHED_AUTOPREF_FULL
2168 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2169 cycle to execute each. An LDR from the constant pool also takes two cycles
2170 to execute, but mildly increases pipelining opportunity (consecutive
2171 loads/stores can be pipelined together, saving one cycle), and may also
2172 improve icache utilisation. Hence we prefer the constant pool for such
2173 processors. */
2175 const struct tune_params arm_v7m_tune =
2177 &v7m_extra_costs,
2178 NULL, /* Sched adj cost. */
2179 arm_cortex_m_branch_cost,
2180 &arm_default_vec_cost,
2181 1, /* Constant limit. */
2182 2, /* Max cond insns. */
2183 8, /* Memset max inline. */
2184 1, /* Issue rate. */
2185 ARM_PREFETCH_NOT_BENEFICIAL,
2186 tune_params::PREF_CONST_POOL_TRUE,
2187 tune_params::PREF_LDRD_FALSE,
2188 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2189 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2190 tune_params::DISPARAGE_FLAGS_NEITHER,
2191 tune_params::PREF_NEON_64_FALSE,
2192 tune_params::PREF_NEON_STRINGOPS_FALSE,
2193 tune_params::FUSE_NOTHING,
2194 tune_params::SCHED_AUTOPREF_OFF
2197 /* Cortex-M7 tuning. */
2199 const struct tune_params arm_cortex_m7_tune =
2201 &v7m_extra_costs,
2202 NULL, /* Sched adj cost. */
2203 arm_cortex_m7_branch_cost,
2204 &arm_default_vec_cost,
2205 0, /* Constant limit. */
2206 1, /* Max cond insns. */
2207 8, /* Memset max inline. */
2208 2, /* Issue rate. */
2209 ARM_PREFETCH_NOT_BENEFICIAL,
2210 tune_params::PREF_CONST_POOL_TRUE,
2211 tune_params::PREF_LDRD_FALSE,
2212 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2213 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2214 tune_params::DISPARAGE_FLAGS_NEITHER,
2215 tune_params::PREF_NEON_64_FALSE,
2216 tune_params::PREF_NEON_STRINGOPS_FALSE,
2217 tune_params::FUSE_NOTHING,
2218 tune_params::SCHED_AUTOPREF_OFF
2221 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2222 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2223 cortex-m23. */
2224 const struct tune_params arm_v6m_tune =
2226 &generic_extra_costs, /* Insn extra costs. */
2227 NULL, /* Sched adj cost. */
2228 arm_default_branch_cost,
2229 &arm_default_vec_cost, /* Vectorizer costs. */
2230 1, /* Constant limit. */
2231 5, /* Max cond insns. */
2232 8, /* Memset max inline. */
2233 1, /* Issue rate. */
2234 ARM_PREFETCH_NOT_BENEFICIAL,
2235 tune_params::PREF_CONST_POOL_FALSE,
2236 tune_params::PREF_LDRD_FALSE,
2237 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2238 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2239 tune_params::DISPARAGE_FLAGS_NEITHER,
2240 tune_params::PREF_NEON_64_FALSE,
2241 tune_params::PREF_NEON_STRINGOPS_FALSE,
2242 tune_params::FUSE_NOTHING,
2243 tune_params::SCHED_AUTOPREF_OFF
2246 const struct tune_params arm_fa726te_tune =
2248 &generic_extra_costs, /* Insn extra costs. */
2249 fa726te_sched_adjust_cost,
2250 arm_default_branch_cost,
2251 &arm_default_vec_cost,
2252 1, /* Constant limit. */
2253 5, /* Max cond insns. */
2254 8, /* Memset max inline. */
2255 2, /* Issue rate. */
2256 ARM_PREFETCH_NOT_BENEFICIAL,
2257 tune_params::PREF_CONST_POOL_TRUE,
2258 tune_params::PREF_LDRD_FALSE,
2259 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2260 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2261 tune_params::DISPARAGE_FLAGS_NEITHER,
2262 tune_params::PREF_NEON_64_FALSE,
2263 tune_params::PREF_NEON_STRINGOPS_FALSE,
2264 tune_params::FUSE_NOTHING,
2265 tune_params::SCHED_AUTOPREF_OFF
2269 /* Not all of these give usefully different compilation alternatives,
2270 but there is no simple way of generalizing them. */
2271 static const struct processors all_cores[] =
2273 /* ARM Cores */
2274 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2275 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2276 FLAGS, &arm_##COSTS##_tune},
2277 #include "arm-cores.def"
2278 #undef ARM_CORE
2279 {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
2282 static const struct processors all_architectures[] =
2284 /* ARM Architectures */
2285 /* We don't specify tuning costs here as it will be figured out
2286 from the core. */
2288 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2289 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2290 #include "arm-arches.def"
2291 #undef ARM_ARCH
2292 {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
2296 /* These are populated as commandline arguments are processed, or NULL
2297 if not specified. */
2298 static const struct processors *arm_selected_arch;
2299 static const struct processors *arm_selected_cpu;
2300 static const struct processors *arm_selected_tune;
2302 /* The name of the preprocessor macro to define for this architecture. PROFILE
2303 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2304 is thus chosen to be big enough to hold the longest architecture name. */
2306 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2308 /* Available values for -mfpu=. */
2310 const struct arm_fpu_desc all_fpus[] =
2312 #define ARM_FPU(NAME, REV, VFP_REGS, FEATURES) \
2313 { NAME, REV, VFP_REGS, FEATURES },
2314 #include "arm-fpus.def"
2315 #undef ARM_FPU
2318 /* Supported TLS relocations. */
2320 enum tls_reloc {
2321 TLS_GD32,
2322 TLS_LDM32,
2323 TLS_LDO32,
2324 TLS_IE32,
2325 TLS_LE32,
2326 TLS_DESCSEQ /* GNU scheme */
2329 /* The maximum number of insns to be used when loading a constant. */
2330 inline static int
2331 arm_constant_limit (bool size_p)
2333 return size_p ? 1 : current_tune->constant_limit;
2336 /* Emit an insn that's a simple single-set. Both the operands must be known
2337 to be valid. */
2338 inline static rtx_insn *
2339 emit_set_insn (rtx x, rtx y)
2341 return emit_insn (gen_rtx_SET (x, y));
2344 /* Return the number of bits set in VALUE. */
2345 static unsigned
2346 bit_count (unsigned long value)
2348 unsigned long count = 0;
2350 while (value)
2352 count++;
2353 value &= value - 1; /* Clear the least-significant set bit. */
2356 return count;
2359 /* Return the number of features in feature-set SET. */
2360 static unsigned
2361 feature_count (const arm_feature_set * set)
2363 return (bit_count (ARM_FSET_CPU1 (*set))
2364 + bit_count (ARM_FSET_CPU2 (*set)));
2367 typedef struct
2369 machine_mode mode;
2370 const char *name;
2371 } arm_fixed_mode_set;
2373 /* A small helper for setting fixed-point library libfuncs. */
2375 static void
2376 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2377 const char *funcname, const char *modename,
2378 int num_suffix)
2380 char buffer[50];
2382 if (num_suffix == 0)
2383 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2384 else
2385 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2387 set_optab_libfunc (optable, mode, buffer);
2390 static void
2391 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2392 machine_mode from, const char *funcname,
2393 const char *toname, const char *fromname)
2395 char buffer[50];
2396 const char *maybe_suffix_2 = "";
2398 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2399 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2400 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2401 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2402 maybe_suffix_2 = "2";
2404 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2405 maybe_suffix_2);
2407 set_conv_libfunc (optable, to, from, buffer);
2410 /* Set up library functions unique to ARM. */
2412 static void
2413 arm_init_libfuncs (void)
2415 /* For Linux, we have access to kernel support for atomic operations. */
2416 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2417 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2419 /* There are no special library functions unless we are using the
2420 ARM BPABI. */
2421 if (!TARGET_BPABI)
2422 return;
2424 /* The functions below are described in Section 4 of the "Run-Time
2425 ABI for the ARM architecture", Version 1.0. */
2427 /* Double-precision floating-point arithmetic. Table 2. */
2428 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2429 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2430 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2431 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2432 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2434 /* Double-precision comparisons. Table 3. */
2435 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2436 set_optab_libfunc (ne_optab, DFmode, NULL);
2437 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2438 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2439 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2440 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2441 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2443 /* Single-precision floating-point arithmetic. Table 4. */
2444 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2445 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2446 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2447 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2448 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2450 /* Single-precision comparisons. Table 5. */
2451 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2452 set_optab_libfunc (ne_optab, SFmode, NULL);
2453 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2454 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2455 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2456 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2457 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2459 /* Floating-point to integer conversions. Table 6. */
2460 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2461 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2462 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2463 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2464 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2465 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2466 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2467 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2469 /* Conversions between floating types. Table 7. */
2470 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2471 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2473 /* Integer to floating-point conversions. Table 8. */
2474 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2475 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2476 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2477 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2478 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2479 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2480 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2481 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2483 /* Long long. Table 9. */
2484 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2485 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2486 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2487 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2488 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2489 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2490 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2491 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2493 /* Integer (32/32->32) division. \S 4.3.1. */
2494 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2495 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2497 /* The divmod functions are designed so that they can be used for
2498 plain division, even though they return both the quotient and the
2499 remainder. The quotient is returned in the usual location (i.e.,
2500 r0 for SImode, {r0, r1} for DImode), just as would be expected
2501 for an ordinary division routine. Because the AAPCS calling
2502 conventions specify that all of { r0, r1, r2, r3 } are
2503 callee-saved registers, there is no need to tell the compiler
2504 explicitly that those registers are clobbered by these
2505 routines. */
2506 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2507 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2509 /* For SImode division the ABI provides div-without-mod routines,
2510 which are faster. */
2511 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2512 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2514 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2515 divmod libcalls instead. */
2516 set_optab_libfunc (smod_optab, DImode, NULL);
2517 set_optab_libfunc (umod_optab, DImode, NULL);
2518 set_optab_libfunc (smod_optab, SImode, NULL);
2519 set_optab_libfunc (umod_optab, SImode, NULL);
2521 /* Half-precision float operations. The compiler handles all operations
2522 with NULL libfuncs by converting the SFmode. */
2523 switch (arm_fp16_format)
2525 case ARM_FP16_FORMAT_IEEE:
2526 case ARM_FP16_FORMAT_ALTERNATIVE:
2528 /* Conversions. */
2529 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2530 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2531 ? "__gnu_f2h_ieee"
2532 : "__gnu_f2h_alternative"));
2533 set_conv_libfunc (sext_optab, SFmode, HFmode,
2534 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2535 ? "__gnu_h2f_ieee"
2536 : "__gnu_h2f_alternative"));
2538 /* Arithmetic. */
2539 set_optab_libfunc (add_optab, HFmode, NULL);
2540 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2541 set_optab_libfunc (smul_optab, HFmode, NULL);
2542 set_optab_libfunc (neg_optab, HFmode, NULL);
2543 set_optab_libfunc (sub_optab, HFmode, NULL);
2545 /* Comparisons. */
2546 set_optab_libfunc (eq_optab, HFmode, NULL);
2547 set_optab_libfunc (ne_optab, HFmode, NULL);
2548 set_optab_libfunc (lt_optab, HFmode, NULL);
2549 set_optab_libfunc (le_optab, HFmode, NULL);
2550 set_optab_libfunc (ge_optab, HFmode, NULL);
2551 set_optab_libfunc (gt_optab, HFmode, NULL);
2552 set_optab_libfunc (unord_optab, HFmode, NULL);
2553 break;
2555 default:
2556 break;
2559 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2561 const arm_fixed_mode_set fixed_arith_modes[] =
2563 { QQmode, "qq" },
2564 { UQQmode, "uqq" },
2565 { HQmode, "hq" },
2566 { UHQmode, "uhq" },
2567 { SQmode, "sq" },
2568 { USQmode, "usq" },
2569 { DQmode, "dq" },
2570 { UDQmode, "udq" },
2571 { TQmode, "tq" },
2572 { UTQmode, "utq" },
2573 { HAmode, "ha" },
2574 { UHAmode, "uha" },
2575 { SAmode, "sa" },
2576 { USAmode, "usa" },
2577 { DAmode, "da" },
2578 { UDAmode, "uda" },
2579 { TAmode, "ta" },
2580 { UTAmode, "uta" }
2582 const arm_fixed_mode_set fixed_conv_modes[] =
2584 { QQmode, "qq" },
2585 { UQQmode, "uqq" },
2586 { HQmode, "hq" },
2587 { UHQmode, "uhq" },
2588 { SQmode, "sq" },
2589 { USQmode, "usq" },
2590 { DQmode, "dq" },
2591 { UDQmode, "udq" },
2592 { TQmode, "tq" },
2593 { UTQmode, "utq" },
2594 { HAmode, "ha" },
2595 { UHAmode, "uha" },
2596 { SAmode, "sa" },
2597 { USAmode, "usa" },
2598 { DAmode, "da" },
2599 { UDAmode, "uda" },
2600 { TAmode, "ta" },
2601 { UTAmode, "uta" },
2602 { QImode, "qi" },
2603 { HImode, "hi" },
2604 { SImode, "si" },
2605 { DImode, "di" },
2606 { TImode, "ti" },
2607 { SFmode, "sf" },
2608 { DFmode, "df" }
2610 unsigned int i, j;
2612 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2614 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2615 "add", fixed_arith_modes[i].name, 3);
2616 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2617 "ssadd", fixed_arith_modes[i].name, 3);
2618 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2619 "usadd", fixed_arith_modes[i].name, 3);
2620 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2621 "sub", fixed_arith_modes[i].name, 3);
2622 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2623 "sssub", fixed_arith_modes[i].name, 3);
2624 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2625 "ussub", fixed_arith_modes[i].name, 3);
2626 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2627 "mul", fixed_arith_modes[i].name, 3);
2628 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2629 "ssmul", fixed_arith_modes[i].name, 3);
2630 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2631 "usmul", fixed_arith_modes[i].name, 3);
2632 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2633 "div", fixed_arith_modes[i].name, 3);
2634 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2635 "udiv", fixed_arith_modes[i].name, 3);
2636 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2637 "ssdiv", fixed_arith_modes[i].name, 3);
2638 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2639 "usdiv", fixed_arith_modes[i].name, 3);
2640 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2641 "neg", fixed_arith_modes[i].name, 2);
2642 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2643 "ssneg", fixed_arith_modes[i].name, 2);
2644 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2645 "usneg", fixed_arith_modes[i].name, 2);
2646 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2647 "ashl", fixed_arith_modes[i].name, 3);
2648 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2649 "ashr", fixed_arith_modes[i].name, 3);
2650 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2651 "lshr", fixed_arith_modes[i].name, 3);
2652 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2653 "ssashl", fixed_arith_modes[i].name, 3);
2654 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2655 "usashl", fixed_arith_modes[i].name, 3);
2656 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2657 "cmp", fixed_arith_modes[i].name, 2);
2660 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2661 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2663 if (i == j
2664 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2665 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2666 continue;
2668 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2669 fixed_conv_modes[j].mode, "fract",
2670 fixed_conv_modes[i].name,
2671 fixed_conv_modes[j].name);
2672 arm_set_fixed_conv_libfunc (satfract_optab,
2673 fixed_conv_modes[i].mode,
2674 fixed_conv_modes[j].mode, "satfract",
2675 fixed_conv_modes[i].name,
2676 fixed_conv_modes[j].name);
2677 arm_set_fixed_conv_libfunc (fractuns_optab,
2678 fixed_conv_modes[i].mode,
2679 fixed_conv_modes[j].mode, "fractuns",
2680 fixed_conv_modes[i].name,
2681 fixed_conv_modes[j].name);
2682 arm_set_fixed_conv_libfunc (satfractuns_optab,
2683 fixed_conv_modes[i].mode,
2684 fixed_conv_modes[j].mode, "satfractuns",
2685 fixed_conv_modes[i].name,
2686 fixed_conv_modes[j].name);
2690 if (TARGET_AAPCS_BASED)
2691 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2694 /* On AAPCS systems, this is the "struct __va_list". */
2695 static GTY(()) tree va_list_type;
2697 /* Return the type to use as __builtin_va_list. */
2698 static tree
2699 arm_build_builtin_va_list (void)
2701 tree va_list_name;
2702 tree ap_field;
2704 if (!TARGET_AAPCS_BASED)
2705 return std_build_builtin_va_list ();
2707 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2708 defined as:
2710 struct __va_list
2712 void *__ap;
2715 The C Library ABI further reinforces this definition in \S
2716 4.1.
2718 We must follow this definition exactly. The structure tag
2719 name is visible in C++ mangled names, and thus forms a part
2720 of the ABI. The field name may be used by people who
2721 #include <stdarg.h>. */
2722 /* Create the type. */
2723 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2724 /* Give it the required name. */
2725 va_list_name = build_decl (BUILTINS_LOCATION,
2726 TYPE_DECL,
2727 get_identifier ("__va_list"),
2728 va_list_type);
2729 DECL_ARTIFICIAL (va_list_name) = 1;
2730 TYPE_NAME (va_list_type) = va_list_name;
2731 TYPE_STUB_DECL (va_list_type) = va_list_name;
2732 /* Create the __ap field. */
2733 ap_field = build_decl (BUILTINS_LOCATION,
2734 FIELD_DECL,
2735 get_identifier ("__ap"),
2736 ptr_type_node);
2737 DECL_ARTIFICIAL (ap_field) = 1;
2738 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2739 TYPE_FIELDS (va_list_type) = ap_field;
2740 /* Compute its layout. */
2741 layout_type (va_list_type);
2743 return va_list_type;
2746 /* Return an expression of type "void *" pointing to the next
2747 available argument in a variable-argument list. VALIST is the
2748 user-level va_list object, of type __builtin_va_list. */
2749 static tree
2750 arm_extract_valist_ptr (tree valist)
2752 if (TREE_TYPE (valist) == error_mark_node)
2753 return error_mark_node;
2755 /* On an AAPCS target, the pointer is stored within "struct
2756 va_list". */
2757 if (TARGET_AAPCS_BASED)
2759 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2760 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2761 valist, ap_field, NULL_TREE);
2764 return valist;
2767 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2768 static void
2769 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2771 valist = arm_extract_valist_ptr (valist);
2772 std_expand_builtin_va_start (valist, nextarg);
2775 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2776 static tree
2777 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2778 gimple_seq *post_p)
2780 valist = arm_extract_valist_ptr (valist);
2781 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2784 /* Check any incompatible options that the user has specified. */
2785 static void
2786 arm_option_check_internal (struct gcc_options *opts)
2788 int flags = opts->x_target_flags;
2789 const struct arm_fpu_desc *fpu_desc = &all_fpus[opts->x_arm_fpu_index];
2791 /* iWMMXt and NEON are incompatible. */
2792 if (TARGET_IWMMXT
2793 && ARM_FPU_FSET_HAS (fpu_desc->features, FPU_FL_NEON))
2794 error ("iWMMXt and NEON are incompatible");
2796 /* Make sure that the processor choice does not conflict with any of the
2797 other command line choices. */
2798 if (TARGET_ARM_P (flags) && !ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM))
2799 error ("target CPU does not support ARM mode");
2801 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2802 from here where no function is being compiled currently. */
2803 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2804 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2806 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2807 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2809 /* If this target is normally configured to use APCS frames, warn if they
2810 are turned off and debugging is turned on. */
2811 if (TARGET_ARM_P (flags)
2812 && write_symbols != NO_DEBUG
2813 && !TARGET_APCS_FRAME
2814 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2815 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2817 /* iWMMXt unsupported under Thumb mode. */
2818 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2819 error ("iWMMXt unsupported under Thumb mode");
2821 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2822 error ("can not use -mtp=cp15 with 16-bit Thumb");
2824 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2826 error ("RTP PIC is incompatible with Thumb");
2827 flag_pic = 0;
2830 /* We only support -mslow-flash-data on armv7-m targets. */
2831 if (target_slow_flash_data
2832 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2833 || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON)))
2834 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2836 /* We only support pure-code on Thumb-2 M-profile targets. */
2837 if (target_pure_code
2838 && (!arm_arch_thumb2 || arm_arch_notm || flag_pic || TARGET_NEON))
2839 error ("-mpure-code only supports non-pic code on armv7-m targets");
2843 /* Recompute the global settings depending on target attribute options. */
2845 static void
2846 arm_option_params_internal (void)
2848 /* If we are not using the default (ARM mode) section anchor offset
2849 ranges, then set the correct ranges now. */
2850 if (TARGET_THUMB1)
2852 /* Thumb-1 LDR instructions cannot have negative offsets.
2853 Permissible positive offset ranges are 5-bit (for byte loads),
2854 6-bit (for halfword loads), or 7-bit (for word loads).
2855 Empirical results suggest a 7-bit anchor range gives the best
2856 overall code size. */
2857 targetm.min_anchor_offset = 0;
2858 targetm.max_anchor_offset = 127;
2860 else if (TARGET_THUMB2)
2862 /* The minimum is set such that the total size of the block
2863 for a particular anchor is 248 + 1 + 4095 bytes, which is
2864 divisible by eight, ensuring natural spacing of anchors. */
2865 targetm.min_anchor_offset = -248;
2866 targetm.max_anchor_offset = 4095;
2868 else
2870 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2871 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2874 if (optimize_size)
2876 /* If optimizing for size, bump the number of instructions that we
2877 are prepared to conditionally execute (even on a StrongARM). */
2878 max_insns_skipped = 6;
2880 /* For THUMB2, we limit the conditional sequence to one IT block. */
2881 if (TARGET_THUMB2)
2882 max_insns_skipped = arm_restrict_it ? 1 : 4;
2884 else
2885 /* When -mrestrict-it is in use tone down the if-conversion. */
2886 max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2887 ? 1 : current_tune->max_insns_skipped;
2890 /* True if -mflip-thumb should next add an attribute for the default
2891 mode, false if it should next add an attribute for the opposite mode. */
2892 static GTY(()) bool thumb_flipper;
2894 /* Options after initial target override. */
2895 static GTY(()) tree init_optimize;
2897 static void
2898 arm_override_options_after_change_1 (struct gcc_options *opts)
2900 if (opts->x_align_functions <= 0)
2901 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2902 && opts->x_optimize_size ? 2 : 4;
2905 /* Implement targetm.override_options_after_change. */
2907 static void
2908 arm_override_options_after_change (void)
2910 arm_override_options_after_change_1 (&global_options);
2913 /* Reset options between modes that the user has specified. */
2914 static void
2915 arm_option_override_internal (struct gcc_options *opts,
2916 struct gcc_options *opts_set)
2918 arm_override_options_after_change_1 (opts);
2920 if (TARGET_INTERWORK && !ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB))
2922 /* The default is to enable interworking, so this warning message would
2923 be confusing to users who have just compiled with, eg, -march=armv3. */
2924 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2925 opts->x_target_flags &= ~MASK_INTERWORK;
2928 if (TARGET_THUMB_P (opts->x_target_flags)
2929 && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB)))
2931 warning (0, "target CPU does not support THUMB instructions");
2932 opts->x_target_flags &= ~MASK_THUMB;
2935 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2937 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2938 opts->x_target_flags &= ~MASK_APCS_FRAME;
2941 /* Callee super interworking implies thumb interworking. Adding
2942 this to the flags here simplifies the logic elsewhere. */
2943 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2944 opts->x_target_flags |= MASK_INTERWORK;
2946 /* need to remember initial values so combinaisons of options like
2947 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2948 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2950 if (! opts_set->x_arm_restrict_it)
2951 opts->x_arm_restrict_it = arm_arch8;
2953 /* ARM execution state and M profile don't have [restrict] IT. */
2954 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2955 opts->x_arm_restrict_it = 0;
2957 /* Enable -munaligned-access by default for
2958 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2959 i.e. Thumb2 and ARM state only.
2960 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2961 - ARMv8 architecture-base processors.
2963 Disable -munaligned-access by default for
2964 - all pre-ARMv6 architecture-based processors
2965 - ARMv6-M architecture-based processors
2966 - ARMv8-M Baseline processors. */
2968 if (! opts_set->x_unaligned_access)
2970 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2971 && arm_arch6 && (arm_arch_notm || arm_arch7));
2973 else if (opts->x_unaligned_access == 1
2974 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2976 warning (0, "target CPU does not support unaligned accesses");
2977 opts->x_unaligned_access = 0;
2980 /* Don't warn since it's on by default in -O2. */
2981 if (TARGET_THUMB1_P (opts->x_target_flags))
2982 opts->x_flag_schedule_insns = 0;
2983 else
2984 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
2986 /* Disable shrink-wrap when optimizing function for size, since it tends to
2987 generate additional returns. */
2988 if (optimize_function_for_size_p (cfun)
2989 && TARGET_THUMB2_P (opts->x_target_flags))
2990 opts->x_flag_shrink_wrap = false;
2991 else
2992 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
2994 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2995 - epilogue_insns - does not accurately model the corresponding insns
2996 emitted in the asm file. In particular, see the comment in thumb_exit
2997 'Find out how many of the (return) argument registers we can corrupt'.
2998 As a consequence, the epilogue may clobber registers without fipa-ra
2999 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3000 TODO: Accurately model clobbers for epilogue_insns and reenable
3001 fipa-ra. */
3002 if (TARGET_THUMB1_P (opts->x_target_flags))
3003 opts->x_flag_ipa_ra = 0;
3004 else
3005 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3007 /* Thumb2 inline assembly code should always use unified syntax.
3008 This will apply to ARM and Thumb1 eventually. */
3009 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3011 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3012 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3013 #endif
3016 /* Fix up any incompatible options that the user has specified. */
3017 static void
3018 arm_option_override (void)
3020 arm_selected_arch = NULL;
3021 arm_selected_cpu = NULL;
3022 arm_selected_tune = NULL;
3024 if (global_options_set.x_arm_arch_option)
3025 arm_selected_arch = &all_architectures[arm_arch_option];
3027 if (global_options_set.x_arm_cpu_option)
3029 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
3030 arm_selected_tune = &all_cores[(int) arm_cpu_option];
3033 if (global_options_set.x_arm_tune_option)
3034 arm_selected_tune = &all_cores[(int) arm_tune_option];
3036 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3037 SUBTARGET_OVERRIDE_OPTIONS;
3038 #endif
3040 if (arm_selected_arch)
3042 if (arm_selected_cpu)
3044 const arm_feature_set tuning_flags = ARM_FSET_MAKE_CPU1 (FL_TUNE);
3045 arm_feature_set selected_flags;
3046 ARM_FSET_XOR (selected_flags, arm_selected_cpu->flags,
3047 arm_selected_arch->flags);
3048 ARM_FSET_EXCLUDE (selected_flags, selected_flags, tuning_flags);
3049 /* Check for conflict between mcpu and march. */
3050 if (!ARM_FSET_IS_EMPTY (selected_flags))
3052 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3053 arm_selected_cpu->name, arm_selected_arch->name);
3054 /* -march wins for code generation.
3055 -mcpu wins for default tuning. */
3056 if (!arm_selected_tune)
3057 arm_selected_tune = arm_selected_cpu;
3059 arm_selected_cpu = arm_selected_arch;
3061 else
3062 /* -mcpu wins. */
3063 arm_selected_arch = NULL;
3065 else
3066 /* Pick a CPU based on the architecture. */
3067 arm_selected_cpu = arm_selected_arch;
3070 /* If the user did not specify a processor, choose one for them. */
3071 if (!arm_selected_cpu)
3073 const struct processors * sel;
3074 arm_feature_set sought = ARM_FSET_EMPTY;;
3076 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
3077 gcc_assert (arm_selected_cpu->name);
3079 sel = arm_selected_cpu;
3080 insn_flags = sel->flags;
3082 /* Now check to see if the user has specified some command line
3083 switch that require certain abilities from the cpu. */
3085 if (TARGET_INTERWORK || TARGET_THUMB)
3087 ARM_FSET_ADD_CPU1 (sought, FL_THUMB);
3088 ARM_FSET_ADD_CPU1 (sought, FL_MODE32);
3090 /* There are no ARM processors that support both APCS-26 and
3091 interworking. Therefore we force FL_MODE26 to be removed
3092 from insn_flags here (if it was set), so that the search
3093 below will always be able to find a compatible processor. */
3094 ARM_FSET_DEL_CPU1 (insn_flags, FL_MODE26);
3097 if (!ARM_FSET_IS_EMPTY (sought)
3098 && !(ARM_FSET_CPU_SUBSET (sought, insn_flags)))
3100 /* Try to locate a CPU type that supports all of the abilities
3101 of the default CPU, plus the extra abilities requested by
3102 the user. */
3103 for (sel = all_cores; sel->name != NULL; sel++)
3104 if (ARM_FSET_CPU_SUBSET (sought, sel->flags))
3105 break;
3107 if (sel->name == NULL)
3109 unsigned current_bit_count = 0;
3110 const struct processors * best_fit = NULL;
3112 /* Ideally we would like to issue an error message here
3113 saying that it was not possible to find a CPU compatible
3114 with the default CPU, but which also supports the command
3115 line options specified by the programmer, and so they
3116 ought to use the -mcpu=<name> command line option to
3117 override the default CPU type.
3119 If we cannot find a cpu that has both the
3120 characteristics of the default cpu and the given
3121 command line options we scan the array again looking
3122 for a best match. */
3123 for (sel = all_cores; sel->name != NULL; sel++)
3125 arm_feature_set required = ARM_FSET_EMPTY;
3126 ARM_FSET_UNION (required, sought, insn_flags);
3127 if (ARM_FSET_CPU_SUBSET (required, sel->flags))
3129 unsigned count;
3130 arm_feature_set flags;
3131 ARM_FSET_INTER (flags, sel->flags, insn_flags);
3132 count = feature_count (&flags);
3134 if (count >= current_bit_count)
3136 best_fit = sel;
3137 current_bit_count = count;
3141 gcc_assert (best_fit);
3142 sel = best_fit;
3145 arm_selected_cpu = sel;
3149 gcc_assert (arm_selected_cpu);
3150 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3151 if (!arm_selected_tune)
3152 arm_selected_tune = &all_cores[arm_selected_cpu->core];
3154 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
3155 insn_flags = arm_selected_cpu->flags;
3156 arm_base_arch = arm_selected_cpu->base_arch;
3158 arm_tune = arm_selected_tune->core;
3159 tune_flags = arm_selected_tune->flags;
3160 current_tune = arm_selected_tune->tune;
3162 /* TBD: Dwarf info for apcs frame is not handled yet. */
3163 if (TARGET_APCS_FRAME)
3164 flag_shrink_wrap = false;
3166 /* BPABI targets use linker tricks to allow interworking on cores
3167 without thumb support. */
3168 if (TARGET_INTERWORK
3169 && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB) || TARGET_BPABI))
3171 warning (0, "target CPU does not support interworking" );
3172 target_flags &= ~MASK_INTERWORK;
3175 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3177 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3178 target_flags |= MASK_APCS_FRAME;
3181 if (TARGET_POKE_FUNCTION_NAME)
3182 target_flags |= MASK_APCS_FRAME;
3184 if (TARGET_APCS_REENT && flag_pic)
3185 error ("-fpic and -mapcs-reent are incompatible");
3187 if (TARGET_APCS_REENT)
3188 warning (0, "APCS reentrant code not supported. Ignored");
3190 if (TARGET_APCS_FLOAT)
3191 warning (0, "passing floating point arguments in fp regs not yet supported");
3193 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3194 arm_arch3m = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH3M);
3195 arm_arch4 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH4);
3196 arm_arch4t = arm_arch4 && (ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB));
3197 arm_arch5 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5);
3198 arm_arch5e = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5E);
3199 arm_arch6 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6);
3200 arm_arch6k = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6K);
3201 arm_arch6kz = arm_arch6k && ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6KZ);
3202 arm_arch_notm = ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM);
3203 arm_arch6m = arm_arch6 && !arm_arch_notm;
3204 arm_arch7 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7);
3205 arm_arch7em = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7EM);
3206 arm_arch8 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH8);
3207 arm_arch8_1 = ARM_FSET_HAS_CPU2 (insn_flags, FL2_ARCH8_1);
3208 arm_arch8_2 = ARM_FSET_HAS_CPU2 (insn_flags, FL2_ARCH8_2);
3209 arm_arch_thumb1 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB);
3210 arm_arch_thumb2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB2);
3211 arm_arch_xscale = ARM_FSET_HAS_CPU1 (insn_flags, FL_XSCALE);
3213 arm_ld_sched = ARM_FSET_HAS_CPU1 (tune_flags, FL_LDSCHED);
3214 arm_tune_strongarm = ARM_FSET_HAS_CPU1 (tune_flags, FL_STRONG);
3215 arm_tune_wbuf = ARM_FSET_HAS_CPU1 (tune_flags, FL_WBUF);
3216 arm_tune_xscale = ARM_FSET_HAS_CPU1 (tune_flags, FL_XSCALE);
3217 arm_arch_iwmmxt = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT);
3218 arm_arch_iwmmxt2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT2);
3219 arm_arch_thumb_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB_DIV);
3220 arm_arch_arm_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARM_DIV);
3221 arm_arch_no_volatile_ce = ARM_FSET_HAS_CPU1 (insn_flags, FL_NO_VOLATILE_CE);
3222 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
3223 arm_arch_crc = ARM_FSET_HAS_CPU1 (insn_flags, FL_CRC32);
3224 arm_m_profile_small_mul = ARM_FSET_HAS_CPU1 (insn_flags, FL_SMALLMUL);
3225 arm_fp16_inst = ARM_FSET_HAS_CPU2 (insn_flags, FL2_FP16INST);
3226 if (arm_fp16_inst)
3228 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3229 error ("selected fp16 options are incompatible.");
3230 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3233 /* V5 code we generate is completely interworking capable, so we turn off
3234 TARGET_INTERWORK here to avoid many tests later on. */
3236 /* XXX However, we must pass the right pre-processor defines to CPP
3237 or GLD can get confused. This is a hack. */
3238 if (TARGET_INTERWORK)
3239 arm_cpp_interwork = 1;
3241 if (arm_arch5)
3242 target_flags &= ~MASK_INTERWORK;
3244 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3245 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3247 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3248 error ("iwmmxt abi requires an iwmmxt capable cpu");
3250 if (!global_options_set.x_arm_fpu_index)
3252 const char *target_fpu_name;
3253 bool ok;
3255 #ifdef FPUTYPE_DEFAULT
3256 target_fpu_name = FPUTYPE_DEFAULT;
3257 #else
3258 target_fpu_name = "vfp";
3259 #endif
3261 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
3262 CL_TARGET);
3263 gcc_assert (ok);
3266 /* If soft-float is specified then don't use FPU. */
3267 if (TARGET_SOFT_FLOAT)
3268 arm_fpu_attr = FPU_NONE;
3269 else
3270 arm_fpu_attr = FPU_VFP;
3272 if (TARGET_AAPCS_BASED)
3274 if (TARGET_CALLER_INTERWORKING)
3275 error ("AAPCS does not support -mcaller-super-interworking");
3276 else
3277 if (TARGET_CALLEE_INTERWORKING)
3278 error ("AAPCS does not support -mcallee-super-interworking");
3281 /* __fp16 support currently assumes the core has ldrh. */
3282 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3283 sorry ("__fp16 and no ldrh");
3285 if (TARGET_AAPCS_BASED)
3287 if (arm_abi == ARM_ABI_IWMMXT)
3288 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3289 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3290 && TARGET_HARD_FLOAT)
3291 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3292 else
3293 arm_pcs_default = ARM_PCS_AAPCS;
3295 else
3297 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3298 sorry ("-mfloat-abi=hard and VFP");
3300 if (arm_abi == ARM_ABI_APCS)
3301 arm_pcs_default = ARM_PCS_APCS;
3302 else
3303 arm_pcs_default = ARM_PCS_ATPCS;
3306 /* For arm2/3 there is no need to do any scheduling if we are doing
3307 software floating-point. */
3308 if (TARGET_SOFT_FLOAT && !ARM_FSET_HAS_CPU1 (tune_flags, FL_MODE32))
3309 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3311 /* Use the cp15 method if it is available. */
3312 if (target_thread_pointer == TP_AUTO)
3314 if (arm_arch6k && !TARGET_THUMB1)
3315 target_thread_pointer = TP_CP15;
3316 else
3317 target_thread_pointer = TP_SOFT;
3320 /* Override the default structure alignment for AAPCS ABI. */
3321 if (!global_options_set.x_arm_structure_size_boundary)
3323 if (TARGET_AAPCS_BASED)
3324 arm_structure_size_boundary = 8;
3326 else
3328 if (arm_structure_size_boundary != 8
3329 && arm_structure_size_boundary != 32
3330 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3332 if (ARM_DOUBLEWORD_ALIGN)
3333 warning (0,
3334 "structure size boundary can only be set to 8, 32 or 64");
3335 else
3336 warning (0, "structure size boundary can only be set to 8 or 32");
3337 arm_structure_size_boundary
3338 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3342 if (TARGET_VXWORKS_RTP)
3344 if (!global_options_set.x_arm_pic_data_is_text_relative)
3345 arm_pic_data_is_text_relative = 0;
3347 else if (flag_pic
3348 && !arm_pic_data_is_text_relative
3349 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3350 /* When text & data segments don't have a fixed displacement, the
3351 intended use is with a single, read only, pic base register.
3352 Unless the user explicitly requested not to do that, set
3353 it. */
3354 target_flags |= MASK_SINGLE_PIC_BASE;
3356 /* If stack checking is disabled, we can use r10 as the PIC register,
3357 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3358 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3360 if (TARGET_VXWORKS_RTP)
3361 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3362 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3365 if (flag_pic && TARGET_VXWORKS_RTP)
3366 arm_pic_register = 9;
3368 if (arm_pic_register_string != NULL)
3370 int pic_register = decode_reg_name (arm_pic_register_string);
3372 if (!flag_pic)
3373 warning (0, "-mpic-register= is useless without -fpic");
3375 /* Prevent the user from choosing an obviously stupid PIC register. */
3376 else if (pic_register < 0 || call_used_regs[pic_register]
3377 || pic_register == HARD_FRAME_POINTER_REGNUM
3378 || pic_register == STACK_POINTER_REGNUM
3379 || pic_register >= PC_REGNUM
3380 || (TARGET_VXWORKS_RTP
3381 && (unsigned int) pic_register != arm_pic_register))
3382 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3383 else
3384 arm_pic_register = pic_register;
3387 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3388 if (fix_cm3_ldrd == 2)
3390 if (arm_selected_cpu->core == cortexm3)
3391 fix_cm3_ldrd = 1;
3392 else
3393 fix_cm3_ldrd = 0;
3396 /* Hot/Cold partitioning is not currently supported, since we can't
3397 handle literal pool placement in that case. */
3398 if (flag_reorder_blocks_and_partition)
3400 inform (input_location,
3401 "-freorder-blocks-and-partition not supported on this architecture");
3402 flag_reorder_blocks_and_partition = 0;
3403 flag_reorder_blocks = 1;
3406 if (flag_pic)
3407 /* Hoisting PIC address calculations more aggressively provides a small,
3408 but measurable, size reduction for PIC code. Therefore, we decrease
3409 the bar for unrestricted expression hoisting to the cost of PIC address
3410 calculation, which is 2 instructions. */
3411 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3412 global_options.x_param_values,
3413 global_options_set.x_param_values);
3415 /* ARM EABI defaults to strict volatile bitfields. */
3416 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3417 && abi_version_at_least(2))
3418 flag_strict_volatile_bitfields = 1;
3420 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3421 have deemed it beneficial (signified by setting
3422 prefetch.num_slots to 1 or more). */
3423 if (flag_prefetch_loop_arrays < 0
3424 && HAVE_prefetch
3425 && optimize >= 3
3426 && current_tune->prefetch.num_slots > 0)
3427 flag_prefetch_loop_arrays = 1;
3429 /* Set up parameters to be used in prefetching algorithm. Do not
3430 override the defaults unless we are tuning for a core we have
3431 researched values for. */
3432 if (current_tune->prefetch.num_slots > 0)
3433 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3434 current_tune->prefetch.num_slots,
3435 global_options.x_param_values,
3436 global_options_set.x_param_values);
3437 if (current_tune->prefetch.l1_cache_line_size >= 0)
3438 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3439 current_tune->prefetch.l1_cache_line_size,
3440 global_options.x_param_values,
3441 global_options_set.x_param_values);
3442 if (current_tune->prefetch.l1_cache_size >= 0)
3443 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3444 current_tune->prefetch.l1_cache_size,
3445 global_options.x_param_values,
3446 global_options_set.x_param_values);
3448 /* Use Neon to perform 64-bits operations rather than core
3449 registers. */
3450 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3451 if (use_neon_for_64bits == 1)
3452 prefer_neon_for_64bits = true;
3454 /* Use the alternative scheduling-pressure algorithm by default. */
3455 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3456 global_options.x_param_values,
3457 global_options_set.x_param_values);
3459 /* Look through ready list and all of queue for instructions
3460 relevant for L2 auto-prefetcher. */
3461 int param_sched_autopref_queue_depth;
3463 switch (current_tune->sched_autopref)
3465 case tune_params::SCHED_AUTOPREF_OFF:
3466 param_sched_autopref_queue_depth = -1;
3467 break;
3469 case tune_params::SCHED_AUTOPREF_RANK:
3470 param_sched_autopref_queue_depth = 0;
3471 break;
3473 case tune_params::SCHED_AUTOPREF_FULL:
3474 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3475 break;
3477 default:
3478 gcc_unreachable ();
3481 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3482 param_sched_autopref_queue_depth,
3483 global_options.x_param_values,
3484 global_options_set.x_param_values);
3486 /* Currently, for slow flash data, we just disable literal pools. We also
3487 disable it for pure-code. */
3488 if (target_slow_flash_data || target_pure_code)
3489 arm_disable_literal_pool = true;
3491 /* Disable scheduling fusion by default if it's not armv7 processor
3492 or doesn't prefer ldrd/strd. */
3493 if (flag_schedule_fusion == 2
3494 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3495 flag_schedule_fusion = 0;
3497 /* Need to remember initial options before they are overriden. */
3498 init_optimize = build_optimization_node (&global_options);
3500 arm_option_override_internal (&global_options, &global_options_set);
3501 arm_option_check_internal (&global_options);
3502 arm_option_params_internal ();
3504 /* Register global variables with the garbage collector. */
3505 arm_add_gc_roots ();
3507 /* Save the initial options in case the user does function specific
3508 options or #pragma target. */
3509 target_option_default_node = target_option_current_node
3510 = build_target_option_node (&global_options);
3512 /* Init initial mode for testing. */
3513 thumb_flipper = TARGET_THUMB;
3516 static void
3517 arm_add_gc_roots (void)
3519 gcc_obstack_init(&minipool_obstack);
3520 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3523 /* A table of known ARM exception types.
3524 For use with the interrupt function attribute. */
3526 typedef struct
3528 const char *const arg;
3529 const unsigned long return_value;
3531 isr_attribute_arg;
3533 static const isr_attribute_arg isr_attribute_args [] =
3535 { "IRQ", ARM_FT_ISR },
3536 { "irq", ARM_FT_ISR },
3537 { "FIQ", ARM_FT_FIQ },
3538 { "fiq", ARM_FT_FIQ },
3539 { "ABORT", ARM_FT_ISR },
3540 { "abort", ARM_FT_ISR },
3541 { "ABORT", ARM_FT_ISR },
3542 { "abort", ARM_FT_ISR },
3543 { "UNDEF", ARM_FT_EXCEPTION },
3544 { "undef", ARM_FT_EXCEPTION },
3545 { "SWI", ARM_FT_EXCEPTION },
3546 { "swi", ARM_FT_EXCEPTION },
3547 { NULL, ARM_FT_NORMAL }
3550 /* Returns the (interrupt) function type of the current
3551 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3553 static unsigned long
3554 arm_isr_value (tree argument)
3556 const isr_attribute_arg * ptr;
3557 const char * arg;
3559 if (!arm_arch_notm)
3560 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3562 /* No argument - default to IRQ. */
3563 if (argument == NULL_TREE)
3564 return ARM_FT_ISR;
3566 /* Get the value of the argument. */
3567 if (TREE_VALUE (argument) == NULL_TREE
3568 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3569 return ARM_FT_UNKNOWN;
3571 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3573 /* Check it against the list of known arguments. */
3574 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3575 if (streq (arg, ptr->arg))
3576 return ptr->return_value;
3578 /* An unrecognized interrupt type. */
3579 return ARM_FT_UNKNOWN;
3582 /* Computes the type of the current function. */
3584 static unsigned long
3585 arm_compute_func_type (void)
3587 unsigned long type = ARM_FT_UNKNOWN;
3588 tree a;
3589 tree attr;
3591 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3593 /* Decide if the current function is volatile. Such functions
3594 never return, and many memory cycles can be saved by not storing
3595 register values that will never be needed again. This optimization
3596 was added to speed up context switching in a kernel application. */
3597 if (optimize > 0
3598 && (TREE_NOTHROW (current_function_decl)
3599 || !(flag_unwind_tables
3600 || (flag_exceptions
3601 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3602 && TREE_THIS_VOLATILE (current_function_decl))
3603 type |= ARM_FT_VOLATILE;
3605 if (cfun->static_chain_decl != NULL)
3606 type |= ARM_FT_NESTED;
3608 attr = DECL_ATTRIBUTES (current_function_decl);
3610 a = lookup_attribute ("naked", attr);
3611 if (a != NULL_TREE)
3612 type |= ARM_FT_NAKED;
3614 a = lookup_attribute ("isr", attr);
3615 if (a == NULL_TREE)
3616 a = lookup_attribute ("interrupt", attr);
3618 if (a == NULL_TREE)
3619 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3620 else
3621 type |= arm_isr_value (TREE_VALUE (a));
3623 return type;
3626 /* Returns the type of the current function. */
3628 unsigned long
3629 arm_current_func_type (void)
3631 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3632 cfun->machine->func_type = arm_compute_func_type ();
3634 return cfun->machine->func_type;
3637 bool
3638 arm_allocate_stack_slots_for_args (void)
3640 /* Naked functions should not allocate stack slots for arguments. */
3641 return !IS_NAKED (arm_current_func_type ());
3644 static bool
3645 arm_warn_func_return (tree decl)
3647 /* Naked functions are implemented entirely in assembly, including the
3648 return sequence, so suppress warnings about this. */
3649 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3653 /* Output assembler code for a block containing the constant parts
3654 of a trampoline, leaving space for the variable parts.
3656 On the ARM, (if r8 is the static chain regnum, and remembering that
3657 referencing pc adds an offset of 8) the trampoline looks like:
3658 ldr r8, [pc, #0]
3659 ldr pc, [pc]
3660 .word static chain value
3661 .word function's address
3662 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3664 static void
3665 arm_asm_trampoline_template (FILE *f)
3667 fprintf (f, "\t.syntax unified\n");
3669 if (TARGET_ARM)
3671 fprintf (f, "\t.arm\n");
3672 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3673 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3675 else if (TARGET_THUMB2)
3677 fprintf (f, "\t.thumb\n");
3678 /* The Thumb-2 trampoline is similar to the arm implementation.
3679 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3680 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3681 STATIC_CHAIN_REGNUM, PC_REGNUM);
3682 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3684 else
3686 ASM_OUTPUT_ALIGN (f, 2);
3687 fprintf (f, "\t.code\t16\n");
3688 fprintf (f, ".Ltrampoline_start:\n");
3689 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3690 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3691 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3692 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3693 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3694 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3696 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3697 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3700 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3702 static void
3703 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3705 rtx fnaddr, mem, a_tramp;
3707 emit_block_move (m_tramp, assemble_trampoline_template (),
3708 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3710 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3711 emit_move_insn (mem, chain_value);
3713 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3714 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3715 emit_move_insn (mem, fnaddr);
3717 a_tramp = XEXP (m_tramp, 0);
3718 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3719 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3720 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3723 /* Thumb trampolines should be entered in thumb mode, so set
3724 the bottom bit of the address. */
3726 static rtx
3727 arm_trampoline_adjust_address (rtx addr)
3729 if (TARGET_THUMB)
3730 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3731 NULL, 0, OPTAB_LIB_WIDEN);
3732 return addr;
3735 /* Return 1 if it is possible to return using a single instruction.
3736 If SIBLING is non-null, this is a test for a return before a sibling
3737 call. SIBLING is the call insn, so we can examine its register usage. */
3740 use_return_insn (int iscond, rtx sibling)
3742 int regno;
3743 unsigned int func_type;
3744 unsigned long saved_int_regs;
3745 unsigned HOST_WIDE_INT stack_adjust;
3746 arm_stack_offsets *offsets;
3748 /* Never use a return instruction before reload has run. */
3749 if (!reload_completed)
3750 return 0;
3752 func_type = arm_current_func_type ();
3754 /* Naked, volatile and stack alignment functions need special
3755 consideration. */
3756 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3757 return 0;
3759 /* So do interrupt functions that use the frame pointer and Thumb
3760 interrupt functions. */
3761 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3762 return 0;
3764 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3765 && !optimize_function_for_size_p (cfun))
3766 return 0;
3768 offsets = arm_get_frame_offsets ();
3769 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3771 /* As do variadic functions. */
3772 if (crtl->args.pretend_args_size
3773 || cfun->machine->uses_anonymous_args
3774 /* Or if the function calls __builtin_eh_return () */
3775 || crtl->calls_eh_return
3776 /* Or if the function calls alloca */
3777 || cfun->calls_alloca
3778 /* Or if there is a stack adjustment. However, if the stack pointer
3779 is saved on the stack, we can use a pre-incrementing stack load. */
3780 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3781 && stack_adjust == 4))
3782 /* Or if the static chain register was saved above the frame, under the
3783 assumption that the stack pointer isn't saved on the stack. */
3784 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3785 && arm_compute_static_chain_stack_bytes() != 0))
3786 return 0;
3788 saved_int_regs = offsets->saved_regs_mask;
3790 /* Unfortunately, the insn
3792 ldmib sp, {..., sp, ...}
3794 triggers a bug on most SA-110 based devices, such that the stack
3795 pointer won't be correctly restored if the instruction takes a
3796 page fault. We work around this problem by popping r3 along with
3797 the other registers, since that is never slower than executing
3798 another instruction.
3800 We test for !arm_arch5 here, because code for any architecture
3801 less than this could potentially be run on one of the buggy
3802 chips. */
3803 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3805 /* Validate that r3 is a call-clobbered register (always true in
3806 the default abi) ... */
3807 if (!call_used_regs[3])
3808 return 0;
3810 /* ... that it isn't being used for a return value ... */
3811 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3812 return 0;
3814 /* ... or for a tail-call argument ... */
3815 if (sibling)
3817 gcc_assert (CALL_P (sibling));
3819 if (find_regno_fusage (sibling, USE, 3))
3820 return 0;
3823 /* ... and that there are no call-saved registers in r0-r2
3824 (always true in the default ABI). */
3825 if (saved_int_regs & 0x7)
3826 return 0;
3829 /* Can't be done if interworking with Thumb, and any registers have been
3830 stacked. */
3831 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3832 return 0;
3834 /* On StrongARM, conditional returns are expensive if they aren't
3835 taken and multiple registers have been stacked. */
3836 if (iscond && arm_tune_strongarm)
3838 /* Conditional return when just the LR is stored is a simple
3839 conditional-load instruction, that's not expensive. */
3840 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3841 return 0;
3843 if (flag_pic
3844 && arm_pic_register != INVALID_REGNUM
3845 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3846 return 0;
3849 /* If there are saved registers but the LR isn't saved, then we need
3850 two instructions for the return. */
3851 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3852 return 0;
3854 /* Can't be done if any of the VFP regs are pushed,
3855 since this also requires an insn. */
3856 if (TARGET_HARD_FLOAT)
3857 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3858 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3859 return 0;
3861 if (TARGET_REALLY_IWMMXT)
3862 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3863 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3864 return 0;
3866 return 1;
3869 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3870 shrink-wrapping if possible. This is the case if we need to emit a
3871 prologue, which we can test by looking at the offsets. */
3872 bool
3873 use_simple_return_p (void)
3875 arm_stack_offsets *offsets;
3877 offsets = arm_get_frame_offsets ();
3878 return offsets->outgoing_args != 0;
3881 /* Return TRUE if int I is a valid immediate ARM constant. */
3884 const_ok_for_arm (HOST_WIDE_INT i)
3886 int lowbit;
3888 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3889 be all zero, or all one. */
3890 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3891 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3892 != ((~(unsigned HOST_WIDE_INT) 0)
3893 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3894 return FALSE;
3896 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3898 /* Fast return for 0 and small values. We must do this for zero, since
3899 the code below can't handle that one case. */
3900 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3901 return TRUE;
3903 /* Get the number of trailing zeros. */
3904 lowbit = ffs((int) i) - 1;
3906 /* Only even shifts are allowed in ARM mode so round down to the
3907 nearest even number. */
3908 if (TARGET_ARM)
3909 lowbit &= ~1;
3911 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3912 return TRUE;
3914 if (TARGET_ARM)
3916 /* Allow rotated constants in ARM mode. */
3917 if (lowbit <= 4
3918 && ((i & ~0xc000003f) == 0
3919 || (i & ~0xf000000f) == 0
3920 || (i & ~0xfc000003) == 0))
3921 return TRUE;
3923 else
3925 HOST_WIDE_INT v;
3927 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3928 v = i & 0xff;
3929 v |= v << 16;
3930 if (i == v || i == (v | (v << 8)))
3931 return TRUE;
3933 /* Allow repeated pattern 0xXY00XY00. */
3934 v = i & 0xff00;
3935 v |= v << 16;
3936 if (i == v)
3937 return TRUE;
3940 return FALSE;
3943 /* Return true if I is a valid constant for the operation CODE. */
3945 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3947 if (const_ok_for_arm (i))
3948 return 1;
3950 switch (code)
3952 case SET:
3953 /* See if we can use movw. */
3954 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
3955 return 1;
3956 else
3957 /* Otherwise, try mvn. */
3958 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3960 case PLUS:
3961 /* See if we can use addw or subw. */
3962 if (TARGET_THUMB2
3963 && ((i & 0xfffff000) == 0
3964 || ((-i) & 0xfffff000) == 0))
3965 return 1;
3966 /* Fall through. */
3967 case COMPARE:
3968 case EQ:
3969 case NE:
3970 case GT:
3971 case LE:
3972 case LT:
3973 case GE:
3974 case GEU:
3975 case LTU:
3976 case GTU:
3977 case LEU:
3978 case UNORDERED:
3979 case ORDERED:
3980 case UNEQ:
3981 case UNGE:
3982 case UNLT:
3983 case UNGT:
3984 case UNLE:
3985 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3987 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3988 case XOR:
3989 return 0;
3991 case IOR:
3992 if (TARGET_THUMB2)
3993 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3994 return 0;
3996 case AND:
3997 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3999 default:
4000 gcc_unreachable ();
4004 /* Return true if I is a valid di mode constant for the operation CODE. */
4006 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4008 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4009 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4010 rtx hi = GEN_INT (hi_val);
4011 rtx lo = GEN_INT (lo_val);
4013 if (TARGET_THUMB1)
4014 return 0;
4016 switch (code)
4018 case AND:
4019 case IOR:
4020 case XOR:
4021 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4022 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4023 case PLUS:
4024 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4026 default:
4027 return 0;
4031 /* Emit a sequence of insns to handle a large constant.
4032 CODE is the code of the operation required, it can be any of SET, PLUS,
4033 IOR, AND, XOR, MINUS;
4034 MODE is the mode in which the operation is being performed;
4035 VAL is the integer to operate on;
4036 SOURCE is the other operand (a register, or a null-pointer for SET);
4037 SUBTARGETS means it is safe to create scratch registers if that will
4038 either produce a simpler sequence, or we will want to cse the values.
4039 Return value is the number of insns emitted. */
4041 /* ??? Tweak this for thumb2. */
4043 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4044 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4046 rtx cond;
4048 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4049 cond = COND_EXEC_TEST (PATTERN (insn));
4050 else
4051 cond = NULL_RTX;
4053 if (subtargets || code == SET
4054 || (REG_P (target) && REG_P (source)
4055 && REGNO (target) != REGNO (source)))
4057 /* After arm_reorg has been called, we can't fix up expensive
4058 constants by pushing them into memory so we must synthesize
4059 them in-line, regardless of the cost. This is only likely to
4060 be more costly on chips that have load delay slots and we are
4061 compiling without running the scheduler (so no splitting
4062 occurred before the final instruction emission).
4064 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4066 if (!cfun->machine->after_arm_reorg
4067 && !cond
4068 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4069 1, 0)
4070 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4071 + (code != SET))))
4073 if (code == SET)
4075 /* Currently SET is the only monadic value for CODE, all
4076 the rest are diadic. */
4077 if (TARGET_USE_MOVT)
4078 arm_emit_movpair (target, GEN_INT (val));
4079 else
4080 emit_set_insn (target, GEN_INT (val));
4082 return 1;
4084 else
4086 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4088 if (TARGET_USE_MOVT)
4089 arm_emit_movpair (temp, GEN_INT (val));
4090 else
4091 emit_set_insn (temp, GEN_INT (val));
4093 /* For MINUS, the value is subtracted from, since we never
4094 have subtraction of a constant. */
4095 if (code == MINUS)
4096 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4097 else
4098 emit_set_insn (target,
4099 gen_rtx_fmt_ee (code, mode, source, temp));
4100 return 2;
4105 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4109 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4110 ARM/THUMB2 immediates, and add up to VAL.
4111 Thr function return value gives the number of insns required. */
4112 static int
4113 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4114 struct four_ints *return_sequence)
4116 int best_consecutive_zeros = 0;
4117 int i;
4118 int best_start = 0;
4119 int insns1, insns2;
4120 struct four_ints tmp_sequence;
4122 /* If we aren't targeting ARM, the best place to start is always at
4123 the bottom, otherwise look more closely. */
4124 if (TARGET_ARM)
4126 for (i = 0; i < 32; i += 2)
4128 int consecutive_zeros = 0;
4130 if (!(val & (3 << i)))
4132 while ((i < 32) && !(val & (3 << i)))
4134 consecutive_zeros += 2;
4135 i += 2;
4137 if (consecutive_zeros > best_consecutive_zeros)
4139 best_consecutive_zeros = consecutive_zeros;
4140 best_start = i - consecutive_zeros;
4142 i -= 2;
4147 /* So long as it won't require any more insns to do so, it's
4148 desirable to emit a small constant (in bits 0...9) in the last
4149 insn. This way there is more chance that it can be combined with
4150 a later addressing insn to form a pre-indexed load or store
4151 operation. Consider:
4153 *((volatile int *)0xe0000100) = 1;
4154 *((volatile int *)0xe0000110) = 2;
4156 We want this to wind up as:
4158 mov rA, #0xe0000000
4159 mov rB, #1
4160 str rB, [rA, #0x100]
4161 mov rB, #2
4162 str rB, [rA, #0x110]
4164 rather than having to synthesize both large constants from scratch.
4166 Therefore, we calculate how many insns would be required to emit
4167 the constant starting from `best_start', and also starting from
4168 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4169 yield a shorter sequence, we may as well use zero. */
4170 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4171 if (best_start != 0
4172 && ((HOST_WIDE_INT_1U << best_start) < val))
4174 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4175 if (insns2 <= insns1)
4177 *return_sequence = tmp_sequence;
4178 insns1 = insns2;
4182 return insns1;
4185 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4186 static int
4187 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4188 struct four_ints *return_sequence, int i)
4190 int remainder = val & 0xffffffff;
4191 int insns = 0;
4193 /* Try and find a way of doing the job in either two or three
4194 instructions.
4196 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4197 location. We start at position I. This may be the MSB, or
4198 optimial_immediate_sequence may have positioned it at the largest block
4199 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4200 wrapping around to the top of the word when we drop off the bottom.
4201 In the worst case this code should produce no more than four insns.
4203 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4204 constants, shifted to any arbitrary location. We should always start
4205 at the MSB. */
4208 int end;
4209 unsigned int b1, b2, b3, b4;
4210 unsigned HOST_WIDE_INT result;
4211 int loc;
4213 gcc_assert (insns < 4);
4215 if (i <= 0)
4216 i += 32;
4218 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4219 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4221 loc = i;
4222 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4223 /* We can use addw/subw for the last 12 bits. */
4224 result = remainder;
4225 else
4227 /* Use an 8-bit shifted/rotated immediate. */
4228 end = i - 8;
4229 if (end < 0)
4230 end += 32;
4231 result = remainder & ((0x0ff << end)
4232 | ((i < end) ? (0xff >> (32 - end))
4233 : 0));
4234 i -= 8;
4237 else
4239 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4240 arbitrary shifts. */
4241 i -= TARGET_ARM ? 2 : 1;
4242 continue;
4245 /* Next, see if we can do a better job with a thumb2 replicated
4246 constant.
4248 We do it this way around to catch the cases like 0x01F001E0 where
4249 two 8-bit immediates would work, but a replicated constant would
4250 make it worse.
4252 TODO: 16-bit constants that don't clear all the bits, but still win.
4253 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4254 if (TARGET_THUMB2)
4256 b1 = (remainder & 0xff000000) >> 24;
4257 b2 = (remainder & 0x00ff0000) >> 16;
4258 b3 = (remainder & 0x0000ff00) >> 8;
4259 b4 = remainder & 0xff;
4261 if (loc > 24)
4263 /* The 8-bit immediate already found clears b1 (and maybe b2),
4264 but must leave b3 and b4 alone. */
4266 /* First try to find a 32-bit replicated constant that clears
4267 almost everything. We can assume that we can't do it in one,
4268 or else we wouldn't be here. */
4269 unsigned int tmp = b1 & b2 & b3 & b4;
4270 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4271 + (tmp << 24);
4272 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4273 + (tmp == b3) + (tmp == b4);
4274 if (tmp
4275 && (matching_bytes >= 3
4276 || (matching_bytes == 2
4277 && const_ok_for_op (remainder & ~tmp2, code))))
4279 /* At least 3 of the bytes match, and the fourth has at
4280 least as many bits set, or two of the bytes match
4281 and it will only require one more insn to finish. */
4282 result = tmp2;
4283 i = tmp != b1 ? 32
4284 : tmp != b2 ? 24
4285 : tmp != b3 ? 16
4286 : 8;
4289 /* Second, try to find a 16-bit replicated constant that can
4290 leave three of the bytes clear. If b2 or b4 is already
4291 zero, then we can. If the 8-bit from above would not
4292 clear b2 anyway, then we still win. */
4293 else if (b1 == b3 && (!b2 || !b4
4294 || (remainder & 0x00ff0000 & ~result)))
4296 result = remainder & 0xff00ff00;
4297 i = 24;
4300 else if (loc > 16)
4302 /* The 8-bit immediate already found clears b2 (and maybe b3)
4303 and we don't get here unless b1 is alredy clear, but it will
4304 leave b4 unchanged. */
4306 /* If we can clear b2 and b4 at once, then we win, since the
4307 8-bits couldn't possibly reach that far. */
4308 if (b2 == b4)
4310 result = remainder & 0x00ff00ff;
4311 i = 16;
4316 return_sequence->i[insns++] = result;
4317 remainder &= ~result;
4319 if (code == SET || code == MINUS)
4320 code = PLUS;
4322 while (remainder);
4324 return insns;
4327 /* Emit an instruction with the indicated PATTERN. If COND is
4328 non-NULL, conditionalize the execution of the instruction on COND
4329 being true. */
4331 static void
4332 emit_constant_insn (rtx cond, rtx pattern)
4334 if (cond)
4335 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4336 emit_insn (pattern);
4339 /* As above, but extra parameter GENERATE which, if clear, suppresses
4340 RTL generation. */
4342 static int
4343 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4344 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4345 int subtargets, int generate)
4347 int can_invert = 0;
4348 int can_negate = 0;
4349 int final_invert = 0;
4350 int i;
4351 int set_sign_bit_copies = 0;
4352 int clear_sign_bit_copies = 0;
4353 int clear_zero_bit_copies = 0;
4354 int set_zero_bit_copies = 0;
4355 int insns = 0, neg_insns, inv_insns;
4356 unsigned HOST_WIDE_INT temp1, temp2;
4357 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4358 struct four_ints *immediates;
4359 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4361 /* Find out which operations are safe for a given CODE. Also do a quick
4362 check for degenerate cases; these can occur when DImode operations
4363 are split. */
4364 switch (code)
4366 case SET:
4367 can_invert = 1;
4368 break;
4370 case PLUS:
4371 can_negate = 1;
4372 break;
4374 case IOR:
4375 if (remainder == 0xffffffff)
4377 if (generate)
4378 emit_constant_insn (cond,
4379 gen_rtx_SET (target,
4380 GEN_INT (ARM_SIGN_EXTEND (val))));
4381 return 1;
4384 if (remainder == 0)
4386 if (reload_completed && rtx_equal_p (target, source))
4387 return 0;
4389 if (generate)
4390 emit_constant_insn (cond, gen_rtx_SET (target, source));
4391 return 1;
4393 break;
4395 case AND:
4396 if (remainder == 0)
4398 if (generate)
4399 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4400 return 1;
4402 if (remainder == 0xffffffff)
4404 if (reload_completed && rtx_equal_p (target, source))
4405 return 0;
4406 if (generate)
4407 emit_constant_insn (cond, gen_rtx_SET (target, source));
4408 return 1;
4410 can_invert = 1;
4411 break;
4413 case XOR:
4414 if (remainder == 0)
4416 if (reload_completed && rtx_equal_p (target, source))
4417 return 0;
4418 if (generate)
4419 emit_constant_insn (cond, gen_rtx_SET (target, source));
4420 return 1;
4423 if (remainder == 0xffffffff)
4425 if (generate)
4426 emit_constant_insn (cond,
4427 gen_rtx_SET (target,
4428 gen_rtx_NOT (mode, source)));
4429 return 1;
4431 final_invert = 1;
4432 break;
4434 case MINUS:
4435 /* We treat MINUS as (val - source), since (source - val) is always
4436 passed as (source + (-val)). */
4437 if (remainder == 0)
4439 if (generate)
4440 emit_constant_insn (cond,
4441 gen_rtx_SET (target,
4442 gen_rtx_NEG (mode, source)));
4443 return 1;
4445 if (const_ok_for_arm (val))
4447 if (generate)
4448 emit_constant_insn (cond,
4449 gen_rtx_SET (target,
4450 gen_rtx_MINUS (mode, GEN_INT (val),
4451 source)));
4452 return 1;
4455 break;
4457 default:
4458 gcc_unreachable ();
4461 /* If we can do it in one insn get out quickly. */
4462 if (const_ok_for_op (val, code))
4464 if (generate)
4465 emit_constant_insn (cond,
4466 gen_rtx_SET (target,
4467 (source
4468 ? gen_rtx_fmt_ee (code, mode, source,
4469 GEN_INT (val))
4470 : GEN_INT (val))));
4471 return 1;
4474 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4475 insn. */
4476 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4477 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4479 if (generate)
4481 if (mode == SImode && i == 16)
4482 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4483 smaller insn. */
4484 emit_constant_insn (cond,
4485 gen_zero_extendhisi2
4486 (target, gen_lowpart (HImode, source)));
4487 else
4488 /* Extz only supports SImode, but we can coerce the operands
4489 into that mode. */
4490 emit_constant_insn (cond,
4491 gen_extzv_t2 (gen_lowpart (SImode, target),
4492 gen_lowpart (SImode, source),
4493 GEN_INT (i), const0_rtx));
4496 return 1;
4499 /* Calculate a few attributes that may be useful for specific
4500 optimizations. */
4501 /* Count number of leading zeros. */
4502 for (i = 31; i >= 0; i--)
4504 if ((remainder & (1 << i)) == 0)
4505 clear_sign_bit_copies++;
4506 else
4507 break;
4510 /* Count number of leading 1's. */
4511 for (i = 31; i >= 0; i--)
4513 if ((remainder & (1 << i)) != 0)
4514 set_sign_bit_copies++;
4515 else
4516 break;
4519 /* Count number of trailing zero's. */
4520 for (i = 0; i <= 31; i++)
4522 if ((remainder & (1 << i)) == 0)
4523 clear_zero_bit_copies++;
4524 else
4525 break;
4528 /* Count number of trailing 1's. */
4529 for (i = 0; i <= 31; i++)
4531 if ((remainder & (1 << i)) != 0)
4532 set_zero_bit_copies++;
4533 else
4534 break;
4537 switch (code)
4539 case SET:
4540 /* See if we can do this by sign_extending a constant that is known
4541 to be negative. This is a good, way of doing it, since the shift
4542 may well merge into a subsequent insn. */
4543 if (set_sign_bit_copies > 1)
4545 if (const_ok_for_arm
4546 (temp1 = ARM_SIGN_EXTEND (remainder
4547 << (set_sign_bit_copies - 1))))
4549 if (generate)
4551 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4552 emit_constant_insn (cond,
4553 gen_rtx_SET (new_src, GEN_INT (temp1)));
4554 emit_constant_insn (cond,
4555 gen_ashrsi3 (target, new_src,
4556 GEN_INT (set_sign_bit_copies - 1)));
4558 return 2;
4560 /* For an inverted constant, we will need to set the low bits,
4561 these will be shifted out of harm's way. */
4562 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4563 if (const_ok_for_arm (~temp1))
4565 if (generate)
4567 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4568 emit_constant_insn (cond,
4569 gen_rtx_SET (new_src, GEN_INT (temp1)));
4570 emit_constant_insn (cond,
4571 gen_ashrsi3 (target, new_src,
4572 GEN_INT (set_sign_bit_copies - 1)));
4574 return 2;
4578 /* See if we can calculate the value as the difference between two
4579 valid immediates. */
4580 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4582 int topshift = clear_sign_bit_copies & ~1;
4584 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4585 & (0xff000000 >> topshift));
4587 /* If temp1 is zero, then that means the 9 most significant
4588 bits of remainder were 1 and we've caused it to overflow.
4589 When topshift is 0 we don't need to do anything since we
4590 can borrow from 'bit 32'. */
4591 if (temp1 == 0 && topshift != 0)
4592 temp1 = 0x80000000 >> (topshift - 1);
4594 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4596 if (const_ok_for_arm (temp2))
4598 if (generate)
4600 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4601 emit_constant_insn (cond,
4602 gen_rtx_SET (new_src, GEN_INT (temp1)));
4603 emit_constant_insn (cond,
4604 gen_addsi3 (target, new_src,
4605 GEN_INT (-temp2)));
4608 return 2;
4612 /* See if we can generate this by setting the bottom (or the top)
4613 16 bits, and then shifting these into the other half of the
4614 word. We only look for the simplest cases, to do more would cost
4615 too much. Be careful, however, not to generate this when the
4616 alternative would take fewer insns. */
4617 if (val & 0xffff0000)
4619 temp1 = remainder & 0xffff0000;
4620 temp2 = remainder & 0x0000ffff;
4622 /* Overlaps outside this range are best done using other methods. */
4623 for (i = 9; i < 24; i++)
4625 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4626 && !const_ok_for_arm (temp2))
4628 rtx new_src = (subtargets
4629 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4630 : target);
4631 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4632 source, subtargets, generate);
4633 source = new_src;
4634 if (generate)
4635 emit_constant_insn
4636 (cond,
4637 gen_rtx_SET
4638 (target,
4639 gen_rtx_IOR (mode,
4640 gen_rtx_ASHIFT (mode, source,
4641 GEN_INT (i)),
4642 source)));
4643 return insns + 1;
4647 /* Don't duplicate cases already considered. */
4648 for (i = 17; i < 24; i++)
4650 if (((temp1 | (temp1 >> i)) == remainder)
4651 && !const_ok_for_arm (temp1))
4653 rtx new_src = (subtargets
4654 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4655 : target);
4656 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4657 source, subtargets, generate);
4658 source = new_src;
4659 if (generate)
4660 emit_constant_insn
4661 (cond,
4662 gen_rtx_SET (target,
4663 gen_rtx_IOR
4664 (mode,
4665 gen_rtx_LSHIFTRT (mode, source,
4666 GEN_INT (i)),
4667 source)));
4668 return insns + 1;
4672 break;
4674 case IOR:
4675 case XOR:
4676 /* If we have IOR or XOR, and the constant can be loaded in a
4677 single instruction, and we can find a temporary to put it in,
4678 then this can be done in two instructions instead of 3-4. */
4679 if (subtargets
4680 /* TARGET can't be NULL if SUBTARGETS is 0 */
4681 || (reload_completed && !reg_mentioned_p (target, source)))
4683 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4685 if (generate)
4687 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4689 emit_constant_insn (cond,
4690 gen_rtx_SET (sub, GEN_INT (val)));
4691 emit_constant_insn (cond,
4692 gen_rtx_SET (target,
4693 gen_rtx_fmt_ee (code, mode,
4694 source, sub)));
4696 return 2;
4700 if (code == XOR)
4701 break;
4703 /* Convert.
4704 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4705 and the remainder 0s for e.g. 0xfff00000)
4706 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4708 This can be done in 2 instructions by using shifts with mov or mvn.
4709 e.g. for
4710 x = x | 0xfff00000;
4711 we generate.
4712 mvn r0, r0, asl #12
4713 mvn r0, r0, lsr #12 */
4714 if (set_sign_bit_copies > 8
4715 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4717 if (generate)
4719 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4720 rtx shift = GEN_INT (set_sign_bit_copies);
4722 emit_constant_insn
4723 (cond,
4724 gen_rtx_SET (sub,
4725 gen_rtx_NOT (mode,
4726 gen_rtx_ASHIFT (mode,
4727 source,
4728 shift))));
4729 emit_constant_insn
4730 (cond,
4731 gen_rtx_SET (target,
4732 gen_rtx_NOT (mode,
4733 gen_rtx_LSHIFTRT (mode, sub,
4734 shift))));
4736 return 2;
4739 /* Convert
4740 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4742 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4744 For eg. r0 = r0 | 0xfff
4745 mvn r0, r0, lsr #12
4746 mvn r0, r0, asl #12
4749 if (set_zero_bit_copies > 8
4750 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4752 if (generate)
4754 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4755 rtx shift = GEN_INT (set_zero_bit_copies);
4757 emit_constant_insn
4758 (cond,
4759 gen_rtx_SET (sub,
4760 gen_rtx_NOT (mode,
4761 gen_rtx_LSHIFTRT (mode,
4762 source,
4763 shift))));
4764 emit_constant_insn
4765 (cond,
4766 gen_rtx_SET (target,
4767 gen_rtx_NOT (mode,
4768 gen_rtx_ASHIFT (mode, sub,
4769 shift))));
4771 return 2;
4774 /* This will never be reached for Thumb2 because orn is a valid
4775 instruction. This is for Thumb1 and the ARM 32 bit cases.
4777 x = y | constant (such that ~constant is a valid constant)
4778 Transform this to
4779 x = ~(~y & ~constant).
4781 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4783 if (generate)
4785 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4786 emit_constant_insn (cond,
4787 gen_rtx_SET (sub,
4788 gen_rtx_NOT (mode, source)));
4789 source = sub;
4790 if (subtargets)
4791 sub = gen_reg_rtx (mode);
4792 emit_constant_insn (cond,
4793 gen_rtx_SET (sub,
4794 gen_rtx_AND (mode, source,
4795 GEN_INT (temp1))));
4796 emit_constant_insn (cond,
4797 gen_rtx_SET (target,
4798 gen_rtx_NOT (mode, sub)));
4800 return 3;
4802 break;
4804 case AND:
4805 /* See if two shifts will do 2 or more insn's worth of work. */
4806 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4808 HOST_WIDE_INT shift_mask = ((0xffffffff
4809 << (32 - clear_sign_bit_copies))
4810 & 0xffffffff);
4812 if ((remainder | shift_mask) != 0xffffffff)
4814 HOST_WIDE_INT new_val
4815 = ARM_SIGN_EXTEND (remainder | shift_mask);
4817 if (generate)
4819 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4820 insns = arm_gen_constant (AND, SImode, cond, new_val,
4821 new_src, source, subtargets, 1);
4822 source = new_src;
4824 else
4826 rtx targ = subtargets ? NULL_RTX : target;
4827 insns = arm_gen_constant (AND, mode, cond, new_val,
4828 targ, source, subtargets, 0);
4832 if (generate)
4834 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4835 rtx shift = GEN_INT (clear_sign_bit_copies);
4837 emit_insn (gen_ashlsi3 (new_src, source, shift));
4838 emit_insn (gen_lshrsi3 (target, new_src, shift));
4841 return insns + 2;
4844 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4846 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4848 if ((remainder | shift_mask) != 0xffffffff)
4850 HOST_WIDE_INT new_val
4851 = ARM_SIGN_EXTEND (remainder | shift_mask);
4852 if (generate)
4854 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4856 insns = arm_gen_constant (AND, mode, cond, new_val,
4857 new_src, source, subtargets, 1);
4858 source = new_src;
4860 else
4862 rtx targ = subtargets ? NULL_RTX : target;
4864 insns = arm_gen_constant (AND, mode, cond, new_val,
4865 targ, source, subtargets, 0);
4869 if (generate)
4871 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4872 rtx shift = GEN_INT (clear_zero_bit_copies);
4874 emit_insn (gen_lshrsi3 (new_src, source, shift));
4875 emit_insn (gen_ashlsi3 (target, new_src, shift));
4878 return insns + 2;
4881 break;
4883 default:
4884 break;
4887 /* Calculate what the instruction sequences would be if we generated it
4888 normally, negated, or inverted. */
4889 if (code == AND)
4890 /* AND cannot be split into multiple insns, so invert and use BIC. */
4891 insns = 99;
4892 else
4893 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4895 if (can_negate)
4896 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4897 &neg_immediates);
4898 else
4899 neg_insns = 99;
4901 if (can_invert || final_invert)
4902 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4903 &inv_immediates);
4904 else
4905 inv_insns = 99;
4907 immediates = &pos_immediates;
4909 /* Is the negated immediate sequence more efficient? */
4910 if (neg_insns < insns && neg_insns <= inv_insns)
4912 insns = neg_insns;
4913 immediates = &neg_immediates;
4915 else
4916 can_negate = 0;
4918 /* Is the inverted immediate sequence more efficient?
4919 We must allow for an extra NOT instruction for XOR operations, although
4920 there is some chance that the final 'mvn' will get optimized later. */
4921 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4923 insns = inv_insns;
4924 immediates = &inv_immediates;
4926 else
4928 can_invert = 0;
4929 final_invert = 0;
4932 /* Now output the chosen sequence as instructions. */
4933 if (generate)
4935 for (i = 0; i < insns; i++)
4937 rtx new_src, temp1_rtx;
4939 temp1 = immediates->i[i];
4941 if (code == SET || code == MINUS)
4942 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4943 else if ((final_invert || i < (insns - 1)) && subtargets)
4944 new_src = gen_reg_rtx (mode);
4945 else
4946 new_src = target;
4948 if (can_invert)
4949 temp1 = ~temp1;
4950 else if (can_negate)
4951 temp1 = -temp1;
4953 temp1 = trunc_int_for_mode (temp1, mode);
4954 temp1_rtx = GEN_INT (temp1);
4956 if (code == SET)
4958 else if (code == MINUS)
4959 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4960 else
4961 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4963 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
4964 source = new_src;
4966 if (code == SET)
4968 can_negate = can_invert;
4969 can_invert = 0;
4970 code = PLUS;
4972 else if (code == MINUS)
4973 code = PLUS;
4977 if (final_invert)
4979 if (generate)
4980 emit_constant_insn (cond, gen_rtx_SET (target,
4981 gen_rtx_NOT (mode, source)));
4982 insns++;
4985 return insns;
4988 /* Canonicalize a comparison so that we are more likely to recognize it.
4989 This can be done for a few constant compares, where we can make the
4990 immediate value easier to load. */
4992 static void
4993 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4994 bool op0_preserve_value)
4996 machine_mode mode;
4997 unsigned HOST_WIDE_INT i, maxval;
4999 mode = GET_MODE (*op0);
5000 if (mode == VOIDmode)
5001 mode = GET_MODE (*op1);
5003 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5005 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5006 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5007 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5008 for GTU/LEU in Thumb mode. */
5009 if (mode == DImode)
5012 if (*code == GT || *code == LE
5013 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5015 /* Missing comparison. First try to use an available
5016 comparison. */
5017 if (CONST_INT_P (*op1))
5019 i = INTVAL (*op1);
5020 switch (*code)
5022 case GT:
5023 case LE:
5024 if (i != maxval
5025 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5027 *op1 = GEN_INT (i + 1);
5028 *code = *code == GT ? GE : LT;
5029 return;
5031 break;
5032 case GTU:
5033 case LEU:
5034 if (i != ~((unsigned HOST_WIDE_INT) 0)
5035 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5037 *op1 = GEN_INT (i + 1);
5038 *code = *code == GTU ? GEU : LTU;
5039 return;
5041 break;
5042 default:
5043 gcc_unreachable ();
5047 /* If that did not work, reverse the condition. */
5048 if (!op0_preserve_value)
5050 std::swap (*op0, *op1);
5051 *code = (int)swap_condition ((enum rtx_code)*code);
5054 return;
5057 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5058 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5059 to facilitate possible combining with a cmp into 'ands'. */
5060 if (mode == SImode
5061 && GET_CODE (*op0) == ZERO_EXTEND
5062 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5063 && GET_MODE (XEXP (*op0, 0)) == QImode
5064 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5065 && subreg_lowpart_p (XEXP (*op0, 0))
5066 && *op1 == const0_rtx)
5067 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5068 GEN_INT (255));
5070 /* Comparisons smaller than DImode. Only adjust comparisons against
5071 an out-of-range constant. */
5072 if (!CONST_INT_P (*op1)
5073 || const_ok_for_arm (INTVAL (*op1))
5074 || const_ok_for_arm (- INTVAL (*op1)))
5075 return;
5077 i = INTVAL (*op1);
5079 switch (*code)
5081 case EQ:
5082 case NE:
5083 return;
5085 case GT:
5086 case LE:
5087 if (i != maxval
5088 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5090 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5091 *code = *code == GT ? GE : LT;
5092 return;
5094 break;
5096 case GE:
5097 case LT:
5098 if (i != ~maxval
5099 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5101 *op1 = GEN_INT (i - 1);
5102 *code = *code == GE ? GT : LE;
5103 return;
5105 break;
5107 case GTU:
5108 case LEU:
5109 if (i != ~((unsigned HOST_WIDE_INT) 0)
5110 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5112 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5113 *code = *code == GTU ? GEU : LTU;
5114 return;
5116 break;
5118 case GEU:
5119 case LTU:
5120 if (i != 0
5121 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5123 *op1 = GEN_INT (i - 1);
5124 *code = *code == GEU ? GTU : LEU;
5125 return;
5127 break;
5129 default:
5130 gcc_unreachable ();
5135 /* Define how to find the value returned by a function. */
5137 static rtx
5138 arm_function_value(const_tree type, const_tree func,
5139 bool outgoing ATTRIBUTE_UNUSED)
5141 machine_mode mode;
5142 int unsignedp ATTRIBUTE_UNUSED;
5143 rtx r ATTRIBUTE_UNUSED;
5145 mode = TYPE_MODE (type);
5147 if (TARGET_AAPCS_BASED)
5148 return aapcs_allocate_return_reg (mode, type, func);
5150 /* Promote integer types. */
5151 if (INTEGRAL_TYPE_P (type))
5152 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5154 /* Promotes small structs returned in a register to full-word size
5155 for big-endian AAPCS. */
5156 if (arm_return_in_msb (type))
5158 HOST_WIDE_INT size = int_size_in_bytes (type);
5159 if (size % UNITS_PER_WORD != 0)
5161 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5162 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5166 return arm_libcall_value_1 (mode);
5169 /* libcall hashtable helpers. */
5171 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5173 static inline hashval_t hash (const rtx_def *);
5174 static inline bool equal (const rtx_def *, const rtx_def *);
5175 static inline void remove (rtx_def *);
5178 inline bool
5179 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5181 return rtx_equal_p (p1, p2);
5184 inline hashval_t
5185 libcall_hasher::hash (const rtx_def *p1)
5187 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5190 typedef hash_table<libcall_hasher> libcall_table_type;
5192 static void
5193 add_libcall (libcall_table_type *htab, rtx libcall)
5195 *htab->find_slot (libcall, INSERT) = libcall;
5198 static bool
5199 arm_libcall_uses_aapcs_base (const_rtx libcall)
5201 static bool init_done = false;
5202 static libcall_table_type *libcall_htab = NULL;
5204 if (!init_done)
5206 init_done = true;
5208 libcall_htab = new libcall_table_type (31);
5209 add_libcall (libcall_htab,
5210 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5211 add_libcall (libcall_htab,
5212 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5213 add_libcall (libcall_htab,
5214 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5215 add_libcall (libcall_htab,
5216 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5218 add_libcall (libcall_htab,
5219 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5220 add_libcall (libcall_htab,
5221 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5222 add_libcall (libcall_htab,
5223 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5224 add_libcall (libcall_htab,
5225 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5227 add_libcall (libcall_htab,
5228 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5229 add_libcall (libcall_htab,
5230 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5231 add_libcall (libcall_htab,
5232 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5233 add_libcall (libcall_htab,
5234 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5235 add_libcall (libcall_htab,
5236 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5237 add_libcall (libcall_htab,
5238 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5239 add_libcall (libcall_htab,
5240 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5241 add_libcall (libcall_htab,
5242 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5244 /* Values from double-precision helper functions are returned in core
5245 registers if the selected core only supports single-precision
5246 arithmetic, even if we are using the hard-float ABI. The same is
5247 true for single-precision helpers, but we will never be using the
5248 hard-float ABI on a CPU which doesn't support single-precision
5249 operations in hardware. */
5250 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5251 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5252 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5253 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5254 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5255 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5256 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5257 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5258 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5259 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5260 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5261 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5262 SFmode));
5263 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5264 DFmode));
5267 return libcall && libcall_htab->find (libcall) != NULL;
5270 static rtx
5271 arm_libcall_value_1 (machine_mode mode)
5273 if (TARGET_AAPCS_BASED)
5274 return aapcs_libcall_value (mode);
5275 else if (TARGET_IWMMXT_ABI
5276 && arm_vector_mode_supported_p (mode))
5277 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5278 else
5279 return gen_rtx_REG (mode, ARG_REGISTER (1));
5282 /* Define how to find the value returned by a library function
5283 assuming the value has mode MODE. */
5285 static rtx
5286 arm_libcall_value (machine_mode mode, const_rtx libcall)
5288 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5289 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5291 /* The following libcalls return their result in integer registers,
5292 even though they return a floating point value. */
5293 if (arm_libcall_uses_aapcs_base (libcall))
5294 return gen_rtx_REG (mode, ARG_REGISTER(1));
5298 return arm_libcall_value_1 (mode);
5301 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5303 static bool
5304 arm_function_value_regno_p (const unsigned int regno)
5306 if (regno == ARG_REGISTER (1)
5307 || (TARGET_32BIT
5308 && TARGET_AAPCS_BASED
5309 && TARGET_HARD_FLOAT
5310 && regno == FIRST_VFP_REGNUM)
5311 || (TARGET_IWMMXT_ABI
5312 && regno == FIRST_IWMMXT_REGNUM))
5313 return true;
5315 return false;
5318 /* Determine the amount of memory needed to store the possible return
5319 registers of an untyped call. */
5321 arm_apply_result_size (void)
5323 int size = 16;
5325 if (TARGET_32BIT)
5327 if (TARGET_HARD_FLOAT_ABI)
5328 size += 32;
5329 if (TARGET_IWMMXT_ABI)
5330 size += 8;
5333 return size;
5336 /* Decide whether TYPE should be returned in memory (true)
5337 or in a register (false). FNTYPE is the type of the function making
5338 the call. */
5339 static bool
5340 arm_return_in_memory (const_tree type, const_tree fntype)
5342 HOST_WIDE_INT size;
5344 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5346 if (TARGET_AAPCS_BASED)
5348 /* Simple, non-aggregate types (ie not including vectors and
5349 complex) are always returned in a register (or registers).
5350 We don't care about which register here, so we can short-cut
5351 some of the detail. */
5352 if (!AGGREGATE_TYPE_P (type)
5353 && TREE_CODE (type) != VECTOR_TYPE
5354 && TREE_CODE (type) != COMPLEX_TYPE)
5355 return false;
5357 /* Any return value that is no larger than one word can be
5358 returned in r0. */
5359 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5360 return false;
5362 /* Check any available co-processors to see if they accept the
5363 type as a register candidate (VFP, for example, can return
5364 some aggregates in consecutive registers). These aren't
5365 available if the call is variadic. */
5366 if (aapcs_select_return_coproc (type, fntype) >= 0)
5367 return false;
5369 /* Vector values should be returned using ARM registers, not
5370 memory (unless they're over 16 bytes, which will break since
5371 we only have four call-clobbered registers to play with). */
5372 if (TREE_CODE (type) == VECTOR_TYPE)
5373 return (size < 0 || size > (4 * UNITS_PER_WORD));
5375 /* The rest go in memory. */
5376 return true;
5379 if (TREE_CODE (type) == VECTOR_TYPE)
5380 return (size < 0 || size > (4 * UNITS_PER_WORD));
5382 if (!AGGREGATE_TYPE_P (type) &&
5383 (TREE_CODE (type) != VECTOR_TYPE))
5384 /* All simple types are returned in registers. */
5385 return false;
5387 if (arm_abi != ARM_ABI_APCS)
5389 /* ATPCS and later return aggregate types in memory only if they are
5390 larger than a word (or are variable size). */
5391 return (size < 0 || size > UNITS_PER_WORD);
5394 /* For the arm-wince targets we choose to be compatible with Microsoft's
5395 ARM and Thumb compilers, which always return aggregates in memory. */
5396 #ifndef ARM_WINCE
5397 /* All structures/unions bigger than one word are returned in memory.
5398 Also catch the case where int_size_in_bytes returns -1. In this case
5399 the aggregate is either huge or of variable size, and in either case
5400 we will want to return it via memory and not in a register. */
5401 if (size < 0 || size > UNITS_PER_WORD)
5402 return true;
5404 if (TREE_CODE (type) == RECORD_TYPE)
5406 tree field;
5408 /* For a struct the APCS says that we only return in a register
5409 if the type is 'integer like' and every addressable element
5410 has an offset of zero. For practical purposes this means
5411 that the structure can have at most one non bit-field element
5412 and that this element must be the first one in the structure. */
5414 /* Find the first field, ignoring non FIELD_DECL things which will
5415 have been created by C++. */
5416 for (field = TYPE_FIELDS (type);
5417 field && TREE_CODE (field) != FIELD_DECL;
5418 field = DECL_CHAIN (field))
5419 continue;
5421 if (field == NULL)
5422 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5424 /* Check that the first field is valid for returning in a register. */
5426 /* ... Floats are not allowed */
5427 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5428 return true;
5430 /* ... Aggregates that are not themselves valid for returning in
5431 a register are not allowed. */
5432 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5433 return true;
5435 /* Now check the remaining fields, if any. Only bitfields are allowed,
5436 since they are not addressable. */
5437 for (field = DECL_CHAIN (field);
5438 field;
5439 field = DECL_CHAIN (field))
5441 if (TREE_CODE (field) != FIELD_DECL)
5442 continue;
5444 if (!DECL_BIT_FIELD_TYPE (field))
5445 return true;
5448 return false;
5451 if (TREE_CODE (type) == UNION_TYPE)
5453 tree field;
5455 /* Unions can be returned in registers if every element is
5456 integral, or can be returned in an integer register. */
5457 for (field = TYPE_FIELDS (type);
5458 field;
5459 field = DECL_CHAIN (field))
5461 if (TREE_CODE (field) != FIELD_DECL)
5462 continue;
5464 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5465 return true;
5467 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5468 return true;
5471 return false;
5473 #endif /* not ARM_WINCE */
5475 /* Return all other types in memory. */
5476 return true;
5479 const struct pcs_attribute_arg
5481 const char *arg;
5482 enum arm_pcs value;
5483 } pcs_attribute_args[] =
5485 {"aapcs", ARM_PCS_AAPCS},
5486 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5487 #if 0
5488 /* We could recognize these, but changes would be needed elsewhere
5489 * to implement them. */
5490 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5491 {"atpcs", ARM_PCS_ATPCS},
5492 {"apcs", ARM_PCS_APCS},
5493 #endif
5494 {NULL, ARM_PCS_UNKNOWN}
5497 static enum arm_pcs
5498 arm_pcs_from_attribute (tree attr)
5500 const struct pcs_attribute_arg *ptr;
5501 const char *arg;
5503 /* Get the value of the argument. */
5504 if (TREE_VALUE (attr) == NULL_TREE
5505 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5506 return ARM_PCS_UNKNOWN;
5508 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5510 /* Check it against the list of known arguments. */
5511 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5512 if (streq (arg, ptr->arg))
5513 return ptr->value;
5515 /* An unrecognized interrupt type. */
5516 return ARM_PCS_UNKNOWN;
5519 /* Get the PCS variant to use for this call. TYPE is the function's type
5520 specification, DECL is the specific declartion. DECL may be null if
5521 the call could be indirect or if this is a library call. */
5522 static enum arm_pcs
5523 arm_get_pcs_model (const_tree type, const_tree decl)
5525 bool user_convention = false;
5526 enum arm_pcs user_pcs = arm_pcs_default;
5527 tree attr;
5529 gcc_assert (type);
5531 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5532 if (attr)
5534 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5535 user_convention = true;
5538 if (TARGET_AAPCS_BASED)
5540 /* Detect varargs functions. These always use the base rules
5541 (no argument is ever a candidate for a co-processor
5542 register). */
5543 bool base_rules = stdarg_p (type);
5545 if (user_convention)
5547 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5548 sorry ("non-AAPCS derived PCS variant");
5549 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5550 error ("variadic functions must use the base AAPCS variant");
5553 if (base_rules)
5554 return ARM_PCS_AAPCS;
5555 else if (user_convention)
5556 return user_pcs;
5557 else if (decl && flag_unit_at_a_time)
5559 /* Local functions never leak outside this compilation unit,
5560 so we are free to use whatever conventions are
5561 appropriate. */
5562 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5563 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5564 if (i && i->local)
5565 return ARM_PCS_AAPCS_LOCAL;
5568 else if (user_convention && user_pcs != arm_pcs_default)
5569 sorry ("PCS variant");
5571 /* For everything else we use the target's default. */
5572 return arm_pcs_default;
5576 static void
5577 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5578 const_tree fntype ATTRIBUTE_UNUSED,
5579 rtx libcall ATTRIBUTE_UNUSED,
5580 const_tree fndecl ATTRIBUTE_UNUSED)
5582 /* Record the unallocated VFP registers. */
5583 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5584 pcum->aapcs_vfp_reg_alloc = 0;
5587 /* Walk down the type tree of TYPE counting consecutive base elements.
5588 If *MODEP is VOIDmode, then set it to the first valid floating point
5589 type. If a non-floating point type is found, or if a floating point
5590 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5591 otherwise return the count in the sub-tree. */
5592 static int
5593 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5595 machine_mode mode;
5596 HOST_WIDE_INT size;
5598 switch (TREE_CODE (type))
5600 case REAL_TYPE:
5601 mode = TYPE_MODE (type);
5602 if (mode != DFmode && mode != SFmode && mode != HFmode)
5603 return -1;
5605 if (*modep == VOIDmode)
5606 *modep = mode;
5608 if (*modep == mode)
5609 return 1;
5611 break;
5613 case COMPLEX_TYPE:
5614 mode = TYPE_MODE (TREE_TYPE (type));
5615 if (mode != DFmode && mode != SFmode)
5616 return -1;
5618 if (*modep == VOIDmode)
5619 *modep = mode;
5621 if (*modep == mode)
5622 return 2;
5624 break;
5626 case VECTOR_TYPE:
5627 /* Use V2SImode and V4SImode as representatives of all 64-bit
5628 and 128-bit vector types, whether or not those modes are
5629 supported with the present options. */
5630 size = int_size_in_bytes (type);
5631 switch (size)
5633 case 8:
5634 mode = V2SImode;
5635 break;
5636 case 16:
5637 mode = V4SImode;
5638 break;
5639 default:
5640 return -1;
5643 if (*modep == VOIDmode)
5644 *modep = mode;
5646 /* Vector modes are considered to be opaque: two vectors are
5647 equivalent for the purposes of being homogeneous aggregates
5648 if they are the same size. */
5649 if (*modep == mode)
5650 return 1;
5652 break;
5654 case ARRAY_TYPE:
5656 int count;
5657 tree index = TYPE_DOMAIN (type);
5659 /* Can't handle incomplete types nor sizes that are not
5660 fixed. */
5661 if (!COMPLETE_TYPE_P (type)
5662 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5663 return -1;
5665 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5666 if (count == -1
5667 || !index
5668 || !TYPE_MAX_VALUE (index)
5669 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5670 || !TYPE_MIN_VALUE (index)
5671 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5672 || count < 0)
5673 return -1;
5675 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5676 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5678 /* There must be no padding. */
5679 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5680 return -1;
5682 return count;
5685 case RECORD_TYPE:
5687 int count = 0;
5688 int sub_count;
5689 tree field;
5691 /* Can't handle incomplete types nor sizes that are not
5692 fixed. */
5693 if (!COMPLETE_TYPE_P (type)
5694 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5695 return -1;
5697 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5699 if (TREE_CODE (field) != FIELD_DECL)
5700 continue;
5702 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5703 if (sub_count < 0)
5704 return -1;
5705 count += sub_count;
5708 /* There must be no padding. */
5709 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5710 return -1;
5712 return count;
5715 case UNION_TYPE:
5716 case QUAL_UNION_TYPE:
5718 /* These aren't very interesting except in a degenerate case. */
5719 int count = 0;
5720 int sub_count;
5721 tree field;
5723 /* Can't handle incomplete types nor sizes that are not
5724 fixed. */
5725 if (!COMPLETE_TYPE_P (type)
5726 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5727 return -1;
5729 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5731 if (TREE_CODE (field) != FIELD_DECL)
5732 continue;
5734 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5735 if (sub_count < 0)
5736 return -1;
5737 count = count > sub_count ? count : sub_count;
5740 /* There must be no padding. */
5741 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5742 return -1;
5744 return count;
5747 default:
5748 break;
5751 return -1;
5754 /* Return true if PCS_VARIANT should use VFP registers. */
5755 static bool
5756 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5758 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5760 static bool seen_thumb1_vfp = false;
5762 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5764 sorry ("Thumb-1 hard-float VFP ABI");
5765 /* sorry() is not immediately fatal, so only display this once. */
5766 seen_thumb1_vfp = true;
5769 return true;
5772 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5773 return false;
5775 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5776 (TARGET_VFP_DOUBLE || !is_double));
5779 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5780 suitable for passing or returning in VFP registers for the PCS
5781 variant selected. If it is, then *BASE_MODE is updated to contain
5782 a machine mode describing each element of the argument's type and
5783 *COUNT to hold the number of such elements. */
5784 static bool
5785 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5786 machine_mode mode, const_tree type,
5787 machine_mode *base_mode, int *count)
5789 machine_mode new_mode = VOIDmode;
5791 /* If we have the type information, prefer that to working things
5792 out from the mode. */
5793 if (type)
5795 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5797 if (ag_count > 0 && ag_count <= 4)
5798 *count = ag_count;
5799 else
5800 return false;
5802 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5803 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5804 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5806 *count = 1;
5807 new_mode = mode;
5809 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5811 *count = 2;
5812 new_mode = (mode == DCmode ? DFmode : SFmode);
5814 else
5815 return false;
5818 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5819 return false;
5821 *base_mode = new_mode;
5822 return true;
5825 static bool
5826 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5827 machine_mode mode, const_tree type)
5829 int count ATTRIBUTE_UNUSED;
5830 machine_mode ag_mode ATTRIBUTE_UNUSED;
5832 if (!use_vfp_abi (pcs_variant, false))
5833 return false;
5834 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5835 &ag_mode, &count);
5838 static bool
5839 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5840 const_tree type)
5842 if (!use_vfp_abi (pcum->pcs_variant, false))
5843 return false;
5845 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5846 &pcum->aapcs_vfp_rmode,
5847 &pcum->aapcs_vfp_rcount);
5850 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
5851 for the behaviour of this function. */
5853 static bool
5854 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5855 const_tree type ATTRIBUTE_UNUSED)
5857 int rmode_size
5858 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
5859 int shift = rmode_size / GET_MODE_SIZE (SFmode);
5860 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5861 int regno;
5863 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5864 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5866 pcum->aapcs_vfp_reg_alloc = mask << regno;
5867 if (mode == BLKmode
5868 || (mode == TImode && ! TARGET_NEON)
5869 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5871 int i;
5872 int rcount = pcum->aapcs_vfp_rcount;
5873 int rshift = shift;
5874 machine_mode rmode = pcum->aapcs_vfp_rmode;
5875 rtx par;
5876 if (!TARGET_NEON)
5878 /* Avoid using unsupported vector modes. */
5879 if (rmode == V2SImode)
5880 rmode = DImode;
5881 else if (rmode == V4SImode)
5883 rmode = DImode;
5884 rcount *= 2;
5885 rshift /= 2;
5888 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5889 for (i = 0; i < rcount; i++)
5891 rtx tmp = gen_rtx_REG (rmode,
5892 FIRST_VFP_REGNUM + regno + i * rshift);
5893 tmp = gen_rtx_EXPR_LIST
5894 (VOIDmode, tmp,
5895 GEN_INT (i * GET_MODE_SIZE (rmode)));
5896 XVECEXP (par, 0, i) = tmp;
5899 pcum->aapcs_reg = par;
5901 else
5902 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5903 return true;
5905 return false;
5908 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
5909 comment there for the behaviour of this function. */
5911 static rtx
5912 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5913 machine_mode mode,
5914 const_tree type ATTRIBUTE_UNUSED)
5916 if (!use_vfp_abi (pcs_variant, false))
5917 return NULL;
5919 if (mode == BLKmode
5920 || (GET_MODE_CLASS (mode) == MODE_INT
5921 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
5922 && !TARGET_NEON))
5924 int count;
5925 machine_mode ag_mode;
5926 int i;
5927 rtx par;
5928 int shift;
5930 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5931 &ag_mode, &count);
5933 if (!TARGET_NEON)
5935 if (ag_mode == V2SImode)
5936 ag_mode = DImode;
5937 else if (ag_mode == V4SImode)
5939 ag_mode = DImode;
5940 count *= 2;
5943 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5944 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5945 for (i = 0; i < count; i++)
5947 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5948 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5949 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5950 XVECEXP (par, 0, i) = tmp;
5953 return par;
5956 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5959 static void
5960 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5961 machine_mode mode ATTRIBUTE_UNUSED,
5962 const_tree type ATTRIBUTE_UNUSED)
5964 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5965 pcum->aapcs_vfp_reg_alloc = 0;
5966 return;
5969 #define AAPCS_CP(X) \
5971 aapcs_ ## X ## _cum_init, \
5972 aapcs_ ## X ## _is_call_candidate, \
5973 aapcs_ ## X ## _allocate, \
5974 aapcs_ ## X ## _is_return_candidate, \
5975 aapcs_ ## X ## _allocate_return_reg, \
5976 aapcs_ ## X ## _advance \
5979 /* Table of co-processors that can be used to pass arguments in
5980 registers. Idealy no arugment should be a candidate for more than
5981 one co-processor table entry, but the table is processed in order
5982 and stops after the first match. If that entry then fails to put
5983 the argument into a co-processor register, the argument will go on
5984 the stack. */
5985 static struct
5987 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5988 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5990 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5991 BLKmode) is a candidate for this co-processor's registers; this
5992 function should ignore any position-dependent state in
5993 CUMULATIVE_ARGS and only use call-type dependent information. */
5994 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5996 /* Return true if the argument does get a co-processor register; it
5997 should set aapcs_reg to an RTX of the register allocated as is
5998 required for a return from FUNCTION_ARG. */
5999 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6001 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6002 be returned in this co-processor's registers. */
6003 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6005 /* Allocate and return an RTX element to hold the return type of a call. This
6006 routine must not fail and will only be called if is_return_candidate
6007 returned true with the same parameters. */
6008 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6010 /* Finish processing this argument and prepare to start processing
6011 the next one. */
6012 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6013 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6015 AAPCS_CP(vfp)
6018 #undef AAPCS_CP
6020 static int
6021 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6022 const_tree type)
6024 int i;
6026 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6027 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6028 return i;
6030 return -1;
6033 static int
6034 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6036 /* We aren't passed a decl, so we can't check that a call is local.
6037 However, it isn't clear that that would be a win anyway, since it
6038 might limit some tail-calling opportunities. */
6039 enum arm_pcs pcs_variant;
6041 if (fntype)
6043 const_tree fndecl = NULL_TREE;
6045 if (TREE_CODE (fntype) == FUNCTION_DECL)
6047 fndecl = fntype;
6048 fntype = TREE_TYPE (fntype);
6051 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6053 else
6054 pcs_variant = arm_pcs_default;
6056 if (pcs_variant != ARM_PCS_AAPCS)
6058 int i;
6060 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6061 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6062 TYPE_MODE (type),
6063 type))
6064 return i;
6066 return -1;
6069 static rtx
6070 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6071 const_tree fntype)
6073 /* We aren't passed a decl, so we can't check that a call is local.
6074 However, it isn't clear that that would be a win anyway, since it
6075 might limit some tail-calling opportunities. */
6076 enum arm_pcs pcs_variant;
6077 int unsignedp ATTRIBUTE_UNUSED;
6079 if (fntype)
6081 const_tree fndecl = NULL_TREE;
6083 if (TREE_CODE (fntype) == FUNCTION_DECL)
6085 fndecl = fntype;
6086 fntype = TREE_TYPE (fntype);
6089 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6091 else
6092 pcs_variant = arm_pcs_default;
6094 /* Promote integer types. */
6095 if (type && INTEGRAL_TYPE_P (type))
6096 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6098 if (pcs_variant != ARM_PCS_AAPCS)
6100 int i;
6102 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6103 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6104 type))
6105 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6106 mode, type);
6109 /* Promotes small structs returned in a register to full-word size
6110 for big-endian AAPCS. */
6111 if (type && arm_return_in_msb (type))
6113 HOST_WIDE_INT size = int_size_in_bytes (type);
6114 if (size % UNITS_PER_WORD != 0)
6116 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6117 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6121 return gen_rtx_REG (mode, R0_REGNUM);
6124 static rtx
6125 aapcs_libcall_value (machine_mode mode)
6127 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6128 && GET_MODE_SIZE (mode) <= 4)
6129 mode = SImode;
6131 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6134 /* Lay out a function argument using the AAPCS rules. The rule
6135 numbers referred to here are those in the AAPCS. */
6136 static void
6137 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6138 const_tree type, bool named)
6140 int nregs, nregs2;
6141 int ncrn;
6143 /* We only need to do this once per argument. */
6144 if (pcum->aapcs_arg_processed)
6145 return;
6147 pcum->aapcs_arg_processed = true;
6149 /* Special case: if named is false then we are handling an incoming
6150 anonymous argument which is on the stack. */
6151 if (!named)
6152 return;
6154 /* Is this a potential co-processor register candidate? */
6155 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6157 int slot = aapcs_select_call_coproc (pcum, mode, type);
6158 pcum->aapcs_cprc_slot = slot;
6160 /* We don't have to apply any of the rules from part B of the
6161 preparation phase, these are handled elsewhere in the
6162 compiler. */
6164 if (slot >= 0)
6166 /* A Co-processor register candidate goes either in its own
6167 class of registers or on the stack. */
6168 if (!pcum->aapcs_cprc_failed[slot])
6170 /* C1.cp - Try to allocate the argument to co-processor
6171 registers. */
6172 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6173 return;
6175 /* C2.cp - Put the argument on the stack and note that we
6176 can't assign any more candidates in this slot. We also
6177 need to note that we have allocated stack space, so that
6178 we won't later try to split a non-cprc candidate between
6179 core registers and the stack. */
6180 pcum->aapcs_cprc_failed[slot] = true;
6181 pcum->can_split = false;
6184 /* We didn't get a register, so this argument goes on the
6185 stack. */
6186 gcc_assert (pcum->can_split == false);
6187 return;
6191 /* C3 - For double-word aligned arguments, round the NCRN up to the
6192 next even number. */
6193 ncrn = pcum->aapcs_ncrn;
6194 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
6195 ncrn++;
6197 nregs = ARM_NUM_REGS2(mode, type);
6199 /* Sigh, this test should really assert that nregs > 0, but a GCC
6200 extension allows empty structs and then gives them empty size; it
6201 then allows such a structure to be passed by value. For some of
6202 the code below we have to pretend that such an argument has
6203 non-zero size so that we 'locate' it correctly either in
6204 registers or on the stack. */
6205 gcc_assert (nregs >= 0);
6207 nregs2 = nregs ? nregs : 1;
6209 /* C4 - Argument fits entirely in core registers. */
6210 if (ncrn + nregs2 <= NUM_ARG_REGS)
6212 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6213 pcum->aapcs_next_ncrn = ncrn + nregs;
6214 return;
6217 /* C5 - Some core registers left and there are no arguments already
6218 on the stack: split this argument between the remaining core
6219 registers and the stack. */
6220 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6222 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6223 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6224 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6225 return;
6228 /* C6 - NCRN is set to 4. */
6229 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6231 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6232 return;
6235 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6236 for a call to a function whose data type is FNTYPE.
6237 For a library call, FNTYPE is NULL. */
6238 void
6239 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6240 rtx libname,
6241 tree fndecl ATTRIBUTE_UNUSED)
6243 /* Long call handling. */
6244 if (fntype)
6245 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6246 else
6247 pcum->pcs_variant = arm_pcs_default;
6249 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6251 if (arm_libcall_uses_aapcs_base (libname))
6252 pcum->pcs_variant = ARM_PCS_AAPCS;
6254 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6255 pcum->aapcs_reg = NULL_RTX;
6256 pcum->aapcs_partial = 0;
6257 pcum->aapcs_arg_processed = false;
6258 pcum->aapcs_cprc_slot = -1;
6259 pcum->can_split = true;
6261 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6263 int i;
6265 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6267 pcum->aapcs_cprc_failed[i] = false;
6268 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6271 return;
6274 /* Legacy ABIs */
6276 /* On the ARM, the offset starts at 0. */
6277 pcum->nregs = 0;
6278 pcum->iwmmxt_nregs = 0;
6279 pcum->can_split = true;
6281 /* Varargs vectors are treated the same as long long.
6282 named_count avoids having to change the way arm handles 'named' */
6283 pcum->named_count = 0;
6284 pcum->nargs = 0;
6286 if (TARGET_REALLY_IWMMXT && fntype)
6288 tree fn_arg;
6290 for (fn_arg = TYPE_ARG_TYPES (fntype);
6291 fn_arg;
6292 fn_arg = TREE_CHAIN (fn_arg))
6293 pcum->named_count += 1;
6295 if (! pcum->named_count)
6296 pcum->named_count = INT_MAX;
6300 /* Return true if mode/type need doubleword alignment. */
6301 static bool
6302 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6304 if (!type)
6305 return PARM_BOUNDARY < GET_MODE_ALIGNMENT (mode);
6307 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6308 if (!AGGREGATE_TYPE_P (type))
6309 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6311 /* Array types: Use member alignment of element type. */
6312 if (TREE_CODE (type) == ARRAY_TYPE)
6313 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6315 /* Record/aggregate types: Use greatest member alignment of any member. */
6316 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6317 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6318 return true;
6320 return false;
6324 /* Determine where to put an argument to a function.
6325 Value is zero to push the argument on the stack,
6326 or a hard register in which to store the argument.
6328 MODE is the argument's machine mode.
6329 TYPE is the data type of the argument (as a tree).
6330 This is null for libcalls where that information may
6331 not be available.
6332 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6333 the preceding args and about the function being called.
6334 NAMED is nonzero if this argument is a named parameter
6335 (otherwise it is an extra parameter matching an ellipsis).
6337 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6338 other arguments are passed on the stack. If (NAMED == 0) (which happens
6339 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6340 defined), say it is passed in the stack (function_prologue will
6341 indeed make it pass in the stack if necessary). */
6343 static rtx
6344 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6345 const_tree type, bool named)
6347 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6348 int nregs;
6350 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6351 a call insn (op3 of a call_value insn). */
6352 if (mode == VOIDmode)
6353 return const0_rtx;
6355 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6357 aapcs_layout_arg (pcum, mode, type, named);
6358 return pcum->aapcs_reg;
6361 /* Varargs vectors are treated the same as long long.
6362 named_count avoids having to change the way arm handles 'named' */
6363 if (TARGET_IWMMXT_ABI
6364 && arm_vector_mode_supported_p (mode)
6365 && pcum->named_count > pcum->nargs + 1)
6367 if (pcum->iwmmxt_nregs <= 9)
6368 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6369 else
6371 pcum->can_split = false;
6372 return NULL_RTX;
6376 /* Put doubleword aligned quantities in even register pairs. */
6377 if (pcum->nregs & 1
6378 && ARM_DOUBLEWORD_ALIGN
6379 && arm_needs_doubleword_align (mode, type))
6380 pcum->nregs++;
6382 /* Only allow splitting an arg between regs and memory if all preceding
6383 args were allocated to regs. For args passed by reference we only count
6384 the reference pointer. */
6385 if (pcum->can_split)
6386 nregs = 1;
6387 else
6388 nregs = ARM_NUM_REGS2 (mode, type);
6390 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6391 return NULL_RTX;
6393 return gen_rtx_REG (mode, pcum->nregs);
6396 static unsigned int
6397 arm_function_arg_boundary (machine_mode mode, const_tree type)
6399 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6400 ? DOUBLEWORD_ALIGNMENT
6401 : PARM_BOUNDARY);
6404 static int
6405 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6406 tree type, bool named)
6408 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6409 int nregs = pcum->nregs;
6411 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6413 aapcs_layout_arg (pcum, mode, type, named);
6414 return pcum->aapcs_partial;
6417 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6418 return 0;
6420 if (NUM_ARG_REGS > nregs
6421 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6422 && pcum->can_split)
6423 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6425 return 0;
6428 /* Update the data in PCUM to advance over an argument
6429 of mode MODE and data type TYPE.
6430 (TYPE is null for libcalls where that information may not be available.) */
6432 static void
6433 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6434 const_tree type, bool named)
6436 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6438 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6440 aapcs_layout_arg (pcum, mode, type, named);
6442 if (pcum->aapcs_cprc_slot >= 0)
6444 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6445 type);
6446 pcum->aapcs_cprc_slot = -1;
6449 /* Generic stuff. */
6450 pcum->aapcs_arg_processed = false;
6451 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6452 pcum->aapcs_reg = NULL_RTX;
6453 pcum->aapcs_partial = 0;
6455 else
6457 pcum->nargs += 1;
6458 if (arm_vector_mode_supported_p (mode)
6459 && pcum->named_count > pcum->nargs
6460 && TARGET_IWMMXT_ABI)
6461 pcum->iwmmxt_nregs += 1;
6462 else
6463 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6467 /* Variable sized types are passed by reference. This is a GCC
6468 extension to the ARM ABI. */
6470 static bool
6471 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6472 machine_mode mode ATTRIBUTE_UNUSED,
6473 const_tree type, bool named ATTRIBUTE_UNUSED)
6475 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6478 /* Encode the current state of the #pragma [no_]long_calls. */
6479 typedef enum
6481 OFF, /* No #pragma [no_]long_calls is in effect. */
6482 LONG, /* #pragma long_calls is in effect. */
6483 SHORT /* #pragma no_long_calls is in effect. */
6484 } arm_pragma_enum;
6486 static arm_pragma_enum arm_pragma_long_calls = OFF;
6488 void
6489 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6491 arm_pragma_long_calls = LONG;
6494 void
6495 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6497 arm_pragma_long_calls = SHORT;
6500 void
6501 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6503 arm_pragma_long_calls = OFF;
6506 /* Handle an attribute requiring a FUNCTION_DECL;
6507 arguments as in struct attribute_spec.handler. */
6508 static tree
6509 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6510 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6512 if (TREE_CODE (*node) != FUNCTION_DECL)
6514 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6515 name);
6516 *no_add_attrs = true;
6519 return NULL_TREE;
6522 /* Handle an "interrupt" or "isr" attribute;
6523 arguments as in struct attribute_spec.handler. */
6524 static tree
6525 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6526 bool *no_add_attrs)
6528 if (DECL_P (*node))
6530 if (TREE_CODE (*node) != FUNCTION_DECL)
6532 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6533 name);
6534 *no_add_attrs = true;
6536 /* FIXME: the argument if any is checked for type attributes;
6537 should it be checked for decl ones? */
6539 else
6541 if (TREE_CODE (*node) == FUNCTION_TYPE
6542 || TREE_CODE (*node) == METHOD_TYPE)
6544 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6546 warning (OPT_Wattributes, "%qE attribute ignored",
6547 name);
6548 *no_add_attrs = true;
6551 else if (TREE_CODE (*node) == POINTER_TYPE
6552 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6553 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6554 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6556 *node = build_variant_type_copy (*node);
6557 TREE_TYPE (*node) = build_type_attribute_variant
6558 (TREE_TYPE (*node),
6559 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6560 *no_add_attrs = true;
6562 else
6564 /* Possibly pass this attribute on from the type to a decl. */
6565 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6566 | (int) ATTR_FLAG_FUNCTION_NEXT
6567 | (int) ATTR_FLAG_ARRAY_NEXT))
6569 *no_add_attrs = true;
6570 return tree_cons (name, args, NULL_TREE);
6572 else
6574 warning (OPT_Wattributes, "%qE attribute ignored",
6575 name);
6580 return NULL_TREE;
6583 /* Handle a "pcs" attribute; arguments as in struct
6584 attribute_spec.handler. */
6585 static tree
6586 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6587 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6589 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6591 warning (OPT_Wattributes, "%qE attribute ignored", name);
6592 *no_add_attrs = true;
6594 return NULL_TREE;
6597 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6598 /* Handle the "notshared" attribute. This attribute is another way of
6599 requesting hidden visibility. ARM's compiler supports
6600 "__declspec(notshared)"; we support the same thing via an
6601 attribute. */
6603 static tree
6604 arm_handle_notshared_attribute (tree *node,
6605 tree name ATTRIBUTE_UNUSED,
6606 tree args ATTRIBUTE_UNUSED,
6607 int flags ATTRIBUTE_UNUSED,
6608 bool *no_add_attrs)
6610 tree decl = TYPE_NAME (*node);
6612 if (decl)
6614 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6615 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6616 *no_add_attrs = false;
6618 return NULL_TREE;
6620 #endif
6622 /* Return 0 if the attributes for two types are incompatible, 1 if they
6623 are compatible, and 2 if they are nearly compatible (which causes a
6624 warning to be generated). */
6625 static int
6626 arm_comp_type_attributes (const_tree type1, const_tree type2)
6628 int l1, l2, s1, s2;
6630 /* Check for mismatch of non-default calling convention. */
6631 if (TREE_CODE (type1) != FUNCTION_TYPE)
6632 return 1;
6634 /* Check for mismatched call attributes. */
6635 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6636 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6637 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6638 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6640 /* Only bother to check if an attribute is defined. */
6641 if (l1 | l2 | s1 | s2)
6643 /* If one type has an attribute, the other must have the same attribute. */
6644 if ((l1 != l2) || (s1 != s2))
6645 return 0;
6647 /* Disallow mixed attributes. */
6648 if ((l1 & s2) || (l2 & s1))
6649 return 0;
6652 /* Check for mismatched ISR attribute. */
6653 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6654 if (! l1)
6655 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6656 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6657 if (! l2)
6658 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6659 if (l1 != l2)
6660 return 0;
6662 return 1;
6665 /* Assigns default attributes to newly defined type. This is used to
6666 set short_call/long_call attributes for function types of
6667 functions defined inside corresponding #pragma scopes. */
6668 static void
6669 arm_set_default_type_attributes (tree type)
6671 /* Add __attribute__ ((long_call)) to all functions, when
6672 inside #pragma long_calls or __attribute__ ((short_call)),
6673 when inside #pragma no_long_calls. */
6674 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6676 tree type_attr_list, attr_name;
6677 type_attr_list = TYPE_ATTRIBUTES (type);
6679 if (arm_pragma_long_calls == LONG)
6680 attr_name = get_identifier ("long_call");
6681 else if (arm_pragma_long_calls == SHORT)
6682 attr_name = get_identifier ("short_call");
6683 else
6684 return;
6686 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6687 TYPE_ATTRIBUTES (type) = type_attr_list;
6691 /* Return true if DECL is known to be linked into section SECTION. */
6693 static bool
6694 arm_function_in_section_p (tree decl, section *section)
6696 /* We can only be certain about the prevailing symbol definition. */
6697 if (!decl_binds_to_current_def_p (decl))
6698 return false;
6700 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6701 if (!DECL_SECTION_NAME (decl))
6703 /* Make sure that we will not create a unique section for DECL. */
6704 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6705 return false;
6708 return function_section (decl) == section;
6711 /* Return nonzero if a 32-bit "long_call" should be generated for
6712 a call from the current function to DECL. We generate a long_call
6713 if the function:
6715 a. has an __attribute__((long call))
6716 or b. is within the scope of a #pragma long_calls
6717 or c. the -mlong-calls command line switch has been specified
6719 However we do not generate a long call if the function:
6721 d. has an __attribute__ ((short_call))
6722 or e. is inside the scope of a #pragma no_long_calls
6723 or f. is defined in the same section as the current function. */
6725 bool
6726 arm_is_long_call_p (tree decl)
6728 tree attrs;
6730 if (!decl)
6731 return TARGET_LONG_CALLS;
6733 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6734 if (lookup_attribute ("short_call", attrs))
6735 return false;
6737 /* For "f", be conservative, and only cater for cases in which the
6738 whole of the current function is placed in the same section. */
6739 if (!flag_reorder_blocks_and_partition
6740 && TREE_CODE (decl) == FUNCTION_DECL
6741 && arm_function_in_section_p (decl, current_function_section ()))
6742 return false;
6744 if (lookup_attribute ("long_call", attrs))
6745 return true;
6747 return TARGET_LONG_CALLS;
6750 /* Return nonzero if it is ok to make a tail-call to DECL. */
6751 static bool
6752 arm_function_ok_for_sibcall (tree decl, tree exp)
6754 unsigned long func_type;
6756 if (cfun->machine->sibcall_blocked)
6757 return false;
6759 /* Never tailcall something if we are generating code for Thumb-1. */
6760 if (TARGET_THUMB1)
6761 return false;
6763 /* The PIC register is live on entry to VxWorks PLT entries, so we
6764 must make the call before restoring the PIC register. */
6765 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
6766 return false;
6768 /* If we are interworking and the function is not declared static
6769 then we can't tail-call it unless we know that it exists in this
6770 compilation unit (since it might be a Thumb routine). */
6771 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6772 && !TREE_ASM_WRITTEN (decl))
6773 return false;
6775 func_type = arm_current_func_type ();
6776 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6777 if (IS_INTERRUPT (func_type))
6778 return false;
6780 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6782 /* Check that the return value locations are the same. For
6783 example that we aren't returning a value from the sibling in
6784 a VFP register but then need to transfer it to a core
6785 register. */
6786 rtx a, b;
6787 tree decl_or_type = decl;
6789 /* If it is an indirect function pointer, get the function type. */
6790 if (!decl)
6791 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
6793 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
6794 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6795 cfun->decl, false);
6796 if (!rtx_equal_p (a, b))
6797 return false;
6800 /* Never tailcall if function may be called with a misaligned SP. */
6801 if (IS_STACKALIGN (func_type))
6802 return false;
6804 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6805 references should become a NOP. Don't convert such calls into
6806 sibling calls. */
6807 if (TARGET_AAPCS_BASED
6808 && arm_abi == ARM_ABI_AAPCS
6809 && decl
6810 && DECL_WEAK (decl))
6811 return false;
6813 /* Everything else is ok. */
6814 return true;
6818 /* Addressing mode support functions. */
6820 /* Return nonzero if X is a legitimate immediate operand when compiling
6821 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6823 legitimate_pic_operand_p (rtx x)
6825 if (GET_CODE (x) == SYMBOL_REF
6826 || (GET_CODE (x) == CONST
6827 && GET_CODE (XEXP (x, 0)) == PLUS
6828 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6829 return 0;
6831 return 1;
6834 /* Record that the current function needs a PIC register. Initialize
6835 cfun->machine->pic_reg if we have not already done so. */
6837 static void
6838 require_pic_register (void)
6840 /* A lot of the logic here is made obscure by the fact that this
6841 routine gets called as part of the rtx cost estimation process.
6842 We don't want those calls to affect any assumptions about the real
6843 function; and further, we can't call entry_of_function() until we
6844 start the real expansion process. */
6845 if (!crtl->uses_pic_offset_table)
6847 gcc_assert (can_create_pseudo_p ());
6848 if (arm_pic_register != INVALID_REGNUM
6849 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6851 if (!cfun->machine->pic_reg)
6852 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6854 /* Play games to avoid marking the function as needing pic
6855 if we are being called as part of the cost-estimation
6856 process. */
6857 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6858 crtl->uses_pic_offset_table = 1;
6860 else
6862 rtx_insn *seq, *insn;
6864 if (!cfun->machine->pic_reg)
6865 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6867 /* Play games to avoid marking the function as needing pic
6868 if we are being called as part of the cost-estimation
6869 process. */
6870 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6872 crtl->uses_pic_offset_table = 1;
6873 start_sequence ();
6875 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6876 && arm_pic_register > LAST_LO_REGNUM)
6877 emit_move_insn (cfun->machine->pic_reg,
6878 gen_rtx_REG (Pmode, arm_pic_register));
6879 else
6880 arm_load_pic_register (0UL);
6882 seq = get_insns ();
6883 end_sequence ();
6885 for (insn = seq; insn; insn = NEXT_INSN (insn))
6886 if (INSN_P (insn))
6887 INSN_LOCATION (insn) = prologue_location;
6889 /* We can be called during expansion of PHI nodes, where
6890 we can't yet emit instructions directly in the final
6891 insn stream. Queue the insns on the entry edge, they will
6892 be committed after everything else is expanded. */
6893 insert_insn_on_edge (seq,
6894 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6901 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6903 if (GET_CODE (orig) == SYMBOL_REF
6904 || GET_CODE (orig) == LABEL_REF)
6906 rtx insn;
6908 if (reg == 0)
6910 gcc_assert (can_create_pseudo_p ());
6911 reg = gen_reg_rtx (Pmode);
6914 /* VxWorks does not impose a fixed gap between segments; the run-time
6915 gap can be different from the object-file gap. We therefore can't
6916 use GOTOFF unless we are absolutely sure that the symbol is in the
6917 same segment as the GOT. Unfortunately, the flexibility of linker
6918 scripts means that we can't be sure of that in general, so assume
6919 that GOTOFF is never valid on VxWorks. */
6920 if ((GET_CODE (orig) == LABEL_REF
6921 || (GET_CODE (orig) == SYMBOL_REF &&
6922 SYMBOL_REF_LOCAL_P (orig)))
6923 && NEED_GOT_RELOC
6924 && arm_pic_data_is_text_relative)
6925 insn = arm_pic_static_addr (orig, reg);
6926 else
6928 rtx pat;
6929 rtx mem;
6931 /* If this function doesn't have a pic register, create one now. */
6932 require_pic_register ();
6934 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6936 /* Make the MEM as close to a constant as possible. */
6937 mem = SET_SRC (pat);
6938 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6939 MEM_READONLY_P (mem) = 1;
6940 MEM_NOTRAP_P (mem) = 1;
6942 insn = emit_insn (pat);
6945 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6946 by loop. */
6947 set_unique_reg_note (insn, REG_EQUAL, orig);
6949 return reg;
6951 else if (GET_CODE (orig) == CONST)
6953 rtx base, offset;
6955 if (GET_CODE (XEXP (orig, 0)) == PLUS
6956 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6957 return orig;
6959 /* Handle the case where we have: const (UNSPEC_TLS). */
6960 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6961 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6962 return orig;
6964 /* Handle the case where we have:
6965 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6966 CONST_INT. */
6967 if (GET_CODE (XEXP (orig, 0)) == PLUS
6968 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6969 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6971 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6972 return orig;
6975 if (reg == 0)
6977 gcc_assert (can_create_pseudo_p ());
6978 reg = gen_reg_rtx (Pmode);
6981 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6983 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6984 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6985 base == reg ? 0 : reg);
6987 if (CONST_INT_P (offset))
6989 /* The base register doesn't really matter, we only want to
6990 test the index for the appropriate mode. */
6991 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6993 gcc_assert (can_create_pseudo_p ());
6994 offset = force_reg (Pmode, offset);
6997 if (CONST_INT_P (offset))
6998 return plus_constant (Pmode, base, INTVAL (offset));
7001 if (GET_MODE_SIZE (mode) > 4
7002 && (GET_MODE_CLASS (mode) == MODE_INT
7003 || TARGET_SOFT_FLOAT))
7005 emit_insn (gen_addsi3 (reg, base, offset));
7006 return reg;
7009 return gen_rtx_PLUS (Pmode, base, offset);
7012 return orig;
7016 /* Find a spare register to use during the prolog of a function. */
7018 static int
7019 thumb_find_work_register (unsigned long pushed_regs_mask)
7021 int reg;
7023 /* Check the argument registers first as these are call-used. The
7024 register allocation order means that sometimes r3 might be used
7025 but earlier argument registers might not, so check them all. */
7026 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7027 if (!df_regs_ever_live_p (reg))
7028 return reg;
7030 /* Before going on to check the call-saved registers we can try a couple
7031 more ways of deducing that r3 is available. The first is when we are
7032 pushing anonymous arguments onto the stack and we have less than 4
7033 registers worth of fixed arguments(*). In this case r3 will be part of
7034 the variable argument list and so we can be sure that it will be
7035 pushed right at the start of the function. Hence it will be available
7036 for the rest of the prologue.
7037 (*): ie crtl->args.pretend_args_size is greater than 0. */
7038 if (cfun->machine->uses_anonymous_args
7039 && crtl->args.pretend_args_size > 0)
7040 return LAST_ARG_REGNUM;
7042 /* The other case is when we have fixed arguments but less than 4 registers
7043 worth. In this case r3 might be used in the body of the function, but
7044 it is not being used to convey an argument into the function. In theory
7045 we could just check crtl->args.size to see how many bytes are
7046 being passed in argument registers, but it seems that it is unreliable.
7047 Sometimes it will have the value 0 when in fact arguments are being
7048 passed. (See testcase execute/20021111-1.c for an example). So we also
7049 check the args_info.nregs field as well. The problem with this field is
7050 that it makes no allowances for arguments that are passed to the
7051 function but which are not used. Hence we could miss an opportunity
7052 when a function has an unused argument in r3. But it is better to be
7053 safe than to be sorry. */
7054 if (! cfun->machine->uses_anonymous_args
7055 && crtl->args.size >= 0
7056 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7057 && (TARGET_AAPCS_BASED
7058 ? crtl->args.info.aapcs_ncrn < 4
7059 : crtl->args.info.nregs < 4))
7060 return LAST_ARG_REGNUM;
7062 /* Otherwise look for a call-saved register that is going to be pushed. */
7063 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7064 if (pushed_regs_mask & (1 << reg))
7065 return reg;
7067 if (TARGET_THUMB2)
7069 /* Thumb-2 can use high regs. */
7070 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7071 if (pushed_regs_mask & (1 << reg))
7072 return reg;
7074 /* Something went wrong - thumb_compute_save_reg_mask()
7075 should have arranged for a suitable register to be pushed. */
7076 gcc_unreachable ();
7079 static GTY(()) int pic_labelno;
7081 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7082 low register. */
7084 void
7085 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7087 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7089 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7090 return;
7092 gcc_assert (flag_pic);
7094 pic_reg = cfun->machine->pic_reg;
7095 if (TARGET_VXWORKS_RTP)
7097 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7098 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7099 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7101 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7103 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7104 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7106 else
7108 /* We use an UNSPEC rather than a LABEL_REF because this label
7109 never appears in the code stream. */
7111 labelno = GEN_INT (pic_labelno++);
7112 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7113 l1 = gen_rtx_CONST (VOIDmode, l1);
7115 /* On the ARM the PC register contains 'dot + 8' at the time of the
7116 addition, on the Thumb it is 'dot + 4'. */
7117 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7118 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7119 UNSPEC_GOTSYM_OFF);
7120 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7122 if (TARGET_32BIT)
7124 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7126 else /* TARGET_THUMB1 */
7128 if (arm_pic_register != INVALID_REGNUM
7129 && REGNO (pic_reg) > LAST_LO_REGNUM)
7131 /* We will have pushed the pic register, so we should always be
7132 able to find a work register. */
7133 pic_tmp = gen_rtx_REG (SImode,
7134 thumb_find_work_register (saved_regs));
7135 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7136 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7137 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7139 else if (arm_pic_register != INVALID_REGNUM
7140 && arm_pic_register > LAST_LO_REGNUM
7141 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7143 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7144 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7145 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7147 else
7148 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7152 /* Need to emit this whether or not we obey regdecls,
7153 since setjmp/longjmp can cause life info to screw up. */
7154 emit_use (pic_reg);
7157 /* Generate code to load the address of a static var when flag_pic is set. */
7158 static rtx
7159 arm_pic_static_addr (rtx orig, rtx reg)
7161 rtx l1, labelno, offset_rtx, insn;
7163 gcc_assert (flag_pic);
7165 /* We use an UNSPEC rather than a LABEL_REF because this label
7166 never appears in the code stream. */
7167 labelno = GEN_INT (pic_labelno++);
7168 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7169 l1 = gen_rtx_CONST (VOIDmode, l1);
7171 /* On the ARM the PC register contains 'dot + 8' at the time of the
7172 addition, on the Thumb it is 'dot + 4'. */
7173 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7174 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7175 UNSPEC_SYMBOL_OFFSET);
7176 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7178 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7179 return insn;
7182 /* Return nonzero if X is valid as an ARM state addressing register. */
7183 static int
7184 arm_address_register_rtx_p (rtx x, int strict_p)
7186 int regno;
7188 if (!REG_P (x))
7189 return 0;
7191 regno = REGNO (x);
7193 if (strict_p)
7194 return ARM_REGNO_OK_FOR_BASE_P (regno);
7196 return (regno <= LAST_ARM_REGNUM
7197 || regno >= FIRST_PSEUDO_REGISTER
7198 || regno == FRAME_POINTER_REGNUM
7199 || regno == ARG_POINTER_REGNUM);
7202 /* Return TRUE if this rtx is the difference of a symbol and a label,
7203 and will reduce to a PC-relative relocation in the object file.
7204 Expressions like this can be left alone when generating PIC, rather
7205 than forced through the GOT. */
7206 static int
7207 pcrel_constant_p (rtx x)
7209 if (GET_CODE (x) == MINUS)
7210 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7212 return FALSE;
7215 /* Return true if X will surely end up in an index register after next
7216 splitting pass. */
7217 static bool
7218 will_be_in_index_register (const_rtx x)
7220 /* arm.md: calculate_pic_address will split this into a register. */
7221 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7224 /* Return nonzero if X is a valid ARM state address operand. */
7226 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7227 int strict_p)
7229 bool use_ldrd;
7230 enum rtx_code code = GET_CODE (x);
7232 if (arm_address_register_rtx_p (x, strict_p))
7233 return 1;
7235 use_ldrd = (TARGET_LDRD
7236 && (mode == DImode || mode == DFmode));
7238 if (code == POST_INC || code == PRE_DEC
7239 || ((code == PRE_INC || code == POST_DEC)
7240 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7241 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7243 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7244 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7245 && GET_CODE (XEXP (x, 1)) == PLUS
7246 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7248 rtx addend = XEXP (XEXP (x, 1), 1);
7250 /* Don't allow ldrd post increment by register because it's hard
7251 to fixup invalid register choices. */
7252 if (use_ldrd
7253 && GET_CODE (x) == POST_MODIFY
7254 && REG_P (addend))
7255 return 0;
7257 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7258 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7261 /* After reload constants split into minipools will have addresses
7262 from a LABEL_REF. */
7263 else if (reload_completed
7264 && (code == LABEL_REF
7265 || (code == CONST
7266 && GET_CODE (XEXP (x, 0)) == PLUS
7267 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7268 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7269 return 1;
7271 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7272 return 0;
7274 else if (code == PLUS)
7276 rtx xop0 = XEXP (x, 0);
7277 rtx xop1 = XEXP (x, 1);
7279 return ((arm_address_register_rtx_p (xop0, strict_p)
7280 && ((CONST_INT_P (xop1)
7281 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7282 || (!strict_p && will_be_in_index_register (xop1))))
7283 || (arm_address_register_rtx_p (xop1, strict_p)
7284 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7287 #if 0
7288 /* Reload currently can't handle MINUS, so disable this for now */
7289 else if (GET_CODE (x) == MINUS)
7291 rtx xop0 = XEXP (x, 0);
7292 rtx xop1 = XEXP (x, 1);
7294 return (arm_address_register_rtx_p (xop0, strict_p)
7295 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7297 #endif
7299 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7300 && code == SYMBOL_REF
7301 && CONSTANT_POOL_ADDRESS_P (x)
7302 && ! (flag_pic
7303 && symbol_mentioned_p (get_pool_constant (x))
7304 && ! pcrel_constant_p (get_pool_constant (x))))
7305 return 1;
7307 return 0;
7310 /* Return nonzero if X is a valid Thumb-2 address operand. */
7311 static int
7312 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7314 bool use_ldrd;
7315 enum rtx_code code = GET_CODE (x);
7317 if (arm_address_register_rtx_p (x, strict_p))
7318 return 1;
7320 use_ldrd = (TARGET_LDRD
7321 && (mode == DImode || mode == DFmode));
7323 if (code == POST_INC || code == PRE_DEC
7324 || ((code == PRE_INC || code == POST_DEC)
7325 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7326 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7328 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7329 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7330 && GET_CODE (XEXP (x, 1)) == PLUS
7331 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7333 /* Thumb-2 only has autoincrement by constant. */
7334 rtx addend = XEXP (XEXP (x, 1), 1);
7335 HOST_WIDE_INT offset;
7337 if (!CONST_INT_P (addend))
7338 return 0;
7340 offset = INTVAL(addend);
7341 if (GET_MODE_SIZE (mode) <= 4)
7342 return (offset > -256 && offset < 256);
7344 return (use_ldrd && offset > -1024 && offset < 1024
7345 && (offset & 3) == 0);
7348 /* After reload constants split into minipools will have addresses
7349 from a LABEL_REF. */
7350 else if (reload_completed
7351 && (code == LABEL_REF
7352 || (code == CONST
7353 && GET_CODE (XEXP (x, 0)) == PLUS
7354 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7355 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7356 return 1;
7358 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7359 return 0;
7361 else if (code == PLUS)
7363 rtx xop0 = XEXP (x, 0);
7364 rtx xop1 = XEXP (x, 1);
7366 return ((arm_address_register_rtx_p (xop0, strict_p)
7367 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7368 || (!strict_p && will_be_in_index_register (xop1))))
7369 || (arm_address_register_rtx_p (xop1, strict_p)
7370 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7373 /* Normally we can assign constant values to target registers without
7374 the help of constant pool. But there are cases we have to use constant
7375 pool like:
7376 1) assign a label to register.
7377 2) sign-extend a 8bit value to 32bit and then assign to register.
7379 Constant pool access in format:
7380 (set (reg r0) (mem (symbol_ref (".LC0"))))
7381 will cause the use of literal pool (later in function arm_reorg).
7382 So here we mark such format as an invalid format, then the compiler
7383 will adjust it into:
7384 (set (reg r0) (symbol_ref (".LC0")))
7385 (set (reg r0) (mem (reg r0))).
7386 No extra register is required, and (mem (reg r0)) won't cause the use
7387 of literal pools. */
7388 else if (arm_disable_literal_pool && code == SYMBOL_REF
7389 && CONSTANT_POOL_ADDRESS_P (x))
7390 return 0;
7392 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7393 && code == SYMBOL_REF
7394 && CONSTANT_POOL_ADDRESS_P (x)
7395 && ! (flag_pic
7396 && symbol_mentioned_p (get_pool_constant (x))
7397 && ! pcrel_constant_p (get_pool_constant (x))))
7398 return 1;
7400 return 0;
7403 /* Return nonzero if INDEX is valid for an address index operand in
7404 ARM state. */
7405 static int
7406 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7407 int strict_p)
7409 HOST_WIDE_INT range;
7410 enum rtx_code code = GET_CODE (index);
7412 /* Standard coprocessor addressing modes. */
7413 if (TARGET_HARD_FLOAT
7414 && (mode == SFmode || mode == DFmode))
7415 return (code == CONST_INT && INTVAL (index) < 1024
7416 && INTVAL (index) > -1024
7417 && (INTVAL (index) & 3) == 0);
7419 /* For quad modes, we restrict the constant offset to be slightly less
7420 than what the instruction format permits. We do this because for
7421 quad mode moves, we will actually decompose them into two separate
7422 double-mode reads or writes. INDEX must therefore be a valid
7423 (double-mode) offset and so should INDEX+8. */
7424 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7425 return (code == CONST_INT
7426 && INTVAL (index) < 1016
7427 && INTVAL (index) > -1024
7428 && (INTVAL (index) & 3) == 0);
7430 /* We have no such constraint on double mode offsets, so we permit the
7431 full range of the instruction format. */
7432 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7433 return (code == CONST_INT
7434 && INTVAL (index) < 1024
7435 && INTVAL (index) > -1024
7436 && (INTVAL (index) & 3) == 0);
7438 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7439 return (code == CONST_INT
7440 && INTVAL (index) < 1024
7441 && INTVAL (index) > -1024
7442 && (INTVAL (index) & 3) == 0);
7444 if (arm_address_register_rtx_p (index, strict_p)
7445 && (GET_MODE_SIZE (mode) <= 4))
7446 return 1;
7448 if (mode == DImode || mode == DFmode)
7450 if (code == CONST_INT)
7452 HOST_WIDE_INT val = INTVAL (index);
7454 if (TARGET_LDRD)
7455 return val > -256 && val < 256;
7456 else
7457 return val > -4096 && val < 4092;
7460 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7463 if (GET_MODE_SIZE (mode) <= 4
7464 && ! (arm_arch4
7465 && (mode == HImode
7466 || mode == HFmode
7467 || (mode == QImode && outer == SIGN_EXTEND))))
7469 if (code == MULT)
7471 rtx xiop0 = XEXP (index, 0);
7472 rtx xiop1 = XEXP (index, 1);
7474 return ((arm_address_register_rtx_p (xiop0, strict_p)
7475 && power_of_two_operand (xiop1, SImode))
7476 || (arm_address_register_rtx_p (xiop1, strict_p)
7477 && power_of_two_operand (xiop0, SImode)));
7479 else if (code == LSHIFTRT || code == ASHIFTRT
7480 || code == ASHIFT || code == ROTATERT)
7482 rtx op = XEXP (index, 1);
7484 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7485 && CONST_INT_P (op)
7486 && INTVAL (op) > 0
7487 && INTVAL (op) <= 31);
7491 /* For ARM v4 we may be doing a sign-extend operation during the
7492 load. */
7493 if (arm_arch4)
7495 if (mode == HImode
7496 || mode == HFmode
7497 || (outer == SIGN_EXTEND && mode == QImode))
7498 range = 256;
7499 else
7500 range = 4096;
7502 else
7503 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7505 return (code == CONST_INT
7506 && INTVAL (index) < range
7507 && INTVAL (index) > -range);
7510 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7511 index operand. i.e. 1, 2, 4 or 8. */
7512 static bool
7513 thumb2_index_mul_operand (rtx op)
7515 HOST_WIDE_INT val;
7517 if (!CONST_INT_P (op))
7518 return false;
7520 val = INTVAL(op);
7521 return (val == 1 || val == 2 || val == 4 || val == 8);
7524 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7525 static int
7526 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7528 enum rtx_code code = GET_CODE (index);
7530 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7531 /* Standard coprocessor addressing modes. */
7532 if (TARGET_HARD_FLOAT
7533 && (mode == SFmode || mode == DFmode))
7534 return (code == CONST_INT && INTVAL (index) < 1024
7535 /* Thumb-2 allows only > -256 index range for it's core register
7536 load/stores. Since we allow SF/DF in core registers, we have
7537 to use the intersection between -256~4096 (core) and -1024~1024
7538 (coprocessor). */
7539 && INTVAL (index) > -256
7540 && (INTVAL (index) & 3) == 0);
7542 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7544 /* For DImode assume values will usually live in core regs
7545 and only allow LDRD addressing modes. */
7546 if (!TARGET_LDRD || mode != DImode)
7547 return (code == CONST_INT
7548 && INTVAL (index) < 1024
7549 && INTVAL (index) > -1024
7550 && (INTVAL (index) & 3) == 0);
7553 /* For quad modes, we restrict the constant offset to be slightly less
7554 than what the instruction format permits. We do this because for
7555 quad mode moves, we will actually decompose them into two separate
7556 double-mode reads or writes. INDEX must therefore be a valid
7557 (double-mode) offset and so should INDEX+8. */
7558 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7559 return (code == CONST_INT
7560 && INTVAL (index) < 1016
7561 && INTVAL (index) > -1024
7562 && (INTVAL (index) & 3) == 0);
7564 /* We have no such constraint on double mode offsets, so we permit the
7565 full range of the instruction format. */
7566 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7567 return (code == CONST_INT
7568 && INTVAL (index) < 1024
7569 && INTVAL (index) > -1024
7570 && (INTVAL (index) & 3) == 0);
7572 if (arm_address_register_rtx_p (index, strict_p)
7573 && (GET_MODE_SIZE (mode) <= 4))
7574 return 1;
7576 if (mode == DImode || mode == DFmode)
7578 if (code == CONST_INT)
7580 HOST_WIDE_INT val = INTVAL (index);
7581 /* ??? Can we assume ldrd for thumb2? */
7582 /* Thumb-2 ldrd only has reg+const addressing modes. */
7583 /* ldrd supports offsets of +-1020.
7584 However the ldr fallback does not. */
7585 return val > -256 && val < 256 && (val & 3) == 0;
7587 else
7588 return 0;
7591 if (code == MULT)
7593 rtx xiop0 = XEXP (index, 0);
7594 rtx xiop1 = XEXP (index, 1);
7596 return ((arm_address_register_rtx_p (xiop0, strict_p)
7597 && thumb2_index_mul_operand (xiop1))
7598 || (arm_address_register_rtx_p (xiop1, strict_p)
7599 && thumb2_index_mul_operand (xiop0)));
7601 else if (code == ASHIFT)
7603 rtx op = XEXP (index, 1);
7605 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7606 && CONST_INT_P (op)
7607 && INTVAL (op) > 0
7608 && INTVAL (op) <= 3);
7611 return (code == CONST_INT
7612 && INTVAL (index) < 4096
7613 && INTVAL (index) > -256);
7616 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7617 static int
7618 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7620 int regno;
7622 if (!REG_P (x))
7623 return 0;
7625 regno = REGNO (x);
7627 if (strict_p)
7628 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7630 return (regno <= LAST_LO_REGNUM
7631 || regno > LAST_VIRTUAL_REGISTER
7632 || regno == FRAME_POINTER_REGNUM
7633 || (GET_MODE_SIZE (mode) >= 4
7634 && (regno == STACK_POINTER_REGNUM
7635 || regno >= FIRST_PSEUDO_REGISTER
7636 || x == hard_frame_pointer_rtx
7637 || x == arg_pointer_rtx)));
7640 /* Return nonzero if x is a legitimate index register. This is the case
7641 for any base register that can access a QImode object. */
7642 inline static int
7643 thumb1_index_register_rtx_p (rtx x, int strict_p)
7645 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7648 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7650 The AP may be eliminated to either the SP or the FP, so we use the
7651 least common denominator, e.g. SImode, and offsets from 0 to 64.
7653 ??? Verify whether the above is the right approach.
7655 ??? Also, the FP may be eliminated to the SP, so perhaps that
7656 needs special handling also.
7658 ??? Look at how the mips16 port solves this problem. It probably uses
7659 better ways to solve some of these problems.
7661 Although it is not incorrect, we don't accept QImode and HImode
7662 addresses based on the frame pointer or arg pointer until the
7663 reload pass starts. This is so that eliminating such addresses
7664 into stack based ones won't produce impossible code. */
7666 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7668 /* ??? Not clear if this is right. Experiment. */
7669 if (GET_MODE_SIZE (mode) < 4
7670 && !(reload_in_progress || reload_completed)
7671 && (reg_mentioned_p (frame_pointer_rtx, x)
7672 || reg_mentioned_p (arg_pointer_rtx, x)
7673 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7674 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7675 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7676 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7677 return 0;
7679 /* Accept any base register. SP only in SImode or larger. */
7680 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7681 return 1;
7683 /* This is PC relative data before arm_reorg runs. */
7684 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7685 && GET_CODE (x) == SYMBOL_REF
7686 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7687 return 1;
7689 /* This is PC relative data after arm_reorg runs. */
7690 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7691 && reload_completed
7692 && (GET_CODE (x) == LABEL_REF
7693 || (GET_CODE (x) == CONST
7694 && GET_CODE (XEXP (x, 0)) == PLUS
7695 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7696 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7697 return 1;
7699 /* Post-inc indexing only supported for SImode and larger. */
7700 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7701 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7702 return 1;
7704 else if (GET_CODE (x) == PLUS)
7706 /* REG+REG address can be any two index registers. */
7707 /* We disallow FRAME+REG addressing since we know that FRAME
7708 will be replaced with STACK, and SP relative addressing only
7709 permits SP+OFFSET. */
7710 if (GET_MODE_SIZE (mode) <= 4
7711 && XEXP (x, 0) != frame_pointer_rtx
7712 && XEXP (x, 1) != frame_pointer_rtx
7713 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7714 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7715 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7716 return 1;
7718 /* REG+const has 5-7 bit offset for non-SP registers. */
7719 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7720 || XEXP (x, 0) == arg_pointer_rtx)
7721 && CONST_INT_P (XEXP (x, 1))
7722 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7723 return 1;
7725 /* REG+const has 10-bit offset for SP, but only SImode and
7726 larger is supported. */
7727 /* ??? Should probably check for DI/DFmode overflow here
7728 just like GO_IF_LEGITIMATE_OFFSET does. */
7729 else if (REG_P (XEXP (x, 0))
7730 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7731 && GET_MODE_SIZE (mode) >= 4
7732 && CONST_INT_P (XEXP (x, 1))
7733 && INTVAL (XEXP (x, 1)) >= 0
7734 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7735 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7736 return 1;
7738 else if (REG_P (XEXP (x, 0))
7739 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7740 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7741 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7742 && REGNO (XEXP (x, 0))
7743 <= LAST_VIRTUAL_POINTER_REGISTER))
7744 && GET_MODE_SIZE (mode) >= 4
7745 && CONST_INT_P (XEXP (x, 1))
7746 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7747 return 1;
7750 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7751 && GET_MODE_SIZE (mode) == 4
7752 && GET_CODE (x) == SYMBOL_REF
7753 && CONSTANT_POOL_ADDRESS_P (x)
7754 && ! (flag_pic
7755 && symbol_mentioned_p (get_pool_constant (x))
7756 && ! pcrel_constant_p (get_pool_constant (x))))
7757 return 1;
7759 return 0;
7762 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7763 instruction of mode MODE. */
7765 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7767 switch (GET_MODE_SIZE (mode))
7769 case 1:
7770 return val >= 0 && val < 32;
7772 case 2:
7773 return val >= 0 && val < 64 && (val & 1) == 0;
7775 default:
7776 return (val >= 0
7777 && (val + GET_MODE_SIZE (mode)) <= 128
7778 && (val & 3) == 0);
7782 bool
7783 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7785 if (TARGET_ARM)
7786 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7787 else if (TARGET_THUMB2)
7788 return thumb2_legitimate_address_p (mode, x, strict_p);
7789 else /* if (TARGET_THUMB1) */
7790 return thumb1_legitimate_address_p (mode, x, strict_p);
7793 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7795 Given an rtx X being reloaded into a reg required to be
7796 in class CLASS, return the class of reg to actually use.
7797 In general this is just CLASS, but for the Thumb core registers and
7798 immediate constants we prefer a LO_REGS class or a subset. */
7800 static reg_class_t
7801 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7803 if (TARGET_32BIT)
7804 return rclass;
7805 else
7807 if (rclass == GENERAL_REGS)
7808 return LO_REGS;
7809 else
7810 return rclass;
7814 /* Build the SYMBOL_REF for __tls_get_addr. */
7816 static GTY(()) rtx tls_get_addr_libfunc;
7818 static rtx
7819 get_tls_get_addr (void)
7821 if (!tls_get_addr_libfunc)
7822 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7823 return tls_get_addr_libfunc;
7827 arm_load_tp (rtx target)
7829 if (!target)
7830 target = gen_reg_rtx (SImode);
7832 if (TARGET_HARD_TP)
7834 /* Can return in any reg. */
7835 emit_insn (gen_load_tp_hard (target));
7837 else
7839 /* Always returned in r0. Immediately copy the result into a pseudo,
7840 otherwise other uses of r0 (e.g. setting up function arguments) may
7841 clobber the value. */
7843 rtx tmp;
7845 emit_insn (gen_load_tp_soft ());
7847 tmp = gen_rtx_REG (SImode, R0_REGNUM);
7848 emit_move_insn (target, tmp);
7850 return target;
7853 static rtx
7854 load_tls_operand (rtx x, rtx reg)
7856 rtx tmp;
7858 if (reg == NULL_RTX)
7859 reg = gen_reg_rtx (SImode);
7861 tmp = gen_rtx_CONST (SImode, x);
7863 emit_move_insn (reg, tmp);
7865 return reg;
7868 static rtx_insn *
7869 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7871 rtx label, labelno, sum;
7873 gcc_assert (reloc != TLS_DESCSEQ);
7874 start_sequence ();
7876 labelno = GEN_INT (pic_labelno++);
7877 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7878 label = gen_rtx_CONST (VOIDmode, label);
7880 sum = gen_rtx_UNSPEC (Pmode,
7881 gen_rtvec (4, x, GEN_INT (reloc), label,
7882 GEN_INT (TARGET_ARM ? 8 : 4)),
7883 UNSPEC_TLS);
7884 reg = load_tls_operand (sum, reg);
7886 if (TARGET_ARM)
7887 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7888 else
7889 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7891 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7892 LCT_PURE, /* LCT_CONST? */
7893 Pmode, 1, reg, Pmode);
7895 rtx_insn *insns = get_insns ();
7896 end_sequence ();
7898 return insns;
7901 static rtx
7902 arm_tls_descseq_addr (rtx x, rtx reg)
7904 rtx labelno = GEN_INT (pic_labelno++);
7905 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7906 rtx sum = gen_rtx_UNSPEC (Pmode,
7907 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7908 gen_rtx_CONST (VOIDmode, label),
7909 GEN_INT (!TARGET_ARM)),
7910 UNSPEC_TLS);
7911 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7913 emit_insn (gen_tlscall (x, labelno));
7914 if (!reg)
7915 reg = gen_reg_rtx (SImode);
7916 else
7917 gcc_assert (REGNO (reg) != R0_REGNUM);
7919 emit_move_insn (reg, reg0);
7921 return reg;
7925 legitimize_tls_address (rtx x, rtx reg)
7927 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
7928 rtx_insn *insns;
7929 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7931 switch (model)
7933 case TLS_MODEL_GLOBAL_DYNAMIC:
7934 if (TARGET_GNU2_TLS)
7936 reg = arm_tls_descseq_addr (x, reg);
7938 tp = arm_load_tp (NULL_RTX);
7940 dest = gen_rtx_PLUS (Pmode, tp, reg);
7942 else
7944 /* Original scheme */
7945 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7946 dest = gen_reg_rtx (Pmode);
7947 emit_libcall_block (insns, dest, ret, x);
7949 return dest;
7951 case TLS_MODEL_LOCAL_DYNAMIC:
7952 if (TARGET_GNU2_TLS)
7954 reg = arm_tls_descseq_addr (x, reg);
7956 tp = arm_load_tp (NULL_RTX);
7958 dest = gen_rtx_PLUS (Pmode, tp, reg);
7960 else
7962 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7964 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7965 share the LDM result with other LD model accesses. */
7966 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7967 UNSPEC_TLS);
7968 dest = gen_reg_rtx (Pmode);
7969 emit_libcall_block (insns, dest, ret, eqv);
7971 /* Load the addend. */
7972 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7973 GEN_INT (TLS_LDO32)),
7974 UNSPEC_TLS);
7975 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7976 dest = gen_rtx_PLUS (Pmode, dest, addend);
7978 return dest;
7980 case TLS_MODEL_INITIAL_EXEC:
7981 labelno = GEN_INT (pic_labelno++);
7982 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7983 label = gen_rtx_CONST (VOIDmode, label);
7984 sum = gen_rtx_UNSPEC (Pmode,
7985 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7986 GEN_INT (TARGET_ARM ? 8 : 4)),
7987 UNSPEC_TLS);
7988 reg = load_tls_operand (sum, reg);
7990 if (TARGET_ARM)
7991 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7992 else if (TARGET_THUMB2)
7993 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7994 else
7996 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7997 emit_move_insn (reg, gen_const_mem (SImode, reg));
8000 tp = arm_load_tp (NULL_RTX);
8002 return gen_rtx_PLUS (Pmode, tp, reg);
8004 case TLS_MODEL_LOCAL_EXEC:
8005 tp = arm_load_tp (NULL_RTX);
8007 reg = gen_rtx_UNSPEC (Pmode,
8008 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8009 UNSPEC_TLS);
8010 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8012 return gen_rtx_PLUS (Pmode, tp, reg);
8014 default:
8015 abort ();
8019 /* Try machine-dependent ways of modifying an illegitimate address
8020 to be legitimate. If we find one, return the new, valid address. */
8022 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8024 if (arm_tls_referenced_p (x))
8026 rtx addend = NULL;
8028 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8030 addend = XEXP (XEXP (x, 0), 1);
8031 x = XEXP (XEXP (x, 0), 0);
8034 if (GET_CODE (x) != SYMBOL_REF)
8035 return x;
8037 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8039 x = legitimize_tls_address (x, NULL_RTX);
8041 if (addend)
8043 x = gen_rtx_PLUS (SImode, x, addend);
8044 orig_x = x;
8046 else
8047 return x;
8050 if (!TARGET_ARM)
8052 /* TODO: legitimize_address for Thumb2. */
8053 if (TARGET_THUMB2)
8054 return x;
8055 return thumb_legitimize_address (x, orig_x, mode);
8058 if (GET_CODE (x) == PLUS)
8060 rtx xop0 = XEXP (x, 0);
8061 rtx xop1 = XEXP (x, 1);
8063 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8064 xop0 = force_reg (SImode, xop0);
8066 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8067 && !symbol_mentioned_p (xop1))
8068 xop1 = force_reg (SImode, xop1);
8070 if (ARM_BASE_REGISTER_RTX_P (xop0)
8071 && CONST_INT_P (xop1))
8073 HOST_WIDE_INT n, low_n;
8074 rtx base_reg, val;
8075 n = INTVAL (xop1);
8077 /* VFP addressing modes actually allow greater offsets, but for
8078 now we just stick with the lowest common denominator. */
8079 if (mode == DImode || mode == DFmode)
8081 low_n = n & 0x0f;
8082 n &= ~0x0f;
8083 if (low_n > 4)
8085 n += 16;
8086 low_n -= 16;
8089 else
8091 low_n = ((mode) == TImode ? 0
8092 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8093 n -= low_n;
8096 base_reg = gen_reg_rtx (SImode);
8097 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8098 emit_move_insn (base_reg, val);
8099 x = plus_constant (Pmode, base_reg, low_n);
8101 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8102 x = gen_rtx_PLUS (SImode, xop0, xop1);
8105 /* XXX We don't allow MINUS any more -- see comment in
8106 arm_legitimate_address_outer_p (). */
8107 else if (GET_CODE (x) == MINUS)
8109 rtx xop0 = XEXP (x, 0);
8110 rtx xop1 = XEXP (x, 1);
8112 if (CONSTANT_P (xop0))
8113 xop0 = force_reg (SImode, xop0);
8115 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8116 xop1 = force_reg (SImode, xop1);
8118 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8119 x = gen_rtx_MINUS (SImode, xop0, xop1);
8122 /* Make sure to take full advantage of the pre-indexed addressing mode
8123 with absolute addresses which often allows for the base register to
8124 be factorized for multiple adjacent memory references, and it might
8125 even allows for the mini pool to be avoided entirely. */
8126 else if (CONST_INT_P (x) && optimize > 0)
8128 unsigned int bits;
8129 HOST_WIDE_INT mask, base, index;
8130 rtx base_reg;
8132 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8133 use a 8-bit index. So let's use a 12-bit index for SImode only and
8134 hope that arm_gen_constant will enable ldrb to use more bits. */
8135 bits = (mode == SImode) ? 12 : 8;
8136 mask = (1 << bits) - 1;
8137 base = INTVAL (x) & ~mask;
8138 index = INTVAL (x) & mask;
8139 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8141 /* It'll most probably be more efficient to generate the base
8142 with more bits set and use a negative index instead. */
8143 base |= mask;
8144 index -= mask;
8146 base_reg = force_reg (SImode, GEN_INT (base));
8147 x = plus_constant (Pmode, base_reg, index);
8150 if (flag_pic)
8152 /* We need to find and carefully transform any SYMBOL and LABEL
8153 references; so go back to the original address expression. */
8154 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8156 if (new_x != orig_x)
8157 x = new_x;
8160 return x;
8164 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8165 to be legitimate. If we find one, return the new, valid address. */
8167 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8169 if (GET_CODE (x) == PLUS
8170 && CONST_INT_P (XEXP (x, 1))
8171 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8172 || INTVAL (XEXP (x, 1)) < 0))
8174 rtx xop0 = XEXP (x, 0);
8175 rtx xop1 = XEXP (x, 1);
8176 HOST_WIDE_INT offset = INTVAL (xop1);
8178 /* Try and fold the offset into a biasing of the base register and
8179 then offsetting that. Don't do this when optimizing for space
8180 since it can cause too many CSEs. */
8181 if (optimize_size && offset >= 0
8182 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8184 HOST_WIDE_INT delta;
8186 if (offset >= 256)
8187 delta = offset - (256 - GET_MODE_SIZE (mode));
8188 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8189 delta = 31 * GET_MODE_SIZE (mode);
8190 else
8191 delta = offset & (~31 * GET_MODE_SIZE (mode));
8193 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8194 NULL_RTX);
8195 x = plus_constant (Pmode, xop0, delta);
8197 else if (offset < 0 && offset > -256)
8198 /* Small negative offsets are best done with a subtract before the
8199 dereference, forcing these into a register normally takes two
8200 instructions. */
8201 x = force_operand (x, NULL_RTX);
8202 else
8204 /* For the remaining cases, force the constant into a register. */
8205 xop1 = force_reg (SImode, xop1);
8206 x = gen_rtx_PLUS (SImode, xop0, xop1);
8209 else if (GET_CODE (x) == PLUS
8210 && s_register_operand (XEXP (x, 1), SImode)
8211 && !s_register_operand (XEXP (x, 0), SImode))
8213 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8215 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8218 if (flag_pic)
8220 /* We need to find and carefully transform any SYMBOL and LABEL
8221 references; so go back to the original address expression. */
8222 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8224 if (new_x != orig_x)
8225 x = new_x;
8228 return x;
8231 /* Return TRUE if X contains any TLS symbol references. */
8233 bool
8234 arm_tls_referenced_p (rtx x)
8236 if (! TARGET_HAVE_TLS)
8237 return false;
8239 subrtx_iterator::array_type array;
8240 FOR_EACH_SUBRTX (iter, array, x, ALL)
8242 const_rtx x = *iter;
8243 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8244 return true;
8246 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8247 TLS offsets, not real symbol references. */
8248 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8249 iter.skip_subrtxes ();
8251 return false;
8254 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8256 On the ARM, allow any integer (invalid ones are removed later by insn
8257 patterns), nice doubles and symbol_refs which refer to the function's
8258 constant pool XXX.
8260 When generating pic allow anything. */
8262 static bool
8263 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8265 return flag_pic || !label_mentioned_p (x);
8268 static bool
8269 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8271 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8272 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8273 for ARMv8-M Baseline or later the result is valid. */
8274 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8275 x = XEXP (x, 0);
8277 return (CONST_INT_P (x)
8278 || CONST_DOUBLE_P (x)
8279 || CONSTANT_ADDRESS_P (x)
8280 || flag_pic);
8283 static bool
8284 arm_legitimate_constant_p (machine_mode mode, rtx x)
8286 return (!arm_cannot_force_const_mem (mode, x)
8287 && (TARGET_32BIT
8288 ? arm_legitimate_constant_p_1 (mode, x)
8289 : thumb_legitimate_constant_p (mode, x)));
8292 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8294 static bool
8295 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8297 rtx base, offset;
8299 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8301 split_const (x, &base, &offset);
8302 if (GET_CODE (base) == SYMBOL_REF
8303 && !offset_within_block_p (base, INTVAL (offset)))
8304 return true;
8306 return arm_tls_referenced_p (x);
8309 #define REG_OR_SUBREG_REG(X) \
8310 (REG_P (X) \
8311 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8313 #define REG_OR_SUBREG_RTX(X) \
8314 (REG_P (X) ? (X) : SUBREG_REG (X))
8316 static inline int
8317 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8319 machine_mode mode = GET_MODE (x);
8320 int total, words;
8322 switch (code)
8324 case ASHIFT:
8325 case ASHIFTRT:
8326 case LSHIFTRT:
8327 case ROTATERT:
8328 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8330 case PLUS:
8331 case MINUS:
8332 case COMPARE:
8333 case NEG:
8334 case NOT:
8335 return COSTS_N_INSNS (1);
8337 case MULT:
8338 if (CONST_INT_P (XEXP (x, 1)))
8340 int cycles = 0;
8341 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8343 while (i)
8345 i >>= 2;
8346 cycles++;
8348 return COSTS_N_INSNS (2) + cycles;
8350 return COSTS_N_INSNS (1) + 16;
8352 case SET:
8353 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8354 the mode. */
8355 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8356 return (COSTS_N_INSNS (words)
8357 + 4 * ((MEM_P (SET_SRC (x)))
8358 + MEM_P (SET_DEST (x))));
8360 case CONST_INT:
8361 if (outer == SET)
8363 if (UINTVAL (x) < 256
8364 /* 16-bit constant. */
8365 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8366 return 0;
8367 if (thumb_shiftable_const (INTVAL (x)))
8368 return COSTS_N_INSNS (2);
8369 return COSTS_N_INSNS (3);
8371 else if ((outer == PLUS || outer == COMPARE)
8372 && INTVAL (x) < 256 && INTVAL (x) > -256)
8373 return 0;
8374 else if ((outer == IOR || outer == XOR || outer == AND)
8375 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8376 return COSTS_N_INSNS (1);
8377 else if (outer == AND)
8379 int i;
8380 /* This duplicates the tests in the andsi3 expander. */
8381 for (i = 9; i <= 31; i++)
8382 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8383 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8384 return COSTS_N_INSNS (2);
8386 else if (outer == ASHIFT || outer == ASHIFTRT
8387 || outer == LSHIFTRT)
8388 return 0;
8389 return COSTS_N_INSNS (2);
8391 case CONST:
8392 case CONST_DOUBLE:
8393 case LABEL_REF:
8394 case SYMBOL_REF:
8395 return COSTS_N_INSNS (3);
8397 case UDIV:
8398 case UMOD:
8399 case DIV:
8400 case MOD:
8401 return 100;
8403 case TRUNCATE:
8404 return 99;
8406 case AND:
8407 case XOR:
8408 case IOR:
8409 /* XXX guess. */
8410 return 8;
8412 case MEM:
8413 /* XXX another guess. */
8414 /* Memory costs quite a lot for the first word, but subsequent words
8415 load at the equivalent of a single insn each. */
8416 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8417 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8418 ? 4 : 0));
8420 case IF_THEN_ELSE:
8421 /* XXX a guess. */
8422 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8423 return 14;
8424 return 2;
8426 case SIGN_EXTEND:
8427 case ZERO_EXTEND:
8428 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8429 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8431 if (mode == SImode)
8432 return total;
8434 if (arm_arch6)
8435 return total + COSTS_N_INSNS (1);
8437 /* Assume a two-shift sequence. Increase the cost slightly so
8438 we prefer actual shifts over an extend operation. */
8439 return total + 1 + COSTS_N_INSNS (2);
8441 default:
8442 return 99;
8446 /* Estimates the size cost of thumb1 instructions.
8447 For now most of the code is copied from thumb1_rtx_costs. We need more
8448 fine grain tuning when we have more related test cases. */
8449 static inline int
8450 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8452 machine_mode mode = GET_MODE (x);
8453 int words, cost;
8455 switch (code)
8457 case ASHIFT:
8458 case ASHIFTRT:
8459 case LSHIFTRT:
8460 case ROTATERT:
8461 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8463 case PLUS:
8464 case MINUS:
8465 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8466 defined by RTL expansion, especially for the expansion of
8467 multiplication. */
8468 if ((GET_CODE (XEXP (x, 0)) == MULT
8469 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8470 || (GET_CODE (XEXP (x, 1)) == MULT
8471 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8472 return COSTS_N_INSNS (2);
8473 /* Fall through. */
8474 case COMPARE:
8475 case NEG:
8476 case NOT:
8477 return COSTS_N_INSNS (1);
8479 case MULT:
8480 if (CONST_INT_P (XEXP (x, 1)))
8482 /* Thumb1 mul instruction can't operate on const. We must Load it
8483 into a register first. */
8484 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8485 /* For the targets which have a very small and high-latency multiply
8486 unit, we prefer to synthesize the mult with up to 5 instructions,
8487 giving a good balance between size and performance. */
8488 if (arm_arch6m && arm_m_profile_small_mul)
8489 return COSTS_N_INSNS (5);
8490 else
8491 return COSTS_N_INSNS (1) + const_size;
8493 return COSTS_N_INSNS (1);
8495 case SET:
8496 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8497 the mode. */
8498 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8499 cost = COSTS_N_INSNS (words);
8500 if (satisfies_constraint_J (SET_SRC (x))
8501 || satisfies_constraint_K (SET_SRC (x))
8502 /* Too big an immediate for a 2-byte mov, using MOVT. */
8503 || (CONST_INT_P (SET_SRC (x))
8504 && UINTVAL (SET_SRC (x)) >= 256
8505 && TARGET_HAVE_MOVT
8506 && satisfies_constraint_j (SET_SRC (x)))
8507 /* thumb1_movdi_insn. */
8508 || ((words > 1) && MEM_P (SET_SRC (x))))
8509 cost += COSTS_N_INSNS (1);
8510 return cost;
8512 case CONST_INT:
8513 if (outer == SET)
8515 if (UINTVAL (x) < 256)
8516 return COSTS_N_INSNS (1);
8517 /* movw is 4byte long. */
8518 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
8519 return COSTS_N_INSNS (2);
8520 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8521 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8522 return COSTS_N_INSNS (2);
8523 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8524 if (thumb_shiftable_const (INTVAL (x)))
8525 return COSTS_N_INSNS (2);
8526 return COSTS_N_INSNS (3);
8528 else if ((outer == PLUS || outer == COMPARE)
8529 && INTVAL (x) < 256 && INTVAL (x) > -256)
8530 return 0;
8531 else if ((outer == IOR || outer == XOR || outer == AND)
8532 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8533 return COSTS_N_INSNS (1);
8534 else if (outer == AND)
8536 int i;
8537 /* This duplicates the tests in the andsi3 expander. */
8538 for (i = 9; i <= 31; i++)
8539 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8540 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8541 return COSTS_N_INSNS (2);
8543 else if (outer == ASHIFT || outer == ASHIFTRT
8544 || outer == LSHIFTRT)
8545 return 0;
8546 return COSTS_N_INSNS (2);
8548 case CONST:
8549 case CONST_DOUBLE:
8550 case LABEL_REF:
8551 case SYMBOL_REF:
8552 return COSTS_N_INSNS (3);
8554 case UDIV:
8555 case UMOD:
8556 case DIV:
8557 case MOD:
8558 return 100;
8560 case TRUNCATE:
8561 return 99;
8563 case AND:
8564 case XOR:
8565 case IOR:
8566 return COSTS_N_INSNS (1);
8568 case MEM:
8569 return (COSTS_N_INSNS (1)
8570 + COSTS_N_INSNS (1)
8571 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8572 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8573 ? COSTS_N_INSNS (1) : 0));
8575 case IF_THEN_ELSE:
8576 /* XXX a guess. */
8577 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8578 return 14;
8579 return 2;
8581 case ZERO_EXTEND:
8582 /* XXX still guessing. */
8583 switch (GET_MODE (XEXP (x, 0)))
8585 case QImode:
8586 return (1 + (mode == DImode ? 4 : 0)
8587 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8589 case HImode:
8590 return (4 + (mode == DImode ? 4 : 0)
8591 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8593 case SImode:
8594 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8596 default:
8597 return 99;
8600 default:
8601 return 99;
8605 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8606 operand, then return the operand that is being shifted. If the shift
8607 is not by a constant, then set SHIFT_REG to point to the operand.
8608 Return NULL if OP is not a shifter operand. */
8609 static rtx
8610 shifter_op_p (rtx op, rtx *shift_reg)
8612 enum rtx_code code = GET_CODE (op);
8614 if (code == MULT && CONST_INT_P (XEXP (op, 1))
8615 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
8616 return XEXP (op, 0);
8617 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
8618 return XEXP (op, 0);
8619 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
8620 || code == ASHIFTRT)
8622 if (!CONST_INT_P (XEXP (op, 1)))
8623 *shift_reg = XEXP (op, 1);
8624 return XEXP (op, 0);
8627 return NULL;
8630 static bool
8631 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
8633 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
8634 rtx_code code = GET_CODE (x);
8635 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
8637 switch (XINT (x, 1))
8639 case UNSPEC_UNALIGNED_LOAD:
8640 /* We can only do unaligned loads into the integer unit, and we can't
8641 use LDM or LDRD. */
8642 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
8643 if (speed_p)
8644 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
8645 + extra_cost->ldst.load_unaligned);
8647 #ifdef NOT_YET
8648 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
8649 ADDR_SPACE_GENERIC, speed_p);
8650 #endif
8651 return true;
8653 case UNSPEC_UNALIGNED_STORE:
8654 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
8655 if (speed_p)
8656 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
8657 + extra_cost->ldst.store_unaligned);
8659 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
8660 #ifdef NOT_YET
8661 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
8662 ADDR_SPACE_GENERIC, speed_p);
8663 #endif
8664 return true;
8666 case UNSPEC_VRINTZ:
8667 case UNSPEC_VRINTP:
8668 case UNSPEC_VRINTM:
8669 case UNSPEC_VRINTR:
8670 case UNSPEC_VRINTX:
8671 case UNSPEC_VRINTA:
8672 if (speed_p)
8673 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
8675 return true;
8676 default:
8677 *cost = COSTS_N_INSNS (2);
8678 break;
8680 return true;
8683 /* Cost of a libcall. We assume one insn per argument, an amount for the
8684 call (one insn for -Os) and then one for processing the result. */
8685 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
8687 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
8688 do \
8690 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
8691 if (shift_op != NULL \
8692 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
8694 if (shift_reg) \
8696 if (speed_p) \
8697 *cost += extra_cost->alu.arith_shift_reg; \
8698 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
8699 ASHIFT, 1, speed_p); \
8701 else if (speed_p) \
8702 *cost += extra_cost->alu.arith_shift; \
8704 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
8705 ASHIFT, 0, speed_p) \
8706 + rtx_cost (XEXP (x, 1 - IDX), \
8707 GET_MODE (shift_op), \
8708 OP, 1, speed_p)); \
8709 return true; \
8712 while (0);
8714 /* RTX costs. Make an estimate of the cost of executing the operation
8715 X, which is contained with an operation with code OUTER_CODE.
8716 SPEED_P indicates whether the cost desired is the performance cost,
8717 or the size cost. The estimate is stored in COST and the return
8718 value is TRUE if the cost calculation is final, or FALSE if the
8719 caller should recurse through the operands of X to add additional
8720 costs.
8722 We currently make no attempt to model the size savings of Thumb-2
8723 16-bit instructions. At the normal points in compilation where
8724 this code is called we have no measure of whether the condition
8725 flags are live or not, and thus no realistic way to determine what
8726 the size will eventually be. */
8727 static bool
8728 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
8729 const struct cpu_cost_table *extra_cost,
8730 int *cost, bool speed_p)
8732 machine_mode mode = GET_MODE (x);
8734 *cost = COSTS_N_INSNS (1);
8736 if (TARGET_THUMB1)
8738 if (speed_p)
8739 *cost = thumb1_rtx_costs (x, code, outer_code);
8740 else
8741 *cost = thumb1_size_rtx_costs (x, code, outer_code);
8742 return true;
8745 switch (code)
8747 case SET:
8748 *cost = 0;
8749 /* SET RTXs don't have a mode so we get it from the destination. */
8750 mode = GET_MODE (SET_DEST (x));
8752 if (REG_P (SET_SRC (x))
8753 && REG_P (SET_DEST (x)))
8755 /* Assume that most copies can be done with a single insn,
8756 unless we don't have HW FP, in which case everything
8757 larger than word mode will require two insns. */
8758 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
8759 && GET_MODE_SIZE (mode) > 4)
8760 || mode == DImode)
8761 ? 2 : 1);
8762 /* Conditional register moves can be encoded
8763 in 16 bits in Thumb mode. */
8764 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
8765 *cost >>= 1;
8767 return true;
8770 if (CONST_INT_P (SET_SRC (x)))
8772 /* Handle CONST_INT here, since the value doesn't have a mode
8773 and we would otherwise be unable to work out the true cost. */
8774 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
8775 0, speed_p);
8776 outer_code = SET;
8777 /* Slightly lower the cost of setting a core reg to a constant.
8778 This helps break up chains and allows for better scheduling. */
8779 if (REG_P (SET_DEST (x))
8780 && REGNO (SET_DEST (x)) <= LR_REGNUM)
8781 *cost -= 1;
8782 x = SET_SRC (x);
8783 /* Immediate moves with an immediate in the range [0, 255] can be
8784 encoded in 16 bits in Thumb mode. */
8785 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
8786 && INTVAL (x) >= 0 && INTVAL (x) <=255)
8787 *cost >>= 1;
8788 goto const_int_cost;
8791 return false;
8793 case MEM:
8794 /* A memory access costs 1 insn if the mode is small, or the address is
8795 a single register, otherwise it costs one insn per word. */
8796 if (REG_P (XEXP (x, 0)))
8797 *cost = COSTS_N_INSNS (1);
8798 else if (flag_pic
8799 && GET_CODE (XEXP (x, 0)) == PLUS
8800 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8801 /* This will be split into two instructions.
8802 See arm.md:calculate_pic_address. */
8803 *cost = COSTS_N_INSNS (2);
8804 else
8805 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8807 /* For speed optimizations, add the costs of the address and
8808 accessing memory. */
8809 if (speed_p)
8810 #ifdef NOT_YET
8811 *cost += (extra_cost->ldst.load
8812 + arm_address_cost (XEXP (x, 0), mode,
8813 ADDR_SPACE_GENERIC, speed_p));
8814 #else
8815 *cost += extra_cost->ldst.load;
8816 #endif
8817 return true;
8819 case PARALLEL:
8821 /* Calculations of LDM costs are complex. We assume an initial cost
8822 (ldm_1st) which will load the number of registers mentioned in
8823 ldm_regs_per_insn_1st registers; then each additional
8824 ldm_regs_per_insn_subsequent registers cost one more insn. The
8825 formula for N regs is thus:
8827 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
8828 + ldm_regs_per_insn_subsequent - 1)
8829 / ldm_regs_per_insn_subsequent).
8831 Additional costs may also be added for addressing. A similar
8832 formula is used for STM. */
8834 bool is_ldm = load_multiple_operation (x, SImode);
8835 bool is_stm = store_multiple_operation (x, SImode);
8837 if (is_ldm || is_stm)
8839 if (speed_p)
8841 HOST_WIDE_INT nregs = XVECLEN (x, 0);
8842 HOST_WIDE_INT regs_per_insn_1st = is_ldm
8843 ? extra_cost->ldst.ldm_regs_per_insn_1st
8844 : extra_cost->ldst.stm_regs_per_insn_1st;
8845 HOST_WIDE_INT regs_per_insn_sub = is_ldm
8846 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
8847 : extra_cost->ldst.stm_regs_per_insn_subsequent;
8849 *cost += regs_per_insn_1st
8850 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
8851 + regs_per_insn_sub - 1)
8852 / regs_per_insn_sub);
8853 return true;
8857 return false;
8859 case DIV:
8860 case UDIV:
8861 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8862 && (mode == SFmode || !TARGET_VFP_SINGLE))
8863 *cost += COSTS_N_INSNS (speed_p
8864 ? extra_cost->fp[mode != SFmode].div : 0);
8865 else if (mode == SImode && TARGET_IDIV)
8866 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
8867 else
8868 *cost = LIBCALL_COST (2);
8869 return false; /* All arguments must be in registers. */
8871 case MOD:
8872 /* MOD by a power of 2 can be expanded as:
8873 rsbs r1, r0, #0
8874 and r0, r0, #(n - 1)
8875 and r1, r1, #(n - 1)
8876 rsbpl r0, r1, #0. */
8877 if (CONST_INT_P (XEXP (x, 1))
8878 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
8879 && mode == SImode)
8881 *cost += COSTS_N_INSNS (3);
8883 if (speed_p)
8884 *cost += 2 * extra_cost->alu.logical
8885 + extra_cost->alu.arith;
8886 return true;
8889 /* Fall-through. */
8890 case UMOD:
8891 *cost = LIBCALL_COST (2);
8892 return false; /* All arguments must be in registers. */
8894 case ROTATE:
8895 if (mode == SImode && REG_P (XEXP (x, 1)))
8897 *cost += (COSTS_N_INSNS (1)
8898 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
8899 if (speed_p)
8900 *cost += extra_cost->alu.shift_reg;
8901 return true;
8903 /* Fall through */
8904 case ROTATERT:
8905 case ASHIFT:
8906 case LSHIFTRT:
8907 case ASHIFTRT:
8908 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8910 *cost += (COSTS_N_INSNS (2)
8911 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
8912 if (speed_p)
8913 *cost += 2 * extra_cost->alu.shift;
8914 return true;
8916 else if (mode == SImode)
8918 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
8919 /* Slightly disparage register shifts at -Os, but not by much. */
8920 if (!CONST_INT_P (XEXP (x, 1)))
8921 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
8922 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
8923 return true;
8925 else if (GET_MODE_CLASS (mode) == MODE_INT
8926 && GET_MODE_SIZE (mode) < 4)
8928 if (code == ASHIFT)
8930 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
8931 /* Slightly disparage register shifts at -Os, but not by
8932 much. */
8933 if (!CONST_INT_P (XEXP (x, 1)))
8934 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
8935 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
8937 else if (code == LSHIFTRT || code == ASHIFTRT)
8939 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
8941 /* Can use SBFX/UBFX. */
8942 if (speed_p)
8943 *cost += extra_cost->alu.bfx;
8944 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
8946 else
8948 *cost += COSTS_N_INSNS (1);
8949 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
8950 if (speed_p)
8952 if (CONST_INT_P (XEXP (x, 1)))
8953 *cost += 2 * extra_cost->alu.shift;
8954 else
8955 *cost += (extra_cost->alu.shift
8956 + extra_cost->alu.shift_reg);
8958 else
8959 /* Slightly disparage register shifts. */
8960 *cost += !CONST_INT_P (XEXP (x, 1));
8963 else /* Rotates. */
8965 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
8966 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
8967 if (speed_p)
8969 if (CONST_INT_P (XEXP (x, 1)))
8970 *cost += (2 * extra_cost->alu.shift
8971 + extra_cost->alu.log_shift);
8972 else
8973 *cost += (extra_cost->alu.shift
8974 + extra_cost->alu.shift_reg
8975 + extra_cost->alu.log_shift_reg);
8978 return true;
8981 *cost = LIBCALL_COST (2);
8982 return false;
8984 case BSWAP:
8985 if (arm_arch6)
8987 if (mode == SImode)
8989 if (speed_p)
8990 *cost += extra_cost->alu.rev;
8992 return false;
8995 else
8997 /* No rev instruction available. Look at arm_legacy_rev
8998 and thumb_legacy_rev for the form of RTL used then. */
8999 if (TARGET_THUMB)
9001 *cost += COSTS_N_INSNS (9);
9003 if (speed_p)
9005 *cost += 6 * extra_cost->alu.shift;
9006 *cost += 3 * extra_cost->alu.logical;
9009 else
9011 *cost += COSTS_N_INSNS (4);
9013 if (speed_p)
9015 *cost += 2 * extra_cost->alu.shift;
9016 *cost += extra_cost->alu.arith_shift;
9017 *cost += 2 * extra_cost->alu.logical;
9020 return true;
9022 return false;
9024 case MINUS:
9025 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9026 && (mode == SFmode || !TARGET_VFP_SINGLE))
9028 if (GET_CODE (XEXP (x, 0)) == MULT
9029 || GET_CODE (XEXP (x, 1)) == MULT)
9031 rtx mul_op0, mul_op1, sub_op;
9033 if (speed_p)
9034 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9036 if (GET_CODE (XEXP (x, 0)) == MULT)
9038 mul_op0 = XEXP (XEXP (x, 0), 0);
9039 mul_op1 = XEXP (XEXP (x, 0), 1);
9040 sub_op = XEXP (x, 1);
9042 else
9044 mul_op0 = XEXP (XEXP (x, 1), 0);
9045 mul_op1 = XEXP (XEXP (x, 1), 1);
9046 sub_op = XEXP (x, 0);
9049 /* The first operand of the multiply may be optionally
9050 negated. */
9051 if (GET_CODE (mul_op0) == NEG)
9052 mul_op0 = XEXP (mul_op0, 0);
9054 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9055 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9056 + rtx_cost (sub_op, mode, code, 0, speed_p));
9058 return true;
9061 if (speed_p)
9062 *cost += extra_cost->fp[mode != SFmode].addsub;
9063 return false;
9066 if (mode == SImode)
9068 rtx shift_by_reg = NULL;
9069 rtx shift_op;
9070 rtx non_shift_op;
9072 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9073 if (shift_op == NULL)
9075 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9076 non_shift_op = XEXP (x, 0);
9078 else
9079 non_shift_op = XEXP (x, 1);
9081 if (shift_op != NULL)
9083 if (shift_by_reg != NULL)
9085 if (speed_p)
9086 *cost += extra_cost->alu.arith_shift_reg;
9087 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9089 else if (speed_p)
9090 *cost += extra_cost->alu.arith_shift;
9092 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9093 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9094 return true;
9097 if (arm_arch_thumb2
9098 && GET_CODE (XEXP (x, 1)) == MULT)
9100 /* MLS. */
9101 if (speed_p)
9102 *cost += extra_cost->mult[0].add;
9103 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9104 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9105 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9106 return true;
9109 if (CONST_INT_P (XEXP (x, 0)))
9111 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9112 INTVAL (XEXP (x, 0)), NULL_RTX,
9113 NULL_RTX, 1, 0);
9114 *cost = COSTS_N_INSNS (insns);
9115 if (speed_p)
9116 *cost += insns * extra_cost->alu.arith;
9117 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9118 return true;
9120 else if (speed_p)
9121 *cost += extra_cost->alu.arith;
9123 return false;
9126 if (GET_MODE_CLASS (mode) == MODE_INT
9127 && GET_MODE_SIZE (mode) < 4)
9129 rtx shift_op, shift_reg;
9130 shift_reg = NULL;
9132 /* We check both sides of the MINUS for shifter operands since,
9133 unlike PLUS, it's not commutative. */
9135 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9136 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9138 /* Slightly disparage, as we might need to widen the result. */
9139 *cost += 1;
9140 if (speed_p)
9141 *cost += extra_cost->alu.arith;
9143 if (CONST_INT_P (XEXP (x, 0)))
9145 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9146 return true;
9149 return false;
9152 if (mode == DImode)
9154 *cost += COSTS_N_INSNS (1);
9156 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9158 rtx op1 = XEXP (x, 1);
9160 if (speed_p)
9161 *cost += 2 * extra_cost->alu.arith;
9163 if (GET_CODE (op1) == ZERO_EXTEND)
9164 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9165 0, speed_p);
9166 else
9167 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9168 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9169 0, speed_p);
9170 return true;
9172 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9174 if (speed_p)
9175 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9176 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9177 0, speed_p)
9178 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9179 return true;
9181 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9182 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9184 if (speed_p)
9185 *cost += (extra_cost->alu.arith
9186 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9187 ? extra_cost->alu.arith
9188 : extra_cost->alu.arith_shift));
9189 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9190 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9191 GET_CODE (XEXP (x, 1)), 0, speed_p));
9192 return true;
9195 if (speed_p)
9196 *cost += 2 * extra_cost->alu.arith;
9197 return false;
9200 /* Vector mode? */
9202 *cost = LIBCALL_COST (2);
9203 return false;
9205 case PLUS:
9206 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9207 && (mode == SFmode || !TARGET_VFP_SINGLE))
9209 if (GET_CODE (XEXP (x, 0)) == MULT)
9211 rtx mul_op0, mul_op1, add_op;
9213 if (speed_p)
9214 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9216 mul_op0 = XEXP (XEXP (x, 0), 0);
9217 mul_op1 = XEXP (XEXP (x, 0), 1);
9218 add_op = XEXP (x, 1);
9220 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9221 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9222 + rtx_cost (add_op, mode, code, 0, speed_p));
9224 return true;
9227 if (speed_p)
9228 *cost += extra_cost->fp[mode != SFmode].addsub;
9229 return false;
9231 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9233 *cost = LIBCALL_COST (2);
9234 return false;
9237 /* Narrow modes can be synthesized in SImode, but the range
9238 of useful sub-operations is limited. Check for shift operations
9239 on one of the operands. Only left shifts can be used in the
9240 narrow modes. */
9241 if (GET_MODE_CLASS (mode) == MODE_INT
9242 && GET_MODE_SIZE (mode) < 4)
9244 rtx shift_op, shift_reg;
9245 shift_reg = NULL;
9247 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9249 if (CONST_INT_P (XEXP (x, 1)))
9251 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9252 INTVAL (XEXP (x, 1)), NULL_RTX,
9253 NULL_RTX, 1, 0);
9254 *cost = COSTS_N_INSNS (insns);
9255 if (speed_p)
9256 *cost += insns * extra_cost->alu.arith;
9257 /* Slightly penalize a narrow operation as the result may
9258 need widening. */
9259 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9260 return true;
9263 /* Slightly penalize a narrow operation as the result may
9264 need widening. */
9265 *cost += 1;
9266 if (speed_p)
9267 *cost += extra_cost->alu.arith;
9269 return false;
9272 if (mode == SImode)
9274 rtx shift_op, shift_reg;
9276 if (TARGET_INT_SIMD
9277 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9278 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9280 /* UXTA[BH] or SXTA[BH]. */
9281 if (speed_p)
9282 *cost += extra_cost->alu.extend_arith;
9283 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9284 0, speed_p)
9285 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9286 return true;
9289 shift_reg = NULL;
9290 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9291 if (shift_op != NULL)
9293 if (shift_reg)
9295 if (speed_p)
9296 *cost += extra_cost->alu.arith_shift_reg;
9297 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9299 else if (speed_p)
9300 *cost += extra_cost->alu.arith_shift;
9302 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9303 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9304 return true;
9306 if (GET_CODE (XEXP (x, 0)) == MULT)
9308 rtx mul_op = XEXP (x, 0);
9310 if (TARGET_DSP_MULTIPLY
9311 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9312 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9313 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9314 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9315 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9316 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9317 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9318 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9319 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9320 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9321 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9322 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9323 == 16))))))
9325 /* SMLA[BT][BT]. */
9326 if (speed_p)
9327 *cost += extra_cost->mult[0].extend_add;
9328 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9329 SIGN_EXTEND, 0, speed_p)
9330 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9331 SIGN_EXTEND, 0, speed_p)
9332 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9333 return true;
9336 if (speed_p)
9337 *cost += extra_cost->mult[0].add;
9338 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9339 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9340 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9341 return true;
9343 if (CONST_INT_P (XEXP (x, 1)))
9345 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9346 INTVAL (XEXP (x, 1)), NULL_RTX,
9347 NULL_RTX, 1, 0);
9348 *cost = COSTS_N_INSNS (insns);
9349 if (speed_p)
9350 *cost += insns * extra_cost->alu.arith;
9351 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9352 return true;
9354 else if (speed_p)
9355 *cost += extra_cost->alu.arith;
9357 return false;
9360 if (mode == DImode)
9362 if (arm_arch3m
9363 && GET_CODE (XEXP (x, 0)) == MULT
9364 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9365 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9366 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9367 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9369 if (speed_p)
9370 *cost += extra_cost->mult[1].extend_add;
9371 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9372 ZERO_EXTEND, 0, speed_p)
9373 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9374 ZERO_EXTEND, 0, speed_p)
9375 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9376 return true;
9379 *cost += COSTS_N_INSNS (1);
9381 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9382 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9384 if (speed_p)
9385 *cost += (extra_cost->alu.arith
9386 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9387 ? extra_cost->alu.arith
9388 : extra_cost->alu.arith_shift));
9390 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9391 0, speed_p)
9392 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9393 return true;
9396 if (speed_p)
9397 *cost += 2 * extra_cost->alu.arith;
9398 return false;
9401 /* Vector mode? */
9402 *cost = LIBCALL_COST (2);
9403 return false;
9404 case IOR:
9405 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9407 if (speed_p)
9408 *cost += extra_cost->alu.rev;
9410 return true;
9412 /* Fall through. */
9413 case AND: case XOR:
9414 if (mode == SImode)
9416 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9417 rtx op0 = XEXP (x, 0);
9418 rtx shift_op, shift_reg;
9420 if (subcode == NOT
9421 && (code == AND
9422 || (code == IOR && TARGET_THUMB2)))
9423 op0 = XEXP (op0, 0);
9425 shift_reg = NULL;
9426 shift_op = shifter_op_p (op0, &shift_reg);
9427 if (shift_op != NULL)
9429 if (shift_reg)
9431 if (speed_p)
9432 *cost += extra_cost->alu.log_shift_reg;
9433 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9435 else if (speed_p)
9436 *cost += extra_cost->alu.log_shift;
9438 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9439 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9440 return true;
9443 if (CONST_INT_P (XEXP (x, 1)))
9445 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9446 INTVAL (XEXP (x, 1)), NULL_RTX,
9447 NULL_RTX, 1, 0);
9449 *cost = COSTS_N_INSNS (insns);
9450 if (speed_p)
9451 *cost += insns * extra_cost->alu.logical;
9452 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9453 return true;
9456 if (speed_p)
9457 *cost += extra_cost->alu.logical;
9458 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9459 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9460 return true;
9463 if (mode == DImode)
9465 rtx op0 = XEXP (x, 0);
9466 enum rtx_code subcode = GET_CODE (op0);
9468 *cost += COSTS_N_INSNS (1);
9470 if (subcode == NOT
9471 && (code == AND
9472 || (code == IOR && TARGET_THUMB2)))
9473 op0 = XEXP (op0, 0);
9475 if (GET_CODE (op0) == ZERO_EXTEND)
9477 if (speed_p)
9478 *cost += 2 * extra_cost->alu.logical;
9480 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9481 0, speed_p)
9482 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9483 return true;
9485 else if (GET_CODE (op0) == SIGN_EXTEND)
9487 if (speed_p)
9488 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9490 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9491 0, speed_p)
9492 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9493 return true;
9496 if (speed_p)
9497 *cost += 2 * extra_cost->alu.logical;
9499 return true;
9501 /* Vector mode? */
9503 *cost = LIBCALL_COST (2);
9504 return false;
9506 case MULT:
9507 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9508 && (mode == SFmode || !TARGET_VFP_SINGLE))
9510 rtx op0 = XEXP (x, 0);
9512 if (GET_CODE (op0) == NEG && !flag_rounding_math)
9513 op0 = XEXP (op0, 0);
9515 if (speed_p)
9516 *cost += extra_cost->fp[mode != SFmode].mult;
9518 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
9519 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
9520 return true;
9522 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9524 *cost = LIBCALL_COST (2);
9525 return false;
9528 if (mode == SImode)
9530 if (TARGET_DSP_MULTIPLY
9531 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9532 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9533 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9534 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9535 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9536 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
9537 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9538 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
9539 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9540 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9541 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9542 && (INTVAL (XEXP (XEXP (x, 1), 1))
9543 == 16))))))
9545 /* SMUL[TB][TB]. */
9546 if (speed_p)
9547 *cost += extra_cost->mult[0].extend;
9548 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
9549 SIGN_EXTEND, 0, speed_p);
9550 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
9551 SIGN_EXTEND, 1, speed_p);
9552 return true;
9554 if (speed_p)
9555 *cost += extra_cost->mult[0].simple;
9556 return false;
9559 if (mode == DImode)
9561 if (arm_arch3m
9562 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9563 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
9564 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9565 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
9567 if (speed_p)
9568 *cost += extra_cost->mult[1].extend;
9569 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
9570 ZERO_EXTEND, 0, speed_p)
9571 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9572 ZERO_EXTEND, 0, speed_p));
9573 return true;
9576 *cost = LIBCALL_COST (2);
9577 return false;
9580 /* Vector mode? */
9581 *cost = LIBCALL_COST (2);
9582 return false;
9584 case NEG:
9585 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9586 && (mode == SFmode || !TARGET_VFP_SINGLE))
9588 if (GET_CODE (XEXP (x, 0)) == MULT)
9590 /* VNMUL. */
9591 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
9592 return true;
9595 if (speed_p)
9596 *cost += extra_cost->fp[mode != SFmode].neg;
9598 return false;
9600 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9602 *cost = LIBCALL_COST (1);
9603 return false;
9606 if (mode == SImode)
9608 if (GET_CODE (XEXP (x, 0)) == ABS)
9610 *cost += COSTS_N_INSNS (1);
9611 /* Assume the non-flag-changing variant. */
9612 if (speed_p)
9613 *cost += (extra_cost->alu.log_shift
9614 + extra_cost->alu.arith_shift);
9615 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
9616 return true;
9619 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
9620 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
9622 *cost += COSTS_N_INSNS (1);
9623 /* No extra cost for MOV imm and MVN imm. */
9624 /* If the comparison op is using the flags, there's no further
9625 cost, otherwise we need to add the cost of the comparison. */
9626 if (!(REG_P (XEXP (XEXP (x, 0), 0))
9627 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
9628 && XEXP (XEXP (x, 0), 1) == const0_rtx))
9630 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
9631 *cost += (COSTS_N_INSNS (1)
9632 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
9633 0, speed_p)
9634 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
9635 1, speed_p));
9636 if (speed_p)
9637 *cost += extra_cost->alu.arith;
9639 return true;
9642 if (speed_p)
9643 *cost += extra_cost->alu.arith;
9644 return false;
9647 if (GET_MODE_CLASS (mode) == MODE_INT
9648 && GET_MODE_SIZE (mode) < 4)
9650 /* Slightly disparage, as we might need an extend operation. */
9651 *cost += 1;
9652 if (speed_p)
9653 *cost += extra_cost->alu.arith;
9654 return false;
9657 if (mode == DImode)
9659 *cost += COSTS_N_INSNS (1);
9660 if (speed_p)
9661 *cost += 2 * extra_cost->alu.arith;
9662 return false;
9665 /* Vector mode? */
9666 *cost = LIBCALL_COST (1);
9667 return false;
9669 case NOT:
9670 if (mode == SImode)
9672 rtx shift_op;
9673 rtx shift_reg = NULL;
9675 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9677 if (shift_op)
9679 if (shift_reg != NULL)
9681 if (speed_p)
9682 *cost += extra_cost->alu.log_shift_reg;
9683 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9685 else if (speed_p)
9686 *cost += extra_cost->alu.log_shift;
9687 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
9688 return true;
9691 if (speed_p)
9692 *cost += extra_cost->alu.logical;
9693 return false;
9695 if (mode == DImode)
9697 *cost += COSTS_N_INSNS (1);
9698 return false;
9701 /* Vector mode? */
9703 *cost += LIBCALL_COST (1);
9704 return false;
9706 case IF_THEN_ELSE:
9708 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9710 *cost += COSTS_N_INSNS (3);
9711 return true;
9713 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
9714 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
9716 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
9717 /* Assume that if one arm of the if_then_else is a register,
9718 that it will be tied with the result and eliminate the
9719 conditional insn. */
9720 if (REG_P (XEXP (x, 1)))
9721 *cost += op2cost;
9722 else if (REG_P (XEXP (x, 2)))
9723 *cost += op1cost;
9724 else
9726 if (speed_p)
9728 if (extra_cost->alu.non_exec_costs_exec)
9729 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
9730 else
9731 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
9733 else
9734 *cost += op1cost + op2cost;
9737 return true;
9739 case COMPARE:
9740 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
9741 *cost = 0;
9742 else
9744 machine_mode op0mode;
9745 /* We'll mostly assume that the cost of a compare is the cost of the
9746 LHS. However, there are some notable exceptions. */
9748 /* Floating point compares are never done as side-effects. */
9749 op0mode = GET_MODE (XEXP (x, 0));
9750 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
9751 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
9753 if (speed_p)
9754 *cost += extra_cost->fp[op0mode != SFmode].compare;
9756 if (XEXP (x, 1) == CONST0_RTX (op0mode))
9758 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
9759 return true;
9762 return false;
9764 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
9766 *cost = LIBCALL_COST (2);
9767 return false;
9770 /* DImode compares normally take two insns. */
9771 if (op0mode == DImode)
9773 *cost += COSTS_N_INSNS (1);
9774 if (speed_p)
9775 *cost += 2 * extra_cost->alu.arith;
9776 return false;
9779 if (op0mode == SImode)
9781 rtx shift_op;
9782 rtx shift_reg;
9784 if (XEXP (x, 1) == const0_rtx
9785 && !(REG_P (XEXP (x, 0))
9786 || (GET_CODE (XEXP (x, 0)) == SUBREG
9787 && REG_P (SUBREG_REG (XEXP (x, 0))))))
9789 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
9791 /* Multiply operations that set the flags are often
9792 significantly more expensive. */
9793 if (speed_p
9794 && GET_CODE (XEXP (x, 0)) == MULT
9795 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
9796 *cost += extra_cost->mult[0].flag_setting;
9798 if (speed_p
9799 && GET_CODE (XEXP (x, 0)) == PLUS
9800 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
9801 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
9802 0), 1), mode))
9803 *cost += extra_cost->mult[0].flag_setting;
9804 return true;
9807 shift_reg = NULL;
9808 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9809 if (shift_op != NULL)
9811 if (shift_reg != NULL)
9813 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
9814 1, speed_p);
9815 if (speed_p)
9816 *cost += extra_cost->alu.arith_shift_reg;
9818 else if (speed_p)
9819 *cost += extra_cost->alu.arith_shift;
9820 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
9821 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
9822 return true;
9825 if (speed_p)
9826 *cost += extra_cost->alu.arith;
9827 if (CONST_INT_P (XEXP (x, 1))
9828 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
9830 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
9831 return true;
9833 return false;
9836 /* Vector mode? */
9838 *cost = LIBCALL_COST (2);
9839 return false;
9841 return true;
9843 case EQ:
9844 case NE:
9845 case LT:
9846 case LE:
9847 case GT:
9848 case GE:
9849 case LTU:
9850 case LEU:
9851 case GEU:
9852 case GTU:
9853 case ORDERED:
9854 case UNORDERED:
9855 case UNEQ:
9856 case UNLE:
9857 case UNLT:
9858 case UNGE:
9859 case UNGT:
9860 case LTGT:
9861 if (outer_code == SET)
9863 /* Is it a store-flag operation? */
9864 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
9865 && XEXP (x, 1) == const0_rtx)
9867 /* Thumb also needs an IT insn. */
9868 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
9869 return true;
9871 if (XEXP (x, 1) == const0_rtx)
9873 switch (code)
9875 case LT:
9876 /* LSR Rd, Rn, #31. */
9877 if (speed_p)
9878 *cost += extra_cost->alu.shift;
9879 break;
9881 case EQ:
9882 /* RSBS T1, Rn, #0
9883 ADC Rd, Rn, T1. */
9885 case NE:
9886 /* SUBS T1, Rn, #1
9887 SBC Rd, Rn, T1. */
9888 *cost += COSTS_N_INSNS (1);
9889 break;
9891 case LE:
9892 /* RSBS T1, Rn, Rn, LSR #31
9893 ADC Rd, Rn, T1. */
9894 *cost += COSTS_N_INSNS (1);
9895 if (speed_p)
9896 *cost += extra_cost->alu.arith_shift;
9897 break;
9899 case GT:
9900 /* RSB Rd, Rn, Rn, ASR #1
9901 LSR Rd, Rd, #31. */
9902 *cost += COSTS_N_INSNS (1);
9903 if (speed_p)
9904 *cost += (extra_cost->alu.arith_shift
9905 + extra_cost->alu.shift);
9906 break;
9908 case GE:
9909 /* ASR Rd, Rn, #31
9910 ADD Rd, Rn, #1. */
9911 *cost += COSTS_N_INSNS (1);
9912 if (speed_p)
9913 *cost += extra_cost->alu.shift;
9914 break;
9916 default:
9917 /* Remaining cases are either meaningless or would take
9918 three insns anyway. */
9919 *cost = COSTS_N_INSNS (3);
9920 break;
9922 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9923 return true;
9925 else
9927 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
9928 if (CONST_INT_P (XEXP (x, 1))
9929 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
9931 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9932 return true;
9935 return false;
9938 /* Not directly inside a set. If it involves the condition code
9939 register it must be the condition for a branch, cond_exec or
9940 I_T_E operation. Since the comparison is performed elsewhere
9941 this is just the control part which has no additional
9942 cost. */
9943 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
9944 && XEXP (x, 1) == const0_rtx)
9946 *cost = 0;
9947 return true;
9949 return false;
9951 case ABS:
9952 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9953 && (mode == SFmode || !TARGET_VFP_SINGLE))
9955 if (speed_p)
9956 *cost += extra_cost->fp[mode != SFmode].neg;
9958 return false;
9960 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9962 *cost = LIBCALL_COST (1);
9963 return false;
9966 if (mode == SImode)
9968 if (speed_p)
9969 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
9970 return false;
9972 /* Vector mode? */
9973 *cost = LIBCALL_COST (1);
9974 return false;
9976 case SIGN_EXTEND:
9977 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
9978 && MEM_P (XEXP (x, 0)))
9980 if (mode == DImode)
9981 *cost += COSTS_N_INSNS (1);
9983 if (!speed_p)
9984 return true;
9986 if (GET_MODE (XEXP (x, 0)) == SImode)
9987 *cost += extra_cost->ldst.load;
9988 else
9989 *cost += extra_cost->ldst.load_sign_extend;
9991 if (mode == DImode)
9992 *cost += extra_cost->alu.shift;
9994 return true;
9997 /* Widening from less than 32-bits requires an extend operation. */
9998 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10000 /* We have SXTB/SXTH. */
10001 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10002 if (speed_p)
10003 *cost += extra_cost->alu.extend;
10005 else if (GET_MODE (XEXP (x, 0)) != SImode)
10007 /* Needs two shifts. */
10008 *cost += COSTS_N_INSNS (1);
10009 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10010 if (speed_p)
10011 *cost += 2 * extra_cost->alu.shift;
10014 /* Widening beyond 32-bits requires one more insn. */
10015 if (mode == DImode)
10017 *cost += COSTS_N_INSNS (1);
10018 if (speed_p)
10019 *cost += extra_cost->alu.shift;
10022 return true;
10024 case ZERO_EXTEND:
10025 if ((arm_arch4
10026 || GET_MODE (XEXP (x, 0)) == SImode
10027 || GET_MODE (XEXP (x, 0)) == QImode)
10028 && MEM_P (XEXP (x, 0)))
10030 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10032 if (mode == DImode)
10033 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10035 return true;
10038 /* Widening from less than 32-bits requires an extend operation. */
10039 if (GET_MODE (XEXP (x, 0)) == QImode)
10041 /* UXTB can be a shorter instruction in Thumb2, but it might
10042 be slower than the AND Rd, Rn, #255 alternative. When
10043 optimizing for speed it should never be slower to use
10044 AND, and we don't really model 16-bit vs 32-bit insns
10045 here. */
10046 if (speed_p)
10047 *cost += extra_cost->alu.logical;
10049 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10051 /* We have UXTB/UXTH. */
10052 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10053 if (speed_p)
10054 *cost += extra_cost->alu.extend;
10056 else if (GET_MODE (XEXP (x, 0)) != SImode)
10058 /* Needs two shifts. It's marginally preferable to use
10059 shifts rather than two BIC instructions as the second
10060 shift may merge with a subsequent insn as a shifter
10061 op. */
10062 *cost = COSTS_N_INSNS (2);
10063 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10064 if (speed_p)
10065 *cost += 2 * extra_cost->alu.shift;
10068 /* Widening beyond 32-bits requires one more insn. */
10069 if (mode == DImode)
10071 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10074 return true;
10076 case CONST_INT:
10077 *cost = 0;
10078 /* CONST_INT has no mode, so we cannot tell for sure how many
10079 insns are really going to be needed. The best we can do is
10080 look at the value passed. If it fits in SImode, then assume
10081 that's the mode it will be used for. Otherwise assume it
10082 will be used in DImode. */
10083 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10084 mode = SImode;
10085 else
10086 mode = DImode;
10088 /* Avoid blowing up in arm_gen_constant (). */
10089 if (!(outer_code == PLUS
10090 || outer_code == AND
10091 || outer_code == IOR
10092 || outer_code == XOR
10093 || outer_code == MINUS))
10094 outer_code = SET;
10096 const_int_cost:
10097 if (mode == SImode)
10099 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10100 INTVAL (x), NULL, NULL,
10101 0, 0));
10102 /* Extra costs? */
10104 else
10106 *cost += COSTS_N_INSNS (arm_gen_constant
10107 (outer_code, SImode, NULL,
10108 trunc_int_for_mode (INTVAL (x), SImode),
10109 NULL, NULL, 0, 0)
10110 + arm_gen_constant (outer_code, SImode, NULL,
10111 INTVAL (x) >> 32, NULL,
10112 NULL, 0, 0));
10113 /* Extra costs? */
10116 return true;
10118 case CONST:
10119 case LABEL_REF:
10120 case SYMBOL_REF:
10121 if (speed_p)
10123 if (arm_arch_thumb2 && !flag_pic)
10124 *cost += COSTS_N_INSNS (1);
10125 else
10126 *cost += extra_cost->ldst.load;
10128 else
10129 *cost += COSTS_N_INSNS (1);
10131 if (flag_pic)
10133 *cost += COSTS_N_INSNS (1);
10134 if (speed_p)
10135 *cost += extra_cost->alu.arith;
10138 return true;
10140 case CONST_FIXED:
10141 *cost = COSTS_N_INSNS (4);
10142 /* Fixme. */
10143 return true;
10145 case CONST_DOUBLE:
10146 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10147 && (mode == SFmode || !TARGET_VFP_SINGLE))
10149 if (vfp3_const_double_rtx (x))
10151 if (speed_p)
10152 *cost += extra_cost->fp[mode == DFmode].fpconst;
10153 return true;
10156 if (speed_p)
10158 if (mode == DFmode)
10159 *cost += extra_cost->ldst.loadd;
10160 else
10161 *cost += extra_cost->ldst.loadf;
10163 else
10164 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10166 return true;
10168 *cost = COSTS_N_INSNS (4);
10169 return true;
10171 case CONST_VECTOR:
10172 /* Fixme. */
10173 if (TARGET_NEON
10174 && TARGET_HARD_FLOAT
10175 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10176 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10177 *cost = COSTS_N_INSNS (1);
10178 else
10179 *cost = COSTS_N_INSNS (4);
10180 return true;
10182 case HIGH:
10183 case LO_SUM:
10184 /* When optimizing for size, we prefer constant pool entries to
10185 MOVW/MOVT pairs, so bump the cost of these slightly. */
10186 if (!speed_p)
10187 *cost += 1;
10188 return true;
10190 case CLZ:
10191 if (speed_p)
10192 *cost += extra_cost->alu.clz;
10193 return false;
10195 case SMIN:
10196 if (XEXP (x, 1) == const0_rtx)
10198 if (speed_p)
10199 *cost += extra_cost->alu.log_shift;
10200 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10201 return true;
10203 /* Fall through. */
10204 case SMAX:
10205 case UMIN:
10206 case UMAX:
10207 *cost += COSTS_N_INSNS (1);
10208 return false;
10210 case TRUNCATE:
10211 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10212 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10213 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10214 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10215 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10216 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10217 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10218 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10219 == ZERO_EXTEND))))
10221 if (speed_p)
10222 *cost += extra_cost->mult[1].extend;
10223 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10224 ZERO_EXTEND, 0, speed_p)
10225 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10226 ZERO_EXTEND, 0, speed_p));
10227 return true;
10229 *cost = LIBCALL_COST (1);
10230 return false;
10232 case UNSPEC_VOLATILE:
10233 case UNSPEC:
10234 return arm_unspec_cost (x, outer_code, speed_p, cost);
10236 case PC:
10237 /* Reading the PC is like reading any other register. Writing it
10238 is more expensive, but we take that into account elsewhere. */
10239 *cost = 0;
10240 return true;
10242 case ZERO_EXTRACT:
10243 /* TODO: Simple zero_extract of bottom bits using AND. */
10244 /* Fall through. */
10245 case SIGN_EXTRACT:
10246 if (arm_arch6
10247 && mode == SImode
10248 && CONST_INT_P (XEXP (x, 1))
10249 && CONST_INT_P (XEXP (x, 2)))
10251 if (speed_p)
10252 *cost += extra_cost->alu.bfx;
10253 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10254 return true;
10256 /* Without UBFX/SBFX, need to resort to shift operations. */
10257 *cost += COSTS_N_INSNS (1);
10258 if (speed_p)
10259 *cost += 2 * extra_cost->alu.shift;
10260 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10261 return true;
10263 case FLOAT_EXTEND:
10264 if (TARGET_HARD_FLOAT)
10266 if (speed_p)
10267 *cost += extra_cost->fp[mode == DFmode].widen;
10268 if (!TARGET_FPU_ARMV8
10269 && GET_MODE (XEXP (x, 0)) == HFmode)
10271 /* Pre v8, widening HF->DF is a two-step process, first
10272 widening to SFmode. */
10273 *cost += COSTS_N_INSNS (1);
10274 if (speed_p)
10275 *cost += extra_cost->fp[0].widen;
10277 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10278 return true;
10281 *cost = LIBCALL_COST (1);
10282 return false;
10284 case FLOAT_TRUNCATE:
10285 if (TARGET_HARD_FLOAT)
10287 if (speed_p)
10288 *cost += extra_cost->fp[mode == DFmode].narrow;
10289 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10290 return true;
10291 /* Vector modes? */
10293 *cost = LIBCALL_COST (1);
10294 return false;
10296 case FMA:
10297 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10299 rtx op0 = XEXP (x, 0);
10300 rtx op1 = XEXP (x, 1);
10301 rtx op2 = XEXP (x, 2);
10304 /* vfms or vfnma. */
10305 if (GET_CODE (op0) == NEG)
10306 op0 = XEXP (op0, 0);
10308 /* vfnms or vfnma. */
10309 if (GET_CODE (op2) == NEG)
10310 op2 = XEXP (op2, 0);
10312 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10313 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10314 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10316 if (speed_p)
10317 *cost += extra_cost->fp[mode ==DFmode].fma;
10319 return true;
10322 *cost = LIBCALL_COST (3);
10323 return false;
10325 case FIX:
10326 case UNSIGNED_FIX:
10327 if (TARGET_HARD_FLOAT)
10329 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10330 a vcvt fixed-point conversion. */
10331 if (code == FIX && mode == SImode
10332 && GET_CODE (XEXP (x, 0)) == FIX
10333 && GET_MODE (XEXP (x, 0)) == SFmode
10334 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10335 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10336 > 0)
10338 if (speed_p)
10339 *cost += extra_cost->fp[0].toint;
10341 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10342 code, 0, speed_p);
10343 return true;
10346 if (GET_MODE_CLASS (mode) == MODE_INT)
10348 mode = GET_MODE (XEXP (x, 0));
10349 if (speed_p)
10350 *cost += extra_cost->fp[mode == DFmode].toint;
10351 /* Strip of the 'cost' of rounding towards zero. */
10352 if (GET_CODE (XEXP (x, 0)) == FIX)
10353 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10354 0, speed_p);
10355 else
10356 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10357 /* ??? Increase the cost to deal with transferring from
10358 FP -> CORE registers? */
10359 return true;
10361 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10362 && TARGET_FPU_ARMV8)
10364 if (speed_p)
10365 *cost += extra_cost->fp[mode == DFmode].roundint;
10366 return false;
10368 /* Vector costs? */
10370 *cost = LIBCALL_COST (1);
10371 return false;
10373 case FLOAT:
10374 case UNSIGNED_FLOAT:
10375 if (TARGET_HARD_FLOAT)
10377 /* ??? Increase the cost to deal with transferring from CORE
10378 -> FP registers? */
10379 if (speed_p)
10380 *cost += extra_cost->fp[mode == DFmode].fromint;
10381 return false;
10383 *cost = LIBCALL_COST (1);
10384 return false;
10386 case CALL:
10387 return true;
10389 case ASM_OPERANDS:
10391 /* Just a guess. Guess number of instructions in the asm
10392 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10393 though (see PR60663). */
10394 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10395 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10397 *cost = COSTS_N_INSNS (asm_length + num_operands);
10398 return true;
10400 default:
10401 if (mode != VOIDmode)
10402 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10403 else
10404 *cost = COSTS_N_INSNS (4); /* Who knows? */
10405 return false;
10409 #undef HANDLE_NARROW_SHIFT_ARITH
10411 /* RTX costs entry point. */
10413 static bool
10414 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10415 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10417 bool result;
10418 int code = GET_CODE (x);
10419 gcc_assert (current_tune->insn_extra_cost);
10421 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10422 (enum rtx_code) outer_code,
10423 current_tune->insn_extra_cost,
10424 total, speed);
10426 if (dump_file && (dump_flags & TDF_DETAILS))
10428 print_rtl_single (dump_file, x);
10429 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10430 *total, result ? "final" : "partial");
10432 return result;
10435 /* All address computations that can be done are free, but rtx cost returns
10436 the same for practically all of them. So we weight the different types
10437 of address here in the order (most pref first):
10438 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10439 static inline int
10440 arm_arm_address_cost (rtx x)
10442 enum rtx_code c = GET_CODE (x);
10444 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10445 return 0;
10446 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10447 return 10;
10449 if (c == PLUS)
10451 if (CONST_INT_P (XEXP (x, 1)))
10452 return 2;
10454 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10455 return 3;
10457 return 4;
10460 return 6;
10463 static inline int
10464 arm_thumb_address_cost (rtx x)
10466 enum rtx_code c = GET_CODE (x);
10468 if (c == REG)
10469 return 1;
10470 if (c == PLUS
10471 && REG_P (XEXP (x, 0))
10472 && CONST_INT_P (XEXP (x, 1)))
10473 return 1;
10475 return 2;
10478 static int
10479 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10480 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10482 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10485 /* Adjust cost hook for XScale. */
10486 static bool
10487 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10488 int * cost)
10490 /* Some true dependencies can have a higher cost depending
10491 on precisely how certain input operands are used. */
10492 if (dep_type == 0
10493 && recog_memoized (insn) >= 0
10494 && recog_memoized (dep) >= 0)
10496 int shift_opnum = get_attr_shift (insn);
10497 enum attr_type attr_type = get_attr_type (dep);
10499 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10500 operand for INSN. If we have a shifted input operand and the
10501 instruction we depend on is another ALU instruction, then we may
10502 have to account for an additional stall. */
10503 if (shift_opnum != 0
10504 && (attr_type == TYPE_ALU_SHIFT_IMM
10505 || attr_type == TYPE_ALUS_SHIFT_IMM
10506 || attr_type == TYPE_LOGIC_SHIFT_IMM
10507 || attr_type == TYPE_LOGICS_SHIFT_IMM
10508 || attr_type == TYPE_ALU_SHIFT_REG
10509 || attr_type == TYPE_ALUS_SHIFT_REG
10510 || attr_type == TYPE_LOGIC_SHIFT_REG
10511 || attr_type == TYPE_LOGICS_SHIFT_REG
10512 || attr_type == TYPE_MOV_SHIFT
10513 || attr_type == TYPE_MVN_SHIFT
10514 || attr_type == TYPE_MOV_SHIFT_REG
10515 || attr_type == TYPE_MVN_SHIFT_REG))
10517 rtx shifted_operand;
10518 int opno;
10520 /* Get the shifted operand. */
10521 extract_insn (insn);
10522 shifted_operand = recog_data.operand[shift_opnum];
10524 /* Iterate over all the operands in DEP. If we write an operand
10525 that overlaps with SHIFTED_OPERAND, then we have increase the
10526 cost of this dependency. */
10527 extract_insn (dep);
10528 preprocess_constraints (dep);
10529 for (opno = 0; opno < recog_data.n_operands; opno++)
10531 /* We can ignore strict inputs. */
10532 if (recog_data.operand_type[opno] == OP_IN)
10533 continue;
10535 if (reg_overlap_mentioned_p (recog_data.operand[opno],
10536 shifted_operand))
10538 *cost = 2;
10539 return false;
10544 return true;
10547 /* Adjust cost hook for Cortex A9. */
10548 static bool
10549 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10550 int * cost)
10552 switch (dep_type)
10554 case REG_DEP_ANTI:
10555 *cost = 0;
10556 return false;
10558 case REG_DEP_TRUE:
10559 case REG_DEP_OUTPUT:
10560 if (recog_memoized (insn) >= 0
10561 && recog_memoized (dep) >= 0)
10563 if (GET_CODE (PATTERN (insn)) == SET)
10565 if (GET_MODE_CLASS
10566 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
10567 || GET_MODE_CLASS
10568 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
10570 enum attr_type attr_type_insn = get_attr_type (insn);
10571 enum attr_type attr_type_dep = get_attr_type (dep);
10573 /* By default all dependencies of the form
10574 s0 = s0 <op> s1
10575 s0 = s0 <op> s2
10576 have an extra latency of 1 cycle because
10577 of the input and output dependency in this
10578 case. However this gets modeled as an true
10579 dependency and hence all these checks. */
10580 if (REG_P (SET_DEST (PATTERN (insn)))
10581 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
10583 /* FMACS is a special case where the dependent
10584 instruction can be issued 3 cycles before
10585 the normal latency in case of an output
10586 dependency. */
10587 if ((attr_type_insn == TYPE_FMACS
10588 || attr_type_insn == TYPE_FMACD)
10589 && (attr_type_dep == TYPE_FMACS
10590 || attr_type_dep == TYPE_FMACD))
10592 if (dep_type == REG_DEP_OUTPUT)
10593 *cost = insn_default_latency (dep) - 3;
10594 else
10595 *cost = insn_default_latency (dep);
10596 return false;
10598 else
10600 if (dep_type == REG_DEP_OUTPUT)
10601 *cost = insn_default_latency (dep) + 1;
10602 else
10603 *cost = insn_default_latency (dep);
10605 return false;
10610 break;
10612 default:
10613 gcc_unreachable ();
10616 return true;
10619 /* Adjust cost hook for FA726TE. */
10620 static bool
10621 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10622 int * cost)
10624 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
10625 have penalty of 3. */
10626 if (dep_type == REG_DEP_TRUE
10627 && recog_memoized (insn) >= 0
10628 && recog_memoized (dep) >= 0
10629 && get_attr_conds (dep) == CONDS_SET)
10631 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
10632 if (get_attr_conds (insn) == CONDS_USE
10633 && get_attr_type (insn) != TYPE_BRANCH)
10635 *cost = 3;
10636 return false;
10639 if (GET_CODE (PATTERN (insn)) == COND_EXEC
10640 || get_attr_conds (insn) == CONDS_USE)
10642 *cost = 0;
10643 return false;
10647 return true;
10650 /* Implement TARGET_REGISTER_MOVE_COST.
10652 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
10653 it is typically more expensive than a single memory access. We set
10654 the cost to less than two memory accesses so that floating
10655 point to integer conversion does not go through memory. */
10658 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
10659 reg_class_t from, reg_class_t to)
10661 if (TARGET_32BIT)
10663 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
10664 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
10665 return 15;
10666 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
10667 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
10668 return 4;
10669 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
10670 return 20;
10671 else
10672 return 2;
10674 else
10676 if (from == HI_REGS || to == HI_REGS)
10677 return 4;
10678 else
10679 return 2;
10683 /* Implement TARGET_MEMORY_MOVE_COST. */
10686 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
10687 bool in ATTRIBUTE_UNUSED)
10689 if (TARGET_32BIT)
10690 return 10;
10691 else
10693 if (GET_MODE_SIZE (mode) < 4)
10694 return 8;
10695 else
10696 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
10700 /* Vectorizer cost model implementation. */
10702 /* Implement targetm.vectorize.builtin_vectorization_cost. */
10703 static int
10704 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
10705 tree vectype,
10706 int misalign ATTRIBUTE_UNUSED)
10708 unsigned elements;
10710 switch (type_of_cost)
10712 case scalar_stmt:
10713 return current_tune->vec_costs->scalar_stmt_cost;
10715 case scalar_load:
10716 return current_tune->vec_costs->scalar_load_cost;
10718 case scalar_store:
10719 return current_tune->vec_costs->scalar_store_cost;
10721 case vector_stmt:
10722 return current_tune->vec_costs->vec_stmt_cost;
10724 case vector_load:
10725 return current_tune->vec_costs->vec_align_load_cost;
10727 case vector_store:
10728 return current_tune->vec_costs->vec_store_cost;
10730 case vec_to_scalar:
10731 return current_tune->vec_costs->vec_to_scalar_cost;
10733 case scalar_to_vec:
10734 return current_tune->vec_costs->scalar_to_vec_cost;
10736 case unaligned_load:
10737 return current_tune->vec_costs->vec_unalign_load_cost;
10739 case unaligned_store:
10740 return current_tune->vec_costs->vec_unalign_store_cost;
10742 case cond_branch_taken:
10743 return current_tune->vec_costs->cond_taken_branch_cost;
10745 case cond_branch_not_taken:
10746 return current_tune->vec_costs->cond_not_taken_branch_cost;
10748 case vec_perm:
10749 case vec_promote_demote:
10750 return current_tune->vec_costs->vec_stmt_cost;
10752 case vec_construct:
10753 elements = TYPE_VECTOR_SUBPARTS (vectype);
10754 return elements / 2 + 1;
10756 default:
10757 gcc_unreachable ();
10761 /* Implement targetm.vectorize.add_stmt_cost. */
10763 static unsigned
10764 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
10765 struct _stmt_vec_info *stmt_info, int misalign,
10766 enum vect_cost_model_location where)
10768 unsigned *cost = (unsigned *) data;
10769 unsigned retval = 0;
10771 if (flag_vect_cost_model)
10773 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
10774 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
10776 /* Statements in an inner loop relative to the loop being
10777 vectorized are weighted more heavily. The value here is
10778 arbitrary and could potentially be improved with analysis. */
10779 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
10780 count *= 50; /* FIXME. */
10782 retval = (unsigned) (count * stmt_cost);
10783 cost[where] += retval;
10786 return retval;
10789 /* Return true if and only if this insn can dual-issue only as older. */
10790 static bool
10791 cortexa7_older_only (rtx_insn *insn)
10793 if (recog_memoized (insn) < 0)
10794 return false;
10796 switch (get_attr_type (insn))
10798 case TYPE_ALU_DSP_REG:
10799 case TYPE_ALU_SREG:
10800 case TYPE_ALUS_SREG:
10801 case TYPE_LOGIC_REG:
10802 case TYPE_LOGICS_REG:
10803 case TYPE_ADC_REG:
10804 case TYPE_ADCS_REG:
10805 case TYPE_ADR:
10806 case TYPE_BFM:
10807 case TYPE_REV:
10808 case TYPE_MVN_REG:
10809 case TYPE_SHIFT_IMM:
10810 case TYPE_SHIFT_REG:
10811 case TYPE_LOAD_BYTE:
10812 case TYPE_LOAD1:
10813 case TYPE_STORE1:
10814 case TYPE_FFARITHS:
10815 case TYPE_FADDS:
10816 case TYPE_FFARITHD:
10817 case TYPE_FADDD:
10818 case TYPE_FMOV:
10819 case TYPE_F_CVT:
10820 case TYPE_FCMPS:
10821 case TYPE_FCMPD:
10822 case TYPE_FCONSTS:
10823 case TYPE_FCONSTD:
10824 case TYPE_FMULS:
10825 case TYPE_FMACS:
10826 case TYPE_FMULD:
10827 case TYPE_FMACD:
10828 case TYPE_FDIVS:
10829 case TYPE_FDIVD:
10830 case TYPE_F_MRC:
10831 case TYPE_F_MRRC:
10832 case TYPE_F_FLAG:
10833 case TYPE_F_LOADS:
10834 case TYPE_F_STORES:
10835 return true;
10836 default:
10837 return false;
10841 /* Return true if and only if this insn can dual-issue as younger. */
10842 static bool
10843 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
10845 if (recog_memoized (insn) < 0)
10847 if (verbose > 5)
10848 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
10849 return false;
10852 switch (get_attr_type (insn))
10854 case TYPE_ALU_IMM:
10855 case TYPE_ALUS_IMM:
10856 case TYPE_LOGIC_IMM:
10857 case TYPE_LOGICS_IMM:
10858 case TYPE_EXTEND:
10859 case TYPE_MVN_IMM:
10860 case TYPE_MOV_IMM:
10861 case TYPE_MOV_REG:
10862 case TYPE_MOV_SHIFT:
10863 case TYPE_MOV_SHIFT_REG:
10864 case TYPE_BRANCH:
10865 case TYPE_CALL:
10866 return true;
10867 default:
10868 return false;
10873 /* Look for an instruction that can dual issue only as an older
10874 instruction, and move it in front of any instructions that can
10875 dual-issue as younger, while preserving the relative order of all
10876 other instructions in the ready list. This is a hueuristic to help
10877 dual-issue in later cycles, by postponing issue of more flexible
10878 instructions. This heuristic may affect dual issue opportunities
10879 in the current cycle. */
10880 static void
10881 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
10882 int *n_readyp, int clock)
10884 int i;
10885 int first_older_only = -1, first_younger = -1;
10887 if (verbose > 5)
10888 fprintf (file,
10889 ";; sched_reorder for cycle %d with %d insns in ready list\n",
10890 clock,
10891 *n_readyp);
10893 /* Traverse the ready list from the head (the instruction to issue
10894 first), and looking for the first instruction that can issue as
10895 younger and the first instruction that can dual-issue only as
10896 older. */
10897 for (i = *n_readyp - 1; i >= 0; i--)
10899 rtx_insn *insn = ready[i];
10900 if (cortexa7_older_only (insn))
10902 first_older_only = i;
10903 if (verbose > 5)
10904 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
10905 break;
10907 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
10908 first_younger = i;
10911 /* Nothing to reorder because either no younger insn found or insn
10912 that can dual-issue only as older appears before any insn that
10913 can dual-issue as younger. */
10914 if (first_younger == -1)
10916 if (verbose > 5)
10917 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
10918 return;
10921 /* Nothing to reorder because no older-only insn in the ready list. */
10922 if (first_older_only == -1)
10924 if (verbose > 5)
10925 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
10926 return;
10929 /* Move first_older_only insn before first_younger. */
10930 if (verbose > 5)
10931 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
10932 INSN_UID(ready [first_older_only]),
10933 INSN_UID(ready [first_younger]));
10934 rtx_insn *first_older_only_insn = ready [first_older_only];
10935 for (i = first_older_only; i < first_younger; i++)
10937 ready[i] = ready[i+1];
10940 ready[i] = first_older_only_insn;
10941 return;
10944 /* Implement TARGET_SCHED_REORDER. */
10945 static int
10946 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
10947 int clock)
10949 switch (arm_tune)
10951 case cortexa7:
10952 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
10953 break;
10954 default:
10955 /* Do nothing for other cores. */
10956 break;
10959 return arm_issue_rate ();
10962 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
10963 It corrects the value of COST based on the relationship between
10964 INSN and DEP through the dependence LINK. It returns the new
10965 value. There is a per-core adjust_cost hook to adjust scheduler costs
10966 and the per-core hook can choose to completely override the generic
10967 adjust_cost function. Only put bits of code into arm_adjust_cost that
10968 are common across all cores. */
10969 static int
10970 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10971 unsigned int)
10973 rtx i_pat, d_pat;
10975 /* When generating Thumb-1 code, we want to place flag-setting operations
10976 close to a conditional branch which depends on them, so that we can
10977 omit the comparison. */
10978 if (TARGET_THUMB1
10979 && dep_type == 0
10980 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
10981 && recog_memoized (dep) >= 0
10982 && get_attr_conds (dep) == CONDS_SET)
10983 return 0;
10985 if (current_tune->sched_adjust_cost != NULL)
10987 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
10988 return cost;
10991 /* XXX Is this strictly true? */
10992 if (dep_type == REG_DEP_ANTI
10993 || dep_type == REG_DEP_OUTPUT)
10994 return 0;
10996 /* Call insns don't incur a stall, even if they follow a load. */
10997 if (dep_type == 0
10998 && CALL_P (insn))
10999 return 1;
11001 if ((i_pat = single_set (insn)) != NULL
11002 && MEM_P (SET_SRC (i_pat))
11003 && (d_pat = single_set (dep)) != NULL
11004 && MEM_P (SET_DEST (d_pat)))
11006 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11007 /* This is a load after a store, there is no conflict if the load reads
11008 from a cached area. Assume that loads from the stack, and from the
11009 constant pool are cached, and that others will miss. This is a
11010 hack. */
11012 if ((GET_CODE (src_mem) == SYMBOL_REF
11013 && CONSTANT_POOL_ADDRESS_P (src_mem))
11014 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11015 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11016 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11017 return 1;
11020 return cost;
11024 arm_max_conditional_execute (void)
11026 return max_insns_skipped;
11029 static int
11030 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11032 if (TARGET_32BIT)
11033 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11034 else
11035 return (optimize > 0) ? 2 : 0;
11038 static int
11039 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11041 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11044 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11045 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11046 sequences of non-executed instructions in IT blocks probably take the same
11047 amount of time as executed instructions (and the IT instruction itself takes
11048 space in icache). This function was experimentally determined to give good
11049 results on a popular embedded benchmark. */
11051 static int
11052 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11054 return (TARGET_32BIT && speed_p) ? 1
11055 : arm_default_branch_cost (speed_p, predictable_p);
11058 static int
11059 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11061 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11064 static bool fp_consts_inited = false;
11066 static REAL_VALUE_TYPE value_fp0;
11068 static void
11069 init_fp_table (void)
11071 REAL_VALUE_TYPE r;
11073 r = REAL_VALUE_ATOF ("0", DFmode);
11074 value_fp0 = r;
11075 fp_consts_inited = true;
11078 /* Return TRUE if rtx X is a valid immediate FP constant. */
11080 arm_const_double_rtx (rtx x)
11082 const REAL_VALUE_TYPE *r;
11084 if (!fp_consts_inited)
11085 init_fp_table ();
11087 r = CONST_DOUBLE_REAL_VALUE (x);
11088 if (REAL_VALUE_MINUS_ZERO (*r))
11089 return 0;
11091 if (real_equal (r, &value_fp0))
11092 return 1;
11094 return 0;
11097 /* VFPv3 has a fairly wide range of representable immediates, formed from
11098 "quarter-precision" floating-point values. These can be evaluated using this
11099 formula (with ^ for exponentiation):
11101 -1^s * n * 2^-r
11103 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11104 16 <= n <= 31 and 0 <= r <= 7.
11106 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11108 - A (most-significant) is the sign bit.
11109 - BCD are the exponent (encoded as r XOR 3).
11110 - EFGH are the mantissa (encoded as n - 16).
11113 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11114 fconst[sd] instruction, or -1 if X isn't suitable. */
11115 static int
11116 vfp3_const_double_index (rtx x)
11118 REAL_VALUE_TYPE r, m;
11119 int sign, exponent;
11120 unsigned HOST_WIDE_INT mantissa, mant_hi;
11121 unsigned HOST_WIDE_INT mask;
11122 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11123 bool fail;
11125 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11126 return -1;
11128 r = *CONST_DOUBLE_REAL_VALUE (x);
11130 /* We can't represent these things, so detect them first. */
11131 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11132 return -1;
11134 /* Extract sign, exponent and mantissa. */
11135 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11136 r = real_value_abs (&r);
11137 exponent = REAL_EXP (&r);
11138 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11139 highest (sign) bit, with a fixed binary point at bit point_pos.
11140 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11141 bits for the mantissa, this may fail (low bits would be lost). */
11142 real_ldexp (&m, &r, point_pos - exponent);
11143 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11144 mantissa = w.elt (0);
11145 mant_hi = w.elt (1);
11147 /* If there are bits set in the low part of the mantissa, we can't
11148 represent this value. */
11149 if (mantissa != 0)
11150 return -1;
11152 /* Now make it so that mantissa contains the most-significant bits, and move
11153 the point_pos to indicate that the least-significant bits have been
11154 discarded. */
11155 point_pos -= HOST_BITS_PER_WIDE_INT;
11156 mantissa = mant_hi;
11158 /* We can permit four significant bits of mantissa only, plus a high bit
11159 which is always 1. */
11160 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11161 if ((mantissa & mask) != 0)
11162 return -1;
11164 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11165 mantissa >>= point_pos - 5;
11167 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11168 floating-point immediate zero with Neon using an integer-zero load, but
11169 that case is handled elsewhere.) */
11170 if (mantissa == 0)
11171 return -1;
11173 gcc_assert (mantissa >= 16 && mantissa <= 31);
11175 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11176 normalized significands are in the range [1, 2). (Our mantissa is shifted
11177 left 4 places at this point relative to normalized IEEE754 values). GCC
11178 internally uses [0.5, 1) (see real.c), so the exponent returned from
11179 REAL_EXP must be altered. */
11180 exponent = 5 - exponent;
11182 if (exponent < 0 || exponent > 7)
11183 return -1;
11185 /* Sign, mantissa and exponent are now in the correct form to plug into the
11186 formula described in the comment above. */
11187 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11190 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11192 vfp3_const_double_rtx (rtx x)
11194 if (!TARGET_VFP3)
11195 return 0;
11197 return vfp3_const_double_index (x) != -1;
11200 /* Recognize immediates which can be used in various Neon instructions. Legal
11201 immediates are described by the following table (for VMVN variants, the
11202 bitwise inverse of the constant shown is recognized. In either case, VMOV
11203 is output and the correct instruction to use for a given constant is chosen
11204 by the assembler). The constant shown is replicated across all elements of
11205 the destination vector.
11207 insn elems variant constant (binary)
11208 ---- ----- ------- -----------------
11209 vmov i32 0 00000000 00000000 00000000 abcdefgh
11210 vmov i32 1 00000000 00000000 abcdefgh 00000000
11211 vmov i32 2 00000000 abcdefgh 00000000 00000000
11212 vmov i32 3 abcdefgh 00000000 00000000 00000000
11213 vmov i16 4 00000000 abcdefgh
11214 vmov i16 5 abcdefgh 00000000
11215 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11216 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11217 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11218 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11219 vmvn i16 10 00000000 abcdefgh
11220 vmvn i16 11 abcdefgh 00000000
11221 vmov i32 12 00000000 00000000 abcdefgh 11111111
11222 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11223 vmov i32 14 00000000 abcdefgh 11111111 11111111
11224 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11225 vmov i8 16 abcdefgh
11226 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11227 eeeeeeee ffffffff gggggggg hhhhhhhh
11228 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11229 vmov f32 19 00000000 00000000 00000000 00000000
11231 For case 18, B = !b. Representable values are exactly those accepted by
11232 vfp3_const_double_index, but are output as floating-point numbers rather
11233 than indices.
11235 For case 19, we will change it to vmov.i32 when assembling.
11237 Variants 0-5 (inclusive) may also be used as immediates for the second
11238 operand of VORR/VBIC instructions.
11240 The INVERSE argument causes the bitwise inverse of the given operand to be
11241 recognized instead (used for recognizing legal immediates for the VAND/VORN
11242 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11243 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11244 output, rather than the real insns vbic/vorr).
11246 INVERSE makes no difference to the recognition of float vectors.
11248 The return value is the variant of immediate as shown in the above table, or
11249 -1 if the given value doesn't match any of the listed patterns.
11251 static int
11252 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11253 rtx *modconst, int *elementwidth)
11255 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11256 matches = 1; \
11257 for (i = 0; i < idx; i += (STRIDE)) \
11258 if (!(TEST)) \
11259 matches = 0; \
11260 if (matches) \
11262 immtype = (CLASS); \
11263 elsize = (ELSIZE); \
11264 break; \
11267 unsigned int i, elsize = 0, idx = 0, n_elts;
11268 unsigned int innersize;
11269 unsigned char bytes[16];
11270 int immtype = -1, matches;
11271 unsigned int invmask = inverse ? 0xff : 0;
11272 bool vector = GET_CODE (op) == CONST_VECTOR;
11274 if (vector)
11275 n_elts = CONST_VECTOR_NUNITS (op);
11276 else
11278 n_elts = 1;
11279 if (mode == VOIDmode)
11280 mode = DImode;
11283 innersize = GET_MODE_UNIT_SIZE (mode);
11285 /* Vectors of float constants. */
11286 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11288 rtx el0 = CONST_VECTOR_ELT (op, 0);
11290 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11291 return -1;
11293 /* FP16 vectors cannot be represented. */
11294 if (GET_MODE_INNER (mode) == HFmode)
11295 return -1;
11297 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11298 are distinct in this context. */
11299 if (!const_vec_duplicate_p (op))
11300 return -1;
11302 if (modconst)
11303 *modconst = CONST_VECTOR_ELT (op, 0);
11305 if (elementwidth)
11306 *elementwidth = 0;
11308 if (el0 == CONST0_RTX (GET_MODE (el0)))
11309 return 19;
11310 else
11311 return 18;
11314 /* Splat vector constant out into a byte vector. */
11315 for (i = 0; i < n_elts; i++)
11317 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11318 unsigned HOST_WIDE_INT elpart;
11320 gcc_assert (CONST_INT_P (el));
11321 elpart = INTVAL (el);
11323 for (unsigned int byte = 0; byte < innersize; byte++)
11325 bytes[idx++] = (elpart & 0xff) ^ invmask;
11326 elpart >>= BITS_PER_UNIT;
11330 /* Sanity check. */
11331 gcc_assert (idx == GET_MODE_SIZE (mode));
11335 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11336 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11338 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11339 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11341 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11342 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11344 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11345 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11347 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11349 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11351 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11352 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11354 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11355 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11357 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11358 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11360 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11361 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11363 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11365 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11367 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11368 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11370 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11371 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11373 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11374 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11376 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11377 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11379 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11381 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11382 && bytes[i] == bytes[(i + 8) % idx]);
11384 while (0);
11386 if (immtype == -1)
11387 return -1;
11389 if (elementwidth)
11390 *elementwidth = elsize;
11392 if (modconst)
11394 unsigned HOST_WIDE_INT imm = 0;
11396 /* Un-invert bytes of recognized vector, if necessary. */
11397 if (invmask != 0)
11398 for (i = 0; i < idx; i++)
11399 bytes[i] ^= invmask;
11401 if (immtype == 17)
11403 /* FIXME: Broken on 32-bit H_W_I hosts. */
11404 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11406 for (i = 0; i < 8; i++)
11407 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11408 << (i * BITS_PER_UNIT);
11410 *modconst = GEN_INT (imm);
11412 else
11414 unsigned HOST_WIDE_INT imm = 0;
11416 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11417 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11419 *modconst = GEN_INT (imm);
11423 return immtype;
11424 #undef CHECK
11427 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11428 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11429 float elements), and a modified constant (whatever should be output for a
11430 VMOV) in *MODCONST. */
11433 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11434 rtx *modconst, int *elementwidth)
11436 rtx tmpconst;
11437 int tmpwidth;
11438 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11440 if (retval == -1)
11441 return 0;
11443 if (modconst)
11444 *modconst = tmpconst;
11446 if (elementwidth)
11447 *elementwidth = tmpwidth;
11449 return 1;
11452 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11453 the immediate is valid, write a constant suitable for using as an operand
11454 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11455 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11458 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11459 rtx *modconst, int *elementwidth)
11461 rtx tmpconst;
11462 int tmpwidth;
11463 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11465 if (retval < 0 || retval > 5)
11466 return 0;
11468 if (modconst)
11469 *modconst = tmpconst;
11471 if (elementwidth)
11472 *elementwidth = tmpwidth;
11474 return 1;
11477 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11478 the immediate is valid, write a constant suitable for using as an operand
11479 to VSHR/VSHL to *MODCONST and the corresponding element width to
11480 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11481 because they have different limitations. */
11484 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11485 rtx *modconst, int *elementwidth,
11486 bool isleftshift)
11488 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11489 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11490 unsigned HOST_WIDE_INT last_elt = 0;
11491 unsigned HOST_WIDE_INT maxshift;
11493 /* Split vector constant out into a byte vector. */
11494 for (i = 0; i < n_elts; i++)
11496 rtx el = CONST_VECTOR_ELT (op, i);
11497 unsigned HOST_WIDE_INT elpart;
11499 if (CONST_INT_P (el))
11500 elpart = INTVAL (el);
11501 else if (CONST_DOUBLE_P (el))
11502 return 0;
11503 else
11504 gcc_unreachable ();
11506 if (i != 0 && elpart != last_elt)
11507 return 0;
11509 last_elt = elpart;
11512 /* Shift less than element size. */
11513 maxshift = innersize * 8;
11515 if (isleftshift)
11517 /* Left shift immediate value can be from 0 to <size>-1. */
11518 if (last_elt >= maxshift)
11519 return 0;
11521 else
11523 /* Right shift immediate value can be from 1 to <size>. */
11524 if (last_elt == 0 || last_elt > maxshift)
11525 return 0;
11528 if (elementwidth)
11529 *elementwidth = innersize * 8;
11531 if (modconst)
11532 *modconst = CONST_VECTOR_ELT (op, 0);
11534 return 1;
11537 /* Return a string suitable for output of Neon immediate logic operation
11538 MNEM. */
11540 char *
11541 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
11542 int inverse, int quad)
11544 int width, is_valid;
11545 static char templ[40];
11547 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
11549 gcc_assert (is_valid != 0);
11551 if (quad)
11552 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
11553 else
11554 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
11556 return templ;
11559 /* Return a string suitable for output of Neon immediate shift operation
11560 (VSHR or VSHL) MNEM. */
11562 char *
11563 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
11564 machine_mode mode, int quad,
11565 bool isleftshift)
11567 int width, is_valid;
11568 static char templ[40];
11570 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
11571 gcc_assert (is_valid != 0);
11573 if (quad)
11574 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
11575 else
11576 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
11578 return templ;
11581 /* Output a sequence of pairwise operations to implement a reduction.
11582 NOTE: We do "too much work" here, because pairwise operations work on two
11583 registers-worth of operands in one go. Unfortunately we can't exploit those
11584 extra calculations to do the full operation in fewer steps, I don't think.
11585 Although all vector elements of the result but the first are ignored, we
11586 actually calculate the same result in each of the elements. An alternative
11587 such as initially loading a vector with zero to use as each of the second
11588 operands would use up an additional register and take an extra instruction,
11589 for no particular gain. */
11591 void
11592 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
11593 rtx (*reduc) (rtx, rtx, rtx))
11595 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
11596 rtx tmpsum = op1;
11598 for (i = parts / 2; i >= 1; i /= 2)
11600 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
11601 emit_insn (reduc (dest, tmpsum, tmpsum));
11602 tmpsum = dest;
11606 /* If VALS is a vector constant that can be loaded into a register
11607 using VDUP, generate instructions to do so and return an RTX to
11608 assign to the register. Otherwise return NULL_RTX. */
11610 static rtx
11611 neon_vdup_constant (rtx vals)
11613 machine_mode mode = GET_MODE (vals);
11614 machine_mode inner_mode = GET_MODE_INNER (mode);
11615 rtx x;
11617 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
11618 return NULL_RTX;
11620 if (!const_vec_duplicate_p (vals, &x))
11621 /* The elements are not all the same. We could handle repeating
11622 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
11623 {0, C, 0, C, 0, C, 0, C} which can be loaded using
11624 vdup.i16). */
11625 return NULL_RTX;
11627 /* We can load this constant by using VDUP and a constant in a
11628 single ARM register. This will be cheaper than a vector
11629 load. */
11631 x = copy_to_mode_reg (inner_mode, x);
11632 return gen_rtx_VEC_DUPLICATE (mode, x);
11635 /* Generate code to load VALS, which is a PARALLEL containing only
11636 constants (for vec_init) or CONST_VECTOR, efficiently into a
11637 register. Returns an RTX to copy into the register, or NULL_RTX
11638 for a PARALLEL that can not be converted into a CONST_VECTOR. */
11641 neon_make_constant (rtx vals)
11643 machine_mode mode = GET_MODE (vals);
11644 rtx target;
11645 rtx const_vec = NULL_RTX;
11646 int n_elts = GET_MODE_NUNITS (mode);
11647 int n_const = 0;
11648 int i;
11650 if (GET_CODE (vals) == CONST_VECTOR)
11651 const_vec = vals;
11652 else if (GET_CODE (vals) == PARALLEL)
11654 /* A CONST_VECTOR must contain only CONST_INTs and
11655 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
11656 Only store valid constants in a CONST_VECTOR. */
11657 for (i = 0; i < n_elts; ++i)
11659 rtx x = XVECEXP (vals, 0, i);
11660 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11661 n_const++;
11663 if (n_const == n_elts)
11664 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
11666 else
11667 gcc_unreachable ();
11669 if (const_vec != NULL
11670 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
11671 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
11672 return const_vec;
11673 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
11674 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
11675 pipeline cycle; creating the constant takes one or two ARM
11676 pipeline cycles. */
11677 return target;
11678 else if (const_vec != NULL_RTX)
11679 /* Load from constant pool. On Cortex-A8 this takes two cycles
11680 (for either double or quad vectors). We can not take advantage
11681 of single-cycle VLD1 because we need a PC-relative addressing
11682 mode. */
11683 return const_vec;
11684 else
11685 /* A PARALLEL containing something not valid inside CONST_VECTOR.
11686 We can not construct an initializer. */
11687 return NULL_RTX;
11690 /* Initialize vector TARGET to VALS. */
11692 void
11693 neon_expand_vector_init (rtx target, rtx vals)
11695 machine_mode mode = GET_MODE (target);
11696 machine_mode inner_mode = GET_MODE_INNER (mode);
11697 int n_elts = GET_MODE_NUNITS (mode);
11698 int n_var = 0, one_var = -1;
11699 bool all_same = true;
11700 rtx x, mem;
11701 int i;
11703 for (i = 0; i < n_elts; ++i)
11705 x = XVECEXP (vals, 0, i);
11706 if (!CONSTANT_P (x))
11707 ++n_var, one_var = i;
11709 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
11710 all_same = false;
11713 if (n_var == 0)
11715 rtx constant = neon_make_constant (vals);
11716 if (constant != NULL_RTX)
11718 emit_move_insn (target, constant);
11719 return;
11723 /* Splat a single non-constant element if we can. */
11724 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
11726 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
11727 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
11728 return;
11731 /* One field is non-constant. Load constant then overwrite varying
11732 field. This is more efficient than using the stack. */
11733 if (n_var == 1)
11735 rtx copy = copy_rtx (vals);
11736 rtx index = GEN_INT (one_var);
11738 /* Load constant part of vector, substitute neighboring value for
11739 varying element. */
11740 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
11741 neon_expand_vector_init (target, copy);
11743 /* Insert variable. */
11744 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
11745 switch (mode)
11747 case V8QImode:
11748 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
11749 break;
11750 case V16QImode:
11751 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
11752 break;
11753 case V4HImode:
11754 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
11755 break;
11756 case V8HImode:
11757 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
11758 break;
11759 case V2SImode:
11760 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
11761 break;
11762 case V4SImode:
11763 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
11764 break;
11765 case V2SFmode:
11766 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
11767 break;
11768 case V4SFmode:
11769 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
11770 break;
11771 case V2DImode:
11772 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
11773 break;
11774 default:
11775 gcc_unreachable ();
11777 return;
11780 /* Construct the vector in memory one field at a time
11781 and load the whole vector. */
11782 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
11783 for (i = 0; i < n_elts; i++)
11784 emit_move_insn (adjust_address_nv (mem, inner_mode,
11785 i * GET_MODE_SIZE (inner_mode)),
11786 XVECEXP (vals, 0, i));
11787 emit_move_insn (target, mem);
11790 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
11791 ERR if it doesn't. EXP indicates the source location, which includes the
11792 inlining history for intrinsics. */
11794 static void
11795 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
11796 const_tree exp, const char *desc)
11798 HOST_WIDE_INT lane;
11800 gcc_assert (CONST_INT_P (operand));
11802 lane = INTVAL (operand);
11804 if (lane < low || lane >= high)
11806 if (exp)
11807 error ("%K%s %wd out of range %wd - %wd",
11808 exp, desc, lane, low, high - 1);
11809 else
11810 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
11814 /* Bounds-check lanes. */
11816 void
11817 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
11818 const_tree exp)
11820 bounds_check (operand, low, high, exp, "lane");
11823 /* Bounds-check constants. */
11825 void
11826 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
11828 bounds_check (operand, low, high, NULL_TREE, "constant");
11831 HOST_WIDE_INT
11832 neon_element_bits (machine_mode mode)
11834 return GET_MODE_UNIT_BITSIZE (mode);
11838 /* Predicates for `match_operand' and `match_operator'. */
11840 /* Return TRUE if OP is a valid coprocessor memory address pattern.
11841 WB is true if full writeback address modes are allowed and is false
11842 if limited writeback address modes (POST_INC and PRE_DEC) are
11843 allowed. */
11846 arm_coproc_mem_operand (rtx op, bool wb)
11848 rtx ind;
11850 /* Reject eliminable registers. */
11851 if (! (reload_in_progress || reload_completed || lra_in_progress)
11852 && ( reg_mentioned_p (frame_pointer_rtx, op)
11853 || reg_mentioned_p (arg_pointer_rtx, op)
11854 || reg_mentioned_p (virtual_incoming_args_rtx, op)
11855 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
11856 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
11857 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
11858 return FALSE;
11860 /* Constants are converted into offsets from labels. */
11861 if (!MEM_P (op))
11862 return FALSE;
11864 ind = XEXP (op, 0);
11866 if (reload_completed
11867 && (GET_CODE (ind) == LABEL_REF
11868 || (GET_CODE (ind) == CONST
11869 && GET_CODE (XEXP (ind, 0)) == PLUS
11870 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
11871 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
11872 return TRUE;
11874 /* Match: (mem (reg)). */
11875 if (REG_P (ind))
11876 return arm_address_register_rtx_p (ind, 0);
11878 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
11879 acceptable in any case (subject to verification by
11880 arm_address_register_rtx_p). We need WB to be true to accept
11881 PRE_INC and POST_DEC. */
11882 if (GET_CODE (ind) == POST_INC
11883 || GET_CODE (ind) == PRE_DEC
11884 || (wb
11885 && (GET_CODE (ind) == PRE_INC
11886 || GET_CODE (ind) == POST_DEC)))
11887 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
11889 if (wb
11890 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
11891 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
11892 && GET_CODE (XEXP (ind, 1)) == PLUS
11893 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
11894 ind = XEXP (ind, 1);
11896 /* Match:
11897 (plus (reg)
11898 (const)). */
11899 if (GET_CODE (ind) == PLUS
11900 && REG_P (XEXP (ind, 0))
11901 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
11902 && CONST_INT_P (XEXP (ind, 1))
11903 && INTVAL (XEXP (ind, 1)) > -1024
11904 && INTVAL (XEXP (ind, 1)) < 1024
11905 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
11906 return TRUE;
11908 return FALSE;
11911 /* Return TRUE if OP is a memory operand which we can load or store a vector
11912 to/from. TYPE is one of the following values:
11913 0 - Vector load/stor (vldr)
11914 1 - Core registers (ldm)
11915 2 - Element/structure loads (vld1)
11918 neon_vector_mem_operand (rtx op, int type, bool strict)
11920 rtx ind;
11922 /* Reject eliminable registers. */
11923 if (strict && ! (reload_in_progress || reload_completed)
11924 && (reg_mentioned_p (frame_pointer_rtx, op)
11925 || reg_mentioned_p (arg_pointer_rtx, op)
11926 || reg_mentioned_p (virtual_incoming_args_rtx, op)
11927 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
11928 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
11929 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
11930 return FALSE;
11932 /* Constants are converted into offsets from labels. */
11933 if (!MEM_P (op))
11934 return FALSE;
11936 ind = XEXP (op, 0);
11938 if (reload_completed
11939 && (GET_CODE (ind) == LABEL_REF
11940 || (GET_CODE (ind) == CONST
11941 && GET_CODE (XEXP (ind, 0)) == PLUS
11942 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
11943 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
11944 return TRUE;
11946 /* Match: (mem (reg)). */
11947 if (REG_P (ind))
11948 return arm_address_register_rtx_p (ind, 0);
11950 /* Allow post-increment with Neon registers. */
11951 if ((type != 1 && GET_CODE (ind) == POST_INC)
11952 || (type == 0 && GET_CODE (ind) == PRE_DEC))
11953 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
11955 /* Allow post-increment by register for VLDn */
11956 if (type == 2 && GET_CODE (ind) == POST_MODIFY
11957 && GET_CODE (XEXP (ind, 1)) == PLUS
11958 && REG_P (XEXP (XEXP (ind, 1), 1)))
11959 return true;
11961 /* Match:
11962 (plus (reg)
11963 (const)). */
11964 if (type == 0
11965 && GET_CODE (ind) == PLUS
11966 && REG_P (XEXP (ind, 0))
11967 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
11968 && CONST_INT_P (XEXP (ind, 1))
11969 && INTVAL (XEXP (ind, 1)) > -1024
11970 /* For quad modes, we restrict the constant offset to be slightly less
11971 than what the instruction format permits. We have no such constraint
11972 on double mode offsets. (This must match arm_legitimate_index_p.) */
11973 && (INTVAL (XEXP (ind, 1))
11974 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
11975 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
11976 return TRUE;
11978 return FALSE;
11981 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
11982 type. */
11984 neon_struct_mem_operand (rtx op)
11986 rtx ind;
11988 /* Reject eliminable registers. */
11989 if (! (reload_in_progress || reload_completed)
11990 && ( reg_mentioned_p (frame_pointer_rtx, op)
11991 || reg_mentioned_p (arg_pointer_rtx, op)
11992 || reg_mentioned_p (virtual_incoming_args_rtx, op)
11993 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
11994 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
11995 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
11996 return FALSE;
11998 /* Constants are converted into offsets from labels. */
11999 if (!MEM_P (op))
12000 return FALSE;
12002 ind = XEXP (op, 0);
12004 if (reload_completed
12005 && (GET_CODE (ind) == LABEL_REF
12006 || (GET_CODE (ind) == CONST
12007 && GET_CODE (XEXP (ind, 0)) == PLUS
12008 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12009 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12010 return TRUE;
12012 /* Match: (mem (reg)). */
12013 if (REG_P (ind))
12014 return arm_address_register_rtx_p (ind, 0);
12016 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12017 if (GET_CODE (ind) == POST_INC
12018 || GET_CODE (ind) == PRE_DEC)
12019 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12021 return FALSE;
12024 /* Return true if X is a register that will be eliminated later on. */
12026 arm_eliminable_register (rtx x)
12028 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12029 || REGNO (x) == ARG_POINTER_REGNUM
12030 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12031 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12034 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12035 coprocessor registers. Otherwise return NO_REGS. */
12037 enum reg_class
12038 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12040 if (mode == HFmode)
12042 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12043 return GENERAL_REGS;
12044 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12045 return NO_REGS;
12046 return GENERAL_REGS;
12049 /* The neon move patterns handle all legitimate vector and struct
12050 addresses. */
12051 if (TARGET_NEON
12052 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12053 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12054 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12055 || VALID_NEON_STRUCT_MODE (mode)))
12056 return NO_REGS;
12058 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12059 return NO_REGS;
12061 return GENERAL_REGS;
12064 /* Values which must be returned in the most-significant end of the return
12065 register. */
12067 static bool
12068 arm_return_in_msb (const_tree valtype)
12070 return (TARGET_AAPCS_BASED
12071 && BYTES_BIG_ENDIAN
12072 && (AGGREGATE_TYPE_P (valtype)
12073 || TREE_CODE (valtype) == COMPLEX_TYPE
12074 || FIXED_POINT_TYPE_P (valtype)));
12077 /* Return TRUE if X references a SYMBOL_REF. */
12079 symbol_mentioned_p (rtx x)
12081 const char * fmt;
12082 int i;
12084 if (GET_CODE (x) == SYMBOL_REF)
12085 return 1;
12087 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12088 are constant offsets, not symbols. */
12089 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12090 return 0;
12092 fmt = GET_RTX_FORMAT (GET_CODE (x));
12094 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12096 if (fmt[i] == 'E')
12098 int j;
12100 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12101 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12102 return 1;
12104 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12105 return 1;
12108 return 0;
12111 /* Return TRUE if X references a LABEL_REF. */
12113 label_mentioned_p (rtx x)
12115 const char * fmt;
12116 int i;
12118 if (GET_CODE (x) == LABEL_REF)
12119 return 1;
12121 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12122 instruction, but they are constant offsets, not symbols. */
12123 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12124 return 0;
12126 fmt = GET_RTX_FORMAT (GET_CODE (x));
12127 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12129 if (fmt[i] == 'E')
12131 int j;
12133 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12134 if (label_mentioned_p (XVECEXP (x, i, j)))
12135 return 1;
12137 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12138 return 1;
12141 return 0;
12145 tls_mentioned_p (rtx x)
12147 switch (GET_CODE (x))
12149 case CONST:
12150 return tls_mentioned_p (XEXP (x, 0));
12152 case UNSPEC:
12153 if (XINT (x, 1) == UNSPEC_TLS)
12154 return 1;
12156 /* Fall through. */
12157 default:
12158 return 0;
12162 /* Must not copy any rtx that uses a pc-relative address.
12163 Also, disallow copying of load-exclusive instructions that
12164 may appear after splitting of compare-and-swap-style operations
12165 so as to prevent those loops from being transformed away from their
12166 canonical forms (see PR 69904). */
12168 static bool
12169 arm_cannot_copy_insn_p (rtx_insn *insn)
12171 /* The tls call insn cannot be copied, as it is paired with a data
12172 word. */
12173 if (recog_memoized (insn) == CODE_FOR_tlscall)
12174 return true;
12176 subrtx_iterator::array_type array;
12177 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12179 const_rtx x = *iter;
12180 if (GET_CODE (x) == UNSPEC
12181 && (XINT (x, 1) == UNSPEC_PIC_BASE
12182 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12183 return true;
12186 rtx set = single_set (insn);
12187 if (set)
12189 rtx src = SET_SRC (set);
12190 if (GET_CODE (src) == ZERO_EXTEND)
12191 src = XEXP (src, 0);
12193 /* Catch the load-exclusive and load-acquire operations. */
12194 if (GET_CODE (src) == UNSPEC_VOLATILE
12195 && (XINT (src, 1) == VUNSPEC_LL
12196 || XINT (src, 1) == VUNSPEC_LAX))
12197 return true;
12199 return false;
12202 enum rtx_code
12203 minmax_code (rtx x)
12205 enum rtx_code code = GET_CODE (x);
12207 switch (code)
12209 case SMAX:
12210 return GE;
12211 case SMIN:
12212 return LE;
12213 case UMIN:
12214 return LEU;
12215 case UMAX:
12216 return GEU;
12217 default:
12218 gcc_unreachable ();
12222 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12224 bool
12225 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12226 int *mask, bool *signed_sat)
12228 /* The high bound must be a power of two minus one. */
12229 int log = exact_log2 (INTVAL (hi_bound) + 1);
12230 if (log == -1)
12231 return false;
12233 /* The low bound is either zero (for usat) or one less than the
12234 negation of the high bound (for ssat). */
12235 if (INTVAL (lo_bound) == 0)
12237 if (mask)
12238 *mask = log;
12239 if (signed_sat)
12240 *signed_sat = false;
12242 return true;
12245 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12247 if (mask)
12248 *mask = log + 1;
12249 if (signed_sat)
12250 *signed_sat = true;
12252 return true;
12255 return false;
12258 /* Return 1 if memory locations are adjacent. */
12260 adjacent_mem_locations (rtx a, rtx b)
12262 /* We don't guarantee to preserve the order of these memory refs. */
12263 if (volatile_refs_p (a) || volatile_refs_p (b))
12264 return 0;
12266 if ((REG_P (XEXP (a, 0))
12267 || (GET_CODE (XEXP (a, 0)) == PLUS
12268 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12269 && (REG_P (XEXP (b, 0))
12270 || (GET_CODE (XEXP (b, 0)) == PLUS
12271 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12273 HOST_WIDE_INT val0 = 0, val1 = 0;
12274 rtx reg0, reg1;
12275 int val_diff;
12277 if (GET_CODE (XEXP (a, 0)) == PLUS)
12279 reg0 = XEXP (XEXP (a, 0), 0);
12280 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12282 else
12283 reg0 = XEXP (a, 0);
12285 if (GET_CODE (XEXP (b, 0)) == PLUS)
12287 reg1 = XEXP (XEXP (b, 0), 0);
12288 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12290 else
12291 reg1 = XEXP (b, 0);
12293 /* Don't accept any offset that will require multiple
12294 instructions to handle, since this would cause the
12295 arith_adjacentmem pattern to output an overlong sequence. */
12296 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12297 return 0;
12299 /* Don't allow an eliminable register: register elimination can make
12300 the offset too large. */
12301 if (arm_eliminable_register (reg0))
12302 return 0;
12304 val_diff = val1 - val0;
12306 if (arm_ld_sched)
12308 /* If the target has load delay slots, then there's no benefit
12309 to using an ldm instruction unless the offset is zero and
12310 we are optimizing for size. */
12311 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12312 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12313 && (val_diff == 4 || val_diff == -4));
12316 return ((REGNO (reg0) == REGNO (reg1))
12317 && (val_diff == 4 || val_diff == -4));
12320 return 0;
12323 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12324 for load operations, false for store operations. CONSECUTIVE is true
12325 if the register numbers in the operation must be consecutive in the register
12326 bank. RETURN_PC is true if value is to be loaded in PC.
12327 The pattern we are trying to match for load is:
12328 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12329 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12332 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12334 where
12335 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12336 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12337 3. If consecutive is TRUE, then for kth register being loaded,
12338 REGNO (R_dk) = REGNO (R_d0) + k.
12339 The pattern for store is similar. */
12340 bool
12341 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12342 bool consecutive, bool return_pc)
12344 HOST_WIDE_INT count = XVECLEN (op, 0);
12345 rtx reg, mem, addr;
12346 unsigned regno;
12347 unsigned first_regno;
12348 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12349 rtx elt;
12350 bool addr_reg_in_reglist = false;
12351 bool update = false;
12352 int reg_increment;
12353 int offset_adj;
12354 int regs_per_val;
12356 /* If not in SImode, then registers must be consecutive
12357 (e.g., VLDM instructions for DFmode). */
12358 gcc_assert ((mode == SImode) || consecutive);
12359 /* Setting return_pc for stores is illegal. */
12360 gcc_assert (!return_pc || load);
12362 /* Set up the increments and the regs per val based on the mode. */
12363 reg_increment = GET_MODE_SIZE (mode);
12364 regs_per_val = reg_increment / 4;
12365 offset_adj = return_pc ? 1 : 0;
12367 if (count <= 1
12368 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12369 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12370 return false;
12372 /* Check if this is a write-back. */
12373 elt = XVECEXP (op, 0, offset_adj);
12374 if (GET_CODE (SET_SRC (elt)) == PLUS)
12376 i++;
12377 base = 1;
12378 update = true;
12380 /* The offset adjustment must be the number of registers being
12381 popped times the size of a single register. */
12382 if (!REG_P (SET_DEST (elt))
12383 || !REG_P (XEXP (SET_SRC (elt), 0))
12384 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12385 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12386 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12387 ((count - 1 - offset_adj) * reg_increment))
12388 return false;
12391 i = i + offset_adj;
12392 base = base + offset_adj;
12393 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12394 success depends on the type: VLDM can do just one reg,
12395 LDM must do at least two. */
12396 if ((count <= i) && (mode == SImode))
12397 return false;
12399 elt = XVECEXP (op, 0, i - 1);
12400 if (GET_CODE (elt) != SET)
12401 return false;
12403 if (load)
12405 reg = SET_DEST (elt);
12406 mem = SET_SRC (elt);
12408 else
12410 reg = SET_SRC (elt);
12411 mem = SET_DEST (elt);
12414 if (!REG_P (reg) || !MEM_P (mem))
12415 return false;
12417 regno = REGNO (reg);
12418 first_regno = regno;
12419 addr = XEXP (mem, 0);
12420 if (GET_CODE (addr) == PLUS)
12422 if (!CONST_INT_P (XEXP (addr, 1)))
12423 return false;
12425 offset = INTVAL (XEXP (addr, 1));
12426 addr = XEXP (addr, 0);
12429 if (!REG_P (addr))
12430 return false;
12432 /* Don't allow SP to be loaded unless it is also the base register. It
12433 guarantees that SP is reset correctly when an LDM instruction
12434 is interrupted. Otherwise, we might end up with a corrupt stack. */
12435 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12436 return false;
12438 for (; i < count; i++)
12440 elt = XVECEXP (op, 0, i);
12441 if (GET_CODE (elt) != SET)
12442 return false;
12444 if (load)
12446 reg = SET_DEST (elt);
12447 mem = SET_SRC (elt);
12449 else
12451 reg = SET_SRC (elt);
12452 mem = SET_DEST (elt);
12455 if (!REG_P (reg)
12456 || GET_MODE (reg) != mode
12457 || REGNO (reg) <= regno
12458 || (consecutive
12459 && (REGNO (reg) !=
12460 (unsigned int) (first_regno + regs_per_val * (i - base))))
12461 /* Don't allow SP to be loaded unless it is also the base register. It
12462 guarantees that SP is reset correctly when an LDM instruction
12463 is interrupted. Otherwise, we might end up with a corrupt stack. */
12464 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12465 || !MEM_P (mem)
12466 || GET_MODE (mem) != mode
12467 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12468 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12469 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12470 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12471 offset + (i - base) * reg_increment))
12472 && (!REG_P (XEXP (mem, 0))
12473 || offset + (i - base) * reg_increment != 0)))
12474 return false;
12476 regno = REGNO (reg);
12477 if (regno == REGNO (addr))
12478 addr_reg_in_reglist = true;
12481 if (load)
12483 if (update && addr_reg_in_reglist)
12484 return false;
12486 /* For Thumb-1, address register is always modified - either by write-back
12487 or by explicit load. If the pattern does not describe an update,
12488 then the address register must be in the list of loaded registers. */
12489 if (TARGET_THUMB1)
12490 return update || addr_reg_in_reglist;
12493 return true;
12496 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12497 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12498 instruction. ADD_OFFSET is nonzero if the base address register needs
12499 to be modified with an add instruction before we can use it. */
12501 static bool
12502 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
12503 int nops, HOST_WIDE_INT add_offset)
12505 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12506 if the offset isn't small enough. The reason 2 ldrs are faster
12507 is because these ARMs are able to do more than one cache access
12508 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12509 whilst the ARM8 has a double bandwidth cache. This means that
12510 these cores can do both an instruction fetch and a data fetch in
12511 a single cycle, so the trick of calculating the address into a
12512 scratch register (one of the result regs) and then doing a load
12513 multiple actually becomes slower (and no smaller in code size).
12514 That is the transformation
12516 ldr rd1, [rbase + offset]
12517 ldr rd2, [rbase + offset + 4]
12521 add rd1, rbase, offset
12522 ldmia rd1, {rd1, rd2}
12524 produces worse code -- '3 cycles + any stalls on rd2' instead of
12525 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12526 access per cycle, the first sequence could never complete in less
12527 than 6 cycles, whereas the ldm sequence would only take 5 and
12528 would make better use of sequential accesses if not hitting the
12529 cache.
12531 We cheat here and test 'arm_ld_sched' which we currently know to
12532 only be true for the ARM8, ARM9 and StrongARM. If this ever
12533 changes, then the test below needs to be reworked. */
12534 if (nops == 2 && arm_ld_sched && add_offset != 0)
12535 return false;
12537 /* XScale has load-store double instructions, but they have stricter
12538 alignment requirements than load-store multiple, so we cannot
12539 use them.
12541 For XScale ldm requires 2 + NREGS cycles to complete and blocks
12542 the pipeline until completion.
12544 NREGS CYCLES
12550 An ldr instruction takes 1-3 cycles, but does not block the
12551 pipeline.
12553 NREGS CYCLES
12554 1 1-3
12555 2 2-6
12556 3 3-9
12557 4 4-12
12559 Best case ldr will always win. However, the more ldr instructions
12560 we issue, the less likely we are to be able to schedule them well.
12561 Using ldr instructions also increases code size.
12563 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
12564 for counts of 3 or 4 regs. */
12565 if (nops <= 2 && arm_tune_xscale && !optimize_size)
12566 return false;
12567 return true;
12570 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
12571 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
12572 an array ORDER which describes the sequence to use when accessing the
12573 offsets that produces an ascending order. In this sequence, each
12574 offset must be larger by exactly 4 than the previous one. ORDER[0]
12575 must have been filled in with the lowest offset by the caller.
12576 If UNSORTED_REGS is nonnull, it is an array of register numbers that
12577 we use to verify that ORDER produces an ascending order of registers.
12578 Return true if it was possible to construct such an order, false if
12579 not. */
12581 static bool
12582 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
12583 int *unsorted_regs)
12585 int i;
12586 for (i = 1; i < nops; i++)
12588 int j;
12590 order[i] = order[i - 1];
12591 for (j = 0; j < nops; j++)
12592 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
12594 /* We must find exactly one offset that is higher than the
12595 previous one by 4. */
12596 if (order[i] != order[i - 1])
12597 return false;
12598 order[i] = j;
12600 if (order[i] == order[i - 1])
12601 return false;
12602 /* The register numbers must be ascending. */
12603 if (unsorted_regs != NULL
12604 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
12605 return false;
12607 return true;
12610 /* Used to determine in a peephole whether a sequence of load
12611 instructions can be changed into a load-multiple instruction.
12612 NOPS is the number of separate load instructions we are examining. The
12613 first NOPS entries in OPERANDS are the destination registers, the
12614 next NOPS entries are memory operands. If this function is
12615 successful, *BASE is set to the common base register of the memory
12616 accesses; *LOAD_OFFSET is set to the first memory location's offset
12617 from that base register.
12618 REGS is an array filled in with the destination register numbers.
12619 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
12620 insn numbers to an ascending order of stores. If CHECK_REGS is true,
12621 the sequence of registers in REGS matches the loads from ascending memory
12622 locations, and the function verifies that the register numbers are
12623 themselves ascending. If CHECK_REGS is false, the register numbers
12624 are stored in the order they are found in the operands. */
12625 static int
12626 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
12627 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
12629 int unsorted_regs[MAX_LDM_STM_OPS];
12630 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
12631 int order[MAX_LDM_STM_OPS];
12632 rtx base_reg_rtx = NULL;
12633 int base_reg = -1;
12634 int i, ldm_case;
12636 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
12637 easily extended if required. */
12638 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
12640 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
12642 /* Loop over the operands and check that the memory references are
12643 suitable (i.e. immediate offsets from the same base register). At
12644 the same time, extract the target register, and the memory
12645 offsets. */
12646 for (i = 0; i < nops; i++)
12648 rtx reg;
12649 rtx offset;
12651 /* Convert a subreg of a mem into the mem itself. */
12652 if (GET_CODE (operands[nops + i]) == SUBREG)
12653 operands[nops + i] = alter_subreg (operands + (nops + i), true);
12655 gcc_assert (MEM_P (operands[nops + i]));
12657 /* Don't reorder volatile memory references; it doesn't seem worth
12658 looking for the case where the order is ok anyway. */
12659 if (MEM_VOLATILE_P (operands[nops + i]))
12660 return 0;
12662 offset = const0_rtx;
12664 if ((REG_P (reg = XEXP (operands[nops + i], 0))
12665 || (GET_CODE (reg) == SUBREG
12666 && REG_P (reg = SUBREG_REG (reg))))
12667 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
12668 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
12669 || (GET_CODE (reg) == SUBREG
12670 && REG_P (reg = SUBREG_REG (reg))))
12671 && (CONST_INT_P (offset
12672 = XEXP (XEXP (operands[nops + i], 0), 1)))))
12674 if (i == 0)
12676 base_reg = REGNO (reg);
12677 base_reg_rtx = reg;
12678 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
12679 return 0;
12681 else if (base_reg != (int) REGNO (reg))
12682 /* Not addressed from the same base register. */
12683 return 0;
12685 unsorted_regs[i] = (REG_P (operands[i])
12686 ? REGNO (operands[i])
12687 : REGNO (SUBREG_REG (operands[i])));
12689 /* If it isn't an integer register, or if it overwrites the
12690 base register but isn't the last insn in the list, then
12691 we can't do this. */
12692 if (unsorted_regs[i] < 0
12693 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
12694 || unsorted_regs[i] > 14
12695 || (i != nops - 1 && unsorted_regs[i] == base_reg))
12696 return 0;
12698 /* Don't allow SP to be loaded unless it is also the base
12699 register. It guarantees that SP is reset correctly when
12700 an LDM instruction is interrupted. Otherwise, we might
12701 end up with a corrupt stack. */
12702 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
12703 return 0;
12705 unsorted_offsets[i] = INTVAL (offset);
12706 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
12707 order[0] = i;
12709 else
12710 /* Not a suitable memory address. */
12711 return 0;
12714 /* All the useful information has now been extracted from the
12715 operands into unsorted_regs and unsorted_offsets; additionally,
12716 order[0] has been set to the lowest offset in the list. Sort
12717 the offsets into order, verifying that they are adjacent, and
12718 check that the register numbers are ascending. */
12719 if (!compute_offset_order (nops, unsorted_offsets, order,
12720 check_regs ? unsorted_regs : NULL))
12721 return 0;
12723 if (saved_order)
12724 memcpy (saved_order, order, sizeof order);
12726 if (base)
12728 *base = base_reg;
12730 for (i = 0; i < nops; i++)
12731 regs[i] = unsorted_regs[check_regs ? order[i] : i];
12733 *load_offset = unsorted_offsets[order[0]];
12736 if (TARGET_THUMB1
12737 && !peep2_reg_dead_p (nops, base_reg_rtx))
12738 return 0;
12740 if (unsorted_offsets[order[0]] == 0)
12741 ldm_case = 1; /* ldmia */
12742 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
12743 ldm_case = 2; /* ldmib */
12744 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
12745 ldm_case = 3; /* ldmda */
12746 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
12747 ldm_case = 4; /* ldmdb */
12748 else if (const_ok_for_arm (unsorted_offsets[order[0]])
12749 || const_ok_for_arm (-unsorted_offsets[order[0]]))
12750 ldm_case = 5;
12751 else
12752 return 0;
12754 if (!multiple_operation_profitable_p (false, nops,
12755 ldm_case == 5
12756 ? unsorted_offsets[order[0]] : 0))
12757 return 0;
12759 return ldm_case;
12762 /* Used to determine in a peephole whether a sequence of store instructions can
12763 be changed into a store-multiple instruction.
12764 NOPS is the number of separate store instructions we are examining.
12765 NOPS_TOTAL is the total number of instructions recognized by the peephole
12766 pattern.
12767 The first NOPS entries in OPERANDS are the source registers, the next
12768 NOPS entries are memory operands. If this function is successful, *BASE is
12769 set to the common base register of the memory accesses; *LOAD_OFFSET is set
12770 to the first memory location's offset from that base register. REGS is an
12771 array filled in with the source register numbers, REG_RTXS (if nonnull) is
12772 likewise filled with the corresponding rtx's.
12773 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
12774 numbers to an ascending order of stores.
12775 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
12776 from ascending memory locations, and the function verifies that the register
12777 numbers are themselves ascending. If CHECK_REGS is false, the register
12778 numbers are stored in the order they are found in the operands. */
12779 static int
12780 store_multiple_sequence (rtx *operands, int nops, int nops_total,
12781 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
12782 HOST_WIDE_INT *load_offset, bool check_regs)
12784 int unsorted_regs[MAX_LDM_STM_OPS];
12785 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
12786 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
12787 int order[MAX_LDM_STM_OPS];
12788 int base_reg = -1;
12789 rtx base_reg_rtx = NULL;
12790 int i, stm_case;
12792 /* Write back of base register is currently only supported for Thumb 1. */
12793 int base_writeback = TARGET_THUMB1;
12795 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
12796 easily extended if required. */
12797 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
12799 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
12801 /* Loop over the operands and check that the memory references are
12802 suitable (i.e. immediate offsets from the same base register). At
12803 the same time, extract the target register, and the memory
12804 offsets. */
12805 for (i = 0; i < nops; i++)
12807 rtx reg;
12808 rtx offset;
12810 /* Convert a subreg of a mem into the mem itself. */
12811 if (GET_CODE (operands[nops + i]) == SUBREG)
12812 operands[nops + i] = alter_subreg (operands + (nops + i), true);
12814 gcc_assert (MEM_P (operands[nops + i]));
12816 /* Don't reorder volatile memory references; it doesn't seem worth
12817 looking for the case where the order is ok anyway. */
12818 if (MEM_VOLATILE_P (operands[nops + i]))
12819 return 0;
12821 offset = const0_rtx;
12823 if ((REG_P (reg = XEXP (operands[nops + i], 0))
12824 || (GET_CODE (reg) == SUBREG
12825 && REG_P (reg = SUBREG_REG (reg))))
12826 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
12827 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
12828 || (GET_CODE (reg) == SUBREG
12829 && REG_P (reg = SUBREG_REG (reg))))
12830 && (CONST_INT_P (offset
12831 = XEXP (XEXP (operands[nops + i], 0), 1)))))
12833 unsorted_reg_rtxs[i] = (REG_P (operands[i])
12834 ? operands[i] : SUBREG_REG (operands[i]));
12835 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
12837 if (i == 0)
12839 base_reg = REGNO (reg);
12840 base_reg_rtx = reg;
12841 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
12842 return 0;
12844 else if (base_reg != (int) REGNO (reg))
12845 /* Not addressed from the same base register. */
12846 return 0;
12848 /* If it isn't an integer register, then we can't do this. */
12849 if (unsorted_regs[i] < 0
12850 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
12851 /* The effects are unpredictable if the base register is
12852 both updated and stored. */
12853 || (base_writeback && unsorted_regs[i] == base_reg)
12854 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
12855 || unsorted_regs[i] > 14)
12856 return 0;
12858 unsorted_offsets[i] = INTVAL (offset);
12859 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
12860 order[0] = i;
12862 else
12863 /* Not a suitable memory address. */
12864 return 0;
12867 /* All the useful information has now been extracted from the
12868 operands into unsorted_regs and unsorted_offsets; additionally,
12869 order[0] has been set to the lowest offset in the list. Sort
12870 the offsets into order, verifying that they are adjacent, and
12871 check that the register numbers are ascending. */
12872 if (!compute_offset_order (nops, unsorted_offsets, order,
12873 check_regs ? unsorted_regs : NULL))
12874 return 0;
12876 if (saved_order)
12877 memcpy (saved_order, order, sizeof order);
12879 if (base)
12881 *base = base_reg;
12883 for (i = 0; i < nops; i++)
12885 regs[i] = unsorted_regs[check_regs ? order[i] : i];
12886 if (reg_rtxs)
12887 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
12890 *load_offset = unsorted_offsets[order[0]];
12893 if (TARGET_THUMB1
12894 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
12895 return 0;
12897 if (unsorted_offsets[order[0]] == 0)
12898 stm_case = 1; /* stmia */
12899 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
12900 stm_case = 2; /* stmib */
12901 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
12902 stm_case = 3; /* stmda */
12903 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
12904 stm_case = 4; /* stmdb */
12905 else
12906 return 0;
12908 if (!multiple_operation_profitable_p (false, nops, 0))
12909 return 0;
12911 return stm_case;
12914 /* Routines for use in generating RTL. */
12916 /* Generate a load-multiple instruction. COUNT is the number of loads in
12917 the instruction; REGS and MEMS are arrays containing the operands.
12918 BASEREG is the base register to be used in addressing the memory operands.
12919 WBACK_OFFSET is nonzero if the instruction should update the base
12920 register. */
12922 static rtx
12923 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
12924 HOST_WIDE_INT wback_offset)
12926 int i = 0, j;
12927 rtx result;
12929 if (!multiple_operation_profitable_p (false, count, 0))
12931 rtx seq;
12933 start_sequence ();
12935 for (i = 0; i < count; i++)
12936 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
12938 if (wback_offset != 0)
12939 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
12941 seq = get_insns ();
12942 end_sequence ();
12944 return seq;
12947 result = gen_rtx_PARALLEL (VOIDmode,
12948 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
12949 if (wback_offset != 0)
12951 XVECEXP (result, 0, 0)
12952 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
12953 i = 1;
12954 count++;
12957 for (j = 0; i < count; i++, j++)
12958 XVECEXP (result, 0, i)
12959 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
12961 return result;
12964 /* Generate a store-multiple instruction. COUNT is the number of stores in
12965 the instruction; REGS and MEMS are arrays containing the operands.
12966 BASEREG is the base register to be used in addressing the memory operands.
12967 WBACK_OFFSET is nonzero if the instruction should update the base
12968 register. */
12970 static rtx
12971 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
12972 HOST_WIDE_INT wback_offset)
12974 int i = 0, j;
12975 rtx result;
12977 if (GET_CODE (basereg) == PLUS)
12978 basereg = XEXP (basereg, 0);
12980 if (!multiple_operation_profitable_p (false, count, 0))
12982 rtx seq;
12984 start_sequence ();
12986 for (i = 0; i < count; i++)
12987 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
12989 if (wback_offset != 0)
12990 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
12992 seq = get_insns ();
12993 end_sequence ();
12995 return seq;
12998 result = gen_rtx_PARALLEL (VOIDmode,
12999 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13000 if (wback_offset != 0)
13002 XVECEXP (result, 0, 0)
13003 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13004 i = 1;
13005 count++;
13008 for (j = 0; i < count; i++, j++)
13009 XVECEXP (result, 0, i)
13010 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13012 return result;
13015 /* Generate either a load-multiple or a store-multiple instruction. This
13016 function can be used in situations where we can start with a single MEM
13017 rtx and adjust its address upwards.
13018 COUNT is the number of operations in the instruction, not counting a
13019 possible update of the base register. REGS is an array containing the
13020 register operands.
13021 BASEREG is the base register to be used in addressing the memory operands,
13022 which are constructed from BASEMEM.
13023 WRITE_BACK specifies whether the generated instruction should include an
13024 update of the base register.
13025 OFFSETP is used to pass an offset to and from this function; this offset
13026 is not used when constructing the address (instead BASEMEM should have an
13027 appropriate offset in its address), it is used only for setting
13028 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13030 static rtx
13031 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13032 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13034 rtx mems[MAX_LDM_STM_OPS];
13035 HOST_WIDE_INT offset = *offsetp;
13036 int i;
13038 gcc_assert (count <= MAX_LDM_STM_OPS);
13040 if (GET_CODE (basereg) == PLUS)
13041 basereg = XEXP (basereg, 0);
13043 for (i = 0; i < count; i++)
13045 rtx addr = plus_constant (Pmode, basereg, i * 4);
13046 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13047 offset += 4;
13050 if (write_back)
13051 *offsetp = offset;
13053 if (is_load)
13054 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13055 write_back ? 4 * count : 0);
13056 else
13057 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13058 write_back ? 4 * count : 0);
13062 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13063 rtx basemem, HOST_WIDE_INT *offsetp)
13065 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13066 offsetp);
13070 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13071 rtx basemem, HOST_WIDE_INT *offsetp)
13073 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13074 offsetp);
13077 /* Called from a peephole2 expander to turn a sequence of loads into an
13078 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13079 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13080 is true if we can reorder the registers because they are used commutatively
13081 subsequently.
13082 Returns true iff we could generate a new instruction. */
13084 bool
13085 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13087 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13088 rtx mems[MAX_LDM_STM_OPS];
13089 int i, j, base_reg;
13090 rtx base_reg_rtx;
13091 HOST_WIDE_INT offset;
13092 int write_back = FALSE;
13093 int ldm_case;
13094 rtx addr;
13096 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13097 &base_reg, &offset, !sort_regs);
13099 if (ldm_case == 0)
13100 return false;
13102 if (sort_regs)
13103 for (i = 0; i < nops - 1; i++)
13104 for (j = i + 1; j < nops; j++)
13105 if (regs[i] > regs[j])
13107 int t = regs[i];
13108 regs[i] = regs[j];
13109 regs[j] = t;
13111 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13113 if (TARGET_THUMB1)
13115 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13116 gcc_assert (ldm_case == 1 || ldm_case == 5);
13117 write_back = TRUE;
13120 if (ldm_case == 5)
13122 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13123 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13124 offset = 0;
13125 if (!TARGET_THUMB1)
13127 base_reg = regs[0];
13128 base_reg_rtx = newbase;
13132 for (i = 0; i < nops; i++)
13134 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13135 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13136 SImode, addr, 0);
13138 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13139 write_back ? offset + i * 4 : 0));
13140 return true;
13143 /* Called from a peephole2 expander to turn a sequence of stores into an
13144 STM instruction. OPERANDS are the operands found by the peephole matcher;
13145 NOPS indicates how many separate stores we are trying to combine.
13146 Returns true iff we could generate a new instruction. */
13148 bool
13149 gen_stm_seq (rtx *operands, int nops)
13151 int i;
13152 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13153 rtx mems[MAX_LDM_STM_OPS];
13154 int base_reg;
13155 rtx base_reg_rtx;
13156 HOST_WIDE_INT offset;
13157 int write_back = FALSE;
13158 int stm_case;
13159 rtx addr;
13160 bool base_reg_dies;
13162 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13163 mem_order, &base_reg, &offset, true);
13165 if (stm_case == 0)
13166 return false;
13168 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13170 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13171 if (TARGET_THUMB1)
13173 gcc_assert (base_reg_dies);
13174 write_back = TRUE;
13177 if (stm_case == 5)
13179 gcc_assert (base_reg_dies);
13180 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13181 offset = 0;
13184 addr = plus_constant (Pmode, base_reg_rtx, offset);
13186 for (i = 0; i < nops; i++)
13188 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13189 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13190 SImode, addr, 0);
13192 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13193 write_back ? offset + i * 4 : 0));
13194 return true;
13197 /* Called from a peephole2 expander to turn a sequence of stores that are
13198 preceded by constant loads into an STM instruction. OPERANDS are the
13199 operands found by the peephole matcher; NOPS indicates how many
13200 separate stores we are trying to combine; there are 2 * NOPS
13201 instructions in the peephole.
13202 Returns true iff we could generate a new instruction. */
13204 bool
13205 gen_const_stm_seq (rtx *operands, int nops)
13207 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13208 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13209 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13210 rtx mems[MAX_LDM_STM_OPS];
13211 int base_reg;
13212 rtx base_reg_rtx;
13213 HOST_WIDE_INT offset;
13214 int write_back = FALSE;
13215 int stm_case;
13216 rtx addr;
13217 bool base_reg_dies;
13218 int i, j;
13219 HARD_REG_SET allocated;
13221 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13222 mem_order, &base_reg, &offset, false);
13224 if (stm_case == 0)
13225 return false;
13227 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13229 /* If the same register is used more than once, try to find a free
13230 register. */
13231 CLEAR_HARD_REG_SET (allocated);
13232 for (i = 0; i < nops; i++)
13234 for (j = i + 1; j < nops; j++)
13235 if (regs[i] == regs[j])
13237 rtx t = peep2_find_free_register (0, nops * 2,
13238 TARGET_THUMB1 ? "l" : "r",
13239 SImode, &allocated);
13240 if (t == NULL_RTX)
13241 return false;
13242 reg_rtxs[i] = t;
13243 regs[i] = REGNO (t);
13247 /* Compute an ordering that maps the register numbers to an ascending
13248 sequence. */
13249 reg_order[0] = 0;
13250 for (i = 0; i < nops; i++)
13251 if (regs[i] < regs[reg_order[0]])
13252 reg_order[0] = i;
13254 for (i = 1; i < nops; i++)
13256 int this_order = reg_order[i - 1];
13257 for (j = 0; j < nops; j++)
13258 if (regs[j] > regs[reg_order[i - 1]]
13259 && (this_order == reg_order[i - 1]
13260 || regs[j] < regs[this_order]))
13261 this_order = j;
13262 reg_order[i] = this_order;
13265 /* Ensure that registers that must be live after the instruction end
13266 up with the correct value. */
13267 for (i = 0; i < nops; i++)
13269 int this_order = reg_order[i];
13270 if ((this_order != mem_order[i]
13271 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13272 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13273 return false;
13276 /* Load the constants. */
13277 for (i = 0; i < nops; i++)
13279 rtx op = operands[2 * nops + mem_order[i]];
13280 sorted_regs[i] = regs[reg_order[i]];
13281 emit_move_insn (reg_rtxs[reg_order[i]], op);
13284 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13286 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13287 if (TARGET_THUMB1)
13289 gcc_assert (base_reg_dies);
13290 write_back = TRUE;
13293 if (stm_case == 5)
13295 gcc_assert (base_reg_dies);
13296 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13297 offset = 0;
13300 addr = plus_constant (Pmode, base_reg_rtx, offset);
13302 for (i = 0; i < nops; i++)
13304 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13305 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13306 SImode, addr, 0);
13308 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13309 write_back ? offset + i * 4 : 0));
13310 return true;
13313 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13314 unaligned copies on processors which support unaligned semantics for those
13315 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13316 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13317 An interleave factor of 1 (the minimum) will perform no interleaving.
13318 Load/store multiple are used for aligned addresses where possible. */
13320 static void
13321 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13322 HOST_WIDE_INT length,
13323 unsigned int interleave_factor)
13325 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13326 int *regnos = XALLOCAVEC (int, interleave_factor);
13327 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13328 HOST_WIDE_INT i, j;
13329 HOST_WIDE_INT remaining = length, words;
13330 rtx halfword_tmp = NULL, byte_tmp = NULL;
13331 rtx dst, src;
13332 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13333 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13334 HOST_WIDE_INT srcoffset, dstoffset;
13335 HOST_WIDE_INT src_autoinc, dst_autoinc;
13336 rtx mem, addr;
13338 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13340 /* Use hard registers if we have aligned source or destination so we can use
13341 load/store multiple with contiguous registers. */
13342 if (dst_aligned || src_aligned)
13343 for (i = 0; i < interleave_factor; i++)
13344 regs[i] = gen_rtx_REG (SImode, i);
13345 else
13346 for (i = 0; i < interleave_factor; i++)
13347 regs[i] = gen_reg_rtx (SImode);
13349 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13350 src = copy_addr_to_reg (XEXP (srcbase, 0));
13352 srcoffset = dstoffset = 0;
13354 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13355 For copying the last bytes we want to subtract this offset again. */
13356 src_autoinc = dst_autoinc = 0;
13358 for (i = 0; i < interleave_factor; i++)
13359 regnos[i] = i;
13361 /* Copy BLOCK_SIZE_BYTES chunks. */
13363 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13365 /* Load words. */
13366 if (src_aligned && interleave_factor > 1)
13368 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13369 TRUE, srcbase, &srcoffset));
13370 src_autoinc += UNITS_PER_WORD * interleave_factor;
13372 else
13374 for (j = 0; j < interleave_factor; j++)
13376 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13377 - src_autoinc));
13378 mem = adjust_automodify_address (srcbase, SImode, addr,
13379 srcoffset + j * UNITS_PER_WORD);
13380 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13382 srcoffset += block_size_bytes;
13385 /* Store words. */
13386 if (dst_aligned && interleave_factor > 1)
13388 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13389 TRUE, dstbase, &dstoffset));
13390 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13392 else
13394 for (j = 0; j < interleave_factor; j++)
13396 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13397 - dst_autoinc));
13398 mem = adjust_automodify_address (dstbase, SImode, addr,
13399 dstoffset + j * UNITS_PER_WORD);
13400 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13402 dstoffset += block_size_bytes;
13405 remaining -= block_size_bytes;
13408 /* Copy any whole words left (note these aren't interleaved with any
13409 subsequent halfword/byte load/stores in the interests of simplicity). */
13411 words = remaining / UNITS_PER_WORD;
13413 gcc_assert (words < interleave_factor);
13415 if (src_aligned && words > 1)
13417 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13418 &srcoffset));
13419 src_autoinc += UNITS_PER_WORD * words;
13421 else
13423 for (j = 0; j < words; j++)
13425 addr = plus_constant (Pmode, src,
13426 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13427 mem = adjust_automodify_address (srcbase, SImode, addr,
13428 srcoffset + j * UNITS_PER_WORD);
13429 if (src_aligned)
13430 emit_move_insn (regs[j], mem);
13431 else
13432 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13434 srcoffset += words * UNITS_PER_WORD;
13437 if (dst_aligned && words > 1)
13439 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13440 &dstoffset));
13441 dst_autoinc += words * UNITS_PER_WORD;
13443 else
13445 for (j = 0; j < words; j++)
13447 addr = plus_constant (Pmode, dst,
13448 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13449 mem = adjust_automodify_address (dstbase, SImode, addr,
13450 dstoffset + j * UNITS_PER_WORD);
13451 if (dst_aligned)
13452 emit_move_insn (mem, regs[j]);
13453 else
13454 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13456 dstoffset += words * UNITS_PER_WORD;
13459 remaining -= words * UNITS_PER_WORD;
13461 gcc_assert (remaining < 4);
13463 /* Copy a halfword if necessary. */
13465 if (remaining >= 2)
13467 halfword_tmp = gen_reg_rtx (SImode);
13469 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13470 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13471 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13473 /* Either write out immediately, or delay until we've loaded the last
13474 byte, depending on interleave factor. */
13475 if (interleave_factor == 1)
13477 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13478 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13479 emit_insn (gen_unaligned_storehi (mem,
13480 gen_lowpart (HImode, halfword_tmp)));
13481 halfword_tmp = NULL;
13482 dstoffset += 2;
13485 remaining -= 2;
13486 srcoffset += 2;
13489 gcc_assert (remaining < 2);
13491 /* Copy last byte. */
13493 if ((remaining & 1) != 0)
13495 byte_tmp = gen_reg_rtx (SImode);
13497 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13498 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
13499 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
13501 if (interleave_factor == 1)
13503 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13504 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13505 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13506 byte_tmp = NULL;
13507 dstoffset++;
13510 remaining--;
13511 srcoffset++;
13514 /* Store last halfword if we haven't done so already. */
13516 if (halfword_tmp)
13518 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13519 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13520 emit_insn (gen_unaligned_storehi (mem,
13521 gen_lowpart (HImode, halfword_tmp)));
13522 dstoffset += 2;
13525 /* Likewise for last byte. */
13527 if (byte_tmp)
13529 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13530 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13531 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13532 dstoffset++;
13535 gcc_assert (remaining == 0 && srcoffset == dstoffset);
13538 /* From mips_adjust_block_mem:
13540 Helper function for doing a loop-based block operation on memory
13541 reference MEM. Each iteration of the loop will operate on LENGTH
13542 bytes of MEM.
13544 Create a new base register for use within the loop and point it to
13545 the start of MEM. Create a new memory reference that uses this
13546 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
13548 static void
13549 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
13550 rtx *loop_mem)
13552 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
13554 /* Although the new mem does not refer to a known location,
13555 it does keep up to LENGTH bytes of alignment. */
13556 *loop_mem = change_address (mem, BLKmode, *loop_reg);
13557 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
13560 /* From mips_block_move_loop:
13562 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
13563 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
13564 the memory regions do not overlap. */
13566 static void
13567 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
13568 unsigned int interleave_factor,
13569 HOST_WIDE_INT bytes_per_iter)
13571 rtx src_reg, dest_reg, final_src, test;
13572 HOST_WIDE_INT leftover;
13574 leftover = length % bytes_per_iter;
13575 length -= leftover;
13577 /* Create registers and memory references for use within the loop. */
13578 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
13579 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
13581 /* Calculate the value that SRC_REG should have after the last iteration of
13582 the loop. */
13583 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
13584 0, 0, OPTAB_WIDEN);
13586 /* Emit the start of the loop. */
13587 rtx_code_label *label = gen_label_rtx ();
13588 emit_label (label);
13590 /* Emit the loop body. */
13591 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
13592 interleave_factor);
13594 /* Move on to the next block. */
13595 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
13596 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
13598 /* Emit the loop condition. */
13599 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
13600 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
13602 /* Mop up any left-over bytes. */
13603 if (leftover)
13604 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
13607 /* Emit a block move when either the source or destination is unaligned (not
13608 aligned to a four-byte boundary). This may need further tuning depending on
13609 core type, optimize_size setting, etc. */
13611 static int
13612 arm_movmemqi_unaligned (rtx *operands)
13614 HOST_WIDE_INT length = INTVAL (operands[2]);
13616 if (optimize_size)
13618 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
13619 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
13620 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
13621 size of code if optimizing for size. We'll use ldm/stm if src_aligned
13622 or dst_aligned though: allow more interleaving in those cases since the
13623 resulting code can be smaller. */
13624 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
13625 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
13627 if (length > 12)
13628 arm_block_move_unaligned_loop (operands[0], operands[1], length,
13629 interleave_factor, bytes_per_iter);
13630 else
13631 arm_block_move_unaligned_straight (operands[0], operands[1], length,
13632 interleave_factor);
13634 else
13636 /* Note that the loop created by arm_block_move_unaligned_loop may be
13637 subject to loop unrolling, which makes tuning this condition a little
13638 redundant. */
13639 if (length > 32)
13640 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
13641 else
13642 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
13645 return 1;
13649 arm_gen_movmemqi (rtx *operands)
13651 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
13652 HOST_WIDE_INT srcoffset, dstoffset;
13653 int i;
13654 rtx src, dst, srcbase, dstbase;
13655 rtx part_bytes_reg = NULL;
13656 rtx mem;
13658 if (!CONST_INT_P (operands[2])
13659 || !CONST_INT_P (operands[3])
13660 || INTVAL (operands[2]) > 64)
13661 return 0;
13663 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
13664 return arm_movmemqi_unaligned (operands);
13666 if (INTVAL (operands[3]) & 3)
13667 return 0;
13669 dstbase = operands[0];
13670 srcbase = operands[1];
13672 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
13673 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
13675 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
13676 out_words_to_go = INTVAL (operands[2]) / 4;
13677 last_bytes = INTVAL (operands[2]) & 3;
13678 dstoffset = srcoffset = 0;
13680 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
13681 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
13683 for (i = 0; in_words_to_go >= 2; i+=4)
13685 if (in_words_to_go > 4)
13686 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
13687 TRUE, srcbase, &srcoffset));
13688 else
13689 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
13690 src, FALSE, srcbase,
13691 &srcoffset));
13693 if (out_words_to_go)
13695 if (out_words_to_go > 4)
13696 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
13697 TRUE, dstbase, &dstoffset));
13698 else if (out_words_to_go != 1)
13699 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
13700 out_words_to_go, dst,
13701 (last_bytes == 0
13702 ? FALSE : TRUE),
13703 dstbase, &dstoffset));
13704 else
13706 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
13707 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
13708 if (last_bytes != 0)
13710 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
13711 dstoffset += 4;
13716 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
13717 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
13720 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
13721 if (out_words_to_go)
13723 rtx sreg;
13725 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
13726 sreg = copy_to_reg (mem);
13728 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
13729 emit_move_insn (mem, sreg);
13730 in_words_to_go--;
13732 gcc_assert (!in_words_to_go); /* Sanity check */
13735 if (in_words_to_go)
13737 gcc_assert (in_words_to_go > 0);
13739 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
13740 part_bytes_reg = copy_to_mode_reg (SImode, mem);
13743 gcc_assert (!last_bytes || part_bytes_reg);
13745 if (BYTES_BIG_ENDIAN && last_bytes)
13747 rtx tmp = gen_reg_rtx (SImode);
13749 /* The bytes we want are in the top end of the word. */
13750 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
13751 GEN_INT (8 * (4 - last_bytes))));
13752 part_bytes_reg = tmp;
13754 while (last_bytes)
13756 mem = adjust_automodify_address (dstbase, QImode,
13757 plus_constant (Pmode, dst,
13758 last_bytes - 1),
13759 dstoffset + last_bytes - 1);
13760 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
13762 if (--last_bytes)
13764 tmp = gen_reg_rtx (SImode);
13765 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
13766 part_bytes_reg = tmp;
13771 else
13773 if (last_bytes > 1)
13775 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
13776 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
13777 last_bytes -= 2;
13778 if (last_bytes)
13780 rtx tmp = gen_reg_rtx (SImode);
13781 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
13782 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
13783 part_bytes_reg = tmp;
13784 dstoffset += 2;
13788 if (last_bytes)
13790 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
13791 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
13795 return 1;
13798 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
13799 by mode size. */
13800 inline static rtx
13801 next_consecutive_mem (rtx mem)
13803 machine_mode mode = GET_MODE (mem);
13804 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
13805 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
13807 return adjust_automodify_address (mem, mode, addr, offset);
13810 /* Copy using LDRD/STRD instructions whenever possible.
13811 Returns true upon success. */
13812 bool
13813 gen_movmem_ldrd_strd (rtx *operands)
13815 unsigned HOST_WIDE_INT len;
13816 HOST_WIDE_INT align;
13817 rtx src, dst, base;
13818 rtx reg0;
13819 bool src_aligned, dst_aligned;
13820 bool src_volatile, dst_volatile;
13822 gcc_assert (CONST_INT_P (operands[2]));
13823 gcc_assert (CONST_INT_P (operands[3]));
13825 len = UINTVAL (operands[2]);
13826 if (len > 64)
13827 return false;
13829 /* Maximum alignment we can assume for both src and dst buffers. */
13830 align = INTVAL (operands[3]);
13832 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
13833 return false;
13835 /* Place src and dst addresses in registers
13836 and update the corresponding mem rtx. */
13837 dst = operands[0];
13838 dst_volatile = MEM_VOLATILE_P (dst);
13839 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
13840 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
13841 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
13843 src = operands[1];
13844 src_volatile = MEM_VOLATILE_P (src);
13845 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
13846 base = copy_to_mode_reg (SImode, XEXP (src, 0));
13847 src = adjust_automodify_address (src, VOIDmode, base, 0);
13849 if (!unaligned_access && !(src_aligned && dst_aligned))
13850 return false;
13852 if (src_volatile || dst_volatile)
13853 return false;
13855 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
13856 if (!(dst_aligned || src_aligned))
13857 return arm_gen_movmemqi (operands);
13859 /* If the either src or dst is unaligned we'll be accessing it as pairs
13860 of unaligned SImode accesses. Otherwise we can generate DImode
13861 ldrd/strd instructions. */
13862 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
13863 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
13865 while (len >= 8)
13867 len -= 8;
13868 reg0 = gen_reg_rtx (DImode);
13869 rtx low_reg = NULL_RTX;
13870 rtx hi_reg = NULL_RTX;
13872 if (!src_aligned || !dst_aligned)
13874 low_reg = gen_lowpart (SImode, reg0);
13875 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
13877 if (src_aligned)
13878 emit_move_insn (reg0, src);
13879 else
13881 emit_insn (gen_unaligned_loadsi (low_reg, src));
13882 src = next_consecutive_mem (src);
13883 emit_insn (gen_unaligned_loadsi (hi_reg, src));
13886 if (dst_aligned)
13887 emit_move_insn (dst, reg0);
13888 else
13890 emit_insn (gen_unaligned_storesi (dst, low_reg));
13891 dst = next_consecutive_mem (dst);
13892 emit_insn (gen_unaligned_storesi (dst, hi_reg));
13895 src = next_consecutive_mem (src);
13896 dst = next_consecutive_mem (dst);
13899 gcc_assert (len < 8);
13900 if (len >= 4)
13902 /* More than a word but less than a double-word to copy. Copy a word. */
13903 reg0 = gen_reg_rtx (SImode);
13904 src = adjust_address (src, SImode, 0);
13905 dst = adjust_address (dst, SImode, 0);
13906 if (src_aligned)
13907 emit_move_insn (reg0, src);
13908 else
13909 emit_insn (gen_unaligned_loadsi (reg0, src));
13911 if (dst_aligned)
13912 emit_move_insn (dst, reg0);
13913 else
13914 emit_insn (gen_unaligned_storesi (dst, reg0));
13916 src = next_consecutive_mem (src);
13917 dst = next_consecutive_mem (dst);
13918 len -= 4;
13921 if (len == 0)
13922 return true;
13924 /* Copy the remaining bytes. */
13925 if (len >= 2)
13927 dst = adjust_address (dst, HImode, 0);
13928 src = adjust_address (src, HImode, 0);
13929 reg0 = gen_reg_rtx (SImode);
13930 if (src_aligned)
13931 emit_insn (gen_zero_extendhisi2 (reg0, src));
13932 else
13933 emit_insn (gen_unaligned_loadhiu (reg0, src));
13935 if (dst_aligned)
13936 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
13937 else
13938 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
13940 src = next_consecutive_mem (src);
13941 dst = next_consecutive_mem (dst);
13942 if (len == 2)
13943 return true;
13946 dst = adjust_address (dst, QImode, 0);
13947 src = adjust_address (src, QImode, 0);
13948 reg0 = gen_reg_rtx (QImode);
13949 emit_move_insn (reg0, src);
13950 emit_move_insn (dst, reg0);
13951 return true;
13954 /* Select a dominance comparison mode if possible for a test of the general
13955 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
13956 COND_OR == DOM_CC_X_AND_Y => (X && Y)
13957 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
13958 COND_OR == DOM_CC_X_OR_Y => (X || Y)
13959 In all cases OP will be either EQ or NE, but we don't need to know which
13960 here. If we are unable to support a dominance comparison we return
13961 CC mode. This will then fail to match for the RTL expressions that
13962 generate this call. */
13963 machine_mode
13964 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
13966 enum rtx_code cond1, cond2;
13967 int swapped = 0;
13969 /* Currently we will probably get the wrong result if the individual
13970 comparisons are not simple. This also ensures that it is safe to
13971 reverse a comparison if necessary. */
13972 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
13973 != CCmode)
13974 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
13975 != CCmode))
13976 return CCmode;
13978 /* The if_then_else variant of this tests the second condition if the
13979 first passes, but is true if the first fails. Reverse the first
13980 condition to get a true "inclusive-or" expression. */
13981 if (cond_or == DOM_CC_NX_OR_Y)
13982 cond1 = reverse_condition (cond1);
13984 /* If the comparisons are not equal, and one doesn't dominate the other,
13985 then we can't do this. */
13986 if (cond1 != cond2
13987 && !comparison_dominates_p (cond1, cond2)
13988 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
13989 return CCmode;
13991 if (swapped)
13992 std::swap (cond1, cond2);
13994 switch (cond1)
13996 case EQ:
13997 if (cond_or == DOM_CC_X_AND_Y)
13998 return CC_DEQmode;
14000 switch (cond2)
14002 case EQ: return CC_DEQmode;
14003 case LE: return CC_DLEmode;
14004 case LEU: return CC_DLEUmode;
14005 case GE: return CC_DGEmode;
14006 case GEU: return CC_DGEUmode;
14007 default: gcc_unreachable ();
14010 case LT:
14011 if (cond_or == DOM_CC_X_AND_Y)
14012 return CC_DLTmode;
14014 switch (cond2)
14016 case LT:
14017 return CC_DLTmode;
14018 case LE:
14019 return CC_DLEmode;
14020 case NE:
14021 return CC_DNEmode;
14022 default:
14023 gcc_unreachable ();
14026 case GT:
14027 if (cond_or == DOM_CC_X_AND_Y)
14028 return CC_DGTmode;
14030 switch (cond2)
14032 case GT:
14033 return CC_DGTmode;
14034 case GE:
14035 return CC_DGEmode;
14036 case NE:
14037 return CC_DNEmode;
14038 default:
14039 gcc_unreachable ();
14042 case LTU:
14043 if (cond_or == DOM_CC_X_AND_Y)
14044 return CC_DLTUmode;
14046 switch (cond2)
14048 case LTU:
14049 return CC_DLTUmode;
14050 case LEU:
14051 return CC_DLEUmode;
14052 case NE:
14053 return CC_DNEmode;
14054 default:
14055 gcc_unreachable ();
14058 case GTU:
14059 if (cond_or == DOM_CC_X_AND_Y)
14060 return CC_DGTUmode;
14062 switch (cond2)
14064 case GTU:
14065 return CC_DGTUmode;
14066 case GEU:
14067 return CC_DGEUmode;
14068 case NE:
14069 return CC_DNEmode;
14070 default:
14071 gcc_unreachable ();
14074 /* The remaining cases only occur when both comparisons are the
14075 same. */
14076 case NE:
14077 gcc_assert (cond1 == cond2);
14078 return CC_DNEmode;
14080 case LE:
14081 gcc_assert (cond1 == cond2);
14082 return CC_DLEmode;
14084 case GE:
14085 gcc_assert (cond1 == cond2);
14086 return CC_DGEmode;
14088 case LEU:
14089 gcc_assert (cond1 == cond2);
14090 return CC_DLEUmode;
14092 case GEU:
14093 gcc_assert (cond1 == cond2);
14094 return CC_DGEUmode;
14096 default:
14097 gcc_unreachable ();
14101 machine_mode
14102 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14104 /* All floating point compares return CCFP if it is an equality
14105 comparison, and CCFPE otherwise. */
14106 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14108 switch (op)
14110 case EQ:
14111 case NE:
14112 case UNORDERED:
14113 case ORDERED:
14114 case UNLT:
14115 case UNLE:
14116 case UNGT:
14117 case UNGE:
14118 case UNEQ:
14119 case LTGT:
14120 return CCFPmode;
14122 case LT:
14123 case LE:
14124 case GT:
14125 case GE:
14126 return CCFPEmode;
14128 default:
14129 gcc_unreachable ();
14133 /* A compare with a shifted operand. Because of canonicalization, the
14134 comparison will have to be swapped when we emit the assembler. */
14135 if (GET_MODE (y) == SImode
14136 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14137 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14138 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14139 || GET_CODE (x) == ROTATERT))
14140 return CC_SWPmode;
14142 /* This operation is performed swapped, but since we only rely on the Z
14143 flag we don't need an additional mode. */
14144 if (GET_MODE (y) == SImode
14145 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14146 && GET_CODE (x) == NEG
14147 && (op == EQ || op == NE))
14148 return CC_Zmode;
14150 /* This is a special case that is used by combine to allow a
14151 comparison of a shifted byte load to be split into a zero-extend
14152 followed by a comparison of the shifted integer (only valid for
14153 equalities and unsigned inequalities). */
14154 if (GET_MODE (x) == SImode
14155 && GET_CODE (x) == ASHIFT
14156 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14157 && GET_CODE (XEXP (x, 0)) == SUBREG
14158 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14159 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14160 && (op == EQ || op == NE
14161 || op == GEU || op == GTU || op == LTU || op == LEU)
14162 && CONST_INT_P (y))
14163 return CC_Zmode;
14165 /* A construct for a conditional compare, if the false arm contains
14166 0, then both conditions must be true, otherwise either condition
14167 must be true. Not all conditions are possible, so CCmode is
14168 returned if it can't be done. */
14169 if (GET_CODE (x) == IF_THEN_ELSE
14170 && (XEXP (x, 2) == const0_rtx
14171 || XEXP (x, 2) == const1_rtx)
14172 && COMPARISON_P (XEXP (x, 0))
14173 && COMPARISON_P (XEXP (x, 1)))
14174 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14175 INTVAL (XEXP (x, 2)));
14177 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14178 if (GET_CODE (x) == AND
14179 && (op == EQ || op == NE)
14180 && COMPARISON_P (XEXP (x, 0))
14181 && COMPARISON_P (XEXP (x, 1)))
14182 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14183 DOM_CC_X_AND_Y);
14185 if (GET_CODE (x) == IOR
14186 && (op == EQ || op == NE)
14187 && COMPARISON_P (XEXP (x, 0))
14188 && COMPARISON_P (XEXP (x, 1)))
14189 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14190 DOM_CC_X_OR_Y);
14192 /* An operation (on Thumb) where we want to test for a single bit.
14193 This is done by shifting that bit up into the top bit of a
14194 scratch register; we can then branch on the sign bit. */
14195 if (TARGET_THUMB1
14196 && GET_MODE (x) == SImode
14197 && (op == EQ || op == NE)
14198 && GET_CODE (x) == ZERO_EXTRACT
14199 && XEXP (x, 1) == const1_rtx)
14200 return CC_Nmode;
14202 /* An operation that sets the condition codes as a side-effect, the
14203 V flag is not set correctly, so we can only use comparisons where
14204 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14205 instead.) */
14206 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14207 if (GET_MODE (x) == SImode
14208 && y == const0_rtx
14209 && (op == EQ || op == NE || op == LT || op == GE)
14210 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14211 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14212 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14213 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14214 || GET_CODE (x) == LSHIFTRT
14215 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14216 || GET_CODE (x) == ROTATERT
14217 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14218 return CC_NOOVmode;
14220 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14221 return CC_Zmode;
14223 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14224 && GET_CODE (x) == PLUS
14225 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14226 return CC_Cmode;
14228 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14230 switch (op)
14232 case EQ:
14233 case NE:
14234 /* A DImode comparison against zero can be implemented by
14235 or'ing the two halves together. */
14236 if (y == const0_rtx)
14237 return CC_Zmode;
14239 /* We can do an equality test in three Thumb instructions. */
14240 if (!TARGET_32BIT)
14241 return CC_Zmode;
14243 /* FALLTHROUGH */
14245 case LTU:
14246 case LEU:
14247 case GTU:
14248 case GEU:
14249 /* DImode unsigned comparisons can be implemented by cmp +
14250 cmpeq without a scratch register. Not worth doing in
14251 Thumb-2. */
14252 if (TARGET_32BIT)
14253 return CC_CZmode;
14255 /* FALLTHROUGH */
14257 case LT:
14258 case LE:
14259 case GT:
14260 case GE:
14261 /* DImode signed and unsigned comparisons can be implemented
14262 by cmp + sbcs with a scratch register, but that does not
14263 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14264 gcc_assert (op != EQ && op != NE);
14265 return CC_NCVmode;
14267 default:
14268 gcc_unreachable ();
14272 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14273 return GET_MODE (x);
14275 return CCmode;
14278 /* X and Y are two things to compare using CODE. Emit the compare insn and
14279 return the rtx for register 0 in the proper mode. FP means this is a
14280 floating point compare: I don't think that it is needed on the arm. */
14282 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14284 machine_mode mode;
14285 rtx cc_reg;
14286 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14288 /* We might have X as a constant, Y as a register because of the predicates
14289 used for cmpdi. If so, force X to a register here. */
14290 if (dimode_comparison && !REG_P (x))
14291 x = force_reg (DImode, x);
14293 mode = SELECT_CC_MODE (code, x, y);
14294 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14296 if (dimode_comparison
14297 && mode != CC_CZmode)
14299 rtx clobber, set;
14301 /* To compare two non-zero values for equality, XOR them and
14302 then compare against zero. Not used for ARM mode; there
14303 CC_CZmode is cheaper. */
14304 if (mode == CC_Zmode && y != const0_rtx)
14306 gcc_assert (!reload_completed);
14307 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14308 y = const0_rtx;
14311 /* A scratch register is required. */
14312 if (reload_completed)
14313 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14314 else
14315 scratch = gen_rtx_SCRATCH (SImode);
14317 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14318 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14319 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14321 else
14322 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14324 return cc_reg;
14327 /* Generate a sequence of insns that will generate the correct return
14328 address mask depending on the physical architecture that the program
14329 is running on. */
14331 arm_gen_return_addr_mask (void)
14333 rtx reg = gen_reg_rtx (Pmode);
14335 emit_insn (gen_return_addr_mask (reg));
14336 return reg;
14339 void
14340 arm_reload_in_hi (rtx *operands)
14342 rtx ref = operands[1];
14343 rtx base, scratch;
14344 HOST_WIDE_INT offset = 0;
14346 if (GET_CODE (ref) == SUBREG)
14348 offset = SUBREG_BYTE (ref);
14349 ref = SUBREG_REG (ref);
14352 if (REG_P (ref))
14354 /* We have a pseudo which has been spilt onto the stack; there
14355 are two cases here: the first where there is a simple
14356 stack-slot replacement and a second where the stack-slot is
14357 out of range, or is used as a subreg. */
14358 if (reg_equiv_mem (REGNO (ref)))
14360 ref = reg_equiv_mem (REGNO (ref));
14361 base = find_replacement (&XEXP (ref, 0));
14363 else
14364 /* The slot is out of range, or was dressed up in a SUBREG. */
14365 base = reg_equiv_address (REGNO (ref));
14367 /* PR 62554: If there is no equivalent memory location then just move
14368 the value as an SImode register move. This happens when the target
14369 architecture variant does not have an HImode register move. */
14370 if (base == NULL)
14372 gcc_assert (REG_P (operands[0]));
14373 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14374 gen_rtx_SUBREG (SImode, ref, 0)));
14375 return;
14378 else
14379 base = find_replacement (&XEXP (ref, 0));
14381 /* Handle the case where the address is too complex to be offset by 1. */
14382 if (GET_CODE (base) == MINUS
14383 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14385 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14387 emit_set_insn (base_plus, base);
14388 base = base_plus;
14390 else if (GET_CODE (base) == PLUS)
14392 /* The addend must be CONST_INT, or we would have dealt with it above. */
14393 HOST_WIDE_INT hi, lo;
14395 offset += INTVAL (XEXP (base, 1));
14396 base = XEXP (base, 0);
14398 /* Rework the address into a legal sequence of insns. */
14399 /* Valid range for lo is -4095 -> 4095 */
14400 lo = (offset >= 0
14401 ? (offset & 0xfff)
14402 : -((-offset) & 0xfff));
14404 /* Corner case, if lo is the max offset then we would be out of range
14405 once we have added the additional 1 below, so bump the msb into the
14406 pre-loading insn(s). */
14407 if (lo == 4095)
14408 lo &= 0x7ff;
14410 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14411 ^ (HOST_WIDE_INT) 0x80000000)
14412 - (HOST_WIDE_INT) 0x80000000);
14414 gcc_assert (hi + lo == offset);
14416 if (hi != 0)
14418 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14420 /* Get the base address; addsi3 knows how to handle constants
14421 that require more than one insn. */
14422 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14423 base = base_plus;
14424 offset = lo;
14428 /* Operands[2] may overlap operands[0] (though it won't overlap
14429 operands[1]), that's why we asked for a DImode reg -- so we can
14430 use the bit that does not overlap. */
14431 if (REGNO (operands[2]) == REGNO (operands[0]))
14432 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14433 else
14434 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14436 emit_insn (gen_zero_extendqisi2 (scratch,
14437 gen_rtx_MEM (QImode,
14438 plus_constant (Pmode, base,
14439 offset))));
14440 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14441 gen_rtx_MEM (QImode,
14442 plus_constant (Pmode, base,
14443 offset + 1))));
14444 if (!BYTES_BIG_ENDIAN)
14445 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14446 gen_rtx_IOR (SImode,
14447 gen_rtx_ASHIFT
14448 (SImode,
14449 gen_rtx_SUBREG (SImode, operands[0], 0),
14450 GEN_INT (8)),
14451 scratch));
14452 else
14453 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14454 gen_rtx_IOR (SImode,
14455 gen_rtx_ASHIFT (SImode, scratch,
14456 GEN_INT (8)),
14457 gen_rtx_SUBREG (SImode, operands[0], 0)));
14460 /* Handle storing a half-word to memory during reload by synthesizing as two
14461 byte stores. Take care not to clobber the input values until after we
14462 have moved them somewhere safe. This code assumes that if the DImode
14463 scratch in operands[2] overlaps either the input value or output address
14464 in some way, then that value must die in this insn (we absolutely need
14465 two scratch registers for some corner cases). */
14466 void
14467 arm_reload_out_hi (rtx *operands)
14469 rtx ref = operands[0];
14470 rtx outval = operands[1];
14471 rtx base, scratch;
14472 HOST_WIDE_INT offset = 0;
14474 if (GET_CODE (ref) == SUBREG)
14476 offset = SUBREG_BYTE (ref);
14477 ref = SUBREG_REG (ref);
14480 if (REG_P (ref))
14482 /* We have a pseudo which has been spilt onto the stack; there
14483 are two cases here: the first where there is a simple
14484 stack-slot replacement and a second where the stack-slot is
14485 out of range, or is used as a subreg. */
14486 if (reg_equiv_mem (REGNO (ref)))
14488 ref = reg_equiv_mem (REGNO (ref));
14489 base = find_replacement (&XEXP (ref, 0));
14491 else
14492 /* The slot is out of range, or was dressed up in a SUBREG. */
14493 base = reg_equiv_address (REGNO (ref));
14495 /* PR 62254: If there is no equivalent memory location then just move
14496 the value as an SImode register move. This happens when the target
14497 architecture variant does not have an HImode register move. */
14498 if (base == NULL)
14500 gcc_assert (REG_P (outval) || SUBREG_P (outval));
14502 if (REG_P (outval))
14504 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14505 gen_rtx_SUBREG (SImode, outval, 0)));
14507 else /* SUBREG_P (outval) */
14509 if (GET_MODE (SUBREG_REG (outval)) == SImode)
14510 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14511 SUBREG_REG (outval)));
14512 else
14513 /* FIXME: Handle other cases ? */
14514 gcc_unreachable ();
14516 return;
14519 else
14520 base = find_replacement (&XEXP (ref, 0));
14522 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14524 /* Handle the case where the address is too complex to be offset by 1. */
14525 if (GET_CODE (base) == MINUS
14526 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14528 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14530 /* Be careful not to destroy OUTVAL. */
14531 if (reg_overlap_mentioned_p (base_plus, outval))
14533 /* Updating base_plus might destroy outval, see if we can
14534 swap the scratch and base_plus. */
14535 if (!reg_overlap_mentioned_p (scratch, outval))
14536 std::swap (scratch, base_plus);
14537 else
14539 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14541 /* Be conservative and copy OUTVAL into the scratch now,
14542 this should only be necessary if outval is a subreg
14543 of something larger than a word. */
14544 /* XXX Might this clobber base? I can't see how it can,
14545 since scratch is known to overlap with OUTVAL, and
14546 must be wider than a word. */
14547 emit_insn (gen_movhi (scratch_hi, outval));
14548 outval = scratch_hi;
14552 emit_set_insn (base_plus, base);
14553 base = base_plus;
14555 else if (GET_CODE (base) == PLUS)
14557 /* The addend must be CONST_INT, or we would have dealt with it above. */
14558 HOST_WIDE_INT hi, lo;
14560 offset += INTVAL (XEXP (base, 1));
14561 base = XEXP (base, 0);
14563 /* Rework the address into a legal sequence of insns. */
14564 /* Valid range for lo is -4095 -> 4095 */
14565 lo = (offset >= 0
14566 ? (offset & 0xfff)
14567 : -((-offset) & 0xfff));
14569 /* Corner case, if lo is the max offset then we would be out of range
14570 once we have added the additional 1 below, so bump the msb into the
14571 pre-loading insn(s). */
14572 if (lo == 4095)
14573 lo &= 0x7ff;
14575 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14576 ^ (HOST_WIDE_INT) 0x80000000)
14577 - (HOST_WIDE_INT) 0x80000000);
14579 gcc_assert (hi + lo == offset);
14581 if (hi != 0)
14583 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14585 /* Be careful not to destroy OUTVAL. */
14586 if (reg_overlap_mentioned_p (base_plus, outval))
14588 /* Updating base_plus might destroy outval, see if we
14589 can swap the scratch and base_plus. */
14590 if (!reg_overlap_mentioned_p (scratch, outval))
14591 std::swap (scratch, base_plus);
14592 else
14594 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14596 /* Be conservative and copy outval into scratch now,
14597 this should only be necessary if outval is a
14598 subreg of something larger than a word. */
14599 /* XXX Might this clobber base? I can't see how it
14600 can, since scratch is known to overlap with
14601 outval. */
14602 emit_insn (gen_movhi (scratch_hi, outval));
14603 outval = scratch_hi;
14607 /* Get the base address; addsi3 knows how to handle constants
14608 that require more than one insn. */
14609 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14610 base = base_plus;
14611 offset = lo;
14615 if (BYTES_BIG_ENDIAN)
14617 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
14618 plus_constant (Pmode, base,
14619 offset + 1)),
14620 gen_lowpart (QImode, outval)));
14621 emit_insn (gen_lshrsi3 (scratch,
14622 gen_rtx_SUBREG (SImode, outval, 0),
14623 GEN_INT (8)));
14624 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
14625 offset)),
14626 gen_lowpart (QImode, scratch)));
14628 else
14630 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
14631 offset)),
14632 gen_lowpart (QImode, outval)));
14633 emit_insn (gen_lshrsi3 (scratch,
14634 gen_rtx_SUBREG (SImode, outval, 0),
14635 GEN_INT (8)));
14636 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
14637 plus_constant (Pmode, base,
14638 offset + 1)),
14639 gen_lowpart (QImode, scratch)));
14643 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
14644 (padded to the size of a word) should be passed in a register. */
14646 static bool
14647 arm_must_pass_in_stack (machine_mode mode, const_tree type)
14649 if (TARGET_AAPCS_BASED)
14650 return must_pass_in_stack_var_size (mode, type);
14651 else
14652 return must_pass_in_stack_var_size_or_pad (mode, type);
14656 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
14657 Return true if an argument passed on the stack should be padded upwards,
14658 i.e. if the least-significant byte has useful data.
14659 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
14660 aggregate types are placed in the lowest memory address. */
14662 bool
14663 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
14665 if (!TARGET_AAPCS_BASED)
14666 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
14668 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
14669 return false;
14671 return true;
14675 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
14676 Return !BYTES_BIG_ENDIAN if the least significant byte of the
14677 register has useful data, and return the opposite if the most
14678 significant byte does. */
14680 bool
14681 arm_pad_reg_upward (machine_mode mode,
14682 tree type, int first ATTRIBUTE_UNUSED)
14684 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
14686 /* For AAPCS, small aggregates, small fixed-point types,
14687 and small complex types are always padded upwards. */
14688 if (type)
14690 if ((AGGREGATE_TYPE_P (type)
14691 || TREE_CODE (type) == COMPLEX_TYPE
14692 || FIXED_POINT_TYPE_P (type))
14693 && int_size_in_bytes (type) <= 4)
14694 return true;
14696 else
14698 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
14699 && GET_MODE_SIZE (mode) <= 4)
14700 return true;
14704 /* Otherwise, use default padding. */
14705 return !BYTES_BIG_ENDIAN;
14708 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
14709 assuming that the address in the base register is word aligned. */
14710 bool
14711 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
14713 HOST_WIDE_INT max_offset;
14715 /* Offset must be a multiple of 4 in Thumb mode. */
14716 if (TARGET_THUMB2 && ((offset & 3) != 0))
14717 return false;
14719 if (TARGET_THUMB2)
14720 max_offset = 1020;
14721 else if (TARGET_ARM)
14722 max_offset = 255;
14723 else
14724 return false;
14726 return ((offset <= max_offset) && (offset >= -max_offset));
14729 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
14730 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
14731 Assumes that the address in the base register RN is word aligned. Pattern
14732 guarantees that both memory accesses use the same base register,
14733 the offsets are constants within the range, and the gap between the offsets is 4.
14734 If preload complete then check that registers are legal. WBACK indicates whether
14735 address is updated. LOAD indicates whether memory access is load or store. */
14736 bool
14737 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
14738 bool wback, bool load)
14740 unsigned int t, t2, n;
14742 if (!reload_completed)
14743 return true;
14745 if (!offset_ok_for_ldrd_strd (offset))
14746 return false;
14748 t = REGNO (rt);
14749 t2 = REGNO (rt2);
14750 n = REGNO (rn);
14752 if ((TARGET_THUMB2)
14753 && ((wback && (n == t || n == t2))
14754 || (t == SP_REGNUM)
14755 || (t == PC_REGNUM)
14756 || (t2 == SP_REGNUM)
14757 || (t2 == PC_REGNUM)
14758 || (!load && (n == PC_REGNUM))
14759 || (load && (t == t2))
14760 /* Triggers Cortex-M3 LDRD errata. */
14761 || (!wback && load && fix_cm3_ldrd && (n == t))))
14762 return false;
14764 if ((TARGET_ARM)
14765 && ((wback && (n == t || n == t2))
14766 || (t2 == PC_REGNUM)
14767 || (t % 2 != 0) /* First destination register is not even. */
14768 || (t2 != t + 1)
14769 /* PC can be used as base register (for offset addressing only),
14770 but it is depricated. */
14771 || (n == PC_REGNUM)))
14772 return false;
14774 return true;
14777 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
14778 operand MEM's address contains an immediate offset from the base
14779 register and has no side effects, in which case it sets BASE and
14780 OFFSET accordingly. */
14781 static bool
14782 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
14784 rtx addr;
14786 gcc_assert (base != NULL && offset != NULL);
14788 /* TODO: Handle more general memory operand patterns, such as
14789 PRE_DEC and PRE_INC. */
14791 if (side_effects_p (mem))
14792 return false;
14794 /* Can't deal with subregs. */
14795 if (GET_CODE (mem) == SUBREG)
14796 return false;
14798 gcc_assert (MEM_P (mem));
14800 *offset = const0_rtx;
14802 addr = XEXP (mem, 0);
14804 /* If addr isn't valid for DImode, then we can't handle it. */
14805 if (!arm_legitimate_address_p (DImode, addr,
14806 reload_in_progress || reload_completed))
14807 return false;
14809 if (REG_P (addr))
14811 *base = addr;
14812 return true;
14814 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
14816 *base = XEXP (addr, 0);
14817 *offset = XEXP (addr, 1);
14818 return (REG_P (*base) && CONST_INT_P (*offset));
14821 return false;
14824 /* Called from a peephole2 to replace two word-size accesses with a
14825 single LDRD/STRD instruction. Returns true iff we can generate a
14826 new instruction sequence. That is, both accesses use the same base
14827 register and the gap between constant offsets is 4. This function
14828 may reorder its operands to match ldrd/strd RTL templates.
14829 OPERANDS are the operands found by the peephole matcher;
14830 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
14831 corresponding memory operands. LOAD indicaates whether the access
14832 is load or store. CONST_STORE indicates a store of constant
14833 integer values held in OPERANDS[4,5] and assumes that the pattern
14834 is of length 4 insn, for the purpose of checking dead registers.
14835 COMMUTE indicates that register operands may be reordered. */
14836 bool
14837 gen_operands_ldrd_strd (rtx *operands, bool load,
14838 bool const_store, bool commute)
14840 int nops = 2;
14841 HOST_WIDE_INT offsets[2], offset;
14842 rtx base = NULL_RTX;
14843 rtx cur_base, cur_offset, tmp;
14844 int i, gap;
14845 HARD_REG_SET regset;
14847 gcc_assert (!const_store || !load);
14848 /* Check that the memory references are immediate offsets from the
14849 same base register. Extract the base register, the destination
14850 registers, and the corresponding memory offsets. */
14851 for (i = 0; i < nops; i++)
14853 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
14854 return false;
14856 if (i == 0)
14857 base = cur_base;
14858 else if (REGNO (base) != REGNO (cur_base))
14859 return false;
14861 offsets[i] = INTVAL (cur_offset);
14862 if (GET_CODE (operands[i]) == SUBREG)
14864 tmp = SUBREG_REG (operands[i]);
14865 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
14866 operands[i] = tmp;
14870 /* Make sure there is no dependency between the individual loads. */
14871 if (load && REGNO (operands[0]) == REGNO (base))
14872 return false; /* RAW */
14874 if (load && REGNO (operands[0]) == REGNO (operands[1]))
14875 return false; /* WAW */
14877 /* If the same input register is used in both stores
14878 when storing different constants, try to find a free register.
14879 For example, the code
14880 mov r0, 0
14881 str r0, [r2]
14882 mov r0, 1
14883 str r0, [r2, #4]
14884 can be transformed into
14885 mov r1, 0
14886 mov r0, 1
14887 strd r1, r0, [r2]
14888 in Thumb mode assuming that r1 is free.
14889 For ARM mode do the same but only if the starting register
14890 can be made to be even. */
14891 if (const_store
14892 && REGNO (operands[0]) == REGNO (operands[1])
14893 && INTVAL (operands[4]) != INTVAL (operands[5]))
14895 if (TARGET_THUMB2)
14897 CLEAR_HARD_REG_SET (regset);
14898 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
14899 if (tmp == NULL_RTX)
14900 return false;
14902 /* Use the new register in the first load to ensure that
14903 if the original input register is not dead after peephole,
14904 then it will have the correct constant value. */
14905 operands[0] = tmp;
14907 else if (TARGET_ARM)
14909 int regno = REGNO (operands[0]);
14910 if (!peep2_reg_dead_p (4, operands[0]))
14912 /* When the input register is even and is not dead after the
14913 pattern, it has to hold the second constant but we cannot
14914 form a legal STRD in ARM mode with this register as the second
14915 register. */
14916 if (regno % 2 == 0)
14917 return false;
14919 /* Is regno-1 free? */
14920 SET_HARD_REG_SET (regset);
14921 CLEAR_HARD_REG_BIT(regset, regno - 1);
14922 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
14923 if (tmp == NULL_RTX)
14924 return false;
14926 operands[0] = tmp;
14928 else
14930 /* Find a DImode register. */
14931 CLEAR_HARD_REG_SET (regset);
14932 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
14933 if (tmp != NULL_RTX)
14935 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
14936 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
14938 else
14940 /* Can we use the input register to form a DI register? */
14941 SET_HARD_REG_SET (regset);
14942 CLEAR_HARD_REG_BIT(regset,
14943 regno % 2 == 0 ? regno + 1 : regno - 1);
14944 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
14945 if (tmp == NULL_RTX)
14946 return false;
14947 operands[regno % 2 == 1 ? 0 : 1] = tmp;
14951 gcc_assert (operands[0] != NULL_RTX);
14952 gcc_assert (operands[1] != NULL_RTX);
14953 gcc_assert (REGNO (operands[0]) % 2 == 0);
14954 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
14958 /* Make sure the instructions are ordered with lower memory access first. */
14959 if (offsets[0] > offsets[1])
14961 gap = offsets[0] - offsets[1];
14962 offset = offsets[1];
14964 /* Swap the instructions such that lower memory is accessed first. */
14965 std::swap (operands[0], operands[1]);
14966 std::swap (operands[2], operands[3]);
14967 if (const_store)
14968 std::swap (operands[4], operands[5]);
14970 else
14972 gap = offsets[1] - offsets[0];
14973 offset = offsets[0];
14976 /* Make sure accesses are to consecutive memory locations. */
14977 if (gap != 4)
14978 return false;
14980 /* Make sure we generate legal instructions. */
14981 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
14982 false, load))
14983 return true;
14985 /* In Thumb state, where registers are almost unconstrained, there
14986 is little hope to fix it. */
14987 if (TARGET_THUMB2)
14988 return false;
14990 if (load && commute)
14992 /* Try reordering registers. */
14993 std::swap (operands[0], operands[1]);
14994 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
14995 false, load))
14996 return true;
14999 if (const_store)
15001 /* If input registers are dead after this pattern, they can be
15002 reordered or replaced by other registers that are free in the
15003 current pattern. */
15004 if (!peep2_reg_dead_p (4, operands[0])
15005 || !peep2_reg_dead_p (4, operands[1]))
15006 return false;
15008 /* Try to reorder the input registers. */
15009 /* For example, the code
15010 mov r0, 0
15011 mov r1, 1
15012 str r1, [r2]
15013 str r0, [r2, #4]
15014 can be transformed into
15015 mov r1, 0
15016 mov r0, 1
15017 strd r0, [r2]
15019 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15020 false, false))
15022 std::swap (operands[0], operands[1]);
15023 return true;
15026 /* Try to find a free DI register. */
15027 CLEAR_HARD_REG_SET (regset);
15028 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15029 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15030 while (true)
15032 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15033 if (tmp == NULL_RTX)
15034 return false;
15036 /* DREG must be an even-numbered register in DImode.
15037 Split it into SI registers. */
15038 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15039 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15040 gcc_assert (operands[0] != NULL_RTX);
15041 gcc_assert (operands[1] != NULL_RTX);
15042 gcc_assert (REGNO (operands[0]) % 2 == 0);
15043 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15045 return (operands_ok_ldrd_strd (operands[0], operands[1],
15046 base, offset,
15047 false, load));
15051 return false;
15057 /* Print a symbolic form of X to the debug file, F. */
15058 static void
15059 arm_print_value (FILE *f, rtx x)
15061 switch (GET_CODE (x))
15063 case CONST_INT:
15064 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15065 return;
15067 case CONST_DOUBLE:
15068 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15069 return;
15071 case CONST_VECTOR:
15073 int i;
15075 fprintf (f, "<");
15076 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15078 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15079 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15080 fputc (',', f);
15082 fprintf (f, ">");
15084 return;
15086 case CONST_STRING:
15087 fprintf (f, "\"%s\"", XSTR (x, 0));
15088 return;
15090 case SYMBOL_REF:
15091 fprintf (f, "`%s'", XSTR (x, 0));
15092 return;
15094 case LABEL_REF:
15095 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15096 return;
15098 case CONST:
15099 arm_print_value (f, XEXP (x, 0));
15100 return;
15102 case PLUS:
15103 arm_print_value (f, XEXP (x, 0));
15104 fprintf (f, "+");
15105 arm_print_value (f, XEXP (x, 1));
15106 return;
15108 case PC:
15109 fprintf (f, "pc");
15110 return;
15112 default:
15113 fprintf (f, "????");
15114 return;
15118 /* Routines for manipulation of the constant pool. */
15120 /* Arm instructions cannot load a large constant directly into a
15121 register; they have to come from a pc relative load. The constant
15122 must therefore be placed in the addressable range of the pc
15123 relative load. Depending on the precise pc relative load
15124 instruction the range is somewhere between 256 bytes and 4k. This
15125 means that we often have to dump a constant inside a function, and
15126 generate code to branch around it.
15128 It is important to minimize this, since the branches will slow
15129 things down and make the code larger.
15131 Normally we can hide the table after an existing unconditional
15132 branch so that there is no interruption of the flow, but in the
15133 worst case the code looks like this:
15135 ldr rn, L1
15137 b L2
15138 align
15139 L1: .long value
15143 ldr rn, L3
15145 b L4
15146 align
15147 L3: .long value
15151 We fix this by performing a scan after scheduling, which notices
15152 which instructions need to have their operands fetched from the
15153 constant table and builds the table.
15155 The algorithm starts by building a table of all the constants that
15156 need fixing up and all the natural barriers in the function (places
15157 where a constant table can be dropped without breaking the flow).
15158 For each fixup we note how far the pc-relative replacement will be
15159 able to reach and the offset of the instruction into the function.
15161 Having built the table we then group the fixes together to form
15162 tables that are as large as possible (subject to addressing
15163 constraints) and emit each table of constants after the last
15164 barrier that is within range of all the instructions in the group.
15165 If a group does not contain a barrier, then we forcibly create one
15166 by inserting a jump instruction into the flow. Once the table has
15167 been inserted, the insns are then modified to reference the
15168 relevant entry in the pool.
15170 Possible enhancements to the algorithm (not implemented) are:
15172 1) For some processors and object formats, there may be benefit in
15173 aligning the pools to the start of cache lines; this alignment
15174 would need to be taken into account when calculating addressability
15175 of a pool. */
15177 /* These typedefs are located at the start of this file, so that
15178 they can be used in the prototypes there. This comment is to
15179 remind readers of that fact so that the following structures
15180 can be understood more easily.
15182 typedef struct minipool_node Mnode;
15183 typedef struct minipool_fixup Mfix; */
15185 struct minipool_node
15187 /* Doubly linked chain of entries. */
15188 Mnode * next;
15189 Mnode * prev;
15190 /* The maximum offset into the code that this entry can be placed. While
15191 pushing fixes for forward references, all entries are sorted in order
15192 of increasing max_address. */
15193 HOST_WIDE_INT max_address;
15194 /* Similarly for an entry inserted for a backwards ref. */
15195 HOST_WIDE_INT min_address;
15196 /* The number of fixes referencing this entry. This can become zero
15197 if we "unpush" an entry. In this case we ignore the entry when we
15198 come to emit the code. */
15199 int refcount;
15200 /* The offset from the start of the minipool. */
15201 HOST_WIDE_INT offset;
15202 /* The value in table. */
15203 rtx value;
15204 /* The mode of value. */
15205 machine_mode mode;
15206 /* The size of the value. With iWMMXt enabled
15207 sizes > 4 also imply an alignment of 8-bytes. */
15208 int fix_size;
15211 struct minipool_fixup
15213 Mfix * next;
15214 rtx_insn * insn;
15215 HOST_WIDE_INT address;
15216 rtx * loc;
15217 machine_mode mode;
15218 int fix_size;
15219 rtx value;
15220 Mnode * minipool;
15221 HOST_WIDE_INT forwards;
15222 HOST_WIDE_INT backwards;
15225 /* Fixes less than a word need padding out to a word boundary. */
15226 #define MINIPOOL_FIX_SIZE(mode) \
15227 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15229 static Mnode * minipool_vector_head;
15230 static Mnode * minipool_vector_tail;
15231 static rtx_code_label *minipool_vector_label;
15232 static int minipool_pad;
15234 /* The linked list of all minipool fixes required for this function. */
15235 Mfix * minipool_fix_head;
15236 Mfix * minipool_fix_tail;
15237 /* The fix entry for the current minipool, once it has been placed. */
15238 Mfix * minipool_barrier;
15240 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15241 #define JUMP_TABLES_IN_TEXT_SECTION 0
15242 #endif
15244 static HOST_WIDE_INT
15245 get_jump_table_size (rtx_jump_table_data *insn)
15247 /* ADDR_VECs only take room if read-only data does into the text
15248 section. */
15249 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15251 rtx body = PATTERN (insn);
15252 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15253 HOST_WIDE_INT size;
15254 HOST_WIDE_INT modesize;
15256 modesize = GET_MODE_SIZE (GET_MODE (body));
15257 size = modesize * XVECLEN (body, elt);
15258 switch (modesize)
15260 case 1:
15261 /* Round up size of TBB table to a halfword boundary. */
15262 size = (size + 1) & ~HOST_WIDE_INT_1;
15263 break;
15264 case 2:
15265 /* No padding necessary for TBH. */
15266 break;
15267 case 4:
15268 /* Add two bytes for alignment on Thumb. */
15269 if (TARGET_THUMB)
15270 size += 2;
15271 break;
15272 default:
15273 gcc_unreachable ();
15275 return size;
15278 return 0;
15281 /* Return the maximum amount of padding that will be inserted before
15282 label LABEL. */
15284 static HOST_WIDE_INT
15285 get_label_padding (rtx label)
15287 HOST_WIDE_INT align, min_insn_size;
15289 align = 1 << label_to_alignment (label);
15290 min_insn_size = TARGET_THUMB ? 2 : 4;
15291 return align > min_insn_size ? align - min_insn_size : 0;
15294 /* Move a minipool fix MP from its current location to before MAX_MP.
15295 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15296 constraints may need updating. */
15297 static Mnode *
15298 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15299 HOST_WIDE_INT max_address)
15301 /* The code below assumes these are different. */
15302 gcc_assert (mp != max_mp);
15304 if (max_mp == NULL)
15306 if (max_address < mp->max_address)
15307 mp->max_address = max_address;
15309 else
15311 if (max_address > max_mp->max_address - mp->fix_size)
15312 mp->max_address = max_mp->max_address - mp->fix_size;
15313 else
15314 mp->max_address = max_address;
15316 /* Unlink MP from its current position. Since max_mp is non-null,
15317 mp->prev must be non-null. */
15318 mp->prev->next = mp->next;
15319 if (mp->next != NULL)
15320 mp->next->prev = mp->prev;
15321 else
15322 minipool_vector_tail = mp->prev;
15324 /* Re-insert it before MAX_MP. */
15325 mp->next = max_mp;
15326 mp->prev = max_mp->prev;
15327 max_mp->prev = mp;
15329 if (mp->prev != NULL)
15330 mp->prev->next = mp;
15331 else
15332 minipool_vector_head = mp;
15335 /* Save the new entry. */
15336 max_mp = mp;
15338 /* Scan over the preceding entries and adjust their addresses as
15339 required. */
15340 while (mp->prev != NULL
15341 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15343 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15344 mp = mp->prev;
15347 return max_mp;
15350 /* Add a constant to the minipool for a forward reference. Returns the
15351 node added or NULL if the constant will not fit in this pool. */
15352 static Mnode *
15353 add_minipool_forward_ref (Mfix *fix)
15355 /* If set, max_mp is the first pool_entry that has a lower
15356 constraint than the one we are trying to add. */
15357 Mnode * max_mp = NULL;
15358 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15359 Mnode * mp;
15361 /* If the minipool starts before the end of FIX->INSN then this FIX
15362 can not be placed into the current pool. Furthermore, adding the
15363 new constant pool entry may cause the pool to start FIX_SIZE bytes
15364 earlier. */
15365 if (minipool_vector_head &&
15366 (fix->address + get_attr_length (fix->insn)
15367 >= minipool_vector_head->max_address - fix->fix_size))
15368 return NULL;
15370 /* Scan the pool to see if a constant with the same value has
15371 already been added. While we are doing this, also note the
15372 location where we must insert the constant if it doesn't already
15373 exist. */
15374 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15376 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15377 && fix->mode == mp->mode
15378 && (!LABEL_P (fix->value)
15379 || (CODE_LABEL_NUMBER (fix->value)
15380 == CODE_LABEL_NUMBER (mp->value)))
15381 && rtx_equal_p (fix->value, mp->value))
15383 /* More than one fix references this entry. */
15384 mp->refcount++;
15385 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15388 /* Note the insertion point if necessary. */
15389 if (max_mp == NULL
15390 && mp->max_address > max_address)
15391 max_mp = mp;
15393 /* If we are inserting an 8-bytes aligned quantity and
15394 we have not already found an insertion point, then
15395 make sure that all such 8-byte aligned quantities are
15396 placed at the start of the pool. */
15397 if (ARM_DOUBLEWORD_ALIGN
15398 && max_mp == NULL
15399 && fix->fix_size >= 8
15400 && mp->fix_size < 8)
15402 max_mp = mp;
15403 max_address = mp->max_address;
15407 /* The value is not currently in the minipool, so we need to create
15408 a new entry for it. If MAX_MP is NULL, the entry will be put on
15409 the end of the list since the placement is less constrained than
15410 any existing entry. Otherwise, we insert the new fix before
15411 MAX_MP and, if necessary, adjust the constraints on the other
15412 entries. */
15413 mp = XNEW (Mnode);
15414 mp->fix_size = fix->fix_size;
15415 mp->mode = fix->mode;
15416 mp->value = fix->value;
15417 mp->refcount = 1;
15418 /* Not yet required for a backwards ref. */
15419 mp->min_address = -65536;
15421 if (max_mp == NULL)
15423 mp->max_address = max_address;
15424 mp->next = NULL;
15425 mp->prev = minipool_vector_tail;
15427 if (mp->prev == NULL)
15429 minipool_vector_head = mp;
15430 minipool_vector_label = gen_label_rtx ();
15432 else
15433 mp->prev->next = mp;
15435 minipool_vector_tail = mp;
15437 else
15439 if (max_address > max_mp->max_address - mp->fix_size)
15440 mp->max_address = max_mp->max_address - mp->fix_size;
15441 else
15442 mp->max_address = max_address;
15444 mp->next = max_mp;
15445 mp->prev = max_mp->prev;
15446 max_mp->prev = mp;
15447 if (mp->prev != NULL)
15448 mp->prev->next = mp;
15449 else
15450 minipool_vector_head = mp;
15453 /* Save the new entry. */
15454 max_mp = mp;
15456 /* Scan over the preceding entries and adjust their addresses as
15457 required. */
15458 while (mp->prev != NULL
15459 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15461 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15462 mp = mp->prev;
15465 return max_mp;
15468 static Mnode *
15469 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15470 HOST_WIDE_INT min_address)
15472 HOST_WIDE_INT offset;
15474 /* The code below assumes these are different. */
15475 gcc_assert (mp != min_mp);
15477 if (min_mp == NULL)
15479 if (min_address > mp->min_address)
15480 mp->min_address = min_address;
15482 else
15484 /* We will adjust this below if it is too loose. */
15485 mp->min_address = min_address;
15487 /* Unlink MP from its current position. Since min_mp is non-null,
15488 mp->next must be non-null. */
15489 mp->next->prev = mp->prev;
15490 if (mp->prev != NULL)
15491 mp->prev->next = mp->next;
15492 else
15493 minipool_vector_head = mp->next;
15495 /* Reinsert it after MIN_MP. */
15496 mp->prev = min_mp;
15497 mp->next = min_mp->next;
15498 min_mp->next = mp;
15499 if (mp->next != NULL)
15500 mp->next->prev = mp;
15501 else
15502 minipool_vector_tail = mp;
15505 min_mp = mp;
15507 offset = 0;
15508 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15510 mp->offset = offset;
15511 if (mp->refcount > 0)
15512 offset += mp->fix_size;
15514 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
15515 mp->next->min_address = mp->min_address + mp->fix_size;
15518 return min_mp;
15521 /* Add a constant to the minipool for a backward reference. Returns the
15522 node added or NULL if the constant will not fit in this pool.
15524 Note that the code for insertion for a backwards reference can be
15525 somewhat confusing because the calculated offsets for each fix do
15526 not take into account the size of the pool (which is still under
15527 construction. */
15528 static Mnode *
15529 add_minipool_backward_ref (Mfix *fix)
15531 /* If set, min_mp is the last pool_entry that has a lower constraint
15532 than the one we are trying to add. */
15533 Mnode *min_mp = NULL;
15534 /* This can be negative, since it is only a constraint. */
15535 HOST_WIDE_INT min_address = fix->address - fix->backwards;
15536 Mnode *mp;
15538 /* If we can't reach the current pool from this insn, or if we can't
15539 insert this entry at the end of the pool without pushing other
15540 fixes out of range, then we don't try. This ensures that we
15541 can't fail later on. */
15542 if (min_address >= minipool_barrier->address
15543 || (minipool_vector_tail->min_address + fix->fix_size
15544 >= minipool_barrier->address))
15545 return NULL;
15547 /* Scan the pool to see if a constant with the same value has
15548 already been added. While we are doing this, also note the
15549 location where we must insert the constant if it doesn't already
15550 exist. */
15551 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
15553 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15554 && fix->mode == mp->mode
15555 && (!LABEL_P (fix->value)
15556 || (CODE_LABEL_NUMBER (fix->value)
15557 == CODE_LABEL_NUMBER (mp->value)))
15558 && rtx_equal_p (fix->value, mp->value)
15559 /* Check that there is enough slack to move this entry to the
15560 end of the table (this is conservative). */
15561 && (mp->max_address
15562 > (minipool_barrier->address
15563 + minipool_vector_tail->offset
15564 + minipool_vector_tail->fix_size)))
15566 mp->refcount++;
15567 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
15570 if (min_mp != NULL)
15571 mp->min_address += fix->fix_size;
15572 else
15574 /* Note the insertion point if necessary. */
15575 if (mp->min_address < min_address)
15577 /* For now, we do not allow the insertion of 8-byte alignment
15578 requiring nodes anywhere but at the start of the pool. */
15579 if (ARM_DOUBLEWORD_ALIGN
15580 && fix->fix_size >= 8 && mp->fix_size < 8)
15581 return NULL;
15582 else
15583 min_mp = mp;
15585 else if (mp->max_address
15586 < minipool_barrier->address + mp->offset + fix->fix_size)
15588 /* Inserting before this entry would push the fix beyond
15589 its maximum address (which can happen if we have
15590 re-located a forwards fix); force the new fix to come
15591 after it. */
15592 if (ARM_DOUBLEWORD_ALIGN
15593 && fix->fix_size >= 8 && mp->fix_size < 8)
15594 return NULL;
15595 else
15597 min_mp = mp;
15598 min_address = mp->min_address + fix->fix_size;
15601 /* Do not insert a non-8-byte aligned quantity before 8-byte
15602 aligned quantities. */
15603 else if (ARM_DOUBLEWORD_ALIGN
15604 && fix->fix_size < 8
15605 && mp->fix_size >= 8)
15607 min_mp = mp;
15608 min_address = mp->min_address + fix->fix_size;
15613 /* We need to create a new entry. */
15614 mp = XNEW (Mnode);
15615 mp->fix_size = fix->fix_size;
15616 mp->mode = fix->mode;
15617 mp->value = fix->value;
15618 mp->refcount = 1;
15619 mp->max_address = minipool_barrier->address + 65536;
15621 mp->min_address = min_address;
15623 if (min_mp == NULL)
15625 mp->prev = NULL;
15626 mp->next = minipool_vector_head;
15628 if (mp->next == NULL)
15630 minipool_vector_tail = mp;
15631 minipool_vector_label = gen_label_rtx ();
15633 else
15634 mp->next->prev = mp;
15636 minipool_vector_head = mp;
15638 else
15640 mp->next = min_mp->next;
15641 mp->prev = min_mp;
15642 min_mp->next = mp;
15644 if (mp->next != NULL)
15645 mp->next->prev = mp;
15646 else
15647 minipool_vector_tail = mp;
15650 /* Save the new entry. */
15651 min_mp = mp;
15653 if (mp->prev)
15654 mp = mp->prev;
15655 else
15656 mp->offset = 0;
15658 /* Scan over the following entries and adjust their offsets. */
15659 while (mp->next != NULL)
15661 if (mp->next->min_address < mp->min_address + mp->fix_size)
15662 mp->next->min_address = mp->min_address + mp->fix_size;
15664 if (mp->refcount)
15665 mp->next->offset = mp->offset + mp->fix_size;
15666 else
15667 mp->next->offset = mp->offset;
15669 mp = mp->next;
15672 return min_mp;
15675 static void
15676 assign_minipool_offsets (Mfix *barrier)
15678 HOST_WIDE_INT offset = 0;
15679 Mnode *mp;
15681 minipool_barrier = barrier;
15683 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15685 mp->offset = offset;
15687 if (mp->refcount > 0)
15688 offset += mp->fix_size;
15692 /* Output the literal table */
15693 static void
15694 dump_minipool (rtx_insn *scan)
15696 Mnode * mp;
15697 Mnode * nmp;
15698 int align64 = 0;
15700 if (ARM_DOUBLEWORD_ALIGN)
15701 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15702 if (mp->refcount > 0 && mp->fix_size >= 8)
15704 align64 = 1;
15705 break;
15708 if (dump_file)
15709 fprintf (dump_file,
15710 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
15711 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
15713 scan = emit_label_after (gen_label_rtx (), scan);
15714 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
15715 scan = emit_label_after (minipool_vector_label, scan);
15717 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
15719 if (mp->refcount > 0)
15721 if (dump_file)
15723 fprintf (dump_file,
15724 ";; Offset %u, min %ld, max %ld ",
15725 (unsigned) mp->offset, (unsigned long) mp->min_address,
15726 (unsigned long) mp->max_address);
15727 arm_print_value (dump_file, mp->value);
15728 fputc ('\n', dump_file);
15731 switch (GET_MODE_SIZE (mp->mode))
15733 #ifdef HAVE_consttable_1
15734 case 1:
15735 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
15736 break;
15738 #endif
15739 #ifdef HAVE_consttable_2
15740 case 2:
15741 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
15742 break;
15744 #endif
15745 #ifdef HAVE_consttable_4
15746 case 4:
15747 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
15748 break;
15750 #endif
15751 #ifdef HAVE_consttable_8
15752 case 8:
15753 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
15754 break;
15756 #endif
15757 #ifdef HAVE_consttable_16
15758 case 16:
15759 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
15760 break;
15762 #endif
15763 default:
15764 gcc_unreachable ();
15768 nmp = mp->next;
15769 free (mp);
15772 minipool_vector_head = minipool_vector_tail = NULL;
15773 scan = emit_insn_after (gen_consttable_end (), scan);
15774 scan = emit_barrier_after (scan);
15777 /* Return the cost of forcibly inserting a barrier after INSN. */
15778 static int
15779 arm_barrier_cost (rtx_insn *insn)
15781 /* Basing the location of the pool on the loop depth is preferable,
15782 but at the moment, the basic block information seems to be
15783 corrupt by this stage of the compilation. */
15784 int base_cost = 50;
15785 rtx_insn *next = next_nonnote_insn (insn);
15787 if (next != NULL && LABEL_P (next))
15788 base_cost -= 20;
15790 switch (GET_CODE (insn))
15792 case CODE_LABEL:
15793 /* It will always be better to place the table before the label, rather
15794 than after it. */
15795 return 50;
15797 case INSN:
15798 case CALL_INSN:
15799 return base_cost;
15801 case JUMP_INSN:
15802 return base_cost - 10;
15804 default:
15805 return base_cost + 10;
15809 /* Find the best place in the insn stream in the range
15810 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
15811 Create the barrier by inserting a jump and add a new fix entry for
15812 it. */
15813 static Mfix *
15814 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
15816 HOST_WIDE_INT count = 0;
15817 rtx_barrier *barrier;
15818 rtx_insn *from = fix->insn;
15819 /* The instruction after which we will insert the jump. */
15820 rtx_insn *selected = NULL;
15821 int selected_cost;
15822 /* The address at which the jump instruction will be placed. */
15823 HOST_WIDE_INT selected_address;
15824 Mfix * new_fix;
15825 HOST_WIDE_INT max_count = max_address - fix->address;
15826 rtx_code_label *label = gen_label_rtx ();
15828 selected_cost = arm_barrier_cost (from);
15829 selected_address = fix->address;
15831 while (from && count < max_count)
15833 rtx_jump_table_data *tmp;
15834 int new_cost;
15836 /* This code shouldn't have been called if there was a natural barrier
15837 within range. */
15838 gcc_assert (!BARRIER_P (from));
15840 /* Count the length of this insn. This must stay in sync with the
15841 code that pushes minipool fixes. */
15842 if (LABEL_P (from))
15843 count += get_label_padding (from);
15844 else
15845 count += get_attr_length (from);
15847 /* If there is a jump table, add its length. */
15848 if (tablejump_p (from, NULL, &tmp))
15850 count += get_jump_table_size (tmp);
15852 /* Jump tables aren't in a basic block, so base the cost on
15853 the dispatch insn. If we select this location, we will
15854 still put the pool after the table. */
15855 new_cost = arm_barrier_cost (from);
15857 if (count < max_count
15858 && (!selected || new_cost <= selected_cost))
15860 selected = tmp;
15861 selected_cost = new_cost;
15862 selected_address = fix->address + count;
15865 /* Continue after the dispatch table. */
15866 from = NEXT_INSN (tmp);
15867 continue;
15870 new_cost = arm_barrier_cost (from);
15872 if (count < max_count
15873 && (!selected || new_cost <= selected_cost))
15875 selected = from;
15876 selected_cost = new_cost;
15877 selected_address = fix->address + count;
15880 from = NEXT_INSN (from);
15883 /* Make sure that we found a place to insert the jump. */
15884 gcc_assert (selected);
15886 /* Make sure we do not split a call and its corresponding
15887 CALL_ARG_LOCATION note. */
15888 if (CALL_P (selected))
15890 rtx_insn *next = NEXT_INSN (selected);
15891 if (next && NOTE_P (next)
15892 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
15893 selected = next;
15896 /* Create a new JUMP_INSN that branches around a barrier. */
15897 from = emit_jump_insn_after (gen_jump (label), selected);
15898 JUMP_LABEL (from) = label;
15899 barrier = emit_barrier_after (from);
15900 emit_label_after (label, barrier);
15902 /* Create a minipool barrier entry for the new barrier. */
15903 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
15904 new_fix->insn = barrier;
15905 new_fix->address = selected_address;
15906 new_fix->next = fix->next;
15907 fix->next = new_fix;
15909 return new_fix;
15912 /* Record that there is a natural barrier in the insn stream at
15913 ADDRESS. */
15914 static void
15915 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
15917 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
15919 fix->insn = insn;
15920 fix->address = address;
15922 fix->next = NULL;
15923 if (minipool_fix_head != NULL)
15924 minipool_fix_tail->next = fix;
15925 else
15926 minipool_fix_head = fix;
15928 minipool_fix_tail = fix;
15931 /* Record INSN, which will need fixing up to load a value from the
15932 minipool. ADDRESS is the offset of the insn since the start of the
15933 function; LOC is a pointer to the part of the insn which requires
15934 fixing; VALUE is the constant that must be loaded, which is of type
15935 MODE. */
15936 static void
15937 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
15938 machine_mode mode, rtx value)
15940 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
15942 fix->insn = insn;
15943 fix->address = address;
15944 fix->loc = loc;
15945 fix->mode = mode;
15946 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
15947 fix->value = value;
15948 fix->forwards = get_attr_pool_range (insn);
15949 fix->backwards = get_attr_neg_pool_range (insn);
15950 fix->minipool = NULL;
15952 /* If an insn doesn't have a range defined for it, then it isn't
15953 expecting to be reworked by this code. Better to stop now than
15954 to generate duff assembly code. */
15955 gcc_assert (fix->forwards || fix->backwards);
15957 /* If an entry requires 8-byte alignment then assume all constant pools
15958 require 4 bytes of padding. Trying to do this later on a per-pool
15959 basis is awkward because existing pool entries have to be modified. */
15960 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
15961 minipool_pad = 4;
15963 if (dump_file)
15965 fprintf (dump_file,
15966 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
15967 GET_MODE_NAME (mode),
15968 INSN_UID (insn), (unsigned long) address,
15969 -1 * (long)fix->backwards, (long)fix->forwards);
15970 arm_print_value (dump_file, fix->value);
15971 fprintf (dump_file, "\n");
15974 /* Add it to the chain of fixes. */
15975 fix->next = NULL;
15977 if (minipool_fix_head != NULL)
15978 minipool_fix_tail->next = fix;
15979 else
15980 minipool_fix_head = fix;
15982 minipool_fix_tail = fix;
15985 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
15986 Returns the number of insns needed, or 99 if we always want to synthesize
15987 the value. */
15989 arm_max_const_double_inline_cost ()
15991 /* Let the value get synthesized to avoid the use of literal pools. */
15992 if (arm_disable_literal_pool)
15993 return 99;
15995 return ((optimize_size || arm_ld_sched) ? 3 : 4);
15998 /* Return the cost of synthesizing a 64-bit constant VAL inline.
15999 Returns the number of insns needed, or 99 if we don't know how to
16000 do it. */
16002 arm_const_double_inline_cost (rtx val)
16004 rtx lowpart, highpart;
16005 machine_mode mode;
16007 mode = GET_MODE (val);
16009 if (mode == VOIDmode)
16010 mode = DImode;
16012 gcc_assert (GET_MODE_SIZE (mode) == 8);
16014 lowpart = gen_lowpart (SImode, val);
16015 highpart = gen_highpart_mode (SImode, mode, val);
16017 gcc_assert (CONST_INT_P (lowpart));
16018 gcc_assert (CONST_INT_P (highpart));
16020 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16021 NULL_RTX, NULL_RTX, 0, 0)
16022 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16023 NULL_RTX, NULL_RTX, 0, 0));
16026 /* Cost of loading a SImode constant. */
16027 static inline int
16028 arm_const_inline_cost (enum rtx_code code, rtx val)
16030 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16031 NULL_RTX, NULL_RTX, 1, 0);
16034 /* Return true if it is worthwhile to split a 64-bit constant into two
16035 32-bit operations. This is the case if optimizing for size, or
16036 if we have load delay slots, or if one 32-bit part can be done with
16037 a single data operation. */
16038 bool
16039 arm_const_double_by_parts (rtx val)
16041 machine_mode mode = GET_MODE (val);
16042 rtx part;
16044 if (optimize_size || arm_ld_sched)
16045 return true;
16047 if (mode == VOIDmode)
16048 mode = DImode;
16050 part = gen_highpart_mode (SImode, mode, val);
16052 gcc_assert (CONST_INT_P (part));
16054 if (const_ok_for_arm (INTVAL (part))
16055 || const_ok_for_arm (~INTVAL (part)))
16056 return true;
16058 part = gen_lowpart (SImode, val);
16060 gcc_assert (CONST_INT_P (part));
16062 if (const_ok_for_arm (INTVAL (part))
16063 || const_ok_for_arm (~INTVAL (part)))
16064 return true;
16066 return false;
16069 /* Return true if it is possible to inline both the high and low parts
16070 of a 64-bit constant into 32-bit data processing instructions. */
16071 bool
16072 arm_const_double_by_immediates (rtx val)
16074 machine_mode mode = GET_MODE (val);
16075 rtx part;
16077 if (mode == VOIDmode)
16078 mode = DImode;
16080 part = gen_highpart_mode (SImode, mode, val);
16082 gcc_assert (CONST_INT_P (part));
16084 if (!const_ok_for_arm (INTVAL (part)))
16085 return false;
16087 part = gen_lowpart (SImode, val);
16089 gcc_assert (CONST_INT_P (part));
16091 if (!const_ok_for_arm (INTVAL (part)))
16092 return false;
16094 return true;
16097 /* Scan INSN and note any of its operands that need fixing.
16098 If DO_PUSHES is false we do not actually push any of the fixups
16099 needed. */
16100 static void
16101 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16103 int opno;
16105 extract_constrain_insn (insn);
16107 if (recog_data.n_alternatives == 0)
16108 return;
16110 /* Fill in recog_op_alt with information about the constraints of
16111 this insn. */
16112 preprocess_constraints (insn);
16114 const operand_alternative *op_alt = which_op_alt ();
16115 for (opno = 0; opno < recog_data.n_operands; opno++)
16117 /* Things we need to fix can only occur in inputs. */
16118 if (recog_data.operand_type[opno] != OP_IN)
16119 continue;
16121 /* If this alternative is a memory reference, then any mention
16122 of constants in this alternative is really to fool reload
16123 into allowing us to accept one there. We need to fix them up
16124 now so that we output the right code. */
16125 if (op_alt[opno].memory_ok)
16127 rtx op = recog_data.operand[opno];
16129 if (CONSTANT_P (op))
16131 if (do_pushes)
16132 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16133 recog_data.operand_mode[opno], op);
16135 else if (MEM_P (op)
16136 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16137 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16139 if (do_pushes)
16141 rtx cop = avoid_constant_pool_reference (op);
16143 /* Casting the address of something to a mode narrower
16144 than a word can cause avoid_constant_pool_reference()
16145 to return the pool reference itself. That's no good to
16146 us here. Lets just hope that we can use the
16147 constant pool value directly. */
16148 if (op == cop)
16149 cop = get_pool_constant (XEXP (op, 0));
16151 push_minipool_fix (insn, address,
16152 recog_data.operand_loc[opno],
16153 recog_data.operand_mode[opno], cop);
16160 return;
16163 /* Rewrite move insn into subtract of 0 if the condition codes will
16164 be useful in next conditional jump insn. */
16166 static void
16167 thumb1_reorg (void)
16169 basic_block bb;
16171 FOR_EACH_BB_FN (bb, cfun)
16173 rtx dest, src;
16174 rtx cmp, op0, op1, set = NULL;
16175 rtx_insn *prev, *insn = BB_END (bb);
16176 bool insn_clobbered = false;
16178 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
16179 insn = PREV_INSN (insn);
16181 /* Find the last cbranchsi4_insn in basic block BB. */
16182 if (insn == BB_HEAD (bb)
16183 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
16184 continue;
16186 /* Get the register with which we are comparing. */
16187 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
16188 op0 = XEXP (cmp, 0);
16189 op1 = XEXP (cmp, 1);
16191 /* Check that comparison is against ZERO. */
16192 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
16193 continue;
16195 /* Find the first flag setting insn before INSN in basic block BB. */
16196 gcc_assert (insn != BB_HEAD (bb));
16197 for (prev = PREV_INSN (insn);
16198 (!insn_clobbered
16199 && prev != BB_HEAD (bb)
16200 && (NOTE_P (prev)
16201 || DEBUG_INSN_P (prev)
16202 || ((set = single_set (prev)) != NULL
16203 && get_attr_conds (prev) == CONDS_NOCOND)));
16204 prev = PREV_INSN (prev))
16206 if (reg_set_p (op0, prev))
16207 insn_clobbered = true;
16210 /* Skip if op0 is clobbered by insn other than prev. */
16211 if (insn_clobbered)
16212 continue;
16214 if (!set)
16215 continue;
16217 dest = SET_DEST (set);
16218 src = SET_SRC (set);
16219 if (!low_register_operand (dest, SImode)
16220 || !low_register_operand (src, SImode))
16221 continue;
16223 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16224 in INSN. Both src and dest of the move insn are checked. */
16225 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
16227 dest = copy_rtx (dest);
16228 src = copy_rtx (src);
16229 src = gen_rtx_MINUS (SImode, src, const0_rtx);
16230 PATTERN (prev) = gen_rtx_SET (dest, src);
16231 INSN_CODE (prev) = -1;
16232 /* Set test register in INSN to dest. */
16233 XEXP (cmp, 0) = copy_rtx (dest);
16234 INSN_CODE (insn) = -1;
16239 /* Convert instructions to their cc-clobbering variant if possible, since
16240 that allows us to use smaller encodings. */
16242 static void
16243 thumb2_reorg (void)
16245 basic_block bb;
16246 regset_head live;
16248 INIT_REG_SET (&live);
16250 /* We are freeing block_for_insn in the toplev to keep compatibility
16251 with old MDEP_REORGS that are not CFG based. Recompute it now. */
16252 compute_bb_for_insn ();
16253 df_analyze ();
16255 enum Convert_Action {SKIP, CONV, SWAP_CONV};
16257 FOR_EACH_BB_FN (bb, cfun)
16259 if ((current_tune->disparage_flag_setting_t16_encodings
16260 == tune_params::DISPARAGE_FLAGS_ALL)
16261 && optimize_bb_for_speed_p (bb))
16262 continue;
16264 rtx_insn *insn;
16265 Convert_Action action = SKIP;
16266 Convert_Action action_for_partial_flag_setting
16267 = ((current_tune->disparage_flag_setting_t16_encodings
16268 != tune_params::DISPARAGE_FLAGS_NEITHER)
16269 && optimize_bb_for_speed_p (bb))
16270 ? SKIP : CONV;
16272 COPY_REG_SET (&live, DF_LR_OUT (bb));
16273 df_simulate_initialize_backwards (bb, &live);
16274 FOR_BB_INSNS_REVERSE (bb, insn)
16276 if (NONJUMP_INSN_P (insn)
16277 && !REGNO_REG_SET_P (&live, CC_REGNUM)
16278 && GET_CODE (PATTERN (insn)) == SET)
16280 action = SKIP;
16281 rtx pat = PATTERN (insn);
16282 rtx dst = XEXP (pat, 0);
16283 rtx src = XEXP (pat, 1);
16284 rtx op0 = NULL_RTX, op1 = NULL_RTX;
16286 if (UNARY_P (src) || BINARY_P (src))
16287 op0 = XEXP (src, 0);
16289 if (BINARY_P (src))
16290 op1 = XEXP (src, 1);
16292 if (low_register_operand (dst, SImode))
16294 switch (GET_CODE (src))
16296 case PLUS:
16297 /* Adding two registers and storing the result
16298 in the first source is already a 16-bit
16299 operation. */
16300 if (rtx_equal_p (dst, op0)
16301 && register_operand (op1, SImode))
16302 break;
16304 if (low_register_operand (op0, SImode))
16306 /* ADDS <Rd>,<Rn>,<Rm> */
16307 if (low_register_operand (op1, SImode))
16308 action = CONV;
16309 /* ADDS <Rdn>,#<imm8> */
16310 /* SUBS <Rdn>,#<imm8> */
16311 else if (rtx_equal_p (dst, op0)
16312 && CONST_INT_P (op1)
16313 && IN_RANGE (INTVAL (op1), -255, 255))
16314 action = CONV;
16315 /* ADDS <Rd>,<Rn>,#<imm3> */
16316 /* SUBS <Rd>,<Rn>,#<imm3> */
16317 else if (CONST_INT_P (op1)
16318 && IN_RANGE (INTVAL (op1), -7, 7))
16319 action = CONV;
16321 /* ADCS <Rd>, <Rn> */
16322 else if (GET_CODE (XEXP (src, 0)) == PLUS
16323 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
16324 && low_register_operand (XEXP (XEXP (src, 0), 1),
16325 SImode)
16326 && COMPARISON_P (op1)
16327 && cc_register (XEXP (op1, 0), VOIDmode)
16328 && maybe_get_arm_condition_code (op1) == ARM_CS
16329 && XEXP (op1, 1) == const0_rtx)
16330 action = CONV;
16331 break;
16333 case MINUS:
16334 /* RSBS <Rd>,<Rn>,#0
16335 Not handled here: see NEG below. */
16336 /* SUBS <Rd>,<Rn>,#<imm3>
16337 SUBS <Rdn>,#<imm8>
16338 Not handled here: see PLUS above. */
16339 /* SUBS <Rd>,<Rn>,<Rm> */
16340 if (low_register_operand (op0, SImode)
16341 && low_register_operand (op1, SImode))
16342 action = CONV;
16343 break;
16345 case MULT:
16346 /* MULS <Rdm>,<Rn>,<Rdm>
16347 As an exception to the rule, this is only used
16348 when optimizing for size since MULS is slow on all
16349 known implementations. We do not even want to use
16350 MULS in cold code, if optimizing for speed, so we
16351 test the global flag here. */
16352 if (!optimize_size)
16353 break;
16354 /* Fall through. */
16355 case AND:
16356 case IOR:
16357 case XOR:
16358 /* ANDS <Rdn>,<Rm> */
16359 if (rtx_equal_p (dst, op0)
16360 && low_register_operand (op1, SImode))
16361 action = action_for_partial_flag_setting;
16362 else if (rtx_equal_p (dst, op1)
16363 && low_register_operand (op0, SImode))
16364 action = action_for_partial_flag_setting == SKIP
16365 ? SKIP : SWAP_CONV;
16366 break;
16368 case ASHIFTRT:
16369 case ASHIFT:
16370 case LSHIFTRT:
16371 /* ASRS <Rdn>,<Rm> */
16372 /* LSRS <Rdn>,<Rm> */
16373 /* LSLS <Rdn>,<Rm> */
16374 if (rtx_equal_p (dst, op0)
16375 && low_register_operand (op1, SImode))
16376 action = action_for_partial_flag_setting;
16377 /* ASRS <Rd>,<Rm>,#<imm5> */
16378 /* LSRS <Rd>,<Rm>,#<imm5> */
16379 /* LSLS <Rd>,<Rm>,#<imm5> */
16380 else if (low_register_operand (op0, SImode)
16381 && CONST_INT_P (op1)
16382 && IN_RANGE (INTVAL (op1), 0, 31))
16383 action = action_for_partial_flag_setting;
16384 break;
16386 case ROTATERT:
16387 /* RORS <Rdn>,<Rm> */
16388 if (rtx_equal_p (dst, op0)
16389 && low_register_operand (op1, SImode))
16390 action = action_for_partial_flag_setting;
16391 break;
16393 case NOT:
16394 /* MVNS <Rd>,<Rm> */
16395 if (low_register_operand (op0, SImode))
16396 action = action_for_partial_flag_setting;
16397 break;
16399 case NEG:
16400 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
16401 if (low_register_operand (op0, SImode))
16402 action = CONV;
16403 break;
16405 case CONST_INT:
16406 /* MOVS <Rd>,#<imm8> */
16407 if (CONST_INT_P (src)
16408 && IN_RANGE (INTVAL (src), 0, 255))
16409 action = action_for_partial_flag_setting;
16410 break;
16412 case REG:
16413 /* MOVS and MOV<c> with registers have different
16414 encodings, so are not relevant here. */
16415 break;
16417 default:
16418 break;
16422 if (action != SKIP)
16424 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
16425 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
16426 rtvec vec;
16428 if (action == SWAP_CONV)
16430 src = copy_rtx (src);
16431 XEXP (src, 0) = op1;
16432 XEXP (src, 1) = op0;
16433 pat = gen_rtx_SET (dst, src);
16434 vec = gen_rtvec (2, pat, clobber);
16436 else /* action == CONV */
16437 vec = gen_rtvec (2, pat, clobber);
16439 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
16440 INSN_CODE (insn) = -1;
16444 if (NONDEBUG_INSN_P (insn))
16445 df_simulate_one_insn_backwards (bb, insn, &live);
16449 CLEAR_REG_SET (&live);
16452 /* Gcc puts the pool in the wrong place for ARM, since we can only
16453 load addresses a limited distance around the pc. We do some
16454 special munging to move the constant pool values to the correct
16455 point in the code. */
16456 static void
16457 arm_reorg (void)
16459 rtx_insn *insn;
16460 HOST_WIDE_INT address = 0;
16461 Mfix * fix;
16463 if (TARGET_THUMB1)
16464 thumb1_reorg ();
16465 else if (TARGET_THUMB2)
16466 thumb2_reorg ();
16468 /* Ensure all insns that must be split have been split at this point.
16469 Otherwise, the pool placement code below may compute incorrect
16470 insn lengths. Note that when optimizing, all insns have already
16471 been split at this point. */
16472 if (!optimize)
16473 split_all_insns_noflow ();
16475 minipool_fix_head = minipool_fix_tail = NULL;
16477 /* The first insn must always be a note, or the code below won't
16478 scan it properly. */
16479 insn = get_insns ();
16480 gcc_assert (NOTE_P (insn));
16481 minipool_pad = 0;
16483 /* Scan all the insns and record the operands that will need fixing. */
16484 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
16486 if (BARRIER_P (insn))
16487 push_minipool_barrier (insn, address);
16488 else if (INSN_P (insn))
16490 rtx_jump_table_data *table;
16492 note_invalid_constants (insn, address, true);
16493 address += get_attr_length (insn);
16495 /* If the insn is a vector jump, add the size of the table
16496 and skip the table. */
16497 if (tablejump_p (insn, NULL, &table))
16499 address += get_jump_table_size (table);
16500 insn = table;
16503 else if (LABEL_P (insn))
16504 /* Add the worst-case padding due to alignment. We don't add
16505 the _current_ padding because the minipool insertions
16506 themselves might change it. */
16507 address += get_label_padding (insn);
16510 fix = minipool_fix_head;
16512 /* Now scan the fixups and perform the required changes. */
16513 while (fix)
16515 Mfix * ftmp;
16516 Mfix * fdel;
16517 Mfix * last_added_fix;
16518 Mfix * last_barrier = NULL;
16519 Mfix * this_fix;
16521 /* Skip any further barriers before the next fix. */
16522 while (fix && BARRIER_P (fix->insn))
16523 fix = fix->next;
16525 /* No more fixes. */
16526 if (fix == NULL)
16527 break;
16529 last_added_fix = NULL;
16531 for (ftmp = fix; ftmp; ftmp = ftmp->next)
16533 if (BARRIER_P (ftmp->insn))
16535 if (ftmp->address >= minipool_vector_head->max_address)
16536 break;
16538 last_barrier = ftmp;
16540 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
16541 break;
16543 last_added_fix = ftmp; /* Keep track of the last fix added. */
16546 /* If we found a barrier, drop back to that; any fixes that we
16547 could have reached but come after the barrier will now go in
16548 the next mini-pool. */
16549 if (last_barrier != NULL)
16551 /* Reduce the refcount for those fixes that won't go into this
16552 pool after all. */
16553 for (fdel = last_barrier->next;
16554 fdel && fdel != ftmp;
16555 fdel = fdel->next)
16557 fdel->minipool->refcount--;
16558 fdel->minipool = NULL;
16561 ftmp = last_barrier;
16563 else
16565 /* ftmp is first fix that we can't fit into this pool and
16566 there no natural barriers that we could use. Insert a
16567 new barrier in the code somewhere between the previous
16568 fix and this one, and arrange to jump around it. */
16569 HOST_WIDE_INT max_address;
16571 /* The last item on the list of fixes must be a barrier, so
16572 we can never run off the end of the list of fixes without
16573 last_barrier being set. */
16574 gcc_assert (ftmp);
16576 max_address = minipool_vector_head->max_address;
16577 /* Check that there isn't another fix that is in range that
16578 we couldn't fit into this pool because the pool was
16579 already too large: we need to put the pool before such an
16580 instruction. The pool itself may come just after the
16581 fix because create_fix_barrier also allows space for a
16582 jump instruction. */
16583 if (ftmp->address < max_address)
16584 max_address = ftmp->address + 1;
16586 last_barrier = create_fix_barrier (last_added_fix, max_address);
16589 assign_minipool_offsets (last_barrier);
16591 while (ftmp)
16593 if (!BARRIER_P (ftmp->insn)
16594 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
16595 == NULL))
16596 break;
16598 ftmp = ftmp->next;
16601 /* Scan over the fixes we have identified for this pool, fixing them
16602 up and adding the constants to the pool itself. */
16603 for (this_fix = fix; this_fix && ftmp != this_fix;
16604 this_fix = this_fix->next)
16605 if (!BARRIER_P (this_fix->insn))
16607 rtx addr
16608 = plus_constant (Pmode,
16609 gen_rtx_LABEL_REF (VOIDmode,
16610 minipool_vector_label),
16611 this_fix->minipool->offset);
16612 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
16615 dump_minipool (last_barrier->insn);
16616 fix = ftmp;
16619 /* From now on we must synthesize any constants that we can't handle
16620 directly. This can happen if the RTL gets split during final
16621 instruction generation. */
16622 cfun->machine->after_arm_reorg = 1;
16624 /* Free the minipool memory. */
16625 obstack_free (&minipool_obstack, minipool_startobj);
16628 /* Routines to output assembly language. */
16630 /* Return string representation of passed in real value. */
16631 static const char *
16632 fp_const_from_val (REAL_VALUE_TYPE *r)
16634 if (!fp_consts_inited)
16635 init_fp_table ();
16637 gcc_assert (real_equal (r, &value_fp0));
16638 return "0";
16641 /* OPERANDS[0] is the entire list of insns that constitute pop,
16642 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
16643 is in the list, UPDATE is true iff the list contains explicit
16644 update of base register. */
16645 void
16646 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
16647 bool update)
16649 int i;
16650 char pattern[100];
16651 int offset;
16652 const char *conditional;
16653 int num_saves = XVECLEN (operands[0], 0);
16654 unsigned int regno;
16655 unsigned int regno_base = REGNO (operands[1]);
16656 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
16658 offset = 0;
16659 offset += update ? 1 : 0;
16660 offset += return_pc ? 1 : 0;
16662 /* Is the base register in the list? */
16663 for (i = offset; i < num_saves; i++)
16665 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
16666 /* If SP is in the list, then the base register must be SP. */
16667 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
16668 /* If base register is in the list, there must be no explicit update. */
16669 if (regno == regno_base)
16670 gcc_assert (!update);
16673 conditional = reverse ? "%?%D0" : "%?%d0";
16674 /* Can't use POP if returning from an interrupt. */
16675 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
16676 sprintf (pattern, "pop%s\t{", conditional);
16677 else
16679 /* Output ldmfd when the base register is SP, otherwise output ldmia.
16680 It's just a convention, their semantics are identical. */
16681 if (regno_base == SP_REGNUM)
16682 sprintf (pattern, "ldmfd%s\t", conditional);
16683 else if (update)
16684 sprintf (pattern, "ldmia%s\t", conditional);
16685 else
16686 sprintf (pattern, "ldm%s\t", conditional);
16688 strcat (pattern, reg_names[regno_base]);
16689 if (update)
16690 strcat (pattern, "!, {");
16691 else
16692 strcat (pattern, ", {");
16695 /* Output the first destination register. */
16696 strcat (pattern,
16697 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
16699 /* Output the rest of the destination registers. */
16700 for (i = offset + 1; i < num_saves; i++)
16702 strcat (pattern, ", ");
16703 strcat (pattern,
16704 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
16707 strcat (pattern, "}");
16709 if (interrupt_p && return_pc)
16710 strcat (pattern, "^");
16712 output_asm_insn (pattern, &cond);
16716 /* Output the assembly for a store multiple. */
16718 const char *
16719 vfp_output_vstmd (rtx * operands)
16721 char pattern[100];
16722 int p;
16723 int base;
16724 int i;
16725 rtx addr_reg = REG_P (XEXP (operands[0], 0))
16726 ? XEXP (operands[0], 0)
16727 : XEXP (XEXP (operands[0], 0), 0);
16728 bool push_p = REGNO (addr_reg) == SP_REGNUM;
16730 if (push_p)
16731 strcpy (pattern, "vpush%?.64\t{%P1");
16732 else
16733 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
16735 p = strlen (pattern);
16737 gcc_assert (REG_P (operands[1]));
16739 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
16740 for (i = 1; i < XVECLEN (operands[2], 0); i++)
16742 p += sprintf (&pattern[p], ", d%d", base + i);
16744 strcpy (&pattern[p], "}");
16746 output_asm_insn (pattern, operands);
16747 return "";
16751 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
16752 number of bytes pushed. */
16754 static int
16755 vfp_emit_fstmd (int base_reg, int count)
16757 rtx par;
16758 rtx dwarf;
16759 rtx tmp, reg;
16760 int i;
16762 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
16763 register pairs are stored by a store multiple insn. We avoid this
16764 by pushing an extra pair. */
16765 if (count == 2 && !arm_arch6)
16767 if (base_reg == LAST_VFP_REGNUM - 3)
16768 base_reg -= 2;
16769 count++;
16772 /* FSTMD may not store more than 16 doubleword registers at once. Split
16773 larger stores into multiple parts (up to a maximum of two, in
16774 practice). */
16775 if (count > 16)
16777 int saved;
16778 /* NOTE: base_reg is an internal register number, so each D register
16779 counts as 2. */
16780 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
16781 saved += vfp_emit_fstmd (base_reg, 16);
16782 return saved;
16785 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
16786 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
16788 reg = gen_rtx_REG (DFmode, base_reg);
16789 base_reg += 2;
16791 XVECEXP (par, 0, 0)
16792 = gen_rtx_SET (gen_frame_mem
16793 (BLKmode,
16794 gen_rtx_PRE_MODIFY (Pmode,
16795 stack_pointer_rtx,
16796 plus_constant
16797 (Pmode, stack_pointer_rtx,
16798 - (count * 8)))
16800 gen_rtx_UNSPEC (BLKmode,
16801 gen_rtvec (1, reg),
16802 UNSPEC_PUSH_MULT));
16804 tmp = gen_rtx_SET (stack_pointer_rtx,
16805 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
16806 RTX_FRAME_RELATED_P (tmp) = 1;
16807 XVECEXP (dwarf, 0, 0) = tmp;
16809 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
16810 RTX_FRAME_RELATED_P (tmp) = 1;
16811 XVECEXP (dwarf, 0, 1) = tmp;
16813 for (i = 1; i < count; i++)
16815 reg = gen_rtx_REG (DFmode, base_reg);
16816 base_reg += 2;
16817 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
16819 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
16820 plus_constant (Pmode,
16821 stack_pointer_rtx,
16822 i * 8)),
16823 reg);
16824 RTX_FRAME_RELATED_P (tmp) = 1;
16825 XVECEXP (dwarf, 0, i + 1) = tmp;
16828 par = emit_insn (par);
16829 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16830 RTX_FRAME_RELATED_P (par) = 1;
16832 return count * 8;
16835 /* Emit a call instruction with pattern PAT. ADDR is the address of
16836 the call target. */
16838 void
16839 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
16841 rtx insn;
16843 insn = emit_call_insn (pat);
16845 /* The PIC register is live on entry to VxWorks PIC PLT entries.
16846 If the call might use such an entry, add a use of the PIC register
16847 to the instruction's CALL_INSN_FUNCTION_USAGE. */
16848 if (TARGET_VXWORKS_RTP
16849 && flag_pic
16850 && !sibcall
16851 && GET_CODE (addr) == SYMBOL_REF
16852 && (SYMBOL_REF_DECL (addr)
16853 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
16854 : !SYMBOL_REF_LOCAL_P (addr)))
16856 require_pic_register ();
16857 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
16860 if (TARGET_AAPCS_BASED)
16862 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
16863 linker. We need to add an IP clobber to allow setting
16864 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
16865 is not needed since it's a fixed register. */
16866 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
16867 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
16871 /* Output a 'call' insn. */
16872 const char *
16873 output_call (rtx *operands)
16875 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
16877 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
16878 if (REGNO (operands[0]) == LR_REGNUM)
16880 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
16881 output_asm_insn ("mov%?\t%0, %|lr", operands);
16884 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
16886 if (TARGET_INTERWORK || arm_arch4t)
16887 output_asm_insn ("bx%?\t%0", operands);
16888 else
16889 output_asm_insn ("mov%?\t%|pc, %0", operands);
16891 return "";
16894 /* Output a move from arm registers to arm registers of a long double
16895 OPERANDS[0] is the destination.
16896 OPERANDS[1] is the source. */
16897 const char *
16898 output_mov_long_double_arm_from_arm (rtx *operands)
16900 /* We have to be careful here because the two might overlap. */
16901 int dest_start = REGNO (operands[0]);
16902 int src_start = REGNO (operands[1]);
16903 rtx ops[2];
16904 int i;
16906 if (dest_start < src_start)
16908 for (i = 0; i < 3; i++)
16910 ops[0] = gen_rtx_REG (SImode, dest_start + i);
16911 ops[1] = gen_rtx_REG (SImode, src_start + i);
16912 output_asm_insn ("mov%?\t%0, %1", ops);
16915 else
16917 for (i = 2; i >= 0; i--)
16919 ops[0] = gen_rtx_REG (SImode, dest_start + i);
16920 ops[1] = gen_rtx_REG (SImode, src_start + i);
16921 output_asm_insn ("mov%?\t%0, %1", ops);
16925 return "";
16928 void
16929 arm_emit_movpair (rtx dest, rtx src)
16931 rtx insn;
16933 /* If the src is an immediate, simplify it. */
16934 if (CONST_INT_P (src))
16936 HOST_WIDE_INT val = INTVAL (src);
16937 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
16938 if ((val >> 16) & 0x0000ffff)
16940 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
16941 GEN_INT (16)),
16942 GEN_INT ((val >> 16) & 0x0000ffff));
16943 insn = get_last_insn ();
16944 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
16946 return;
16948 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
16949 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
16950 insn = get_last_insn ();
16951 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
16954 /* Output a move between double words. It must be REG<-MEM
16955 or MEM<-REG. */
16956 const char *
16957 output_move_double (rtx *operands, bool emit, int *count)
16959 enum rtx_code code0 = GET_CODE (operands[0]);
16960 enum rtx_code code1 = GET_CODE (operands[1]);
16961 rtx otherops[3];
16962 if (count)
16963 *count = 1;
16965 /* The only case when this might happen is when
16966 you are looking at the length of a DImode instruction
16967 that has an invalid constant in it. */
16968 if (code0 == REG && code1 != MEM)
16970 gcc_assert (!emit);
16971 *count = 2;
16972 return "";
16975 if (code0 == REG)
16977 unsigned int reg0 = REGNO (operands[0]);
16979 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
16981 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
16983 switch (GET_CODE (XEXP (operands[1], 0)))
16985 case REG:
16987 if (emit)
16989 if (TARGET_LDRD
16990 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
16991 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
16992 else
16993 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
16995 break;
16997 case PRE_INC:
16998 gcc_assert (TARGET_LDRD);
16999 if (emit)
17000 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17001 break;
17003 case PRE_DEC:
17004 if (emit)
17006 if (TARGET_LDRD)
17007 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17008 else
17009 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
17011 break;
17013 case POST_INC:
17014 if (emit)
17016 if (TARGET_LDRD)
17017 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
17018 else
17019 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
17021 break;
17023 case POST_DEC:
17024 gcc_assert (TARGET_LDRD);
17025 if (emit)
17026 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
17027 break;
17029 case PRE_MODIFY:
17030 case POST_MODIFY:
17031 /* Autoicrement addressing modes should never have overlapping
17032 base and destination registers, and overlapping index registers
17033 are already prohibited, so this doesn't need to worry about
17034 fix_cm3_ldrd. */
17035 otherops[0] = operands[0];
17036 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17037 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17039 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17041 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17043 /* Registers overlap so split out the increment. */
17044 if (emit)
17046 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17047 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
17049 if (count)
17050 *count = 2;
17052 else
17054 /* Use a single insn if we can.
17055 FIXME: IWMMXT allows offsets larger than ldrd can
17056 handle, fix these up with a pair of ldr. */
17057 if (TARGET_THUMB2
17058 || !CONST_INT_P (otherops[2])
17059 || (INTVAL (otherops[2]) > -256
17060 && INTVAL (otherops[2]) < 256))
17062 if (emit)
17063 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
17065 else
17067 if (emit)
17069 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17070 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17072 if (count)
17073 *count = 2;
17078 else
17080 /* Use a single insn if we can.
17081 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17082 fix these up with a pair of ldr. */
17083 if (TARGET_THUMB2
17084 || !CONST_INT_P (otherops[2])
17085 || (INTVAL (otherops[2]) > -256
17086 && INTVAL (otherops[2]) < 256))
17088 if (emit)
17089 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
17091 else
17093 if (emit)
17095 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17096 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17098 if (count)
17099 *count = 2;
17102 break;
17104 case LABEL_REF:
17105 case CONST:
17106 /* We might be able to use ldrd %0, %1 here. However the range is
17107 different to ldr/adr, and it is broken on some ARMv7-M
17108 implementations. */
17109 /* Use the second register of the pair to avoid problematic
17110 overlap. */
17111 otherops[1] = operands[1];
17112 if (emit)
17113 output_asm_insn ("adr%?\t%0, %1", otherops);
17114 operands[1] = otherops[0];
17115 if (emit)
17117 if (TARGET_LDRD)
17118 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
17119 else
17120 output_asm_insn ("ldmia%?\t%1, %M0", operands);
17123 if (count)
17124 *count = 2;
17125 break;
17127 /* ??? This needs checking for thumb2. */
17128 default:
17129 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17130 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17132 otherops[0] = operands[0];
17133 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17134 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17136 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17138 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17140 switch ((int) INTVAL (otherops[2]))
17142 case -8:
17143 if (emit)
17144 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
17145 return "";
17146 case -4:
17147 if (TARGET_THUMB2)
17148 break;
17149 if (emit)
17150 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
17151 return "";
17152 case 4:
17153 if (TARGET_THUMB2)
17154 break;
17155 if (emit)
17156 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
17157 return "";
17160 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
17161 operands[1] = otherops[0];
17162 if (TARGET_LDRD
17163 && (REG_P (otherops[2])
17164 || TARGET_THUMB2
17165 || (CONST_INT_P (otherops[2])
17166 && INTVAL (otherops[2]) > -256
17167 && INTVAL (otherops[2]) < 256)))
17169 if (reg_overlap_mentioned_p (operands[0],
17170 otherops[2]))
17172 /* Swap base and index registers over to
17173 avoid a conflict. */
17174 std::swap (otherops[1], otherops[2]);
17176 /* If both registers conflict, it will usually
17177 have been fixed by a splitter. */
17178 if (reg_overlap_mentioned_p (operands[0], otherops[2])
17179 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
17181 if (emit)
17183 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17184 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
17186 if (count)
17187 *count = 2;
17189 else
17191 otherops[0] = operands[0];
17192 if (emit)
17193 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
17195 return "";
17198 if (CONST_INT_P (otherops[2]))
17200 if (emit)
17202 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
17203 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
17204 else
17205 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17208 else
17210 if (emit)
17211 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17214 else
17216 if (emit)
17217 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
17220 if (count)
17221 *count = 2;
17223 if (TARGET_LDRD)
17224 return "ldrd%?\t%0, [%1]";
17226 return "ldmia%?\t%1, %M0";
17228 else
17230 otherops[1] = adjust_address (operands[1], SImode, 4);
17231 /* Take care of overlapping base/data reg. */
17232 if (reg_mentioned_p (operands[0], operands[1]))
17234 if (emit)
17236 output_asm_insn ("ldr%?\t%0, %1", otherops);
17237 output_asm_insn ("ldr%?\t%0, %1", operands);
17239 if (count)
17240 *count = 2;
17243 else
17245 if (emit)
17247 output_asm_insn ("ldr%?\t%0, %1", operands);
17248 output_asm_insn ("ldr%?\t%0, %1", otherops);
17250 if (count)
17251 *count = 2;
17256 else
17258 /* Constraints should ensure this. */
17259 gcc_assert (code0 == MEM && code1 == REG);
17260 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
17261 || (TARGET_ARM && TARGET_LDRD));
17263 switch (GET_CODE (XEXP (operands[0], 0)))
17265 case REG:
17266 if (emit)
17268 if (TARGET_LDRD)
17269 output_asm_insn ("strd%?\t%1, [%m0]", operands);
17270 else
17271 output_asm_insn ("stm%?\t%m0, %M1", operands);
17273 break;
17275 case PRE_INC:
17276 gcc_assert (TARGET_LDRD);
17277 if (emit)
17278 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
17279 break;
17281 case PRE_DEC:
17282 if (emit)
17284 if (TARGET_LDRD)
17285 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
17286 else
17287 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
17289 break;
17291 case POST_INC:
17292 if (emit)
17294 if (TARGET_LDRD)
17295 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
17296 else
17297 output_asm_insn ("stm%?\t%m0!, %M1", operands);
17299 break;
17301 case POST_DEC:
17302 gcc_assert (TARGET_LDRD);
17303 if (emit)
17304 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
17305 break;
17307 case PRE_MODIFY:
17308 case POST_MODIFY:
17309 otherops[0] = operands[1];
17310 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
17311 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
17313 /* IWMMXT allows offsets larger than ldrd can handle,
17314 fix these up with a pair of ldr. */
17315 if (!TARGET_THUMB2
17316 && CONST_INT_P (otherops[2])
17317 && (INTVAL(otherops[2]) <= -256
17318 || INTVAL(otherops[2]) >= 256))
17320 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17322 if (emit)
17324 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
17325 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17327 if (count)
17328 *count = 2;
17330 else
17332 if (emit)
17334 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17335 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
17337 if (count)
17338 *count = 2;
17341 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17343 if (emit)
17344 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
17346 else
17348 if (emit)
17349 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
17351 break;
17353 case PLUS:
17354 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
17355 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17357 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
17359 case -8:
17360 if (emit)
17361 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
17362 return "";
17364 case -4:
17365 if (TARGET_THUMB2)
17366 break;
17367 if (emit)
17368 output_asm_insn ("stmda%?\t%m0, %M1", operands);
17369 return "";
17371 case 4:
17372 if (TARGET_THUMB2)
17373 break;
17374 if (emit)
17375 output_asm_insn ("stmib%?\t%m0, %M1", operands);
17376 return "";
17379 if (TARGET_LDRD
17380 && (REG_P (otherops[2])
17381 || TARGET_THUMB2
17382 || (CONST_INT_P (otherops[2])
17383 && INTVAL (otherops[2]) > -256
17384 && INTVAL (otherops[2]) < 256)))
17386 otherops[0] = operands[1];
17387 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
17388 if (emit)
17389 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
17390 return "";
17392 /* Fall through */
17394 default:
17395 otherops[0] = adjust_address (operands[0], SImode, 4);
17396 otherops[1] = operands[1];
17397 if (emit)
17399 output_asm_insn ("str%?\t%1, %0", operands);
17400 output_asm_insn ("str%?\t%H1, %0", otherops);
17402 if (count)
17403 *count = 2;
17407 return "";
17410 /* Output a move, load or store for quad-word vectors in ARM registers. Only
17411 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
17413 const char *
17414 output_move_quad (rtx *operands)
17416 if (REG_P (operands[0]))
17418 /* Load, or reg->reg move. */
17420 if (MEM_P (operands[1]))
17422 switch (GET_CODE (XEXP (operands[1], 0)))
17424 case REG:
17425 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17426 break;
17428 case LABEL_REF:
17429 case CONST:
17430 output_asm_insn ("adr%?\t%0, %1", operands);
17431 output_asm_insn ("ldmia%?\t%0, %M0", operands);
17432 break;
17434 default:
17435 gcc_unreachable ();
17438 else
17440 rtx ops[2];
17441 int dest, src, i;
17443 gcc_assert (REG_P (operands[1]));
17445 dest = REGNO (operands[0]);
17446 src = REGNO (operands[1]);
17448 /* This seems pretty dumb, but hopefully GCC won't try to do it
17449 very often. */
17450 if (dest < src)
17451 for (i = 0; i < 4; i++)
17453 ops[0] = gen_rtx_REG (SImode, dest + i);
17454 ops[1] = gen_rtx_REG (SImode, src + i);
17455 output_asm_insn ("mov%?\t%0, %1", ops);
17457 else
17458 for (i = 3; i >= 0; i--)
17460 ops[0] = gen_rtx_REG (SImode, dest + i);
17461 ops[1] = gen_rtx_REG (SImode, src + i);
17462 output_asm_insn ("mov%?\t%0, %1", ops);
17466 else
17468 gcc_assert (MEM_P (operands[0]));
17469 gcc_assert (REG_P (operands[1]));
17470 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
17472 switch (GET_CODE (XEXP (operands[0], 0)))
17474 case REG:
17475 output_asm_insn ("stm%?\t%m0, %M1", operands);
17476 break;
17478 default:
17479 gcc_unreachable ();
17483 return "";
17486 /* Output a VFP load or store instruction. */
17488 const char *
17489 output_move_vfp (rtx *operands)
17491 rtx reg, mem, addr, ops[2];
17492 int load = REG_P (operands[0]);
17493 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
17494 int sp = (!TARGET_VFP_FP16INST
17495 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
17496 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
17497 const char *templ;
17498 char buff[50];
17499 machine_mode mode;
17501 reg = operands[!load];
17502 mem = operands[load];
17504 mode = GET_MODE (reg);
17506 gcc_assert (REG_P (reg));
17507 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
17508 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
17509 || mode == SFmode
17510 || mode == DFmode
17511 || mode == HImode
17512 || mode == SImode
17513 || mode == DImode
17514 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
17515 gcc_assert (MEM_P (mem));
17517 addr = XEXP (mem, 0);
17519 switch (GET_CODE (addr))
17521 case PRE_DEC:
17522 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
17523 ops[0] = XEXP (addr, 0);
17524 ops[1] = reg;
17525 break;
17527 case POST_INC:
17528 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
17529 ops[0] = XEXP (addr, 0);
17530 ops[1] = reg;
17531 break;
17533 default:
17534 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
17535 ops[0] = reg;
17536 ops[1] = mem;
17537 break;
17540 sprintf (buff, templ,
17541 load ? "ld" : "st",
17542 dp ? "64" : sp ? "32" : "16",
17543 dp ? "P" : "",
17544 integer_p ? "\t%@ int" : "");
17545 output_asm_insn (buff, ops);
17547 return "";
17550 /* Output a Neon double-word or quad-word load or store, or a load
17551 or store for larger structure modes.
17553 WARNING: The ordering of elements is weird in big-endian mode,
17554 because the EABI requires that vectors stored in memory appear
17555 as though they were stored by a VSTM, as required by the EABI.
17556 GCC RTL defines element ordering based on in-memory order.
17557 This can be different from the architectural ordering of elements
17558 within a NEON register. The intrinsics defined in arm_neon.h use the
17559 NEON register element ordering, not the GCC RTL element ordering.
17561 For example, the in-memory ordering of a big-endian a quadword
17562 vector with 16-bit elements when stored from register pair {d0,d1}
17563 will be (lowest address first, d0[N] is NEON register element N):
17565 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
17567 When necessary, quadword registers (dN, dN+1) are moved to ARM
17568 registers from rN in the order:
17570 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
17572 So that STM/LDM can be used on vectors in ARM registers, and the
17573 same memory layout will result as if VSTM/VLDM were used.
17575 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
17576 possible, which allows use of appropriate alignment tags.
17577 Note that the choice of "64" is independent of the actual vector
17578 element size; this size simply ensures that the behavior is
17579 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
17581 Due to limitations of those instructions, use of VST1.64/VLD1.64
17582 is not possible if:
17583 - the address contains PRE_DEC, or
17584 - the mode refers to more than 4 double-word registers
17586 In those cases, it would be possible to replace VSTM/VLDM by a
17587 sequence of instructions; this is not currently implemented since
17588 this is not certain to actually improve performance. */
17590 const char *
17591 output_move_neon (rtx *operands)
17593 rtx reg, mem, addr, ops[2];
17594 int regno, nregs, load = REG_P (operands[0]);
17595 const char *templ;
17596 char buff[50];
17597 machine_mode mode;
17599 reg = operands[!load];
17600 mem = operands[load];
17602 mode = GET_MODE (reg);
17604 gcc_assert (REG_P (reg));
17605 regno = REGNO (reg);
17606 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
17607 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
17608 || NEON_REGNO_OK_FOR_QUAD (regno));
17609 gcc_assert (VALID_NEON_DREG_MODE (mode)
17610 || VALID_NEON_QREG_MODE (mode)
17611 || VALID_NEON_STRUCT_MODE (mode));
17612 gcc_assert (MEM_P (mem));
17614 addr = XEXP (mem, 0);
17616 /* Strip off const from addresses like (const (plus (...))). */
17617 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
17618 addr = XEXP (addr, 0);
17620 switch (GET_CODE (addr))
17622 case POST_INC:
17623 /* We have to use vldm / vstm for too-large modes. */
17624 if (nregs > 4)
17626 templ = "v%smia%%?\t%%0!, %%h1";
17627 ops[0] = XEXP (addr, 0);
17629 else
17631 templ = "v%s1.64\t%%h1, %%A0";
17632 ops[0] = mem;
17634 ops[1] = reg;
17635 break;
17637 case PRE_DEC:
17638 /* We have to use vldm / vstm in this case, since there is no
17639 pre-decrement form of the vld1 / vst1 instructions. */
17640 templ = "v%smdb%%?\t%%0!, %%h1";
17641 ops[0] = XEXP (addr, 0);
17642 ops[1] = reg;
17643 break;
17645 case POST_MODIFY:
17646 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
17647 gcc_unreachable ();
17649 case REG:
17650 /* We have to use vldm / vstm for too-large modes. */
17651 if (nregs > 1)
17653 if (nregs > 4)
17654 templ = "v%smia%%?\t%%m0, %%h1";
17655 else
17656 templ = "v%s1.64\t%%h1, %%A0";
17658 ops[0] = mem;
17659 ops[1] = reg;
17660 break;
17662 /* Fall through. */
17663 case LABEL_REF:
17664 case PLUS:
17666 int i;
17667 int overlap = -1;
17668 for (i = 0; i < nregs; i++)
17670 /* We're only using DImode here because it's a convenient size. */
17671 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
17672 ops[1] = adjust_address (mem, DImode, 8 * i);
17673 if (reg_overlap_mentioned_p (ops[0], mem))
17675 gcc_assert (overlap == -1);
17676 overlap = i;
17678 else
17680 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
17681 output_asm_insn (buff, ops);
17684 if (overlap != -1)
17686 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
17687 ops[1] = adjust_address (mem, SImode, 8 * overlap);
17688 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
17689 output_asm_insn (buff, ops);
17692 return "";
17695 default:
17696 gcc_unreachable ();
17699 sprintf (buff, templ, load ? "ld" : "st");
17700 output_asm_insn (buff, ops);
17702 return "";
17705 /* Compute and return the length of neon_mov<mode>, where <mode> is
17706 one of VSTRUCT modes: EI, OI, CI or XI. */
17708 arm_attr_length_move_neon (rtx_insn *insn)
17710 rtx reg, mem, addr;
17711 int load;
17712 machine_mode mode;
17714 extract_insn_cached (insn);
17716 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
17718 mode = GET_MODE (recog_data.operand[0]);
17719 switch (mode)
17721 case EImode:
17722 case OImode:
17723 return 8;
17724 case CImode:
17725 return 12;
17726 case XImode:
17727 return 16;
17728 default:
17729 gcc_unreachable ();
17733 load = REG_P (recog_data.operand[0]);
17734 reg = recog_data.operand[!load];
17735 mem = recog_data.operand[load];
17737 gcc_assert (MEM_P (mem));
17739 mode = GET_MODE (reg);
17740 addr = XEXP (mem, 0);
17742 /* Strip off const from addresses like (const (plus (...))). */
17743 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
17744 addr = XEXP (addr, 0);
17746 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
17748 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
17749 return insns * 4;
17751 else
17752 return 4;
17755 /* Return nonzero if the offset in the address is an immediate. Otherwise,
17756 return zero. */
17759 arm_address_offset_is_imm (rtx_insn *insn)
17761 rtx mem, addr;
17763 extract_insn_cached (insn);
17765 if (REG_P (recog_data.operand[0]))
17766 return 0;
17768 mem = recog_data.operand[0];
17770 gcc_assert (MEM_P (mem));
17772 addr = XEXP (mem, 0);
17774 if (REG_P (addr)
17775 || (GET_CODE (addr) == PLUS
17776 && REG_P (XEXP (addr, 0))
17777 && CONST_INT_P (XEXP (addr, 1))))
17778 return 1;
17779 else
17780 return 0;
17783 /* Output an ADD r, s, #n where n may be too big for one instruction.
17784 If adding zero to one register, output nothing. */
17785 const char *
17786 output_add_immediate (rtx *operands)
17788 HOST_WIDE_INT n = INTVAL (operands[2]);
17790 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
17792 if (n < 0)
17793 output_multi_immediate (operands,
17794 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
17795 -n);
17796 else
17797 output_multi_immediate (operands,
17798 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
17802 return "";
17805 /* Output a multiple immediate operation.
17806 OPERANDS is the vector of operands referred to in the output patterns.
17807 INSTR1 is the output pattern to use for the first constant.
17808 INSTR2 is the output pattern to use for subsequent constants.
17809 IMMED_OP is the index of the constant slot in OPERANDS.
17810 N is the constant value. */
17811 static const char *
17812 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
17813 int immed_op, HOST_WIDE_INT n)
17815 #if HOST_BITS_PER_WIDE_INT > 32
17816 n &= 0xffffffff;
17817 #endif
17819 if (n == 0)
17821 /* Quick and easy output. */
17822 operands[immed_op] = const0_rtx;
17823 output_asm_insn (instr1, operands);
17825 else
17827 int i;
17828 const char * instr = instr1;
17830 /* Note that n is never zero here (which would give no output). */
17831 for (i = 0; i < 32; i += 2)
17833 if (n & (3 << i))
17835 operands[immed_op] = GEN_INT (n & (255 << i));
17836 output_asm_insn (instr, operands);
17837 instr = instr2;
17838 i += 6;
17843 return "";
17846 /* Return the name of a shifter operation. */
17847 static const char *
17848 arm_shift_nmem(enum rtx_code code)
17850 switch (code)
17852 case ASHIFT:
17853 return ARM_LSL_NAME;
17855 case ASHIFTRT:
17856 return "asr";
17858 case LSHIFTRT:
17859 return "lsr";
17861 case ROTATERT:
17862 return "ror";
17864 default:
17865 abort();
17869 /* Return the appropriate ARM instruction for the operation code.
17870 The returned result should not be overwritten. OP is the rtx of the
17871 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
17872 was shifted. */
17873 const char *
17874 arithmetic_instr (rtx op, int shift_first_arg)
17876 switch (GET_CODE (op))
17878 case PLUS:
17879 return "add";
17881 case MINUS:
17882 return shift_first_arg ? "rsb" : "sub";
17884 case IOR:
17885 return "orr";
17887 case XOR:
17888 return "eor";
17890 case AND:
17891 return "and";
17893 case ASHIFT:
17894 case ASHIFTRT:
17895 case LSHIFTRT:
17896 case ROTATERT:
17897 return arm_shift_nmem(GET_CODE(op));
17899 default:
17900 gcc_unreachable ();
17904 /* Ensure valid constant shifts and return the appropriate shift mnemonic
17905 for the operation code. The returned result should not be overwritten.
17906 OP is the rtx code of the shift.
17907 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
17908 shift. */
17909 static const char *
17910 shift_op (rtx op, HOST_WIDE_INT *amountp)
17912 const char * mnem;
17913 enum rtx_code code = GET_CODE (op);
17915 switch (code)
17917 case ROTATE:
17918 if (!CONST_INT_P (XEXP (op, 1)))
17920 output_operand_lossage ("invalid shift operand");
17921 return NULL;
17924 code = ROTATERT;
17925 *amountp = 32 - INTVAL (XEXP (op, 1));
17926 mnem = "ror";
17927 break;
17929 case ASHIFT:
17930 case ASHIFTRT:
17931 case LSHIFTRT:
17932 case ROTATERT:
17933 mnem = arm_shift_nmem(code);
17934 if (CONST_INT_P (XEXP (op, 1)))
17936 *amountp = INTVAL (XEXP (op, 1));
17938 else if (REG_P (XEXP (op, 1)))
17940 *amountp = -1;
17941 return mnem;
17943 else
17945 output_operand_lossage ("invalid shift operand");
17946 return NULL;
17948 break;
17950 case MULT:
17951 /* We never have to worry about the amount being other than a
17952 power of 2, since this case can never be reloaded from a reg. */
17953 if (!CONST_INT_P (XEXP (op, 1)))
17955 output_operand_lossage ("invalid shift operand");
17956 return NULL;
17959 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
17961 /* Amount must be a power of two. */
17962 if (*amountp & (*amountp - 1))
17964 output_operand_lossage ("invalid shift operand");
17965 return NULL;
17968 *amountp = exact_log2 (*amountp);
17969 gcc_assert (IN_RANGE (*amountp, 0, 31));
17970 return ARM_LSL_NAME;
17972 default:
17973 output_operand_lossage ("invalid shift operand");
17974 return NULL;
17977 /* This is not 100% correct, but follows from the desire to merge
17978 multiplication by a power of 2 with the recognizer for a
17979 shift. >=32 is not a valid shift for "lsl", so we must try and
17980 output a shift that produces the correct arithmetical result.
17981 Using lsr #32 is identical except for the fact that the carry bit
17982 is not set correctly if we set the flags; but we never use the
17983 carry bit from such an operation, so we can ignore that. */
17984 if (code == ROTATERT)
17985 /* Rotate is just modulo 32. */
17986 *amountp &= 31;
17987 else if (*amountp != (*amountp & 31))
17989 if (code == ASHIFT)
17990 mnem = "lsr";
17991 *amountp = 32;
17994 /* Shifts of 0 are no-ops. */
17995 if (*amountp == 0)
17996 return NULL;
17998 return mnem;
18001 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18002 because /bin/as is horribly restrictive. The judgement about
18003 whether or not each character is 'printable' (and can be output as
18004 is) or not (and must be printed with an octal escape) must be made
18005 with reference to the *host* character set -- the situation is
18006 similar to that discussed in the comments above pp_c_char in
18007 c-pretty-print.c. */
18009 #define MAX_ASCII_LEN 51
18011 void
18012 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18014 int i;
18015 int len_so_far = 0;
18017 fputs ("\t.ascii\t\"", stream);
18019 for (i = 0; i < len; i++)
18021 int c = p[i];
18023 if (len_so_far >= MAX_ASCII_LEN)
18025 fputs ("\"\n\t.ascii\t\"", stream);
18026 len_so_far = 0;
18029 if (ISPRINT (c))
18031 if (c == '\\' || c == '\"')
18033 putc ('\\', stream);
18034 len_so_far++;
18036 putc (c, stream);
18037 len_so_far++;
18039 else
18041 fprintf (stream, "\\%03o", c);
18042 len_so_far += 4;
18046 fputs ("\"\n", stream);
18049 /* Whether a register is callee saved or not. This is necessary because high
18050 registers are marked as caller saved when optimizing for size on Thumb-1
18051 targets despite being callee saved in order to avoid using them. */
18052 #define callee_saved_reg_p(reg) \
18053 (!call_used_regs[reg] \
18054 || (TARGET_THUMB1 && optimize_size \
18055 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18057 /* Compute the register save mask for registers 0 through 12
18058 inclusive. This code is used by arm_compute_save_reg_mask. */
18060 static unsigned long
18061 arm_compute_save_reg0_reg12_mask (void)
18063 unsigned long func_type = arm_current_func_type ();
18064 unsigned long save_reg_mask = 0;
18065 unsigned int reg;
18067 if (IS_INTERRUPT (func_type))
18069 unsigned int max_reg;
18070 /* Interrupt functions must not corrupt any registers,
18071 even call clobbered ones. If this is a leaf function
18072 we can just examine the registers used by the RTL, but
18073 otherwise we have to assume that whatever function is
18074 called might clobber anything, and so we have to save
18075 all the call-clobbered registers as well. */
18076 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18077 /* FIQ handlers have registers r8 - r12 banked, so
18078 we only need to check r0 - r7, Normal ISRs only
18079 bank r14 and r15, so we must check up to r12.
18080 r13 is the stack pointer which is always preserved,
18081 so we do not need to consider it here. */
18082 max_reg = 7;
18083 else
18084 max_reg = 12;
18086 for (reg = 0; reg <= max_reg; reg++)
18087 if (df_regs_ever_live_p (reg)
18088 || (! crtl->is_leaf && call_used_regs[reg]))
18089 save_reg_mask |= (1 << reg);
18091 /* Also save the pic base register if necessary. */
18092 if (flag_pic
18093 && !TARGET_SINGLE_PIC_BASE
18094 && arm_pic_register != INVALID_REGNUM
18095 && crtl->uses_pic_offset_table)
18096 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18098 else if (IS_VOLATILE(func_type))
18100 /* For noreturn functions we historically omitted register saves
18101 altogether. However this really messes up debugging. As a
18102 compromise save just the frame pointers. Combined with the link
18103 register saved elsewhere this should be sufficient to get
18104 a backtrace. */
18105 if (frame_pointer_needed)
18106 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18107 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18108 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18109 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18110 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18112 else
18114 /* In the normal case we only need to save those registers
18115 which are call saved and which are used by this function. */
18116 for (reg = 0; reg <= 11; reg++)
18117 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
18118 save_reg_mask |= (1 << reg);
18120 /* Handle the frame pointer as a special case. */
18121 if (frame_pointer_needed)
18122 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18124 /* If we aren't loading the PIC register,
18125 don't stack it even though it may be live. */
18126 if (flag_pic
18127 && !TARGET_SINGLE_PIC_BASE
18128 && arm_pic_register != INVALID_REGNUM
18129 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18130 || crtl->uses_pic_offset_table))
18131 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18133 /* The prologue will copy SP into R0, so save it. */
18134 if (IS_STACKALIGN (func_type))
18135 save_reg_mask |= 1;
18138 /* Save registers so the exception handler can modify them. */
18139 if (crtl->calls_eh_return)
18141 unsigned int i;
18143 for (i = 0; ; i++)
18145 reg = EH_RETURN_DATA_REGNO (i);
18146 if (reg == INVALID_REGNUM)
18147 break;
18148 save_reg_mask |= 1 << reg;
18152 return save_reg_mask;
18155 /* Return true if r3 is live at the start of the function. */
18157 static bool
18158 arm_r3_live_at_start_p (void)
18160 /* Just look at cfg info, which is still close enough to correct at this
18161 point. This gives false positives for broken functions that might use
18162 uninitialized data that happens to be allocated in r3, but who cares? */
18163 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
18166 /* Compute the number of bytes used to store the static chain register on the
18167 stack, above the stack frame. We need to know this accurately to get the
18168 alignment of the rest of the stack frame correct. */
18170 static int
18171 arm_compute_static_chain_stack_bytes (void)
18173 /* See the defining assertion in arm_expand_prologue. */
18174 if (IS_NESTED (arm_current_func_type ())
18175 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18176 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
18177 && !df_regs_ever_live_p (LR_REGNUM)))
18178 && arm_r3_live_at_start_p ()
18179 && crtl->args.pretend_args_size == 0)
18180 return 4;
18182 return 0;
18185 /* Compute a bit mask of which registers need to be
18186 saved on the stack for the current function.
18187 This is used by arm_get_frame_offsets, which may add extra registers. */
18189 static unsigned long
18190 arm_compute_save_reg_mask (void)
18192 unsigned int save_reg_mask = 0;
18193 unsigned long func_type = arm_current_func_type ();
18194 unsigned int reg;
18196 if (IS_NAKED (func_type))
18197 /* This should never really happen. */
18198 return 0;
18200 /* If we are creating a stack frame, then we must save the frame pointer,
18201 IP (which will hold the old stack pointer), LR and the PC. */
18202 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18203 save_reg_mask |=
18204 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
18205 | (1 << IP_REGNUM)
18206 | (1 << LR_REGNUM)
18207 | (1 << PC_REGNUM);
18209 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
18211 /* Decide if we need to save the link register.
18212 Interrupt routines have their own banked link register,
18213 so they never need to save it.
18214 Otherwise if we do not use the link register we do not need to save
18215 it. If we are pushing other registers onto the stack however, we
18216 can save an instruction in the epilogue by pushing the link register
18217 now and then popping it back into the PC. This incurs extra memory
18218 accesses though, so we only do it when optimizing for size, and only
18219 if we know that we will not need a fancy return sequence. */
18220 if (df_regs_ever_live_p (LR_REGNUM)
18221 || (save_reg_mask
18222 && optimize_size
18223 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
18224 && !crtl->tail_call_emit
18225 && !crtl->calls_eh_return))
18226 save_reg_mask |= 1 << LR_REGNUM;
18228 if (cfun->machine->lr_save_eliminated)
18229 save_reg_mask &= ~ (1 << LR_REGNUM);
18231 if (TARGET_REALLY_IWMMXT
18232 && ((bit_count (save_reg_mask)
18233 + ARM_NUM_INTS (crtl->args.pretend_args_size +
18234 arm_compute_static_chain_stack_bytes())
18235 ) % 2) != 0)
18237 /* The total number of registers that are going to be pushed
18238 onto the stack is odd. We need to ensure that the stack
18239 is 64-bit aligned before we start to save iWMMXt registers,
18240 and also before we start to create locals. (A local variable
18241 might be a double or long long which we will load/store using
18242 an iWMMXt instruction). Therefore we need to push another
18243 ARM register, so that the stack will be 64-bit aligned. We
18244 try to avoid using the arg registers (r0 -r3) as they might be
18245 used to pass values in a tail call. */
18246 for (reg = 4; reg <= 12; reg++)
18247 if ((save_reg_mask & (1 << reg)) == 0)
18248 break;
18250 if (reg <= 12)
18251 save_reg_mask |= (1 << reg);
18252 else
18254 cfun->machine->sibcall_blocked = 1;
18255 save_reg_mask |= (1 << 3);
18259 /* We may need to push an additional register for use initializing the
18260 PIC base register. */
18261 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
18262 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
18264 reg = thumb_find_work_register (1 << 4);
18265 if (!call_used_regs[reg])
18266 save_reg_mask |= (1 << reg);
18269 return save_reg_mask;
18272 /* Compute a bit mask of which registers need to be
18273 saved on the stack for the current function. */
18274 static unsigned long
18275 thumb1_compute_save_reg_mask (void)
18277 unsigned long mask;
18278 unsigned reg;
18280 mask = 0;
18281 for (reg = 0; reg < 12; reg ++)
18282 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
18283 mask |= 1 << reg;
18285 if (flag_pic
18286 && !TARGET_SINGLE_PIC_BASE
18287 && arm_pic_register != INVALID_REGNUM
18288 && crtl->uses_pic_offset_table)
18289 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18291 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
18292 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
18293 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18295 /* LR will also be pushed if any lo regs are pushed. */
18296 if (mask & 0xff || thumb_force_lr_save ())
18297 mask |= (1 << LR_REGNUM);
18299 /* Make sure we have a low work register if we need one.
18300 We will need one if we are going to push a high register,
18301 but we are not currently intending to push a low register. */
18302 if ((mask & 0xff) == 0
18303 && ((mask & 0x0f00) || TARGET_BACKTRACE))
18305 /* Use thumb_find_work_register to choose which register
18306 we will use. If the register is live then we will
18307 have to push it. Use LAST_LO_REGNUM as our fallback
18308 choice for the register to select. */
18309 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
18310 /* Make sure the register returned by thumb_find_work_register is
18311 not part of the return value. */
18312 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
18313 reg = LAST_LO_REGNUM;
18315 if (callee_saved_reg_p (reg))
18316 mask |= 1 << reg;
18319 /* The 504 below is 8 bytes less than 512 because there are two possible
18320 alignment words. We can't tell here if they will be present or not so we
18321 have to play it safe and assume that they are. */
18322 if ((CALLER_INTERWORKING_SLOT_SIZE +
18323 ROUND_UP_WORD (get_frame_size ()) +
18324 crtl->outgoing_args_size) >= 504)
18326 /* This is the same as the code in thumb1_expand_prologue() which
18327 determines which register to use for stack decrement. */
18328 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
18329 if (mask & (1 << reg))
18330 break;
18332 if (reg > LAST_LO_REGNUM)
18334 /* Make sure we have a register available for stack decrement. */
18335 mask |= 1 << LAST_LO_REGNUM;
18339 return mask;
18343 /* Return the number of bytes required to save VFP registers. */
18344 static int
18345 arm_get_vfp_saved_size (void)
18347 unsigned int regno;
18348 int count;
18349 int saved;
18351 saved = 0;
18352 /* Space for saved VFP registers. */
18353 if (TARGET_HARD_FLOAT)
18355 count = 0;
18356 for (regno = FIRST_VFP_REGNUM;
18357 regno < LAST_VFP_REGNUM;
18358 regno += 2)
18360 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
18361 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
18363 if (count > 0)
18365 /* Workaround ARM10 VFPr1 bug. */
18366 if (count == 2 && !arm_arch6)
18367 count++;
18368 saved += count * 8;
18370 count = 0;
18372 else
18373 count++;
18375 if (count > 0)
18377 if (count == 2 && !arm_arch6)
18378 count++;
18379 saved += count * 8;
18382 return saved;
18386 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
18387 everything bar the final return instruction. If simple_return is true,
18388 then do not output epilogue, because it has already been emitted in RTL. */
18389 const char *
18390 output_return_instruction (rtx operand, bool really_return, bool reverse,
18391 bool simple_return)
18393 char conditional[10];
18394 char instr[100];
18395 unsigned reg;
18396 unsigned long live_regs_mask;
18397 unsigned long func_type;
18398 arm_stack_offsets *offsets;
18400 func_type = arm_current_func_type ();
18402 if (IS_NAKED (func_type))
18403 return "";
18405 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
18407 /* If this function was declared non-returning, and we have
18408 found a tail call, then we have to trust that the called
18409 function won't return. */
18410 if (really_return)
18412 rtx ops[2];
18414 /* Otherwise, trap an attempted return by aborting. */
18415 ops[0] = operand;
18416 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
18417 : "abort");
18418 assemble_external_libcall (ops[1]);
18419 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
18422 return "";
18425 gcc_assert (!cfun->calls_alloca || really_return);
18427 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
18429 cfun->machine->return_used_this_function = 1;
18431 offsets = arm_get_frame_offsets ();
18432 live_regs_mask = offsets->saved_regs_mask;
18434 if (!simple_return && live_regs_mask)
18436 const char * return_reg;
18438 /* If we do not have any special requirements for function exit
18439 (e.g. interworking) then we can load the return address
18440 directly into the PC. Otherwise we must load it into LR. */
18441 if (really_return
18442 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
18443 return_reg = reg_names[PC_REGNUM];
18444 else
18445 return_reg = reg_names[LR_REGNUM];
18447 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
18449 /* There are three possible reasons for the IP register
18450 being saved. 1) a stack frame was created, in which case
18451 IP contains the old stack pointer, or 2) an ISR routine
18452 corrupted it, or 3) it was saved to align the stack on
18453 iWMMXt. In case 1, restore IP into SP, otherwise just
18454 restore IP. */
18455 if (frame_pointer_needed)
18457 live_regs_mask &= ~ (1 << IP_REGNUM);
18458 live_regs_mask |= (1 << SP_REGNUM);
18460 else
18461 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
18464 /* On some ARM architectures it is faster to use LDR rather than
18465 LDM to load a single register. On other architectures, the
18466 cost is the same. In 26 bit mode, or for exception handlers,
18467 we have to use LDM to load the PC so that the CPSR is also
18468 restored. */
18469 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
18470 if (live_regs_mask == (1U << reg))
18471 break;
18473 if (reg <= LAST_ARM_REGNUM
18474 && (reg != LR_REGNUM
18475 || ! really_return
18476 || ! IS_INTERRUPT (func_type)))
18478 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
18479 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
18481 else
18483 char *p;
18484 int first = 1;
18486 /* Generate the load multiple instruction to restore the
18487 registers. Note we can get here, even if
18488 frame_pointer_needed is true, but only if sp already
18489 points to the base of the saved core registers. */
18490 if (live_regs_mask & (1 << SP_REGNUM))
18492 unsigned HOST_WIDE_INT stack_adjust;
18494 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
18495 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
18497 if (stack_adjust && arm_arch5 && TARGET_ARM)
18498 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
18499 else
18501 /* If we can't use ldmib (SA110 bug),
18502 then try to pop r3 instead. */
18503 if (stack_adjust)
18504 live_regs_mask |= 1 << 3;
18506 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
18509 /* For interrupt returns we have to use an LDM rather than
18510 a POP so that we can use the exception return variant. */
18511 else if (IS_INTERRUPT (func_type))
18512 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
18513 else
18514 sprintf (instr, "pop%s\t{", conditional);
18516 p = instr + strlen (instr);
18518 for (reg = 0; reg <= SP_REGNUM; reg++)
18519 if (live_regs_mask & (1 << reg))
18521 int l = strlen (reg_names[reg]);
18523 if (first)
18524 first = 0;
18525 else
18527 memcpy (p, ", ", 2);
18528 p += 2;
18531 memcpy (p, "%|", 2);
18532 memcpy (p + 2, reg_names[reg], l);
18533 p += l + 2;
18536 if (live_regs_mask & (1 << LR_REGNUM))
18538 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
18539 /* If returning from an interrupt, restore the CPSR. */
18540 if (IS_INTERRUPT (func_type))
18541 strcat (p, "^");
18543 else
18544 strcpy (p, "}");
18547 output_asm_insn (instr, & operand);
18549 /* See if we need to generate an extra instruction to
18550 perform the actual function return. */
18551 if (really_return
18552 && func_type != ARM_FT_INTERWORKED
18553 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
18555 /* The return has already been handled
18556 by loading the LR into the PC. */
18557 return "";
18561 if (really_return)
18563 switch ((int) ARM_FUNC_TYPE (func_type))
18565 case ARM_FT_ISR:
18566 case ARM_FT_FIQ:
18567 /* ??? This is wrong for unified assembly syntax. */
18568 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
18569 break;
18571 case ARM_FT_INTERWORKED:
18572 gcc_assert (arm_arch5 || arm_arch4t);
18573 sprintf (instr, "bx%s\t%%|lr", conditional);
18574 break;
18576 case ARM_FT_EXCEPTION:
18577 /* ??? This is wrong for unified assembly syntax. */
18578 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
18579 break;
18581 default:
18582 /* Use bx if it's available. */
18583 if (arm_arch5 || arm_arch4t)
18584 sprintf (instr, "bx%s\t%%|lr", conditional);
18585 else
18586 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
18587 break;
18590 output_asm_insn (instr, & operand);
18593 return "";
18596 /* Write the function name into the code section, directly preceding
18597 the function prologue.
18599 Code will be output similar to this:
18601 .ascii "arm_poke_function_name", 0
18602 .align
18604 .word 0xff000000 + (t1 - t0)
18605 arm_poke_function_name
18606 mov ip, sp
18607 stmfd sp!, {fp, ip, lr, pc}
18608 sub fp, ip, #4
18610 When performing a stack backtrace, code can inspect the value
18611 of 'pc' stored at 'fp' + 0. If the trace function then looks
18612 at location pc - 12 and the top 8 bits are set, then we know
18613 that there is a function name embedded immediately preceding this
18614 location and has length ((pc[-3]) & 0xff000000).
18616 We assume that pc is declared as a pointer to an unsigned long.
18618 It is of no benefit to output the function name if we are assembling
18619 a leaf function. These function types will not contain a stack
18620 backtrace structure, therefore it is not possible to determine the
18621 function name. */
18622 void
18623 arm_poke_function_name (FILE *stream, const char *name)
18625 unsigned long alignlength;
18626 unsigned long length;
18627 rtx x;
18629 length = strlen (name) + 1;
18630 alignlength = ROUND_UP_WORD (length);
18632 ASM_OUTPUT_ASCII (stream, name, length);
18633 ASM_OUTPUT_ALIGN (stream, 2);
18634 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
18635 assemble_aligned_integer (UNITS_PER_WORD, x);
18638 /* Place some comments into the assembler stream
18639 describing the current function. */
18640 static void
18641 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
18643 unsigned long func_type;
18645 /* ??? Do we want to print some of the below anyway? */
18646 if (TARGET_THUMB1)
18647 return;
18649 /* Sanity check. */
18650 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
18652 func_type = arm_current_func_type ();
18654 switch ((int) ARM_FUNC_TYPE (func_type))
18656 default:
18657 case ARM_FT_NORMAL:
18658 break;
18659 case ARM_FT_INTERWORKED:
18660 asm_fprintf (f, "\t%@ Function supports interworking.\n");
18661 break;
18662 case ARM_FT_ISR:
18663 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
18664 break;
18665 case ARM_FT_FIQ:
18666 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
18667 break;
18668 case ARM_FT_EXCEPTION:
18669 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
18670 break;
18673 if (IS_NAKED (func_type))
18674 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
18676 if (IS_VOLATILE (func_type))
18677 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
18679 if (IS_NESTED (func_type))
18680 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
18681 if (IS_STACKALIGN (func_type))
18682 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
18684 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
18685 crtl->args.size,
18686 crtl->args.pretend_args_size, frame_size);
18688 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
18689 frame_pointer_needed,
18690 cfun->machine->uses_anonymous_args);
18692 if (cfun->machine->lr_save_eliminated)
18693 asm_fprintf (f, "\t%@ link register save eliminated.\n");
18695 if (crtl->calls_eh_return)
18696 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
18700 static void
18701 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
18702 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
18704 arm_stack_offsets *offsets;
18706 if (TARGET_THUMB1)
18708 int regno;
18710 /* Emit any call-via-reg trampolines that are needed for v4t support
18711 of call_reg and call_value_reg type insns. */
18712 for (regno = 0; regno < LR_REGNUM; regno++)
18714 rtx label = cfun->machine->call_via[regno];
18716 if (label != NULL)
18718 switch_to_section (function_section (current_function_decl));
18719 targetm.asm_out.internal_label (asm_out_file, "L",
18720 CODE_LABEL_NUMBER (label));
18721 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
18725 /* ??? Probably not safe to set this here, since it assumes that a
18726 function will be emitted as assembly immediately after we generate
18727 RTL for it. This does not happen for inline functions. */
18728 cfun->machine->return_used_this_function = 0;
18730 else /* TARGET_32BIT */
18732 /* We need to take into account any stack-frame rounding. */
18733 offsets = arm_get_frame_offsets ();
18735 gcc_assert (!use_return_insn (FALSE, NULL)
18736 || (cfun->machine->return_used_this_function != 0)
18737 || offsets->saved_regs == offsets->outgoing_args
18738 || frame_pointer_needed);
18742 /* Generate and emit a sequence of insns equivalent to PUSH, but using
18743 STR and STRD. If an even number of registers are being pushed, one
18744 or more STRD patterns are created for each register pair. If an
18745 odd number of registers are pushed, emit an initial STR followed by
18746 as many STRD instructions as are needed. This works best when the
18747 stack is initially 64-bit aligned (the normal case), since it
18748 ensures that each STRD is also 64-bit aligned. */
18749 static void
18750 thumb2_emit_strd_push (unsigned long saved_regs_mask)
18752 int num_regs = 0;
18753 int i;
18754 int regno;
18755 rtx par = NULL_RTX;
18756 rtx dwarf = NULL_RTX;
18757 rtx tmp;
18758 bool first = true;
18760 num_regs = bit_count (saved_regs_mask);
18762 /* Must be at least one register to save, and can't save SP or PC. */
18763 gcc_assert (num_regs > 0 && num_regs <= 14);
18764 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
18765 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
18767 /* Create sequence for DWARF info. All the frame-related data for
18768 debugging is held in this wrapper. */
18769 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
18771 /* Describe the stack adjustment. */
18772 tmp = gen_rtx_SET (stack_pointer_rtx,
18773 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
18774 RTX_FRAME_RELATED_P (tmp) = 1;
18775 XVECEXP (dwarf, 0, 0) = tmp;
18777 /* Find the first register. */
18778 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
18781 i = 0;
18783 /* If there's an odd number of registers to push. Start off by
18784 pushing a single register. This ensures that subsequent strd
18785 operations are dword aligned (assuming that SP was originally
18786 64-bit aligned). */
18787 if ((num_regs & 1) != 0)
18789 rtx reg, mem, insn;
18791 reg = gen_rtx_REG (SImode, regno);
18792 if (num_regs == 1)
18793 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
18794 stack_pointer_rtx));
18795 else
18796 mem = gen_frame_mem (Pmode,
18797 gen_rtx_PRE_MODIFY
18798 (Pmode, stack_pointer_rtx,
18799 plus_constant (Pmode, stack_pointer_rtx,
18800 -4 * num_regs)));
18802 tmp = gen_rtx_SET (mem, reg);
18803 RTX_FRAME_RELATED_P (tmp) = 1;
18804 insn = emit_insn (tmp);
18805 RTX_FRAME_RELATED_P (insn) = 1;
18806 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
18807 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
18808 RTX_FRAME_RELATED_P (tmp) = 1;
18809 i++;
18810 regno++;
18811 XVECEXP (dwarf, 0, i) = tmp;
18812 first = false;
18815 while (i < num_regs)
18816 if (saved_regs_mask & (1 << regno))
18818 rtx reg1, reg2, mem1, mem2;
18819 rtx tmp0, tmp1, tmp2;
18820 int regno2;
18822 /* Find the register to pair with this one. */
18823 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
18824 regno2++)
18827 reg1 = gen_rtx_REG (SImode, regno);
18828 reg2 = gen_rtx_REG (SImode, regno2);
18830 if (first)
18832 rtx insn;
18834 first = false;
18835 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
18836 stack_pointer_rtx,
18837 -4 * num_regs));
18838 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
18839 stack_pointer_rtx,
18840 -4 * (num_regs - 1)));
18841 tmp0 = gen_rtx_SET (stack_pointer_rtx,
18842 plus_constant (Pmode, stack_pointer_rtx,
18843 -4 * (num_regs)));
18844 tmp1 = gen_rtx_SET (mem1, reg1);
18845 tmp2 = gen_rtx_SET (mem2, reg2);
18846 RTX_FRAME_RELATED_P (tmp0) = 1;
18847 RTX_FRAME_RELATED_P (tmp1) = 1;
18848 RTX_FRAME_RELATED_P (tmp2) = 1;
18849 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
18850 XVECEXP (par, 0, 0) = tmp0;
18851 XVECEXP (par, 0, 1) = tmp1;
18852 XVECEXP (par, 0, 2) = tmp2;
18853 insn = emit_insn (par);
18854 RTX_FRAME_RELATED_P (insn) = 1;
18855 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
18857 else
18859 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
18860 stack_pointer_rtx,
18861 4 * i));
18862 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
18863 stack_pointer_rtx,
18864 4 * (i + 1)));
18865 tmp1 = gen_rtx_SET (mem1, reg1);
18866 tmp2 = gen_rtx_SET (mem2, reg2);
18867 RTX_FRAME_RELATED_P (tmp1) = 1;
18868 RTX_FRAME_RELATED_P (tmp2) = 1;
18869 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
18870 XVECEXP (par, 0, 0) = tmp1;
18871 XVECEXP (par, 0, 1) = tmp2;
18872 emit_insn (par);
18875 /* Create unwind information. This is an approximation. */
18876 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
18877 plus_constant (Pmode,
18878 stack_pointer_rtx,
18879 4 * i)),
18880 reg1);
18881 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
18882 plus_constant (Pmode,
18883 stack_pointer_rtx,
18884 4 * (i + 1))),
18885 reg2);
18887 RTX_FRAME_RELATED_P (tmp1) = 1;
18888 RTX_FRAME_RELATED_P (tmp2) = 1;
18889 XVECEXP (dwarf, 0, i + 1) = tmp1;
18890 XVECEXP (dwarf, 0, i + 2) = tmp2;
18891 i += 2;
18892 regno = regno2 + 1;
18894 else
18895 regno++;
18897 return;
18900 /* STRD in ARM mode requires consecutive registers. This function emits STRD
18901 whenever possible, otherwise it emits single-word stores. The first store
18902 also allocates stack space for all saved registers, using writeback with
18903 post-addressing mode. All other stores use offset addressing. If no STRD
18904 can be emitted, this function emits a sequence of single-word stores,
18905 and not an STM as before, because single-word stores provide more freedom
18906 scheduling and can be turned into an STM by peephole optimizations. */
18907 static void
18908 arm_emit_strd_push (unsigned long saved_regs_mask)
18910 int num_regs = 0;
18911 int i, j, dwarf_index = 0;
18912 int offset = 0;
18913 rtx dwarf = NULL_RTX;
18914 rtx insn = NULL_RTX;
18915 rtx tmp, mem;
18917 /* TODO: A more efficient code can be emitted by changing the
18918 layout, e.g., first push all pairs that can use STRD to keep the
18919 stack aligned, and then push all other registers. */
18920 for (i = 0; i <= LAST_ARM_REGNUM; i++)
18921 if (saved_regs_mask & (1 << i))
18922 num_regs++;
18924 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
18925 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
18926 gcc_assert (num_regs > 0);
18928 /* Create sequence for DWARF info. */
18929 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
18931 /* For dwarf info, we generate explicit stack update. */
18932 tmp = gen_rtx_SET (stack_pointer_rtx,
18933 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
18934 RTX_FRAME_RELATED_P (tmp) = 1;
18935 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
18937 /* Save registers. */
18938 offset = - 4 * num_regs;
18939 j = 0;
18940 while (j <= LAST_ARM_REGNUM)
18941 if (saved_regs_mask & (1 << j))
18943 if ((j % 2 == 0)
18944 && (saved_regs_mask & (1 << (j + 1))))
18946 /* Current register and previous register form register pair for
18947 which STRD can be generated. */
18948 if (offset < 0)
18950 /* Allocate stack space for all saved registers. */
18951 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
18952 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
18953 mem = gen_frame_mem (DImode, tmp);
18954 offset = 0;
18956 else if (offset > 0)
18957 mem = gen_frame_mem (DImode,
18958 plus_constant (Pmode,
18959 stack_pointer_rtx,
18960 offset));
18961 else
18962 mem = gen_frame_mem (DImode, stack_pointer_rtx);
18964 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
18965 RTX_FRAME_RELATED_P (tmp) = 1;
18966 tmp = emit_insn (tmp);
18968 /* Record the first store insn. */
18969 if (dwarf_index == 1)
18970 insn = tmp;
18972 /* Generate dwarf info. */
18973 mem = gen_frame_mem (SImode,
18974 plus_constant (Pmode,
18975 stack_pointer_rtx,
18976 offset));
18977 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
18978 RTX_FRAME_RELATED_P (tmp) = 1;
18979 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
18981 mem = gen_frame_mem (SImode,
18982 plus_constant (Pmode,
18983 stack_pointer_rtx,
18984 offset + 4));
18985 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
18986 RTX_FRAME_RELATED_P (tmp) = 1;
18987 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
18989 offset += 8;
18990 j += 2;
18992 else
18994 /* Emit a single word store. */
18995 if (offset < 0)
18997 /* Allocate stack space for all saved registers. */
18998 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
18999 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19000 mem = gen_frame_mem (SImode, tmp);
19001 offset = 0;
19003 else if (offset > 0)
19004 mem = gen_frame_mem (SImode,
19005 plus_constant (Pmode,
19006 stack_pointer_rtx,
19007 offset));
19008 else
19009 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19011 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19012 RTX_FRAME_RELATED_P (tmp) = 1;
19013 tmp = emit_insn (tmp);
19015 /* Record the first store insn. */
19016 if (dwarf_index == 1)
19017 insn = tmp;
19019 /* Generate dwarf info. */
19020 mem = gen_frame_mem (SImode,
19021 plus_constant(Pmode,
19022 stack_pointer_rtx,
19023 offset));
19024 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19025 RTX_FRAME_RELATED_P (tmp) = 1;
19026 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19028 offset += 4;
19029 j += 1;
19032 else
19033 j++;
19035 /* Attach dwarf info to the first insn we generate. */
19036 gcc_assert (insn != NULL_RTX);
19037 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19038 RTX_FRAME_RELATED_P (insn) = 1;
19041 /* Generate and emit an insn that we will recognize as a push_multi.
19042 Unfortunately, since this insn does not reflect very well the actual
19043 semantics of the operation, we need to annotate the insn for the benefit
19044 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19045 MASK for registers that should be annotated for DWARF2 frame unwind
19046 information. */
19047 static rtx
19048 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19050 int num_regs = 0;
19051 int num_dwarf_regs = 0;
19052 int i, j;
19053 rtx par;
19054 rtx dwarf;
19055 int dwarf_par_index;
19056 rtx tmp, reg;
19058 /* We don't record the PC in the dwarf frame information. */
19059 dwarf_regs_mask &= ~(1 << PC_REGNUM);
19061 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19063 if (mask & (1 << i))
19064 num_regs++;
19065 if (dwarf_regs_mask & (1 << i))
19066 num_dwarf_regs++;
19069 gcc_assert (num_regs && num_regs <= 16);
19070 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
19072 /* For the body of the insn we are going to generate an UNSPEC in
19073 parallel with several USEs. This allows the insn to be recognized
19074 by the push_multi pattern in the arm.md file.
19076 The body of the insn looks something like this:
19078 (parallel [
19079 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19080 (const_int:SI <num>)))
19081 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19082 (use (reg:SI XX))
19083 (use (reg:SI YY))
19087 For the frame note however, we try to be more explicit and actually
19088 show each register being stored into the stack frame, plus a (single)
19089 decrement of the stack pointer. We do it this way in order to be
19090 friendly to the stack unwinding code, which only wants to see a single
19091 stack decrement per instruction. The RTL we generate for the note looks
19092 something like this:
19094 (sequence [
19095 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19096 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19097 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19098 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19102 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19103 instead we'd have a parallel expression detailing all
19104 the stores to the various memory addresses so that debug
19105 information is more up-to-date. Remember however while writing
19106 this to take care of the constraints with the push instruction.
19108 Note also that this has to be taken care of for the VFP registers.
19110 For more see PR43399. */
19112 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19113 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19114 dwarf_par_index = 1;
19116 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19118 if (mask & (1 << i))
19120 reg = gen_rtx_REG (SImode, i);
19122 XVECEXP (par, 0, 0)
19123 = gen_rtx_SET (gen_frame_mem
19124 (BLKmode,
19125 gen_rtx_PRE_MODIFY (Pmode,
19126 stack_pointer_rtx,
19127 plus_constant
19128 (Pmode, stack_pointer_rtx,
19129 -4 * num_regs))
19131 gen_rtx_UNSPEC (BLKmode,
19132 gen_rtvec (1, reg),
19133 UNSPEC_PUSH_MULT));
19135 if (dwarf_regs_mask & (1 << i))
19137 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
19138 reg);
19139 RTX_FRAME_RELATED_P (tmp) = 1;
19140 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19143 break;
19147 for (j = 1, i++; j < num_regs; i++)
19149 if (mask & (1 << i))
19151 reg = gen_rtx_REG (SImode, i);
19153 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
19155 if (dwarf_regs_mask & (1 << i))
19158 = gen_rtx_SET (gen_frame_mem
19159 (SImode,
19160 plus_constant (Pmode, stack_pointer_rtx,
19161 4 * j)),
19162 reg);
19163 RTX_FRAME_RELATED_P (tmp) = 1;
19164 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19167 j++;
19171 par = emit_insn (par);
19173 tmp = gen_rtx_SET (stack_pointer_rtx,
19174 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19175 RTX_FRAME_RELATED_P (tmp) = 1;
19176 XVECEXP (dwarf, 0, 0) = tmp;
19178 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19180 return par;
19183 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19184 SIZE is the offset to be adjusted.
19185 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19186 static void
19187 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
19189 rtx dwarf;
19191 RTX_FRAME_RELATED_P (insn) = 1;
19192 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
19193 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
19196 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19197 SAVED_REGS_MASK shows which registers need to be restored.
19199 Unfortunately, since this insn does not reflect very well the actual
19200 semantics of the operation, we need to annotate the insn for the benefit
19201 of DWARF2 frame unwind information. */
19202 static void
19203 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
19205 int num_regs = 0;
19206 int i, j;
19207 rtx par;
19208 rtx dwarf = NULL_RTX;
19209 rtx tmp, reg;
19210 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
19211 int offset_adj;
19212 int emit_update;
19214 offset_adj = return_in_pc ? 1 : 0;
19215 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19216 if (saved_regs_mask & (1 << i))
19217 num_regs++;
19219 gcc_assert (num_regs && num_regs <= 16);
19221 /* If SP is in reglist, then we don't emit SP update insn. */
19222 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
19224 /* The parallel needs to hold num_regs SETs
19225 and one SET for the stack update. */
19226 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
19228 if (return_in_pc)
19229 XVECEXP (par, 0, 0) = ret_rtx;
19231 if (emit_update)
19233 /* Increment the stack pointer, based on there being
19234 num_regs 4-byte registers to restore. */
19235 tmp = gen_rtx_SET (stack_pointer_rtx,
19236 plus_constant (Pmode,
19237 stack_pointer_rtx,
19238 4 * num_regs));
19239 RTX_FRAME_RELATED_P (tmp) = 1;
19240 XVECEXP (par, 0, offset_adj) = tmp;
19243 /* Now restore every reg, which may include PC. */
19244 for (j = 0, i = 0; j < num_regs; i++)
19245 if (saved_regs_mask & (1 << i))
19247 reg = gen_rtx_REG (SImode, i);
19248 if ((num_regs == 1) && emit_update && !return_in_pc)
19250 /* Emit single load with writeback. */
19251 tmp = gen_frame_mem (SImode,
19252 gen_rtx_POST_INC (Pmode,
19253 stack_pointer_rtx));
19254 tmp = emit_insn (gen_rtx_SET (reg, tmp));
19255 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19256 return;
19259 tmp = gen_rtx_SET (reg,
19260 gen_frame_mem
19261 (SImode,
19262 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
19263 RTX_FRAME_RELATED_P (tmp) = 1;
19264 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
19266 /* We need to maintain a sequence for DWARF info too. As dwarf info
19267 should not have PC, skip PC. */
19268 if (i != PC_REGNUM)
19269 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19271 j++;
19274 if (return_in_pc)
19275 par = emit_jump_insn (par);
19276 else
19277 par = emit_insn (par);
19279 REG_NOTES (par) = dwarf;
19280 if (!return_in_pc)
19281 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
19282 stack_pointer_rtx, stack_pointer_rtx);
19285 /* Generate and emit an insn pattern that we will recognize as a pop_multi
19286 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
19288 Unfortunately, since this insn does not reflect very well the actual
19289 semantics of the operation, we need to annotate the insn for the benefit
19290 of DWARF2 frame unwind information. */
19291 static void
19292 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
19294 int i, j;
19295 rtx par;
19296 rtx dwarf = NULL_RTX;
19297 rtx tmp, reg;
19299 gcc_assert (num_regs && num_regs <= 32);
19301 /* Workaround ARM10 VFPr1 bug. */
19302 if (num_regs == 2 && !arm_arch6)
19304 if (first_reg == 15)
19305 first_reg--;
19307 num_regs++;
19310 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
19311 there could be up to 32 D-registers to restore.
19312 If there are more than 16 D-registers, make two recursive calls,
19313 each of which emits one pop_multi instruction. */
19314 if (num_regs > 16)
19316 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
19317 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
19318 return;
19321 /* The parallel needs to hold num_regs SETs
19322 and one SET for the stack update. */
19323 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
19325 /* Increment the stack pointer, based on there being
19326 num_regs 8-byte registers to restore. */
19327 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
19328 RTX_FRAME_RELATED_P (tmp) = 1;
19329 XVECEXP (par, 0, 0) = tmp;
19331 /* Now show every reg that will be restored, using a SET for each. */
19332 for (j = 0, i=first_reg; j < num_regs; i += 2)
19334 reg = gen_rtx_REG (DFmode, i);
19336 tmp = gen_rtx_SET (reg,
19337 gen_frame_mem
19338 (DFmode,
19339 plus_constant (Pmode, base_reg, 8 * j)));
19340 RTX_FRAME_RELATED_P (tmp) = 1;
19341 XVECEXP (par, 0, j + 1) = tmp;
19343 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19345 j++;
19348 par = emit_insn (par);
19349 REG_NOTES (par) = dwarf;
19351 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
19352 if (REGNO (base_reg) == IP_REGNUM)
19354 RTX_FRAME_RELATED_P (par) = 1;
19355 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
19357 else
19358 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
19359 base_reg, base_reg);
19362 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
19363 number of registers are being popped, multiple LDRD patterns are created for
19364 all register pairs. If odd number of registers are popped, last register is
19365 loaded by using LDR pattern. */
19366 static void
19367 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
19369 int num_regs = 0;
19370 int i, j;
19371 rtx par = NULL_RTX;
19372 rtx dwarf = NULL_RTX;
19373 rtx tmp, reg, tmp1;
19374 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
19376 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19377 if (saved_regs_mask & (1 << i))
19378 num_regs++;
19380 gcc_assert (num_regs && num_regs <= 16);
19382 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
19383 to be popped. So, if num_regs is even, now it will become odd,
19384 and we can generate pop with PC. If num_regs is odd, it will be
19385 even now, and ldr with return can be generated for PC. */
19386 if (return_in_pc)
19387 num_regs--;
19389 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19391 /* Var j iterates over all the registers to gather all the registers in
19392 saved_regs_mask. Var i gives index of saved registers in stack frame.
19393 A PARALLEL RTX of register-pair is created here, so that pattern for
19394 LDRD can be matched. As PC is always last register to be popped, and
19395 we have already decremented num_regs if PC, we don't have to worry
19396 about PC in this loop. */
19397 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
19398 if (saved_regs_mask & (1 << j))
19400 /* Create RTX for memory load. */
19401 reg = gen_rtx_REG (SImode, j);
19402 tmp = gen_rtx_SET (reg,
19403 gen_frame_mem (SImode,
19404 plus_constant (Pmode,
19405 stack_pointer_rtx, 4 * i)));
19406 RTX_FRAME_RELATED_P (tmp) = 1;
19408 if (i % 2 == 0)
19410 /* When saved-register index (i) is even, the RTX to be emitted is
19411 yet to be created. Hence create it first. The LDRD pattern we
19412 are generating is :
19413 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
19414 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
19415 where target registers need not be consecutive. */
19416 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19417 dwarf = NULL_RTX;
19420 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
19421 added as 0th element and if i is odd, reg_i is added as 1st element
19422 of LDRD pattern shown above. */
19423 XVECEXP (par, 0, (i % 2)) = tmp;
19424 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19426 if ((i % 2) == 1)
19428 /* When saved-register index (i) is odd, RTXs for both the registers
19429 to be loaded are generated in above given LDRD pattern, and the
19430 pattern can be emitted now. */
19431 par = emit_insn (par);
19432 REG_NOTES (par) = dwarf;
19433 RTX_FRAME_RELATED_P (par) = 1;
19436 i++;
19439 /* If the number of registers pushed is odd AND return_in_pc is false OR
19440 number of registers are even AND return_in_pc is true, last register is
19441 popped using LDR. It can be PC as well. Hence, adjust the stack first and
19442 then LDR with post increment. */
19444 /* Increment the stack pointer, based on there being
19445 num_regs 4-byte registers to restore. */
19446 tmp = gen_rtx_SET (stack_pointer_rtx,
19447 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
19448 RTX_FRAME_RELATED_P (tmp) = 1;
19449 tmp = emit_insn (tmp);
19450 if (!return_in_pc)
19452 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
19453 stack_pointer_rtx, stack_pointer_rtx);
19456 dwarf = NULL_RTX;
19458 if (((num_regs % 2) == 1 && !return_in_pc)
19459 || ((num_regs % 2) == 0 && return_in_pc))
19461 /* Scan for the single register to be popped. Skip until the saved
19462 register is found. */
19463 for (; (saved_regs_mask & (1 << j)) == 0; j++);
19465 /* Gen LDR with post increment here. */
19466 tmp1 = gen_rtx_MEM (SImode,
19467 gen_rtx_POST_INC (SImode,
19468 stack_pointer_rtx));
19469 set_mem_alias_set (tmp1, get_frame_alias_set ());
19471 reg = gen_rtx_REG (SImode, j);
19472 tmp = gen_rtx_SET (reg, tmp1);
19473 RTX_FRAME_RELATED_P (tmp) = 1;
19474 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19476 if (return_in_pc)
19478 /* If return_in_pc, j must be PC_REGNUM. */
19479 gcc_assert (j == PC_REGNUM);
19480 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19481 XVECEXP (par, 0, 0) = ret_rtx;
19482 XVECEXP (par, 0, 1) = tmp;
19483 par = emit_jump_insn (par);
19485 else
19487 par = emit_insn (tmp);
19488 REG_NOTES (par) = dwarf;
19489 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
19490 stack_pointer_rtx, stack_pointer_rtx);
19494 else if ((num_regs % 2) == 1 && return_in_pc)
19496 /* There are 2 registers to be popped. So, generate the pattern
19497 pop_multiple_with_stack_update_and_return to pop in PC. */
19498 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
19501 return;
19504 /* LDRD in ARM mode needs consecutive registers as operands. This function
19505 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
19506 offset addressing and then generates one separate stack udpate. This provides
19507 more scheduling freedom, compared to writeback on every load. However,
19508 if the function returns using load into PC directly
19509 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
19510 before the last load. TODO: Add a peephole optimization to recognize
19511 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
19512 peephole optimization to merge the load at stack-offset zero
19513 with the stack update instruction using load with writeback
19514 in post-index addressing mode. */
19515 static void
19516 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
19518 int j = 0;
19519 int offset = 0;
19520 rtx par = NULL_RTX;
19521 rtx dwarf = NULL_RTX;
19522 rtx tmp, mem;
19524 /* Restore saved registers. */
19525 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
19526 j = 0;
19527 while (j <= LAST_ARM_REGNUM)
19528 if (saved_regs_mask & (1 << j))
19530 if ((j % 2) == 0
19531 && (saved_regs_mask & (1 << (j + 1)))
19532 && (j + 1) != PC_REGNUM)
19534 /* Current register and next register form register pair for which
19535 LDRD can be generated. PC is always the last register popped, and
19536 we handle it separately. */
19537 if (offset > 0)
19538 mem = gen_frame_mem (DImode,
19539 plus_constant (Pmode,
19540 stack_pointer_rtx,
19541 offset));
19542 else
19543 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19545 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
19546 tmp = emit_insn (tmp);
19547 RTX_FRAME_RELATED_P (tmp) = 1;
19549 /* Generate dwarf info. */
19551 dwarf = alloc_reg_note (REG_CFA_RESTORE,
19552 gen_rtx_REG (SImode, j),
19553 NULL_RTX);
19554 dwarf = alloc_reg_note (REG_CFA_RESTORE,
19555 gen_rtx_REG (SImode, j + 1),
19556 dwarf);
19558 REG_NOTES (tmp) = dwarf;
19560 offset += 8;
19561 j += 2;
19563 else if (j != PC_REGNUM)
19565 /* Emit a single word load. */
19566 if (offset > 0)
19567 mem = gen_frame_mem (SImode,
19568 plus_constant (Pmode,
19569 stack_pointer_rtx,
19570 offset));
19571 else
19572 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19574 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
19575 tmp = emit_insn (tmp);
19576 RTX_FRAME_RELATED_P (tmp) = 1;
19578 /* Generate dwarf info. */
19579 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
19580 gen_rtx_REG (SImode, j),
19581 NULL_RTX);
19583 offset += 4;
19584 j += 1;
19586 else /* j == PC_REGNUM */
19587 j++;
19589 else
19590 j++;
19592 /* Update the stack. */
19593 if (offset > 0)
19595 tmp = gen_rtx_SET (stack_pointer_rtx,
19596 plus_constant (Pmode,
19597 stack_pointer_rtx,
19598 offset));
19599 tmp = emit_insn (tmp);
19600 arm_add_cfa_adjust_cfa_note (tmp, offset,
19601 stack_pointer_rtx, stack_pointer_rtx);
19602 offset = 0;
19605 if (saved_regs_mask & (1 << PC_REGNUM))
19607 /* Only PC is to be popped. */
19608 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19609 XVECEXP (par, 0, 0) = ret_rtx;
19610 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
19611 gen_frame_mem (SImode,
19612 gen_rtx_POST_INC (SImode,
19613 stack_pointer_rtx)));
19614 RTX_FRAME_RELATED_P (tmp) = 1;
19615 XVECEXP (par, 0, 1) = tmp;
19616 par = emit_jump_insn (par);
19618 /* Generate dwarf info. */
19619 dwarf = alloc_reg_note (REG_CFA_RESTORE,
19620 gen_rtx_REG (SImode, PC_REGNUM),
19621 NULL_RTX);
19622 REG_NOTES (par) = dwarf;
19623 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
19624 stack_pointer_rtx, stack_pointer_rtx);
19628 /* Calculate the size of the return value that is passed in registers. */
19629 static unsigned
19630 arm_size_return_regs (void)
19632 machine_mode mode;
19634 if (crtl->return_rtx != 0)
19635 mode = GET_MODE (crtl->return_rtx);
19636 else
19637 mode = DECL_MODE (DECL_RESULT (current_function_decl));
19639 return GET_MODE_SIZE (mode);
19642 /* Return true if the current function needs to save/restore LR. */
19643 static bool
19644 thumb_force_lr_save (void)
19646 return !cfun->machine->lr_save_eliminated
19647 && (!leaf_function_p ()
19648 || thumb_far_jump_used_p ()
19649 || df_regs_ever_live_p (LR_REGNUM));
19652 /* We do not know if r3 will be available because
19653 we do have an indirect tailcall happening in this
19654 particular case. */
19655 static bool
19656 is_indirect_tailcall_p (rtx call)
19658 rtx pat = PATTERN (call);
19660 /* Indirect tail call. */
19661 pat = XVECEXP (pat, 0, 0);
19662 if (GET_CODE (pat) == SET)
19663 pat = SET_SRC (pat);
19665 pat = XEXP (XEXP (pat, 0), 0);
19666 return REG_P (pat);
19669 /* Return true if r3 is used by any of the tail call insns in the
19670 current function. */
19671 static bool
19672 any_sibcall_could_use_r3 (void)
19674 edge_iterator ei;
19675 edge e;
19677 if (!crtl->tail_call_emit)
19678 return false;
19679 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
19680 if (e->flags & EDGE_SIBCALL)
19682 rtx_insn *call = BB_END (e->src);
19683 if (!CALL_P (call))
19684 call = prev_nonnote_nondebug_insn (call);
19685 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
19686 if (find_regno_fusage (call, USE, 3)
19687 || is_indirect_tailcall_p (call))
19688 return true;
19690 return false;
19694 /* Compute the distance from register FROM to register TO.
19695 These can be the arg pointer (26), the soft frame pointer (25),
19696 the stack pointer (13) or the hard frame pointer (11).
19697 In thumb mode r7 is used as the soft frame pointer, if needed.
19698 Typical stack layout looks like this:
19700 old stack pointer -> | |
19701 ----
19702 | | \
19703 | | saved arguments for
19704 | | vararg functions
19705 | | /
19707 hard FP & arg pointer -> | | \
19708 | | stack
19709 | | frame
19710 | | /
19712 | | \
19713 | | call saved
19714 | | registers
19715 soft frame pointer -> | | /
19717 | | \
19718 | | local
19719 | | variables
19720 locals base pointer -> | | /
19722 | | \
19723 | | outgoing
19724 | | arguments
19725 current stack pointer -> | | /
19728 For a given function some or all of these stack components
19729 may not be needed, giving rise to the possibility of
19730 eliminating some of the registers.
19732 The values returned by this function must reflect the behavior
19733 of arm_expand_prologue() and arm_compute_save_reg_mask().
19735 The sign of the number returned reflects the direction of stack
19736 growth, so the values are positive for all eliminations except
19737 from the soft frame pointer to the hard frame pointer.
19739 SFP may point just inside the local variables block to ensure correct
19740 alignment. */
19743 /* Calculate stack offsets. These are used to calculate register elimination
19744 offsets and in prologue/epilogue code. Also calculates which registers
19745 should be saved. */
19747 static arm_stack_offsets *
19748 arm_get_frame_offsets (void)
19750 struct arm_stack_offsets *offsets;
19751 unsigned long func_type;
19752 int leaf;
19753 int saved;
19754 int core_saved;
19755 HOST_WIDE_INT frame_size;
19756 int i;
19758 offsets = &cfun->machine->stack_offsets;
19760 /* We need to know if we are a leaf function. Unfortunately, it
19761 is possible to be called after start_sequence has been called,
19762 which causes get_insns to return the insns for the sequence,
19763 not the function, which will cause leaf_function_p to return
19764 the incorrect result.
19766 to know about leaf functions once reload has completed, and the
19767 frame size cannot be changed after that time, so we can safely
19768 use the cached value. */
19770 if (reload_completed)
19771 return offsets;
19773 /* Initially this is the size of the local variables. It will translated
19774 into an offset once we have determined the size of preceding data. */
19775 frame_size = ROUND_UP_WORD (get_frame_size ());
19777 leaf = leaf_function_p ();
19779 /* Space for variadic functions. */
19780 offsets->saved_args = crtl->args.pretend_args_size;
19782 /* In Thumb mode this is incorrect, but never used. */
19783 offsets->frame
19784 = (offsets->saved_args
19785 + arm_compute_static_chain_stack_bytes ()
19786 + (frame_pointer_needed ? 4 : 0));
19788 if (TARGET_32BIT)
19790 unsigned int regno;
19792 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
19793 core_saved = bit_count (offsets->saved_regs_mask) * 4;
19794 saved = core_saved;
19796 /* We know that SP will be doubleword aligned on entry, and we must
19797 preserve that condition at any subroutine call. We also require the
19798 soft frame pointer to be doubleword aligned. */
19800 if (TARGET_REALLY_IWMMXT)
19802 /* Check for the call-saved iWMMXt registers. */
19803 for (regno = FIRST_IWMMXT_REGNUM;
19804 regno <= LAST_IWMMXT_REGNUM;
19805 regno++)
19806 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
19807 saved += 8;
19810 func_type = arm_current_func_type ();
19811 /* Space for saved VFP registers. */
19812 if (! IS_VOLATILE (func_type)
19813 && TARGET_HARD_FLOAT)
19814 saved += arm_get_vfp_saved_size ();
19816 else /* TARGET_THUMB1 */
19818 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
19819 core_saved = bit_count (offsets->saved_regs_mask) * 4;
19820 saved = core_saved;
19821 if (TARGET_BACKTRACE)
19822 saved += 16;
19825 /* Saved registers include the stack frame. */
19826 offsets->saved_regs
19827 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
19828 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
19830 /* A leaf function does not need any stack alignment if it has nothing
19831 on the stack. */
19832 if (leaf && frame_size == 0
19833 /* However if it calls alloca(), we have a dynamically allocated
19834 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
19835 && ! cfun->calls_alloca)
19837 offsets->outgoing_args = offsets->soft_frame;
19838 offsets->locals_base = offsets->soft_frame;
19839 return offsets;
19842 /* Ensure SFP has the correct alignment. */
19843 if (ARM_DOUBLEWORD_ALIGN
19844 && (offsets->soft_frame & 7))
19846 offsets->soft_frame += 4;
19847 /* Try to align stack by pushing an extra reg. Don't bother doing this
19848 when there is a stack frame as the alignment will be rolled into
19849 the normal stack adjustment. */
19850 if (frame_size + crtl->outgoing_args_size == 0)
19852 int reg = -1;
19854 /* Register r3 is caller-saved. Normally it does not need to be
19855 saved on entry by the prologue. However if we choose to save
19856 it for padding then we may confuse the compiler into thinking
19857 a prologue sequence is required when in fact it is not. This
19858 will occur when shrink-wrapping if r3 is used as a scratch
19859 register and there are no other callee-saved writes.
19861 This situation can be avoided when other callee-saved registers
19862 are available and r3 is not mandatory if we choose a callee-saved
19863 register for padding. */
19864 bool prefer_callee_reg_p = false;
19866 /* If it is safe to use r3, then do so. This sometimes
19867 generates better code on Thumb-2 by avoiding the need to
19868 use 32-bit push/pop instructions. */
19869 if (! any_sibcall_could_use_r3 ()
19870 && arm_size_return_regs () <= 12
19871 && (offsets->saved_regs_mask & (1 << 3)) == 0
19872 && (TARGET_THUMB2
19873 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
19875 reg = 3;
19876 if (!TARGET_THUMB2)
19877 prefer_callee_reg_p = true;
19879 if (reg == -1
19880 || prefer_callee_reg_p)
19882 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
19884 /* Avoid fixed registers; they may be changed at
19885 arbitrary times so it's unsafe to restore them
19886 during the epilogue. */
19887 if (!fixed_regs[i]
19888 && (offsets->saved_regs_mask & (1 << i)) == 0)
19890 reg = i;
19891 break;
19896 if (reg != -1)
19898 offsets->saved_regs += 4;
19899 offsets->saved_regs_mask |= (1 << reg);
19904 offsets->locals_base = offsets->soft_frame + frame_size;
19905 offsets->outgoing_args = (offsets->locals_base
19906 + crtl->outgoing_args_size);
19908 if (ARM_DOUBLEWORD_ALIGN)
19910 /* Ensure SP remains doubleword aligned. */
19911 if (offsets->outgoing_args & 7)
19912 offsets->outgoing_args += 4;
19913 gcc_assert (!(offsets->outgoing_args & 7));
19916 return offsets;
19920 /* Calculate the relative offsets for the different stack pointers. Positive
19921 offsets are in the direction of stack growth. */
19923 HOST_WIDE_INT
19924 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
19926 arm_stack_offsets *offsets;
19928 offsets = arm_get_frame_offsets ();
19930 /* OK, now we have enough information to compute the distances.
19931 There must be an entry in these switch tables for each pair
19932 of registers in ELIMINABLE_REGS, even if some of the entries
19933 seem to be redundant or useless. */
19934 switch (from)
19936 case ARG_POINTER_REGNUM:
19937 switch (to)
19939 case THUMB_HARD_FRAME_POINTER_REGNUM:
19940 return 0;
19942 case FRAME_POINTER_REGNUM:
19943 /* This is the reverse of the soft frame pointer
19944 to hard frame pointer elimination below. */
19945 return offsets->soft_frame - offsets->saved_args;
19947 case ARM_HARD_FRAME_POINTER_REGNUM:
19948 /* This is only non-zero in the case where the static chain register
19949 is stored above the frame. */
19950 return offsets->frame - offsets->saved_args - 4;
19952 case STACK_POINTER_REGNUM:
19953 /* If nothing has been pushed on the stack at all
19954 then this will return -4. This *is* correct! */
19955 return offsets->outgoing_args - (offsets->saved_args + 4);
19957 default:
19958 gcc_unreachable ();
19960 gcc_unreachable ();
19962 case FRAME_POINTER_REGNUM:
19963 switch (to)
19965 case THUMB_HARD_FRAME_POINTER_REGNUM:
19966 return 0;
19968 case ARM_HARD_FRAME_POINTER_REGNUM:
19969 /* The hard frame pointer points to the top entry in the
19970 stack frame. The soft frame pointer to the bottom entry
19971 in the stack frame. If there is no stack frame at all,
19972 then they are identical. */
19974 return offsets->frame - offsets->soft_frame;
19976 case STACK_POINTER_REGNUM:
19977 return offsets->outgoing_args - offsets->soft_frame;
19979 default:
19980 gcc_unreachable ();
19982 gcc_unreachable ();
19984 default:
19985 /* You cannot eliminate from the stack pointer.
19986 In theory you could eliminate from the hard frame
19987 pointer to the stack pointer, but this will never
19988 happen, since if a stack frame is not needed the
19989 hard frame pointer will never be used. */
19990 gcc_unreachable ();
19994 /* Given FROM and TO register numbers, say whether this elimination is
19995 allowed. Frame pointer elimination is automatically handled.
19997 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
19998 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
19999 pointer, we must eliminate FRAME_POINTER_REGNUM into
20000 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20001 ARG_POINTER_REGNUM. */
20003 bool
20004 arm_can_eliminate (const int from, const int to)
20006 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20007 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20008 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20009 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20010 true);
20013 /* Emit RTL to save coprocessor registers on function entry. Returns the
20014 number of bytes pushed. */
20016 static int
20017 arm_save_coproc_regs(void)
20019 int saved_size = 0;
20020 unsigned reg;
20021 unsigned start_reg;
20022 rtx insn;
20024 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20025 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20027 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20028 insn = gen_rtx_MEM (V2SImode, insn);
20029 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20030 RTX_FRAME_RELATED_P (insn) = 1;
20031 saved_size += 8;
20034 if (TARGET_HARD_FLOAT)
20036 start_reg = FIRST_VFP_REGNUM;
20038 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20040 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20041 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20043 if (start_reg != reg)
20044 saved_size += vfp_emit_fstmd (start_reg,
20045 (reg - start_reg) / 2);
20046 start_reg = reg + 2;
20049 if (start_reg != reg)
20050 saved_size += vfp_emit_fstmd (start_reg,
20051 (reg - start_reg) / 2);
20053 return saved_size;
20057 /* Set the Thumb frame pointer from the stack pointer. */
20059 static void
20060 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20062 HOST_WIDE_INT amount;
20063 rtx insn, dwarf;
20065 amount = offsets->outgoing_args - offsets->locals_base;
20066 if (amount < 1024)
20067 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20068 stack_pointer_rtx, GEN_INT (amount)));
20069 else
20071 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20072 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20073 expects the first two operands to be the same. */
20074 if (TARGET_THUMB2)
20076 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20077 stack_pointer_rtx,
20078 hard_frame_pointer_rtx));
20080 else
20082 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20083 hard_frame_pointer_rtx,
20084 stack_pointer_rtx));
20086 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
20087 plus_constant (Pmode, stack_pointer_rtx, amount));
20088 RTX_FRAME_RELATED_P (dwarf) = 1;
20089 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20092 RTX_FRAME_RELATED_P (insn) = 1;
20095 struct scratch_reg {
20096 rtx reg;
20097 bool saved;
20100 /* Return a short-lived scratch register for use as a 2nd scratch register on
20101 function entry after the registers are saved in the prologue. This register
20102 must be released by means of release_scratch_register_on_entry. IP is not
20103 considered since it is always used as the 1st scratch register if available.
20105 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
20106 mask of live registers. */
20108 static void
20109 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
20110 unsigned long live_regs)
20112 int regno = -1;
20114 sr->saved = false;
20116 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
20117 regno = LR_REGNUM;
20118 else
20120 unsigned int i;
20122 for (i = 4; i < 11; i++)
20123 if (regno1 != i && (live_regs & (1 << i)) != 0)
20125 regno = i;
20126 break;
20129 if (regno < 0)
20131 /* If IP is used as the 1st scratch register for a nested function,
20132 then either r3 wasn't available or is used to preserve IP. */
20133 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
20134 regno1 = 3;
20135 regno = (regno1 == 3 ? 2 : 3);
20136 sr->saved
20137 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
20138 regno);
20142 sr->reg = gen_rtx_REG (SImode, regno);
20143 if (sr->saved)
20145 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20146 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
20147 rtx x = gen_rtx_SET (stack_pointer_rtx,
20148 plus_constant (Pmode, stack_pointer_rtx, -4));
20149 RTX_FRAME_RELATED_P (insn) = 1;
20150 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
20154 /* Release a scratch register obtained from the preceding function. */
20156 static void
20157 release_scratch_register_on_entry (struct scratch_reg *sr)
20159 if (sr->saved)
20161 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
20162 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
20163 rtx x = gen_rtx_SET (stack_pointer_rtx,
20164 plus_constant (Pmode, stack_pointer_rtx, 4));
20165 RTX_FRAME_RELATED_P (insn) = 1;
20166 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
20170 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
20172 #if PROBE_INTERVAL > 4096
20173 #error Cannot use indexed addressing mode for stack probing
20174 #endif
20176 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
20177 inclusive. These are offsets from the current stack pointer. REGNO1
20178 is the index number of the 1st scratch register and LIVE_REGS is the
20179 mask of live registers. */
20181 static void
20182 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
20183 unsigned int regno1, unsigned long live_regs)
20185 rtx reg1 = gen_rtx_REG (Pmode, regno1);
20187 /* See if we have a constant small number of probes to generate. If so,
20188 that's the easy case. */
20189 if (size <= PROBE_INTERVAL)
20191 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
20192 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
20193 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
20196 /* The run-time loop is made up of 10 insns in the generic case while the
20197 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
20198 else if (size <= 5 * PROBE_INTERVAL)
20200 HOST_WIDE_INT i, rem;
20202 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
20203 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
20204 emit_stack_probe (reg1);
20206 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
20207 it exceeds SIZE. If only two probes are needed, this will not
20208 generate any code. Then probe at FIRST + SIZE. */
20209 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
20211 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
20212 emit_stack_probe (reg1);
20215 rem = size - (i - PROBE_INTERVAL);
20216 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
20218 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
20219 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
20221 else
20222 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
20225 /* Otherwise, do the same as above, but in a loop. Note that we must be
20226 extra careful with variables wrapping around because we might be at
20227 the very top (or the very bottom) of the address space and we have
20228 to be able to handle this case properly; in particular, we use an
20229 equality test for the loop condition. */
20230 else
20232 HOST_WIDE_INT rounded_size;
20233 struct scratch_reg sr;
20235 get_scratch_register_on_entry (&sr, regno1, live_regs);
20237 emit_move_insn (reg1, GEN_INT (first));
20240 /* Step 1: round SIZE to the previous multiple of the interval. */
20242 rounded_size = size & -PROBE_INTERVAL;
20243 emit_move_insn (sr.reg, GEN_INT (rounded_size));
20246 /* Step 2: compute initial and final value of the loop counter. */
20248 /* TEST_ADDR = SP + FIRST. */
20249 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
20251 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
20252 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
20255 /* Step 3: the loop
20259 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
20260 probe at TEST_ADDR
20262 while (TEST_ADDR != LAST_ADDR)
20264 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
20265 until it is equal to ROUNDED_SIZE. */
20267 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
20270 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
20271 that SIZE is equal to ROUNDED_SIZE. */
20273 if (size != rounded_size)
20275 HOST_WIDE_INT rem = size - rounded_size;
20277 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
20279 emit_set_insn (sr.reg,
20280 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
20281 emit_stack_probe (plus_constant (Pmode, sr.reg,
20282 PROBE_INTERVAL - rem));
20284 else
20285 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
20288 release_scratch_register_on_entry (&sr);
20291 /* Make sure nothing is scheduled before we are done. */
20292 emit_insn (gen_blockage ());
20295 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
20296 absolute addresses. */
20298 const char *
20299 output_probe_stack_range (rtx reg1, rtx reg2)
20301 static int labelno = 0;
20302 char loop_lab[32];
20303 rtx xops[2];
20305 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
20307 /* Loop. */
20308 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
20310 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
20311 xops[0] = reg1;
20312 xops[1] = GEN_INT (PROBE_INTERVAL);
20313 output_asm_insn ("sub\t%0, %0, %1", xops);
20315 /* Probe at TEST_ADDR. */
20316 output_asm_insn ("str\tr0, [%0, #0]", xops);
20318 /* Test if TEST_ADDR == LAST_ADDR. */
20319 xops[1] = reg2;
20320 output_asm_insn ("cmp\t%0, %1", xops);
20322 /* Branch. */
20323 fputs ("\tbne\t", asm_out_file);
20324 assemble_name_raw (asm_out_file, loop_lab);
20325 fputc ('\n', asm_out_file);
20327 return "";
20330 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20331 function. */
20332 void
20333 arm_expand_prologue (void)
20335 rtx amount;
20336 rtx insn;
20337 rtx ip_rtx;
20338 unsigned long live_regs_mask;
20339 unsigned long func_type;
20340 int fp_offset = 0;
20341 int saved_pretend_args = 0;
20342 int saved_regs = 0;
20343 unsigned HOST_WIDE_INT args_to_push;
20344 HOST_WIDE_INT size;
20345 arm_stack_offsets *offsets;
20346 bool clobber_ip;
20348 func_type = arm_current_func_type ();
20350 /* Naked functions don't have prologues. */
20351 if (IS_NAKED (func_type))
20353 if (flag_stack_usage_info)
20354 current_function_static_stack_size = 0;
20355 return;
20358 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20359 args_to_push = crtl->args.pretend_args_size;
20361 /* Compute which register we will have to save onto the stack. */
20362 offsets = arm_get_frame_offsets ();
20363 live_regs_mask = offsets->saved_regs_mask;
20365 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
20367 if (IS_STACKALIGN (func_type))
20369 rtx r0, r1;
20371 /* Handle a word-aligned stack pointer. We generate the following:
20373 mov r0, sp
20374 bic r1, r0, #7
20375 mov sp, r1
20376 <save and restore r0 in normal prologue/epilogue>
20377 mov sp, r0
20378 bx lr
20380 The unwinder doesn't need to know about the stack realignment.
20381 Just tell it we saved SP in r0. */
20382 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
20384 r0 = gen_rtx_REG (SImode, R0_REGNUM);
20385 r1 = gen_rtx_REG (SImode, R1_REGNUM);
20387 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
20388 RTX_FRAME_RELATED_P (insn) = 1;
20389 add_reg_note (insn, REG_CFA_REGISTER, NULL);
20391 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
20393 /* ??? The CFA changes here, which may cause GDB to conclude that it
20394 has entered a different function. That said, the unwind info is
20395 correct, individually, before and after this instruction because
20396 we've described the save of SP, which will override the default
20397 handling of SP as restoring from the CFA. */
20398 emit_insn (gen_movsi (stack_pointer_rtx, r1));
20401 /* The static chain register is the same as the IP register. If it is
20402 clobbered when creating the frame, we need to save and restore it. */
20403 clobber_ip = IS_NESTED (func_type)
20404 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20405 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
20406 && !df_regs_ever_live_p (LR_REGNUM)
20407 && arm_r3_live_at_start_p ()));
20409 /* Find somewhere to store IP whilst the frame is being created.
20410 We try the following places in order:
20412 1. The last argument register r3 if it is available.
20413 2. A slot on the stack above the frame if there are no
20414 arguments to push onto the stack.
20415 3. Register r3 again, after pushing the argument registers
20416 onto the stack, if this is a varargs function.
20417 4. The last slot on the stack created for the arguments to
20418 push, if this isn't a varargs function.
20420 Note - we only need to tell the dwarf2 backend about the SP
20421 adjustment in the second variant; the static chain register
20422 doesn't need to be unwound, as it doesn't contain a value
20423 inherited from the caller. */
20424 if (clobber_ip)
20426 if (!arm_r3_live_at_start_p ())
20427 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20428 else if (args_to_push == 0)
20430 rtx addr, dwarf;
20432 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
20433 saved_regs += 4;
20435 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20436 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
20437 fp_offset = 4;
20439 /* Just tell the dwarf backend that we adjusted SP. */
20440 dwarf = gen_rtx_SET (stack_pointer_rtx,
20441 plus_constant (Pmode, stack_pointer_rtx,
20442 -fp_offset));
20443 RTX_FRAME_RELATED_P (insn) = 1;
20444 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20446 else
20448 /* Store the args on the stack. */
20449 if (cfun->machine->uses_anonymous_args)
20451 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
20452 (0xf0 >> (args_to_push / 4)) & 0xf);
20453 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20454 saved_pretend_args = 1;
20456 else
20458 rtx addr, dwarf;
20460 if (args_to_push == 4)
20461 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20462 else
20463 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
20464 plus_constant (Pmode,
20465 stack_pointer_rtx,
20466 -args_to_push));
20468 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
20470 /* Just tell the dwarf backend that we adjusted SP. */
20471 dwarf = gen_rtx_SET (stack_pointer_rtx,
20472 plus_constant (Pmode, stack_pointer_rtx,
20473 -args_to_push));
20474 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20477 RTX_FRAME_RELATED_P (insn) = 1;
20478 fp_offset = args_to_push;
20479 args_to_push = 0;
20483 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20485 if (IS_INTERRUPT (func_type))
20487 /* Interrupt functions must not corrupt any registers.
20488 Creating a frame pointer however, corrupts the IP
20489 register, so we must push it first. */
20490 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
20492 /* Do not set RTX_FRAME_RELATED_P on this insn.
20493 The dwarf stack unwinding code only wants to see one
20494 stack decrement per function, and this is not it. If
20495 this instruction is labeled as being part of the frame
20496 creation sequence then dwarf2out_frame_debug_expr will
20497 die when it encounters the assignment of IP to FP
20498 later on, since the use of SP here establishes SP as
20499 the CFA register and not IP.
20501 Anyway this instruction is not really part of the stack
20502 frame creation although it is part of the prologue. */
20505 insn = emit_set_insn (ip_rtx,
20506 plus_constant (Pmode, stack_pointer_rtx,
20507 fp_offset));
20508 RTX_FRAME_RELATED_P (insn) = 1;
20511 if (args_to_push)
20513 /* Push the argument registers, or reserve space for them. */
20514 if (cfun->machine->uses_anonymous_args)
20515 insn = emit_multi_reg_push
20516 ((0xf0 >> (args_to_push / 4)) & 0xf,
20517 (0xf0 >> (args_to_push / 4)) & 0xf);
20518 else
20519 insn = emit_insn
20520 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20521 GEN_INT (- args_to_push)));
20522 RTX_FRAME_RELATED_P (insn) = 1;
20525 /* If this is an interrupt service routine, and the link register
20526 is going to be pushed, and we're not generating extra
20527 push of IP (needed when frame is needed and frame layout if apcs),
20528 subtracting four from LR now will mean that the function return
20529 can be done with a single instruction. */
20530 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
20531 && (live_regs_mask & (1 << LR_REGNUM)) != 0
20532 && !(frame_pointer_needed && TARGET_APCS_FRAME)
20533 && TARGET_ARM)
20535 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
20537 emit_set_insn (lr, plus_constant (SImode, lr, -4));
20540 if (live_regs_mask)
20542 unsigned long dwarf_regs_mask = live_regs_mask;
20544 saved_regs += bit_count (live_regs_mask) * 4;
20545 if (optimize_size && !frame_pointer_needed
20546 && saved_regs == offsets->saved_regs - offsets->saved_args)
20548 /* If no coprocessor registers are being pushed and we don't have
20549 to worry about a frame pointer then push extra registers to
20550 create the stack frame. This is done is a way that does not
20551 alter the frame layout, so is independent of the epilogue. */
20552 int n;
20553 int frame;
20554 n = 0;
20555 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
20556 n++;
20557 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
20558 if (frame && n * 4 >= frame)
20560 n = frame / 4;
20561 live_regs_mask |= (1 << n) - 1;
20562 saved_regs += frame;
20566 if (TARGET_LDRD
20567 && current_tune->prefer_ldrd_strd
20568 && !optimize_function_for_size_p (cfun))
20570 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
20571 if (TARGET_THUMB2)
20572 thumb2_emit_strd_push (live_regs_mask);
20573 else if (TARGET_ARM
20574 && !TARGET_APCS_FRAME
20575 && !IS_INTERRUPT (func_type))
20576 arm_emit_strd_push (live_regs_mask);
20577 else
20579 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
20580 RTX_FRAME_RELATED_P (insn) = 1;
20583 else
20585 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
20586 RTX_FRAME_RELATED_P (insn) = 1;
20590 if (! IS_VOLATILE (func_type))
20591 saved_regs += arm_save_coproc_regs ();
20593 if (frame_pointer_needed && TARGET_ARM)
20595 /* Create the new frame pointer. */
20596 if (TARGET_APCS_FRAME)
20598 insn = GEN_INT (-(4 + args_to_push + fp_offset));
20599 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
20600 RTX_FRAME_RELATED_P (insn) = 1;
20602 else
20604 insn = GEN_INT (saved_regs - (4 + fp_offset));
20605 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20606 stack_pointer_rtx, insn));
20607 RTX_FRAME_RELATED_P (insn) = 1;
20611 size = offsets->outgoing_args - offsets->saved_args;
20612 if (flag_stack_usage_info)
20613 current_function_static_stack_size = size;
20615 /* If this isn't an interrupt service routine and we have a frame, then do
20616 stack checking. We use IP as the first scratch register, except for the
20617 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
20618 if (!IS_INTERRUPT (func_type)
20619 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
20621 unsigned int regno;
20623 if (!IS_NESTED (func_type) || clobber_ip)
20624 regno = IP_REGNUM;
20625 else if (df_regs_ever_live_p (LR_REGNUM))
20626 regno = LR_REGNUM;
20627 else
20628 regno = 3;
20630 if (crtl->is_leaf && !cfun->calls_alloca)
20632 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
20633 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
20634 size - STACK_CHECK_PROTECT,
20635 regno, live_regs_mask);
20637 else if (size > 0)
20638 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
20639 regno, live_regs_mask);
20642 /* Recover the static chain register. */
20643 if (clobber_ip)
20645 if (!arm_r3_live_at_start_p () || saved_pretend_args)
20646 insn = gen_rtx_REG (SImode, 3);
20647 else
20649 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
20650 insn = gen_frame_mem (SImode, insn);
20652 emit_set_insn (ip_rtx, insn);
20653 emit_insn (gen_force_register_use (ip_rtx));
20656 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
20658 /* This add can produce multiple insns for a large constant, so we
20659 need to get tricky. */
20660 rtx_insn *last = get_last_insn ();
20662 amount = GEN_INT (offsets->saved_args + saved_regs
20663 - offsets->outgoing_args);
20665 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20666 amount));
20669 last = last ? NEXT_INSN (last) : get_insns ();
20670 RTX_FRAME_RELATED_P (last) = 1;
20672 while (last != insn);
20674 /* If the frame pointer is needed, emit a special barrier that
20675 will prevent the scheduler from moving stores to the frame
20676 before the stack adjustment. */
20677 if (frame_pointer_needed)
20678 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
20679 hard_frame_pointer_rtx));
20683 if (frame_pointer_needed && TARGET_THUMB2)
20684 thumb_set_frame_pointer (offsets);
20686 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20688 unsigned long mask;
20690 mask = live_regs_mask;
20691 mask &= THUMB2_WORK_REGS;
20692 if (!IS_NESTED (func_type))
20693 mask |= (1 << IP_REGNUM);
20694 arm_load_pic_register (mask);
20697 /* If we are profiling, make sure no instructions are scheduled before
20698 the call to mcount. Similarly if the user has requested no
20699 scheduling in the prolog. Similarly if we want non-call exceptions
20700 using the EABI unwinder, to prevent faulting instructions from being
20701 swapped with a stack adjustment. */
20702 if (crtl->profile || !TARGET_SCHED_PROLOG
20703 || (arm_except_unwind_info (&global_options) == UI_TARGET
20704 && cfun->can_throw_non_call_exceptions))
20705 emit_insn (gen_blockage ());
20707 /* If the link register is being kept alive, with the return address in it,
20708 then make sure that it does not get reused by the ce2 pass. */
20709 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
20710 cfun->machine->lr_save_eliminated = 1;
20713 /* Print condition code to STREAM. Helper function for arm_print_operand. */
20714 static void
20715 arm_print_condition (FILE *stream)
20717 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
20719 /* Branch conversion is not implemented for Thumb-2. */
20720 if (TARGET_THUMB)
20722 output_operand_lossage ("predicated Thumb instruction");
20723 return;
20725 if (current_insn_predicate != NULL)
20727 output_operand_lossage
20728 ("predicated instruction in conditional sequence");
20729 return;
20732 fputs (arm_condition_codes[arm_current_cc], stream);
20734 else if (current_insn_predicate)
20736 enum arm_cond_code code;
20738 if (TARGET_THUMB1)
20740 output_operand_lossage ("predicated Thumb instruction");
20741 return;
20744 code = get_arm_condition_code (current_insn_predicate);
20745 fputs (arm_condition_codes[code], stream);
20750 /* Globally reserved letters: acln
20751 Puncutation letters currently used: @_|?().!#
20752 Lower case letters currently used: bcdefhimpqtvwxyz
20753 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
20754 Letters previously used, but now deprecated/obsolete: sVWXYZ.
20756 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
20758 If CODE is 'd', then the X is a condition operand and the instruction
20759 should only be executed if the condition is true.
20760 if CODE is 'D', then the X is a condition operand and the instruction
20761 should only be executed if the condition is false: however, if the mode
20762 of the comparison is CCFPEmode, then always execute the instruction -- we
20763 do this because in these circumstances !GE does not necessarily imply LT;
20764 in these cases the instruction pattern will take care to make sure that
20765 an instruction containing %d will follow, thereby undoing the effects of
20766 doing this instruction unconditionally.
20767 If CODE is 'N' then X is a floating point operand that must be negated
20768 before output.
20769 If CODE is 'B' then output a bitwise inverted value of X (a const int).
20770 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
20771 static void
20772 arm_print_operand (FILE *stream, rtx x, int code)
20774 switch (code)
20776 case '@':
20777 fputs (ASM_COMMENT_START, stream);
20778 return;
20780 case '_':
20781 fputs (user_label_prefix, stream);
20782 return;
20784 case '|':
20785 fputs (REGISTER_PREFIX, stream);
20786 return;
20788 case '?':
20789 arm_print_condition (stream);
20790 return;
20792 case '.':
20793 /* The current condition code for a condition code setting instruction.
20794 Preceded by 's' in unified syntax, otherwise followed by 's'. */
20795 fputc('s', stream);
20796 arm_print_condition (stream);
20797 return;
20799 case '!':
20800 /* If the instruction is conditionally executed then print
20801 the current condition code, otherwise print 's'. */
20802 gcc_assert (TARGET_THUMB2);
20803 if (current_insn_predicate)
20804 arm_print_condition (stream);
20805 else
20806 fputc('s', stream);
20807 break;
20809 /* %# is a "break" sequence. It doesn't output anything, but is used to
20810 separate e.g. operand numbers from following text, if that text consists
20811 of further digits which we don't want to be part of the operand
20812 number. */
20813 case '#':
20814 return;
20816 case 'N':
20818 REAL_VALUE_TYPE r;
20819 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
20820 fprintf (stream, "%s", fp_const_from_val (&r));
20822 return;
20824 /* An integer or symbol address without a preceding # sign. */
20825 case 'c':
20826 switch (GET_CODE (x))
20828 case CONST_INT:
20829 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
20830 break;
20832 case SYMBOL_REF:
20833 output_addr_const (stream, x);
20834 break;
20836 case CONST:
20837 if (GET_CODE (XEXP (x, 0)) == PLUS
20838 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
20840 output_addr_const (stream, x);
20841 break;
20843 /* Fall through. */
20845 default:
20846 output_operand_lossage ("Unsupported operand for code '%c'", code);
20848 return;
20850 /* An integer that we want to print in HEX. */
20851 case 'x':
20852 switch (GET_CODE (x))
20854 case CONST_INT:
20855 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
20856 break;
20858 default:
20859 output_operand_lossage ("Unsupported operand for code '%c'", code);
20861 return;
20863 case 'B':
20864 if (CONST_INT_P (x))
20866 HOST_WIDE_INT val;
20867 val = ARM_SIGN_EXTEND (~INTVAL (x));
20868 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
20870 else
20872 putc ('~', stream);
20873 output_addr_const (stream, x);
20875 return;
20877 case 'b':
20878 /* Print the log2 of a CONST_INT. */
20880 HOST_WIDE_INT val;
20882 if (!CONST_INT_P (x)
20883 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
20884 output_operand_lossage ("Unsupported operand for code '%c'", code);
20885 else
20886 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
20888 return;
20890 case 'L':
20891 /* The low 16 bits of an immediate constant. */
20892 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
20893 return;
20895 case 'i':
20896 fprintf (stream, "%s", arithmetic_instr (x, 1));
20897 return;
20899 case 'I':
20900 fprintf (stream, "%s", arithmetic_instr (x, 0));
20901 return;
20903 case 'S':
20905 HOST_WIDE_INT val;
20906 const char *shift;
20908 shift = shift_op (x, &val);
20910 if (shift)
20912 fprintf (stream, ", %s ", shift);
20913 if (val == -1)
20914 arm_print_operand (stream, XEXP (x, 1), 0);
20915 else
20916 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
20919 return;
20921 /* An explanation of the 'Q', 'R' and 'H' register operands:
20923 In a pair of registers containing a DI or DF value the 'Q'
20924 operand returns the register number of the register containing
20925 the least significant part of the value. The 'R' operand returns
20926 the register number of the register containing the most
20927 significant part of the value.
20929 The 'H' operand returns the higher of the two register numbers.
20930 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
20931 same as the 'Q' operand, since the most significant part of the
20932 value is held in the lower number register. The reverse is true
20933 on systems where WORDS_BIG_ENDIAN is false.
20935 The purpose of these operands is to distinguish between cases
20936 where the endian-ness of the values is important (for example
20937 when they are added together), and cases where the endian-ness
20938 is irrelevant, but the order of register operations is important.
20939 For example when loading a value from memory into a register
20940 pair, the endian-ness does not matter. Provided that the value
20941 from the lower memory address is put into the lower numbered
20942 register, and the value from the higher address is put into the
20943 higher numbered register, the load will work regardless of whether
20944 the value being loaded is big-wordian or little-wordian. The
20945 order of the two register loads can matter however, if the address
20946 of the memory location is actually held in one of the registers
20947 being overwritten by the load.
20949 The 'Q' and 'R' constraints are also available for 64-bit
20950 constants. */
20951 case 'Q':
20952 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
20954 rtx part = gen_lowpart (SImode, x);
20955 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
20956 return;
20959 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
20961 output_operand_lossage ("invalid operand for code '%c'", code);
20962 return;
20965 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
20966 return;
20968 case 'R':
20969 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
20971 machine_mode mode = GET_MODE (x);
20972 rtx part;
20974 if (mode == VOIDmode)
20975 mode = DImode;
20976 part = gen_highpart_mode (SImode, mode, x);
20977 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
20978 return;
20981 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
20983 output_operand_lossage ("invalid operand for code '%c'", code);
20984 return;
20987 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
20988 return;
20990 case 'H':
20991 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
20993 output_operand_lossage ("invalid operand for code '%c'", code);
20994 return;
20997 asm_fprintf (stream, "%r", REGNO (x) + 1);
20998 return;
21000 case 'J':
21001 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21003 output_operand_lossage ("invalid operand for code '%c'", code);
21004 return;
21007 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21008 return;
21010 case 'K':
21011 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21013 output_operand_lossage ("invalid operand for code '%c'", code);
21014 return;
21017 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21018 return;
21020 case 'm':
21021 asm_fprintf (stream, "%r",
21022 REG_P (XEXP (x, 0))
21023 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21024 return;
21026 case 'M':
21027 asm_fprintf (stream, "{%r-%r}",
21028 REGNO (x),
21029 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21030 return;
21032 /* Like 'M', but writing doubleword vector registers, for use by Neon
21033 insns. */
21034 case 'h':
21036 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21037 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21038 if (numregs == 1)
21039 asm_fprintf (stream, "{d%d}", regno);
21040 else
21041 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21043 return;
21045 case 'd':
21046 /* CONST_TRUE_RTX means always -- that's the default. */
21047 if (x == const_true_rtx)
21048 return;
21050 if (!COMPARISON_P (x))
21052 output_operand_lossage ("invalid operand for code '%c'", code);
21053 return;
21056 fputs (arm_condition_codes[get_arm_condition_code (x)],
21057 stream);
21058 return;
21060 case 'D':
21061 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21062 want to do that. */
21063 if (x == const_true_rtx)
21065 output_operand_lossage ("instruction never executed");
21066 return;
21068 if (!COMPARISON_P (x))
21070 output_operand_lossage ("invalid operand for code '%c'", code);
21071 return;
21074 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21075 (get_arm_condition_code (x))],
21076 stream);
21077 return;
21079 case 's':
21080 case 'V':
21081 case 'W':
21082 case 'X':
21083 case 'Y':
21084 case 'Z':
21085 /* Former Maverick support, removed after GCC-4.7. */
21086 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21087 return;
21089 case 'U':
21090 if (!REG_P (x)
21091 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21092 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21093 /* Bad value for wCG register number. */
21095 output_operand_lossage ("invalid operand for code '%c'", code);
21096 return;
21099 else
21100 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21101 return;
21103 /* Print an iWMMXt control register name. */
21104 case 'w':
21105 if (!CONST_INT_P (x)
21106 || INTVAL (x) < 0
21107 || INTVAL (x) >= 16)
21108 /* Bad value for wC register number. */
21110 output_operand_lossage ("invalid operand for code '%c'", code);
21111 return;
21114 else
21116 static const char * wc_reg_names [16] =
21118 "wCID", "wCon", "wCSSF", "wCASF",
21119 "wC4", "wC5", "wC6", "wC7",
21120 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21121 "wC12", "wC13", "wC14", "wC15"
21124 fputs (wc_reg_names [INTVAL (x)], stream);
21126 return;
21128 /* Print the high single-precision register of a VFP double-precision
21129 register. */
21130 case 'p':
21132 machine_mode mode = GET_MODE (x);
21133 int regno;
21135 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21137 output_operand_lossage ("invalid operand for code '%c'", code);
21138 return;
21141 regno = REGNO (x);
21142 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21144 output_operand_lossage ("invalid operand for code '%c'", code);
21145 return;
21148 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21150 return;
21152 /* Print a VFP/Neon double precision or quad precision register name. */
21153 case 'P':
21154 case 'q':
21156 machine_mode mode = GET_MODE (x);
21157 int is_quad = (code == 'q');
21158 int regno;
21160 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21162 output_operand_lossage ("invalid operand for code '%c'", code);
21163 return;
21166 if (!REG_P (x)
21167 || !IS_VFP_REGNUM (REGNO (x)))
21169 output_operand_lossage ("invalid operand for code '%c'", code);
21170 return;
21173 regno = REGNO (x);
21174 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21175 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21177 output_operand_lossage ("invalid operand for code '%c'", code);
21178 return;
21181 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21182 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21184 return;
21186 /* These two codes print the low/high doubleword register of a Neon quad
21187 register, respectively. For pair-structure types, can also print
21188 low/high quadword registers. */
21189 case 'e':
21190 case 'f':
21192 machine_mode mode = GET_MODE (x);
21193 int regno;
21195 if ((GET_MODE_SIZE (mode) != 16
21196 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21198 output_operand_lossage ("invalid operand for code '%c'", code);
21199 return;
21202 regno = REGNO (x);
21203 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21205 output_operand_lossage ("invalid operand for code '%c'", code);
21206 return;
21209 if (GET_MODE_SIZE (mode) == 16)
21210 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21211 + (code == 'f' ? 1 : 0));
21212 else
21213 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21214 + (code == 'f' ? 1 : 0));
21216 return;
21218 /* Print a VFPv3 floating-point constant, represented as an integer
21219 index. */
21220 case 'G':
21222 int index = vfp3_const_double_index (x);
21223 gcc_assert (index != -1);
21224 fprintf (stream, "%d", index);
21226 return;
21228 /* Print bits representing opcode features for Neon.
21230 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21231 and polynomials as unsigned.
21233 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21235 Bit 2 is 1 for rounding functions, 0 otherwise. */
21237 /* Identify the type as 's', 'u', 'p' or 'f'. */
21238 case 'T':
21240 HOST_WIDE_INT bits = INTVAL (x);
21241 fputc ("uspf"[bits & 3], stream);
21243 return;
21245 /* Likewise, but signed and unsigned integers are both 'i'. */
21246 case 'F':
21248 HOST_WIDE_INT bits = INTVAL (x);
21249 fputc ("iipf"[bits & 3], stream);
21251 return;
21253 /* As for 'T', but emit 'u' instead of 'p'. */
21254 case 't':
21256 HOST_WIDE_INT bits = INTVAL (x);
21257 fputc ("usuf"[bits & 3], stream);
21259 return;
21261 /* Bit 2: rounding (vs none). */
21262 case 'O':
21264 HOST_WIDE_INT bits = INTVAL (x);
21265 fputs ((bits & 4) != 0 ? "r" : "", stream);
21267 return;
21269 /* Memory operand for vld1/vst1 instruction. */
21270 case 'A':
21272 rtx addr;
21273 bool postinc = FALSE;
21274 rtx postinc_reg = NULL;
21275 unsigned align, memsize, align_bits;
21277 gcc_assert (MEM_P (x));
21278 addr = XEXP (x, 0);
21279 if (GET_CODE (addr) == POST_INC)
21281 postinc = 1;
21282 addr = XEXP (addr, 0);
21284 if (GET_CODE (addr) == POST_MODIFY)
21286 postinc_reg = XEXP( XEXP (addr, 1), 1);
21287 addr = XEXP (addr, 0);
21289 asm_fprintf (stream, "[%r", REGNO (addr));
21291 /* We know the alignment of this access, so we can emit a hint in the
21292 instruction (for some alignments) as an aid to the memory subsystem
21293 of the target. */
21294 align = MEM_ALIGN (x) >> 3;
21295 memsize = MEM_SIZE (x);
21297 /* Only certain alignment specifiers are supported by the hardware. */
21298 if (memsize == 32 && (align % 32) == 0)
21299 align_bits = 256;
21300 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21301 align_bits = 128;
21302 else if (memsize >= 8 && (align % 8) == 0)
21303 align_bits = 64;
21304 else
21305 align_bits = 0;
21307 if (align_bits != 0)
21308 asm_fprintf (stream, ":%d", align_bits);
21310 asm_fprintf (stream, "]");
21312 if (postinc)
21313 fputs("!", stream);
21314 if (postinc_reg)
21315 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
21317 return;
21319 case 'C':
21321 rtx addr;
21323 gcc_assert (MEM_P (x));
21324 addr = XEXP (x, 0);
21325 gcc_assert (REG_P (addr));
21326 asm_fprintf (stream, "[%r]", REGNO (addr));
21328 return;
21330 /* Translate an S register number into a D register number and element index. */
21331 case 'y':
21333 machine_mode mode = GET_MODE (x);
21334 int regno;
21336 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21338 output_operand_lossage ("invalid operand for code '%c'", code);
21339 return;
21342 regno = REGNO (x);
21343 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21345 output_operand_lossage ("invalid operand for code '%c'", code);
21346 return;
21349 regno = regno - FIRST_VFP_REGNUM;
21350 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21352 return;
21354 case 'v':
21355 gcc_assert (CONST_DOUBLE_P (x));
21356 int result;
21357 result = vfp3_const_double_for_fract_bits (x);
21358 if (result == 0)
21359 result = vfp3_const_double_for_bits (x);
21360 fprintf (stream, "#%d", result);
21361 return;
21363 /* Register specifier for vld1.16/vst1.16. Translate the S register
21364 number into a D register number and element index. */
21365 case 'z':
21367 machine_mode mode = GET_MODE (x);
21368 int regno;
21370 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
21372 output_operand_lossage ("invalid operand for code '%c'", code);
21373 return;
21376 regno = REGNO (x);
21377 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21379 output_operand_lossage ("invalid operand for code '%c'", code);
21380 return;
21383 regno = regno - FIRST_VFP_REGNUM;
21384 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
21386 return;
21388 default:
21389 if (x == 0)
21391 output_operand_lossage ("missing operand");
21392 return;
21395 switch (GET_CODE (x))
21397 case REG:
21398 asm_fprintf (stream, "%r", REGNO (x));
21399 break;
21401 case MEM:
21402 output_address (GET_MODE (x), XEXP (x, 0));
21403 break;
21405 case CONST_DOUBLE:
21407 char fpstr[20];
21408 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
21409 sizeof (fpstr), 0, 1);
21410 fprintf (stream, "#%s", fpstr);
21412 break;
21414 default:
21415 gcc_assert (GET_CODE (x) != NEG);
21416 fputc ('#', stream);
21417 if (GET_CODE (x) == HIGH)
21419 fputs (":lower16:", stream);
21420 x = XEXP (x, 0);
21423 output_addr_const (stream, x);
21424 break;
21429 /* Target hook for printing a memory address. */
21430 static void
21431 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
21433 if (TARGET_32BIT)
21435 int is_minus = GET_CODE (x) == MINUS;
21437 if (REG_P (x))
21438 asm_fprintf (stream, "[%r]", REGNO (x));
21439 else if (GET_CODE (x) == PLUS || is_minus)
21441 rtx base = XEXP (x, 0);
21442 rtx index = XEXP (x, 1);
21443 HOST_WIDE_INT offset = 0;
21444 if (!REG_P (base)
21445 || (REG_P (index) && REGNO (index) == SP_REGNUM))
21447 /* Ensure that BASE is a register. */
21448 /* (one of them must be). */
21449 /* Also ensure the SP is not used as in index register. */
21450 std::swap (base, index);
21452 switch (GET_CODE (index))
21454 case CONST_INT:
21455 offset = INTVAL (index);
21456 if (is_minus)
21457 offset = -offset;
21458 asm_fprintf (stream, "[%r, #%wd]",
21459 REGNO (base), offset);
21460 break;
21462 case REG:
21463 asm_fprintf (stream, "[%r, %s%r]",
21464 REGNO (base), is_minus ? "-" : "",
21465 REGNO (index));
21466 break;
21468 case MULT:
21469 case ASHIFTRT:
21470 case LSHIFTRT:
21471 case ASHIFT:
21472 case ROTATERT:
21474 asm_fprintf (stream, "[%r, %s%r",
21475 REGNO (base), is_minus ? "-" : "",
21476 REGNO (XEXP (index, 0)));
21477 arm_print_operand (stream, index, 'S');
21478 fputs ("]", stream);
21479 break;
21482 default:
21483 gcc_unreachable ();
21486 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
21487 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
21489 gcc_assert (REG_P (XEXP (x, 0)));
21491 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
21492 asm_fprintf (stream, "[%r, #%s%d]!",
21493 REGNO (XEXP (x, 0)),
21494 GET_CODE (x) == PRE_DEC ? "-" : "",
21495 GET_MODE_SIZE (mode));
21496 else
21497 asm_fprintf (stream, "[%r], #%s%d",
21498 REGNO (XEXP (x, 0)),
21499 GET_CODE (x) == POST_DEC ? "-" : "",
21500 GET_MODE_SIZE (mode));
21502 else if (GET_CODE (x) == PRE_MODIFY)
21504 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
21505 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21506 asm_fprintf (stream, "#%wd]!",
21507 INTVAL (XEXP (XEXP (x, 1), 1)));
21508 else
21509 asm_fprintf (stream, "%r]!",
21510 REGNO (XEXP (XEXP (x, 1), 1)));
21512 else if (GET_CODE (x) == POST_MODIFY)
21514 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
21515 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21516 asm_fprintf (stream, "#%wd",
21517 INTVAL (XEXP (XEXP (x, 1), 1)));
21518 else
21519 asm_fprintf (stream, "%r",
21520 REGNO (XEXP (XEXP (x, 1), 1)));
21522 else output_addr_const (stream, x);
21524 else
21526 if (REG_P (x))
21527 asm_fprintf (stream, "[%r]", REGNO (x));
21528 else if (GET_CODE (x) == POST_INC)
21529 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
21530 else if (GET_CODE (x) == PLUS)
21532 gcc_assert (REG_P (XEXP (x, 0)));
21533 if (CONST_INT_P (XEXP (x, 1)))
21534 asm_fprintf (stream, "[%r, #%wd]",
21535 REGNO (XEXP (x, 0)),
21536 INTVAL (XEXP (x, 1)));
21537 else
21538 asm_fprintf (stream, "[%r, %r]",
21539 REGNO (XEXP (x, 0)),
21540 REGNO (XEXP (x, 1)));
21542 else
21543 output_addr_const (stream, x);
21547 /* Target hook for indicating whether a punctuation character for
21548 TARGET_PRINT_OPERAND is valid. */
21549 static bool
21550 arm_print_operand_punct_valid_p (unsigned char code)
21552 return (code == '@' || code == '|' || code == '.'
21553 || code == '(' || code == ')' || code == '#'
21554 || (TARGET_32BIT && (code == '?'))
21555 || (TARGET_THUMB2 && (code == '!'))
21556 || (TARGET_THUMB && (code == '_')));
21559 /* Target hook for assembling integer objects. The ARM version needs to
21560 handle word-sized values specially. */
21561 static bool
21562 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
21564 machine_mode mode;
21566 if (size == UNITS_PER_WORD && aligned_p)
21568 fputs ("\t.word\t", asm_out_file);
21569 output_addr_const (asm_out_file, x);
21571 /* Mark symbols as position independent. We only do this in the
21572 .text segment, not in the .data segment. */
21573 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
21574 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
21576 /* See legitimize_pic_address for an explanation of the
21577 TARGET_VXWORKS_RTP check. */
21578 if (!arm_pic_data_is_text_relative
21579 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
21580 fputs ("(GOT)", asm_out_file);
21581 else
21582 fputs ("(GOTOFF)", asm_out_file);
21584 fputc ('\n', asm_out_file);
21585 return true;
21588 mode = GET_MODE (x);
21590 if (arm_vector_mode_supported_p (mode))
21592 int i, units;
21594 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21596 units = CONST_VECTOR_NUNITS (x);
21597 size = GET_MODE_UNIT_SIZE (mode);
21599 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21600 for (i = 0; i < units; i++)
21602 rtx elt = CONST_VECTOR_ELT (x, i);
21603 assemble_integer
21604 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
21606 else
21607 for (i = 0; i < units; i++)
21609 rtx elt = CONST_VECTOR_ELT (x, i);
21610 assemble_real
21611 (*CONST_DOUBLE_REAL_VALUE (elt), GET_MODE_INNER (mode),
21612 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
21615 return true;
21618 return default_assemble_integer (x, size, aligned_p);
21621 static void
21622 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
21624 section *s;
21626 if (!TARGET_AAPCS_BASED)
21628 (is_ctor ?
21629 default_named_section_asm_out_constructor
21630 : default_named_section_asm_out_destructor) (symbol, priority);
21631 return;
21634 /* Put these in the .init_array section, using a special relocation. */
21635 if (priority != DEFAULT_INIT_PRIORITY)
21637 char buf[18];
21638 sprintf (buf, "%s.%.5u",
21639 is_ctor ? ".init_array" : ".fini_array",
21640 priority);
21641 s = get_section (buf, SECTION_WRITE, NULL_TREE);
21643 else if (is_ctor)
21644 s = ctors_section;
21645 else
21646 s = dtors_section;
21648 switch_to_section (s);
21649 assemble_align (POINTER_SIZE);
21650 fputs ("\t.word\t", asm_out_file);
21651 output_addr_const (asm_out_file, symbol);
21652 fputs ("(target1)\n", asm_out_file);
21655 /* Add a function to the list of static constructors. */
21657 static void
21658 arm_elf_asm_constructor (rtx symbol, int priority)
21660 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
21663 /* Add a function to the list of static destructors. */
21665 static void
21666 arm_elf_asm_destructor (rtx symbol, int priority)
21668 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
21671 /* A finite state machine takes care of noticing whether or not instructions
21672 can be conditionally executed, and thus decrease execution time and code
21673 size by deleting branch instructions. The fsm is controlled by
21674 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
21676 /* The state of the fsm controlling condition codes are:
21677 0: normal, do nothing special
21678 1: make ASM_OUTPUT_OPCODE not output this instruction
21679 2: make ASM_OUTPUT_OPCODE not output this instruction
21680 3: make instructions conditional
21681 4: make instructions conditional
21683 State transitions (state->state by whom under condition):
21684 0 -> 1 final_prescan_insn if the `target' is a label
21685 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
21686 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
21687 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
21688 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
21689 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
21690 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
21691 (the target insn is arm_target_insn).
21693 If the jump clobbers the conditions then we use states 2 and 4.
21695 A similar thing can be done with conditional return insns.
21697 XXX In case the `target' is an unconditional branch, this conditionalising
21698 of the instructions always reduces code size, but not always execution
21699 time. But then, I want to reduce the code size to somewhere near what
21700 /bin/cc produces. */
21702 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
21703 instructions. When a COND_EXEC instruction is seen the subsequent
21704 instructions are scanned so that multiple conditional instructions can be
21705 combined into a single IT block. arm_condexec_count and arm_condexec_mask
21706 specify the length and true/false mask for the IT block. These will be
21707 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
21709 /* Returns the index of the ARM condition code string in
21710 `arm_condition_codes', or ARM_NV if the comparison is invalid.
21711 COMPARISON should be an rtx like `(eq (...) (...))'. */
21713 enum arm_cond_code
21714 maybe_get_arm_condition_code (rtx comparison)
21716 machine_mode mode = GET_MODE (XEXP (comparison, 0));
21717 enum arm_cond_code code;
21718 enum rtx_code comp_code = GET_CODE (comparison);
21720 if (GET_MODE_CLASS (mode) != MODE_CC)
21721 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
21722 XEXP (comparison, 1));
21724 switch (mode)
21726 case CC_DNEmode: code = ARM_NE; goto dominance;
21727 case CC_DEQmode: code = ARM_EQ; goto dominance;
21728 case CC_DGEmode: code = ARM_GE; goto dominance;
21729 case CC_DGTmode: code = ARM_GT; goto dominance;
21730 case CC_DLEmode: code = ARM_LE; goto dominance;
21731 case CC_DLTmode: code = ARM_LT; goto dominance;
21732 case CC_DGEUmode: code = ARM_CS; goto dominance;
21733 case CC_DGTUmode: code = ARM_HI; goto dominance;
21734 case CC_DLEUmode: code = ARM_LS; goto dominance;
21735 case CC_DLTUmode: code = ARM_CC;
21737 dominance:
21738 if (comp_code == EQ)
21739 return ARM_INVERSE_CONDITION_CODE (code);
21740 if (comp_code == NE)
21741 return code;
21742 return ARM_NV;
21744 case CC_NOOVmode:
21745 switch (comp_code)
21747 case NE: return ARM_NE;
21748 case EQ: return ARM_EQ;
21749 case GE: return ARM_PL;
21750 case LT: return ARM_MI;
21751 default: return ARM_NV;
21754 case CC_Zmode:
21755 switch (comp_code)
21757 case NE: return ARM_NE;
21758 case EQ: return ARM_EQ;
21759 default: return ARM_NV;
21762 case CC_Nmode:
21763 switch (comp_code)
21765 case NE: return ARM_MI;
21766 case EQ: return ARM_PL;
21767 default: return ARM_NV;
21770 case CCFPEmode:
21771 case CCFPmode:
21772 /* We can handle all cases except UNEQ and LTGT. */
21773 switch (comp_code)
21775 case GE: return ARM_GE;
21776 case GT: return ARM_GT;
21777 case LE: return ARM_LS;
21778 case LT: return ARM_MI;
21779 case NE: return ARM_NE;
21780 case EQ: return ARM_EQ;
21781 case ORDERED: return ARM_VC;
21782 case UNORDERED: return ARM_VS;
21783 case UNLT: return ARM_LT;
21784 case UNLE: return ARM_LE;
21785 case UNGT: return ARM_HI;
21786 case UNGE: return ARM_PL;
21787 /* UNEQ and LTGT do not have a representation. */
21788 case UNEQ: /* Fall through. */
21789 case LTGT: /* Fall through. */
21790 default: return ARM_NV;
21793 case CC_SWPmode:
21794 switch (comp_code)
21796 case NE: return ARM_NE;
21797 case EQ: return ARM_EQ;
21798 case GE: return ARM_LE;
21799 case GT: return ARM_LT;
21800 case LE: return ARM_GE;
21801 case LT: return ARM_GT;
21802 case GEU: return ARM_LS;
21803 case GTU: return ARM_CC;
21804 case LEU: return ARM_CS;
21805 case LTU: return ARM_HI;
21806 default: return ARM_NV;
21809 case CC_Cmode:
21810 switch (comp_code)
21812 case LTU: return ARM_CS;
21813 case GEU: return ARM_CC;
21814 case NE: return ARM_CS;
21815 case EQ: return ARM_CC;
21816 default: return ARM_NV;
21819 case CC_CZmode:
21820 switch (comp_code)
21822 case NE: return ARM_NE;
21823 case EQ: return ARM_EQ;
21824 case GEU: return ARM_CS;
21825 case GTU: return ARM_HI;
21826 case LEU: return ARM_LS;
21827 case LTU: return ARM_CC;
21828 default: return ARM_NV;
21831 case CC_NCVmode:
21832 switch (comp_code)
21834 case GE: return ARM_GE;
21835 case LT: return ARM_LT;
21836 case GEU: return ARM_CS;
21837 case LTU: return ARM_CC;
21838 default: return ARM_NV;
21841 case CC_Vmode:
21842 switch (comp_code)
21844 case NE: return ARM_VS;
21845 case EQ: return ARM_VC;
21846 default: return ARM_NV;
21849 case CCmode:
21850 switch (comp_code)
21852 case NE: return ARM_NE;
21853 case EQ: return ARM_EQ;
21854 case GE: return ARM_GE;
21855 case GT: return ARM_GT;
21856 case LE: return ARM_LE;
21857 case LT: return ARM_LT;
21858 case GEU: return ARM_CS;
21859 case GTU: return ARM_HI;
21860 case LEU: return ARM_LS;
21861 case LTU: return ARM_CC;
21862 default: return ARM_NV;
21865 default: gcc_unreachable ();
21869 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
21870 static enum arm_cond_code
21871 get_arm_condition_code (rtx comparison)
21873 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
21874 gcc_assert (code != ARM_NV);
21875 return code;
21878 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
21879 instructions. */
21880 void
21881 thumb2_final_prescan_insn (rtx_insn *insn)
21883 rtx_insn *first_insn = insn;
21884 rtx body = PATTERN (insn);
21885 rtx predicate;
21886 enum arm_cond_code code;
21887 int n;
21888 int mask;
21889 int max;
21891 /* max_insns_skipped in the tune was already taken into account in the
21892 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
21893 just emit the IT blocks as we can. It does not make sense to split
21894 the IT blocks. */
21895 max = MAX_INSN_PER_IT_BLOCK;
21897 /* Remove the previous insn from the count of insns to be output. */
21898 if (arm_condexec_count)
21899 arm_condexec_count--;
21901 /* Nothing to do if we are already inside a conditional block. */
21902 if (arm_condexec_count)
21903 return;
21905 if (GET_CODE (body) != COND_EXEC)
21906 return;
21908 /* Conditional jumps are implemented directly. */
21909 if (JUMP_P (insn))
21910 return;
21912 predicate = COND_EXEC_TEST (body);
21913 arm_current_cc = get_arm_condition_code (predicate);
21915 n = get_attr_ce_count (insn);
21916 arm_condexec_count = 1;
21917 arm_condexec_mask = (1 << n) - 1;
21918 arm_condexec_masklen = n;
21919 /* See if subsequent instructions can be combined into the same block. */
21920 for (;;)
21922 insn = next_nonnote_insn (insn);
21924 /* Jumping into the middle of an IT block is illegal, so a label or
21925 barrier terminates the block. */
21926 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
21927 break;
21929 body = PATTERN (insn);
21930 /* USE and CLOBBER aren't really insns, so just skip them. */
21931 if (GET_CODE (body) == USE
21932 || GET_CODE (body) == CLOBBER)
21933 continue;
21935 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
21936 if (GET_CODE (body) != COND_EXEC)
21937 break;
21938 /* Maximum number of conditionally executed instructions in a block. */
21939 n = get_attr_ce_count (insn);
21940 if (arm_condexec_masklen + n > max)
21941 break;
21943 predicate = COND_EXEC_TEST (body);
21944 code = get_arm_condition_code (predicate);
21945 mask = (1 << n) - 1;
21946 if (arm_current_cc == code)
21947 arm_condexec_mask |= (mask << arm_condexec_masklen);
21948 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
21949 break;
21951 arm_condexec_count++;
21952 arm_condexec_masklen += n;
21954 /* A jump must be the last instruction in a conditional block. */
21955 if (JUMP_P (insn))
21956 break;
21958 /* Restore recog_data (getting the attributes of other insns can
21959 destroy this array, but final.c assumes that it remains intact
21960 across this call). */
21961 extract_constrain_insn_cached (first_insn);
21964 void
21965 arm_final_prescan_insn (rtx_insn *insn)
21967 /* BODY will hold the body of INSN. */
21968 rtx body = PATTERN (insn);
21970 /* This will be 1 if trying to repeat the trick, and things need to be
21971 reversed if it appears to fail. */
21972 int reverse = 0;
21974 /* If we start with a return insn, we only succeed if we find another one. */
21975 int seeking_return = 0;
21976 enum rtx_code return_code = UNKNOWN;
21978 /* START_INSN will hold the insn from where we start looking. This is the
21979 first insn after the following code_label if REVERSE is true. */
21980 rtx_insn *start_insn = insn;
21982 /* If in state 4, check if the target branch is reached, in order to
21983 change back to state 0. */
21984 if (arm_ccfsm_state == 4)
21986 if (insn == arm_target_insn)
21988 arm_target_insn = NULL;
21989 arm_ccfsm_state = 0;
21991 return;
21994 /* If in state 3, it is possible to repeat the trick, if this insn is an
21995 unconditional branch to a label, and immediately following this branch
21996 is the previous target label which is only used once, and the label this
21997 branch jumps to is not too far off. */
21998 if (arm_ccfsm_state == 3)
22000 if (simplejump_p (insn))
22002 start_insn = next_nonnote_insn (start_insn);
22003 if (BARRIER_P (start_insn))
22005 /* XXX Isn't this always a barrier? */
22006 start_insn = next_nonnote_insn (start_insn);
22008 if (LABEL_P (start_insn)
22009 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22010 && LABEL_NUSES (start_insn) == 1)
22011 reverse = TRUE;
22012 else
22013 return;
22015 else if (ANY_RETURN_P (body))
22017 start_insn = next_nonnote_insn (start_insn);
22018 if (BARRIER_P (start_insn))
22019 start_insn = next_nonnote_insn (start_insn);
22020 if (LABEL_P (start_insn)
22021 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22022 && LABEL_NUSES (start_insn) == 1)
22024 reverse = TRUE;
22025 seeking_return = 1;
22026 return_code = GET_CODE (body);
22028 else
22029 return;
22031 else
22032 return;
22035 gcc_assert (!arm_ccfsm_state || reverse);
22036 if (!JUMP_P (insn))
22037 return;
22039 /* This jump might be paralleled with a clobber of the condition codes
22040 the jump should always come first */
22041 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22042 body = XVECEXP (body, 0, 0);
22044 if (reverse
22045 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22046 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22048 int insns_skipped;
22049 int fail = FALSE, succeed = FALSE;
22050 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22051 int then_not_else = TRUE;
22052 rtx_insn *this_insn = start_insn;
22053 rtx label = 0;
22055 /* Register the insn jumped to. */
22056 if (reverse)
22058 if (!seeking_return)
22059 label = XEXP (SET_SRC (body), 0);
22061 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22062 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22063 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22065 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22066 then_not_else = FALSE;
22068 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22070 seeking_return = 1;
22071 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22073 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22075 seeking_return = 1;
22076 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22077 then_not_else = FALSE;
22079 else
22080 gcc_unreachable ();
22082 /* See how many insns this branch skips, and what kind of insns. If all
22083 insns are okay, and the label or unconditional branch to the same
22084 label is not too far away, succeed. */
22085 for (insns_skipped = 0;
22086 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22088 rtx scanbody;
22090 this_insn = next_nonnote_insn (this_insn);
22091 if (!this_insn)
22092 break;
22094 switch (GET_CODE (this_insn))
22096 case CODE_LABEL:
22097 /* Succeed if it is the target label, otherwise fail since
22098 control falls in from somewhere else. */
22099 if (this_insn == label)
22101 arm_ccfsm_state = 1;
22102 succeed = TRUE;
22104 else
22105 fail = TRUE;
22106 break;
22108 case BARRIER:
22109 /* Succeed if the following insn is the target label.
22110 Otherwise fail.
22111 If return insns are used then the last insn in a function
22112 will be a barrier. */
22113 this_insn = next_nonnote_insn (this_insn);
22114 if (this_insn && this_insn == label)
22116 arm_ccfsm_state = 1;
22117 succeed = TRUE;
22119 else
22120 fail = TRUE;
22121 break;
22123 case CALL_INSN:
22124 /* The AAPCS says that conditional calls should not be
22125 used since they make interworking inefficient (the
22126 linker can't transform BL<cond> into BLX). That's
22127 only a problem if the machine has BLX. */
22128 if (arm_arch5)
22130 fail = TRUE;
22131 break;
22134 /* Succeed if the following insn is the target label, or
22135 if the following two insns are a barrier and the
22136 target label. */
22137 this_insn = next_nonnote_insn (this_insn);
22138 if (this_insn && BARRIER_P (this_insn))
22139 this_insn = next_nonnote_insn (this_insn);
22141 if (this_insn && this_insn == label
22142 && insns_skipped < max_insns_skipped)
22144 arm_ccfsm_state = 1;
22145 succeed = TRUE;
22147 else
22148 fail = TRUE;
22149 break;
22151 case JUMP_INSN:
22152 /* If this is an unconditional branch to the same label, succeed.
22153 If it is to another label, do nothing. If it is conditional,
22154 fail. */
22155 /* XXX Probably, the tests for SET and the PC are
22156 unnecessary. */
22158 scanbody = PATTERN (this_insn);
22159 if (GET_CODE (scanbody) == SET
22160 && GET_CODE (SET_DEST (scanbody)) == PC)
22162 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22163 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22165 arm_ccfsm_state = 2;
22166 succeed = TRUE;
22168 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22169 fail = TRUE;
22171 /* Fail if a conditional return is undesirable (e.g. on a
22172 StrongARM), but still allow this if optimizing for size. */
22173 else if (GET_CODE (scanbody) == return_code
22174 && !use_return_insn (TRUE, NULL)
22175 && !optimize_size)
22176 fail = TRUE;
22177 else if (GET_CODE (scanbody) == return_code)
22179 arm_ccfsm_state = 2;
22180 succeed = TRUE;
22182 else if (GET_CODE (scanbody) == PARALLEL)
22184 switch (get_attr_conds (this_insn))
22186 case CONDS_NOCOND:
22187 break;
22188 default:
22189 fail = TRUE;
22190 break;
22193 else
22194 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22196 break;
22198 case INSN:
22199 /* Instructions using or affecting the condition codes make it
22200 fail. */
22201 scanbody = PATTERN (this_insn);
22202 if (!(GET_CODE (scanbody) == SET
22203 || GET_CODE (scanbody) == PARALLEL)
22204 || get_attr_conds (this_insn) != CONDS_NOCOND)
22205 fail = TRUE;
22206 break;
22208 default:
22209 break;
22212 if (succeed)
22214 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22215 arm_target_label = CODE_LABEL_NUMBER (label);
22216 else
22218 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22220 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22222 this_insn = next_nonnote_insn (this_insn);
22223 gcc_assert (!this_insn
22224 || (!BARRIER_P (this_insn)
22225 && !LABEL_P (this_insn)));
22227 if (!this_insn)
22229 /* Oh, dear! we ran off the end.. give up. */
22230 extract_constrain_insn_cached (insn);
22231 arm_ccfsm_state = 0;
22232 arm_target_insn = NULL;
22233 return;
22235 arm_target_insn = this_insn;
22238 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22239 what it was. */
22240 if (!reverse)
22241 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22243 if (reverse || then_not_else)
22244 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22247 /* Restore recog_data (getting the attributes of other insns can
22248 destroy this array, but final.c assumes that it remains intact
22249 across this call. */
22250 extract_constrain_insn_cached (insn);
22254 /* Output IT instructions. */
22255 void
22256 thumb2_asm_output_opcode (FILE * stream)
22258 char buff[5];
22259 int n;
22261 if (arm_condexec_mask)
22263 for (n = 0; n < arm_condexec_masklen; n++)
22264 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22265 buff[n] = 0;
22266 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22267 arm_condition_codes[arm_current_cc]);
22268 arm_condexec_mask = 0;
22272 /* Returns true if REGNO is a valid register
22273 for holding a quantity of type MODE. */
22275 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
22277 if (GET_MODE_CLASS (mode) == MODE_CC)
22278 return (regno == CC_REGNUM
22279 || (TARGET_HARD_FLOAT
22280 && regno == VFPCC_REGNUM));
22282 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
22283 return false;
22285 if (TARGET_THUMB1)
22286 /* For the Thumb we only allow values bigger than SImode in
22287 registers 0 - 6, so that there is always a second low
22288 register available to hold the upper part of the value.
22289 We probably we ought to ensure that the register is the
22290 start of an even numbered register pair. */
22291 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22293 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
22295 if (mode == SFmode || mode == SImode)
22296 return VFP_REGNO_OK_FOR_SINGLE (regno);
22298 if (mode == DFmode)
22299 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22301 if (mode == HFmode)
22302 return VFP_REGNO_OK_FOR_SINGLE (regno);
22304 /* VFP registers can hold HImode values. */
22305 if (mode == HImode)
22306 return VFP_REGNO_OK_FOR_SINGLE (regno);
22308 if (TARGET_NEON)
22309 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22310 || (VALID_NEON_QREG_MODE (mode)
22311 && NEON_REGNO_OK_FOR_QUAD (regno))
22312 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22313 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22314 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22315 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22316 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22318 return FALSE;
22321 if (TARGET_REALLY_IWMMXT)
22323 if (IS_IWMMXT_GR_REGNUM (regno))
22324 return mode == SImode;
22326 if (IS_IWMMXT_REGNUM (regno))
22327 return VALID_IWMMXT_REG_MODE (mode);
22330 /* We allow almost any value to be stored in the general registers.
22331 Restrict doubleword quantities to even register pairs in ARM state
22332 so that we can use ldrd. Do not allow very large Neon structure
22333 opaque modes in general registers; they would use too many. */
22334 if (regno <= LAST_ARM_REGNUM)
22336 if (ARM_NUM_REGS (mode) > 4)
22337 return FALSE;
22339 if (TARGET_THUMB2)
22340 return TRUE;
22342 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
22345 if (regno == FRAME_POINTER_REGNUM
22346 || regno == ARG_POINTER_REGNUM)
22347 /* We only allow integers in the fake hard registers. */
22348 return GET_MODE_CLASS (mode) == MODE_INT;
22350 return FALSE;
22353 /* Implement MODES_TIEABLE_P. */
22355 bool
22356 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
22358 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22359 return true;
22361 /* We specifically want to allow elements of "structure" modes to
22362 be tieable to the structure. This more general condition allows
22363 other rarer situations too. */
22364 if (TARGET_NEON
22365 && (VALID_NEON_DREG_MODE (mode1)
22366 || VALID_NEON_QREG_MODE (mode1)
22367 || VALID_NEON_STRUCT_MODE (mode1))
22368 && (VALID_NEON_DREG_MODE (mode2)
22369 || VALID_NEON_QREG_MODE (mode2)
22370 || VALID_NEON_STRUCT_MODE (mode2)))
22371 return true;
22373 return false;
22376 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22377 not used in arm mode. */
22379 enum reg_class
22380 arm_regno_class (int regno)
22382 if (regno == PC_REGNUM)
22383 return NO_REGS;
22385 if (TARGET_THUMB1)
22387 if (regno == STACK_POINTER_REGNUM)
22388 return STACK_REG;
22389 if (regno == CC_REGNUM)
22390 return CC_REG;
22391 if (regno < 8)
22392 return LO_REGS;
22393 return HI_REGS;
22396 if (TARGET_THUMB2 && regno < 8)
22397 return LO_REGS;
22399 if ( regno <= LAST_ARM_REGNUM
22400 || regno == FRAME_POINTER_REGNUM
22401 || regno == ARG_POINTER_REGNUM)
22402 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
22404 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
22405 return TARGET_THUMB2 ? CC_REG : NO_REGS;
22407 if (IS_VFP_REGNUM (regno))
22409 if (regno <= D7_VFP_REGNUM)
22410 return VFP_D0_D7_REGS;
22411 else if (regno <= LAST_LO_VFP_REGNUM)
22412 return VFP_LO_REGS;
22413 else
22414 return VFP_HI_REGS;
22417 if (IS_IWMMXT_REGNUM (regno))
22418 return IWMMXT_REGS;
22420 if (IS_IWMMXT_GR_REGNUM (regno))
22421 return IWMMXT_GR_REGS;
22423 return NO_REGS;
22426 /* Handle a special case when computing the offset
22427 of an argument from the frame pointer. */
22429 arm_debugger_arg_offset (int value, rtx addr)
22431 rtx_insn *insn;
22433 /* We are only interested if dbxout_parms() failed to compute the offset. */
22434 if (value != 0)
22435 return 0;
22437 /* We can only cope with the case where the address is held in a register. */
22438 if (!REG_P (addr))
22439 return 0;
22441 /* If we are using the frame pointer to point at the argument, then
22442 an offset of 0 is correct. */
22443 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
22444 return 0;
22446 /* If we are using the stack pointer to point at the
22447 argument, then an offset of 0 is correct. */
22448 /* ??? Check this is consistent with thumb2 frame layout. */
22449 if ((TARGET_THUMB || !frame_pointer_needed)
22450 && REGNO (addr) == SP_REGNUM)
22451 return 0;
22453 /* Oh dear. The argument is pointed to by a register rather
22454 than being held in a register, or being stored at a known
22455 offset from the frame pointer. Since GDB only understands
22456 those two kinds of argument we must translate the address
22457 held in the register into an offset from the frame pointer.
22458 We do this by searching through the insns for the function
22459 looking to see where this register gets its value. If the
22460 register is initialized from the frame pointer plus an offset
22461 then we are in luck and we can continue, otherwise we give up.
22463 This code is exercised by producing debugging information
22464 for a function with arguments like this:
22466 double func (double a, double b, int c, double d) {return d;}
22468 Without this code the stab for parameter 'd' will be set to
22469 an offset of 0 from the frame pointer, rather than 8. */
22471 /* The if() statement says:
22473 If the insn is a normal instruction
22474 and if the insn is setting the value in a register
22475 and if the register being set is the register holding the address of the argument
22476 and if the address is computing by an addition
22477 that involves adding to a register
22478 which is the frame pointer
22479 a constant integer
22481 then... */
22483 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22485 if ( NONJUMP_INSN_P (insn)
22486 && GET_CODE (PATTERN (insn)) == SET
22487 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
22488 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
22489 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
22490 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
22491 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
22494 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
22496 break;
22500 if (value == 0)
22502 debug_rtx (addr);
22503 warning (0, "unable to compute real location of stacked parameter");
22504 value = 8; /* XXX magic hack */
22507 return value;
22510 /* Implement TARGET_PROMOTED_TYPE. */
22512 static tree
22513 arm_promoted_type (const_tree t)
22515 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
22516 return float_type_node;
22517 return NULL_TREE;
22520 /* Implement TARGET_CONVERT_TO_TYPE.
22521 Specifically, this hook implements the peculiarity of the ARM
22522 half-precision floating-point C semantics that requires conversions between
22523 __fp16 to or from double to do an intermediate conversion to float. */
22525 static tree
22526 arm_convert_to_type (tree type, tree expr)
22528 tree fromtype = TREE_TYPE (expr);
22529 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
22530 return NULL_TREE;
22531 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
22532 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
22533 return convert (type, convert (float_type_node, expr));
22534 return NULL_TREE;
22537 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
22538 This simply adds HFmode as a supported mode; even though we don't
22539 implement arithmetic on this type directly, it's supported by
22540 optabs conversions, much the way the double-word arithmetic is
22541 special-cased in the default hook. */
22543 static bool
22544 arm_scalar_mode_supported_p (machine_mode mode)
22546 if (mode == HFmode)
22547 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
22548 else if (ALL_FIXED_POINT_MODE_P (mode))
22549 return true;
22550 else
22551 return default_scalar_mode_supported_p (mode);
22554 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
22555 not to early-clobber SRC registers in the process.
22557 We assume that the operands described by SRC and DEST represent a
22558 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
22559 number of components into which the copy has been decomposed. */
22560 void
22561 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
22563 unsigned int i;
22565 if (!reg_overlap_mentioned_p (operands[0], operands[1])
22566 || REGNO (operands[0]) < REGNO (operands[1]))
22568 for (i = 0; i < count; i++)
22570 operands[2 * i] = dest[i];
22571 operands[2 * i + 1] = src[i];
22574 else
22576 for (i = 0; i < count; i++)
22578 operands[2 * i] = dest[count - i - 1];
22579 operands[2 * i + 1] = src[count - i - 1];
22584 /* Split operands into moves from op[1] + op[2] into op[0]. */
22586 void
22587 neon_split_vcombine (rtx operands[3])
22589 unsigned int dest = REGNO (operands[0]);
22590 unsigned int src1 = REGNO (operands[1]);
22591 unsigned int src2 = REGNO (operands[2]);
22592 machine_mode halfmode = GET_MODE (operands[1]);
22593 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
22594 rtx destlo, desthi;
22596 if (src1 == dest && src2 == dest + halfregs)
22598 /* No-op move. Can't split to nothing; emit something. */
22599 emit_note (NOTE_INSN_DELETED);
22600 return;
22603 /* Preserve register attributes for variable tracking. */
22604 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
22605 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
22606 GET_MODE_SIZE (halfmode));
22608 /* Special case of reversed high/low parts. Use VSWP. */
22609 if (src2 == dest && src1 == dest + halfregs)
22611 rtx x = gen_rtx_SET (destlo, operands[1]);
22612 rtx y = gen_rtx_SET (desthi, operands[2]);
22613 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
22614 return;
22617 if (!reg_overlap_mentioned_p (operands[2], destlo))
22619 /* Try to avoid unnecessary moves if part of the result
22620 is in the right place already. */
22621 if (src1 != dest)
22622 emit_move_insn (destlo, operands[1]);
22623 if (src2 != dest + halfregs)
22624 emit_move_insn (desthi, operands[2]);
22626 else
22628 if (src2 != dest + halfregs)
22629 emit_move_insn (desthi, operands[2]);
22630 if (src1 != dest)
22631 emit_move_insn (destlo, operands[1]);
22635 /* Return the number (counting from 0) of
22636 the least significant set bit in MASK. */
22638 inline static int
22639 number_of_first_bit_set (unsigned mask)
22641 return ctz_hwi (mask);
22644 /* Like emit_multi_reg_push, but allowing for a different set of
22645 registers to be described as saved. MASK is the set of registers
22646 to be saved; REAL_REGS is the set of registers to be described as
22647 saved. If REAL_REGS is 0, only describe the stack adjustment. */
22649 static rtx_insn *
22650 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
22652 unsigned long regno;
22653 rtx par[10], tmp, reg;
22654 rtx_insn *insn;
22655 int i, j;
22657 /* Build the parallel of the registers actually being stored. */
22658 for (i = 0; mask; ++i, mask &= mask - 1)
22660 regno = ctz_hwi (mask);
22661 reg = gen_rtx_REG (SImode, regno);
22663 if (i == 0)
22664 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
22665 else
22666 tmp = gen_rtx_USE (VOIDmode, reg);
22668 par[i] = tmp;
22671 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
22672 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22673 tmp = gen_frame_mem (BLKmode, tmp);
22674 tmp = gen_rtx_SET (tmp, par[0]);
22675 par[0] = tmp;
22677 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
22678 insn = emit_insn (tmp);
22680 /* Always build the stack adjustment note for unwind info. */
22681 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
22682 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
22683 par[0] = tmp;
22685 /* Build the parallel of the registers recorded as saved for unwind. */
22686 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
22688 regno = ctz_hwi (real_regs);
22689 reg = gen_rtx_REG (SImode, regno);
22691 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
22692 tmp = gen_frame_mem (SImode, tmp);
22693 tmp = gen_rtx_SET (tmp, reg);
22694 RTX_FRAME_RELATED_P (tmp) = 1;
22695 par[j + 1] = tmp;
22698 if (j == 0)
22699 tmp = par[0];
22700 else
22702 RTX_FRAME_RELATED_P (par[0]) = 1;
22703 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
22706 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
22708 return insn;
22711 /* Emit code to push or pop registers to or from the stack. F is the
22712 assembly file. MASK is the registers to pop. */
22713 static void
22714 thumb_pop (FILE *f, unsigned long mask)
22716 int regno;
22717 int lo_mask = mask & 0xFF;
22718 int pushed_words = 0;
22720 gcc_assert (mask);
22722 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
22724 /* Special case. Do not generate a POP PC statement here, do it in
22725 thumb_exit() */
22726 thumb_exit (f, -1);
22727 return;
22730 fprintf (f, "\tpop\t{");
22732 /* Look at the low registers first. */
22733 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
22735 if (lo_mask & 1)
22737 asm_fprintf (f, "%r", regno);
22739 if ((lo_mask & ~1) != 0)
22740 fprintf (f, ", ");
22742 pushed_words++;
22746 if (mask & (1 << PC_REGNUM))
22748 /* Catch popping the PC. */
22749 if (TARGET_INTERWORK || TARGET_BACKTRACE
22750 || crtl->calls_eh_return)
22752 /* The PC is never poped directly, instead
22753 it is popped into r3 and then BX is used. */
22754 fprintf (f, "}\n");
22756 thumb_exit (f, -1);
22758 return;
22760 else
22762 if (mask & 0xFF)
22763 fprintf (f, ", ");
22765 asm_fprintf (f, "%r", PC_REGNUM);
22769 fprintf (f, "}\n");
22772 /* Generate code to return from a thumb function.
22773 If 'reg_containing_return_addr' is -1, then the return address is
22774 actually on the stack, at the stack pointer. */
22775 static void
22776 thumb_exit (FILE *f, int reg_containing_return_addr)
22778 unsigned regs_available_for_popping;
22779 unsigned regs_to_pop;
22780 int pops_needed;
22781 unsigned available;
22782 unsigned required;
22783 machine_mode mode;
22784 int size;
22785 int restore_a4 = FALSE;
22787 /* Compute the registers we need to pop. */
22788 regs_to_pop = 0;
22789 pops_needed = 0;
22791 if (reg_containing_return_addr == -1)
22793 regs_to_pop |= 1 << LR_REGNUM;
22794 ++pops_needed;
22797 if (TARGET_BACKTRACE)
22799 /* Restore the (ARM) frame pointer and stack pointer. */
22800 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
22801 pops_needed += 2;
22804 /* If there is nothing to pop then just emit the BX instruction and
22805 return. */
22806 if (pops_needed == 0)
22808 if (crtl->calls_eh_return)
22809 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
22811 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
22812 return;
22814 /* Otherwise if we are not supporting interworking and we have not created
22815 a backtrace structure and the function was not entered in ARM mode then
22816 just pop the return address straight into the PC. */
22817 else if (!TARGET_INTERWORK
22818 && !TARGET_BACKTRACE
22819 && !is_called_in_ARM_mode (current_function_decl)
22820 && !crtl->calls_eh_return)
22822 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
22823 return;
22826 /* Find out how many of the (return) argument registers we can corrupt. */
22827 regs_available_for_popping = 0;
22829 /* If returning via __builtin_eh_return, the bottom three registers
22830 all contain information needed for the return. */
22831 if (crtl->calls_eh_return)
22832 size = 12;
22833 else
22835 /* If we can deduce the registers used from the function's
22836 return value. This is more reliable that examining
22837 df_regs_ever_live_p () because that will be set if the register is
22838 ever used in the function, not just if the register is used
22839 to hold a return value. */
22841 if (crtl->return_rtx != 0)
22842 mode = GET_MODE (crtl->return_rtx);
22843 else
22844 mode = DECL_MODE (DECL_RESULT (current_function_decl));
22846 size = GET_MODE_SIZE (mode);
22848 if (size == 0)
22850 /* In a void function we can use any argument register.
22851 In a function that returns a structure on the stack
22852 we can use the second and third argument registers. */
22853 if (mode == VOIDmode)
22854 regs_available_for_popping =
22855 (1 << ARG_REGISTER (1))
22856 | (1 << ARG_REGISTER (2))
22857 | (1 << ARG_REGISTER (3));
22858 else
22859 regs_available_for_popping =
22860 (1 << ARG_REGISTER (2))
22861 | (1 << ARG_REGISTER (3));
22863 else if (size <= 4)
22864 regs_available_for_popping =
22865 (1 << ARG_REGISTER (2))
22866 | (1 << ARG_REGISTER (3));
22867 else if (size <= 8)
22868 regs_available_for_popping =
22869 (1 << ARG_REGISTER (3));
22872 /* Match registers to be popped with registers into which we pop them. */
22873 for (available = regs_available_for_popping,
22874 required = regs_to_pop;
22875 required != 0 && available != 0;
22876 available &= ~(available & - available),
22877 required &= ~(required & - required))
22878 -- pops_needed;
22880 /* If we have any popping registers left over, remove them. */
22881 if (available > 0)
22882 regs_available_for_popping &= ~available;
22884 /* Otherwise if we need another popping register we can use
22885 the fourth argument register. */
22886 else if (pops_needed)
22888 /* If we have not found any free argument registers and
22889 reg a4 contains the return address, we must move it. */
22890 if (regs_available_for_popping == 0
22891 && reg_containing_return_addr == LAST_ARG_REGNUM)
22893 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
22894 reg_containing_return_addr = LR_REGNUM;
22896 else if (size > 12)
22898 /* Register a4 is being used to hold part of the return value,
22899 but we have dire need of a free, low register. */
22900 restore_a4 = TRUE;
22902 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
22905 if (reg_containing_return_addr != LAST_ARG_REGNUM)
22907 /* The fourth argument register is available. */
22908 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
22910 --pops_needed;
22914 /* Pop as many registers as we can. */
22915 thumb_pop (f, regs_available_for_popping);
22917 /* Process the registers we popped. */
22918 if (reg_containing_return_addr == -1)
22920 /* The return address was popped into the lowest numbered register. */
22921 regs_to_pop &= ~(1 << LR_REGNUM);
22923 reg_containing_return_addr =
22924 number_of_first_bit_set (regs_available_for_popping);
22926 /* Remove this register for the mask of available registers, so that
22927 the return address will not be corrupted by further pops. */
22928 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
22931 /* If we popped other registers then handle them here. */
22932 if (regs_available_for_popping)
22934 int frame_pointer;
22936 /* Work out which register currently contains the frame pointer. */
22937 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
22939 /* Move it into the correct place. */
22940 asm_fprintf (f, "\tmov\t%r, %r\n",
22941 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
22943 /* (Temporarily) remove it from the mask of popped registers. */
22944 regs_available_for_popping &= ~(1 << frame_pointer);
22945 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
22947 if (regs_available_for_popping)
22949 int stack_pointer;
22951 /* We popped the stack pointer as well,
22952 find the register that contains it. */
22953 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
22955 /* Move it into the stack register. */
22956 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
22958 /* At this point we have popped all necessary registers, so
22959 do not worry about restoring regs_available_for_popping
22960 to its correct value:
22962 assert (pops_needed == 0)
22963 assert (regs_available_for_popping == (1 << frame_pointer))
22964 assert (regs_to_pop == (1 << STACK_POINTER)) */
22966 else
22968 /* Since we have just move the popped value into the frame
22969 pointer, the popping register is available for reuse, and
22970 we know that we still have the stack pointer left to pop. */
22971 regs_available_for_popping |= (1 << frame_pointer);
22975 /* If we still have registers left on the stack, but we no longer have
22976 any registers into which we can pop them, then we must move the return
22977 address into the link register and make available the register that
22978 contained it. */
22979 if (regs_available_for_popping == 0 && pops_needed > 0)
22981 regs_available_for_popping |= 1 << reg_containing_return_addr;
22983 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
22984 reg_containing_return_addr);
22986 reg_containing_return_addr = LR_REGNUM;
22989 /* If we have registers left on the stack then pop some more.
22990 We know that at most we will want to pop FP and SP. */
22991 if (pops_needed > 0)
22993 int popped_into;
22994 int move_to;
22996 thumb_pop (f, regs_available_for_popping);
22998 /* We have popped either FP or SP.
22999 Move whichever one it is into the correct register. */
23000 popped_into = number_of_first_bit_set (regs_available_for_popping);
23001 move_to = number_of_first_bit_set (regs_to_pop);
23003 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23005 regs_to_pop &= ~(1 << move_to);
23007 --pops_needed;
23010 /* If we still have not popped everything then we must have only
23011 had one register available to us and we are now popping the SP. */
23012 if (pops_needed > 0)
23014 int popped_into;
23016 thumb_pop (f, regs_available_for_popping);
23018 popped_into = number_of_first_bit_set (regs_available_for_popping);
23020 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23022 assert (regs_to_pop == (1 << STACK_POINTER))
23023 assert (pops_needed == 1)
23027 /* If necessary restore the a4 register. */
23028 if (restore_a4)
23030 if (reg_containing_return_addr != LR_REGNUM)
23032 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23033 reg_containing_return_addr = LR_REGNUM;
23036 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23039 if (crtl->calls_eh_return)
23040 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23042 /* Return to caller. */
23043 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23046 /* Scan INSN just before assembler is output for it.
23047 For Thumb-1, we track the status of the condition codes; this
23048 information is used in the cbranchsi4_insn pattern. */
23049 void
23050 thumb1_final_prescan_insn (rtx_insn *insn)
23052 if (flag_print_asm_name)
23053 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23054 INSN_ADDRESSES (INSN_UID (insn)));
23055 /* Don't overwrite the previous setter when we get to a cbranch. */
23056 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23058 enum attr_conds conds;
23060 if (cfun->machine->thumb1_cc_insn)
23062 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23063 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23064 CC_STATUS_INIT;
23066 conds = get_attr_conds (insn);
23067 if (conds == CONDS_SET)
23069 rtx set = single_set (insn);
23070 cfun->machine->thumb1_cc_insn = insn;
23071 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23072 cfun->machine->thumb1_cc_op1 = const0_rtx;
23073 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23074 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23076 rtx src1 = XEXP (SET_SRC (set), 1);
23077 if (src1 == const0_rtx)
23078 cfun->machine->thumb1_cc_mode = CCmode;
23080 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23082 /* Record the src register operand instead of dest because
23083 cprop_hardreg pass propagates src. */
23084 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23087 else if (conds != CONDS_NOCOND)
23088 cfun->machine->thumb1_cc_insn = NULL_RTX;
23091 /* Check if unexpected far jump is used. */
23092 if (cfun->machine->lr_save_eliminated
23093 && get_attr_far_jump (insn) == FAR_JUMP_YES)
23094 internal_error("Unexpected thumb1 far jump");
23098 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23100 unsigned HOST_WIDE_INT mask = 0xff;
23101 int i;
23103 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23104 if (val == 0) /* XXX */
23105 return 0;
23107 for (i = 0; i < 25; i++)
23108 if ((val & (mask << i)) == val)
23109 return 1;
23111 return 0;
23114 /* Returns nonzero if the current function contains,
23115 or might contain a far jump. */
23116 static int
23117 thumb_far_jump_used_p (void)
23119 rtx_insn *insn;
23120 bool far_jump = false;
23121 unsigned int func_size = 0;
23123 /* This test is only important for leaf functions. */
23124 /* assert (!leaf_function_p ()); */
23126 /* If we have already decided that far jumps may be used,
23127 do not bother checking again, and always return true even if
23128 it turns out that they are not being used. Once we have made
23129 the decision that far jumps are present (and that hence the link
23130 register will be pushed onto the stack) we cannot go back on it. */
23131 if (cfun->machine->far_jump_used)
23132 return 1;
23134 /* If this function is not being called from the prologue/epilogue
23135 generation code then it must be being called from the
23136 INITIAL_ELIMINATION_OFFSET macro. */
23137 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23139 /* In this case we know that we are being asked about the elimination
23140 of the arg pointer register. If that register is not being used,
23141 then there are no arguments on the stack, and we do not have to
23142 worry that a far jump might force the prologue to push the link
23143 register, changing the stack offsets. In this case we can just
23144 return false, since the presence of far jumps in the function will
23145 not affect stack offsets.
23147 If the arg pointer is live (or if it was live, but has now been
23148 eliminated and so set to dead) then we do have to test to see if
23149 the function might contain a far jump. This test can lead to some
23150 false negatives, since before reload is completed, then length of
23151 branch instructions is not known, so gcc defaults to returning their
23152 longest length, which in turn sets the far jump attribute to true.
23154 A false negative will not result in bad code being generated, but it
23155 will result in a needless push and pop of the link register. We
23156 hope that this does not occur too often.
23158 If we need doubleword stack alignment this could affect the other
23159 elimination offsets so we can't risk getting it wrong. */
23160 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23161 cfun->machine->arg_pointer_live = 1;
23162 else if (!cfun->machine->arg_pointer_live)
23163 return 0;
23166 /* We should not change far_jump_used during or after reload, as there is
23167 no chance to change stack frame layout. */
23168 if (reload_in_progress || reload_completed)
23169 return 0;
23171 /* Check to see if the function contains a branch
23172 insn with the far jump attribute set. */
23173 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23175 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
23177 far_jump = true;
23179 func_size += get_attr_length (insn);
23182 /* Attribute far_jump will always be true for thumb1 before
23183 shorten_branch pass. So checking far_jump attribute before
23184 shorten_branch isn't much useful.
23186 Following heuristic tries to estimate more accurately if a far jump
23187 may finally be used. The heuristic is very conservative as there is
23188 no chance to roll-back the decision of not to use far jump.
23190 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23191 2-byte insn is associated with a 4 byte constant pool. Using
23192 function size 2048/3 as the threshold is conservative enough. */
23193 if (far_jump)
23195 if ((func_size * 3) >= 2048)
23197 /* Record the fact that we have decided that
23198 the function does use far jumps. */
23199 cfun->machine->far_jump_used = 1;
23200 return 1;
23204 return 0;
23207 /* Return nonzero if FUNC must be entered in ARM mode. */
23208 static bool
23209 is_called_in_ARM_mode (tree func)
23211 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23213 /* Ignore the problem about functions whose address is taken. */
23214 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23215 return true;
23217 #ifdef ARM_PE
23218 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23219 #else
23220 return false;
23221 #endif
23224 /* Given the stack offsets and register mask in OFFSETS, decide how
23225 many additional registers to push instead of subtracting a constant
23226 from SP. For epilogues the principle is the same except we use pop.
23227 FOR_PROLOGUE indicates which we're generating. */
23228 static int
23229 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23231 HOST_WIDE_INT amount;
23232 unsigned long live_regs_mask = offsets->saved_regs_mask;
23233 /* Extract a mask of the ones we can give to the Thumb's push/pop
23234 instruction. */
23235 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23236 /* Then count how many other high registers will need to be pushed. */
23237 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23238 int n_free, reg_base, size;
23240 if (!for_prologue && frame_pointer_needed)
23241 amount = offsets->locals_base - offsets->saved_regs;
23242 else
23243 amount = offsets->outgoing_args - offsets->saved_regs;
23245 /* If the stack frame size is 512 exactly, we can save one load
23246 instruction, which should make this a win even when optimizing
23247 for speed. */
23248 if (!optimize_size && amount != 512)
23249 return 0;
23251 /* Can't do this if there are high registers to push. */
23252 if (high_regs_pushed != 0)
23253 return 0;
23255 /* Shouldn't do it in the prologue if no registers would normally
23256 be pushed at all. In the epilogue, also allow it if we'll have
23257 a pop insn for the PC. */
23258 if (l_mask == 0
23259 && (for_prologue
23260 || TARGET_BACKTRACE
23261 || (live_regs_mask & 1 << LR_REGNUM) == 0
23262 || TARGET_INTERWORK
23263 || crtl->args.pretend_args_size != 0))
23264 return 0;
23266 /* Don't do this if thumb_expand_prologue wants to emit instructions
23267 between the push and the stack frame allocation. */
23268 if (for_prologue
23269 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
23270 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
23271 return 0;
23273 reg_base = 0;
23274 n_free = 0;
23275 if (!for_prologue)
23277 size = arm_size_return_regs ();
23278 reg_base = ARM_NUM_INTS (size);
23279 live_regs_mask >>= reg_base;
23282 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
23283 && (for_prologue || call_used_regs[reg_base + n_free]))
23285 live_regs_mask >>= 1;
23286 n_free++;
23289 if (n_free == 0)
23290 return 0;
23291 gcc_assert (amount / 4 * 4 == amount);
23293 if (amount >= 512 && (amount - n_free * 4) < 512)
23294 return (amount - 508) / 4;
23295 if (amount <= n_free * 4)
23296 return amount / 4;
23297 return 0;
23300 /* The bits which aren't usefully expanded as rtl. */
23301 const char *
23302 thumb1_unexpanded_epilogue (void)
23304 arm_stack_offsets *offsets;
23305 int regno;
23306 unsigned long live_regs_mask = 0;
23307 int high_regs_pushed = 0;
23308 int extra_pop;
23309 int had_to_push_lr;
23310 int size;
23312 if (cfun->machine->return_used_this_function != 0)
23313 return "";
23315 if (IS_NAKED (arm_current_func_type ()))
23316 return "";
23318 offsets = arm_get_frame_offsets ();
23319 live_regs_mask = offsets->saved_regs_mask;
23320 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23322 /* If we can deduce the registers used from the function's return value.
23323 This is more reliable that examining df_regs_ever_live_p () because that
23324 will be set if the register is ever used in the function, not just if
23325 the register is used to hold a return value. */
23326 size = arm_size_return_regs ();
23328 extra_pop = thumb1_extra_regs_pushed (offsets, false);
23329 if (extra_pop > 0)
23331 unsigned long extra_mask = (1 << extra_pop) - 1;
23332 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
23335 /* The prolog may have pushed some high registers to use as
23336 work registers. e.g. the testsuite file:
23337 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
23338 compiles to produce:
23339 push {r4, r5, r6, r7, lr}
23340 mov r7, r9
23341 mov r6, r8
23342 push {r6, r7}
23343 as part of the prolog. We have to undo that pushing here. */
23345 if (high_regs_pushed)
23347 unsigned long mask = live_regs_mask & 0xff;
23348 int next_hi_reg;
23350 /* The available low registers depend on the size of the value we are
23351 returning. */
23352 if (size <= 12)
23353 mask |= 1 << 3;
23354 if (size <= 8)
23355 mask |= 1 << 2;
23357 if (mask == 0)
23358 /* Oh dear! We have no low registers into which we can pop
23359 high registers! */
23360 internal_error
23361 ("no low registers available for popping high registers");
23363 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
23364 if (live_regs_mask & (1 << next_hi_reg))
23365 break;
23367 while (high_regs_pushed)
23369 /* Find lo register(s) into which the high register(s) can
23370 be popped. */
23371 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
23373 if (mask & (1 << regno))
23374 high_regs_pushed--;
23375 if (high_regs_pushed == 0)
23376 break;
23379 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
23381 /* Pop the values into the low register(s). */
23382 thumb_pop (asm_out_file, mask);
23384 /* Move the value(s) into the high registers. */
23385 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
23387 if (mask & (1 << regno))
23389 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
23390 regno);
23392 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
23393 if (live_regs_mask & (1 << next_hi_reg))
23394 break;
23398 live_regs_mask &= ~0x0f00;
23401 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
23402 live_regs_mask &= 0xff;
23404 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
23406 /* Pop the return address into the PC. */
23407 if (had_to_push_lr)
23408 live_regs_mask |= 1 << PC_REGNUM;
23410 /* Either no argument registers were pushed or a backtrace
23411 structure was created which includes an adjusted stack
23412 pointer, so just pop everything. */
23413 if (live_regs_mask)
23414 thumb_pop (asm_out_file, live_regs_mask);
23416 /* We have either just popped the return address into the
23417 PC or it is was kept in LR for the entire function.
23418 Note that thumb_pop has already called thumb_exit if the
23419 PC was in the list. */
23420 if (!had_to_push_lr)
23421 thumb_exit (asm_out_file, LR_REGNUM);
23423 else
23425 /* Pop everything but the return address. */
23426 if (live_regs_mask)
23427 thumb_pop (asm_out_file, live_regs_mask);
23429 if (had_to_push_lr)
23431 if (size > 12)
23433 /* We have no free low regs, so save one. */
23434 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
23435 LAST_ARG_REGNUM);
23438 /* Get the return address into a temporary register. */
23439 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
23441 if (size > 12)
23443 /* Move the return address to lr. */
23444 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
23445 LAST_ARG_REGNUM);
23446 /* Restore the low register. */
23447 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
23448 IP_REGNUM);
23449 regno = LR_REGNUM;
23451 else
23452 regno = LAST_ARG_REGNUM;
23454 else
23455 regno = LR_REGNUM;
23457 /* Remove the argument registers that were pushed onto the stack. */
23458 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
23459 SP_REGNUM, SP_REGNUM,
23460 crtl->args.pretend_args_size);
23462 thumb_exit (asm_out_file, regno);
23465 return "";
23468 /* Functions to save and restore machine-specific function data. */
23469 static struct machine_function *
23470 arm_init_machine_status (void)
23472 struct machine_function *machine;
23473 machine = ggc_cleared_alloc<machine_function> ();
23475 #if ARM_FT_UNKNOWN != 0
23476 machine->func_type = ARM_FT_UNKNOWN;
23477 #endif
23478 return machine;
23481 /* Return an RTX indicating where the return address to the
23482 calling function can be found. */
23484 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
23486 if (count != 0)
23487 return NULL_RTX;
23489 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
23492 /* Do anything needed before RTL is emitted for each function. */
23493 void
23494 arm_init_expanders (void)
23496 /* Arrange to initialize and mark the machine per-function status. */
23497 init_machine_status = arm_init_machine_status;
23499 /* This is to stop the combine pass optimizing away the alignment
23500 adjustment of va_arg. */
23501 /* ??? It is claimed that this should not be necessary. */
23502 if (cfun)
23503 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
23506 /* Check that FUNC is called with a different mode. */
23508 bool
23509 arm_change_mode_p (tree func)
23511 if (TREE_CODE (func) != FUNCTION_DECL)
23512 return false;
23514 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
23516 if (!callee_tree)
23517 callee_tree = target_option_default_node;
23519 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
23520 int flags = callee_opts->x_target_flags;
23522 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
23525 /* Like arm_compute_initial_elimination offset. Simpler because there
23526 isn't an ABI specified frame pointer for Thumb. Instead, we set it
23527 to point at the base of the local variables after static stack
23528 space for a function has been allocated. */
23530 HOST_WIDE_INT
23531 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23533 arm_stack_offsets *offsets;
23535 offsets = arm_get_frame_offsets ();
23537 switch (from)
23539 case ARG_POINTER_REGNUM:
23540 switch (to)
23542 case STACK_POINTER_REGNUM:
23543 return offsets->outgoing_args - offsets->saved_args;
23545 case FRAME_POINTER_REGNUM:
23546 return offsets->soft_frame - offsets->saved_args;
23548 case ARM_HARD_FRAME_POINTER_REGNUM:
23549 return offsets->saved_regs - offsets->saved_args;
23551 case THUMB_HARD_FRAME_POINTER_REGNUM:
23552 return offsets->locals_base - offsets->saved_args;
23554 default:
23555 gcc_unreachable ();
23557 break;
23559 case FRAME_POINTER_REGNUM:
23560 switch (to)
23562 case STACK_POINTER_REGNUM:
23563 return offsets->outgoing_args - offsets->soft_frame;
23565 case ARM_HARD_FRAME_POINTER_REGNUM:
23566 return offsets->saved_regs - offsets->soft_frame;
23568 case THUMB_HARD_FRAME_POINTER_REGNUM:
23569 return offsets->locals_base - offsets->soft_frame;
23571 default:
23572 gcc_unreachable ();
23574 break;
23576 default:
23577 gcc_unreachable ();
23581 /* Generate the function's prologue. */
23583 void
23584 thumb1_expand_prologue (void)
23586 rtx_insn *insn;
23588 HOST_WIDE_INT amount;
23589 HOST_WIDE_INT size;
23590 arm_stack_offsets *offsets;
23591 unsigned long func_type;
23592 int regno;
23593 unsigned long live_regs_mask;
23594 unsigned long l_mask;
23595 unsigned high_regs_pushed = 0;
23597 func_type = arm_current_func_type ();
23599 /* Naked functions don't have prologues. */
23600 if (IS_NAKED (func_type))
23602 if (flag_stack_usage_info)
23603 current_function_static_stack_size = 0;
23604 return;
23607 if (IS_INTERRUPT (func_type))
23609 error ("interrupt Service Routines cannot be coded in Thumb mode");
23610 return;
23613 if (is_called_in_ARM_mode (current_function_decl))
23614 emit_insn (gen_prologue_thumb1_interwork ());
23616 offsets = arm_get_frame_offsets ();
23617 live_regs_mask = offsets->saved_regs_mask;
23619 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
23620 l_mask = live_regs_mask & 0x40ff;
23621 /* Then count how many other high registers will need to be pushed. */
23622 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23624 if (crtl->args.pretend_args_size)
23626 rtx x = GEN_INT (-crtl->args.pretend_args_size);
23628 if (cfun->machine->uses_anonymous_args)
23630 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
23631 unsigned long mask;
23633 mask = 1ul << (LAST_ARG_REGNUM + 1);
23634 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
23636 insn = thumb1_emit_multi_reg_push (mask, 0);
23638 else
23640 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23641 stack_pointer_rtx, x));
23643 RTX_FRAME_RELATED_P (insn) = 1;
23646 if (TARGET_BACKTRACE)
23648 HOST_WIDE_INT offset = 0;
23649 unsigned work_register;
23650 rtx work_reg, x, arm_hfp_rtx;
23652 /* We have been asked to create a stack backtrace structure.
23653 The code looks like this:
23655 0 .align 2
23656 0 func:
23657 0 sub SP, #16 Reserve space for 4 registers.
23658 2 push {R7} Push low registers.
23659 4 add R7, SP, #20 Get the stack pointer before the push.
23660 6 str R7, [SP, #8] Store the stack pointer
23661 (before reserving the space).
23662 8 mov R7, PC Get hold of the start of this code + 12.
23663 10 str R7, [SP, #16] Store it.
23664 12 mov R7, FP Get hold of the current frame pointer.
23665 14 str R7, [SP, #4] Store it.
23666 16 mov R7, LR Get hold of the current return address.
23667 18 str R7, [SP, #12] Store it.
23668 20 add R7, SP, #16 Point at the start of the
23669 backtrace structure.
23670 22 mov FP, R7 Put this value into the frame pointer. */
23672 work_register = thumb_find_work_register (live_regs_mask);
23673 work_reg = gen_rtx_REG (SImode, work_register);
23674 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
23676 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23677 stack_pointer_rtx, GEN_INT (-16)));
23678 RTX_FRAME_RELATED_P (insn) = 1;
23680 if (l_mask)
23682 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
23683 RTX_FRAME_RELATED_P (insn) = 1;
23685 offset = bit_count (l_mask) * UNITS_PER_WORD;
23688 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
23689 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
23691 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
23692 x = gen_frame_mem (SImode, x);
23693 emit_move_insn (x, work_reg);
23695 /* Make sure that the instruction fetching the PC is in the right place
23696 to calculate "start of backtrace creation code + 12". */
23697 /* ??? The stores using the common WORK_REG ought to be enough to
23698 prevent the scheduler from doing anything weird. Failing that
23699 we could always move all of the following into an UNSPEC_VOLATILE. */
23700 if (l_mask)
23702 x = gen_rtx_REG (SImode, PC_REGNUM);
23703 emit_move_insn (work_reg, x);
23705 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
23706 x = gen_frame_mem (SImode, x);
23707 emit_move_insn (x, work_reg);
23709 emit_move_insn (work_reg, arm_hfp_rtx);
23711 x = plus_constant (Pmode, stack_pointer_rtx, offset);
23712 x = gen_frame_mem (SImode, x);
23713 emit_move_insn (x, work_reg);
23715 else
23717 emit_move_insn (work_reg, arm_hfp_rtx);
23719 x = plus_constant (Pmode, stack_pointer_rtx, offset);
23720 x = gen_frame_mem (SImode, x);
23721 emit_move_insn (x, work_reg);
23723 x = gen_rtx_REG (SImode, PC_REGNUM);
23724 emit_move_insn (work_reg, x);
23726 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
23727 x = gen_frame_mem (SImode, x);
23728 emit_move_insn (x, work_reg);
23731 x = gen_rtx_REG (SImode, LR_REGNUM);
23732 emit_move_insn (work_reg, x);
23734 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
23735 x = gen_frame_mem (SImode, x);
23736 emit_move_insn (x, work_reg);
23738 x = GEN_INT (offset + 12);
23739 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
23741 emit_move_insn (arm_hfp_rtx, work_reg);
23743 /* Optimization: If we are not pushing any low registers but we are going
23744 to push some high registers then delay our first push. This will just
23745 be a push of LR and we can combine it with the push of the first high
23746 register. */
23747 else if ((l_mask & 0xff) != 0
23748 || (high_regs_pushed == 0 && l_mask))
23750 unsigned long mask = l_mask;
23751 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
23752 insn = thumb1_emit_multi_reg_push (mask, mask);
23753 RTX_FRAME_RELATED_P (insn) = 1;
23756 if (high_regs_pushed)
23758 unsigned pushable_regs;
23759 unsigned next_hi_reg;
23760 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
23761 : crtl->args.info.nregs;
23762 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
23764 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
23765 if (live_regs_mask & (1 << next_hi_reg))
23766 break;
23768 /* Here we need to mask out registers used for passing arguments
23769 even if they can be pushed. This is to avoid using them to stash the high
23770 registers. Such kind of stash may clobber the use of arguments. */
23771 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
23773 if (pushable_regs == 0)
23774 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
23776 while (high_regs_pushed > 0)
23778 unsigned long real_regs_mask = 0;
23780 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
23782 if (pushable_regs & (1 << regno))
23784 emit_move_insn (gen_rtx_REG (SImode, regno),
23785 gen_rtx_REG (SImode, next_hi_reg));
23787 high_regs_pushed --;
23788 real_regs_mask |= (1 << next_hi_reg);
23790 if (high_regs_pushed)
23792 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
23793 next_hi_reg --)
23794 if (live_regs_mask & (1 << next_hi_reg))
23795 break;
23797 else
23799 pushable_regs &= ~((1 << regno) - 1);
23800 break;
23805 /* If we had to find a work register and we have not yet
23806 saved the LR then add it to the list of regs to push. */
23807 if (l_mask == (1 << LR_REGNUM))
23809 pushable_regs |= l_mask;
23810 real_regs_mask |= l_mask;
23811 l_mask = 0;
23814 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
23815 RTX_FRAME_RELATED_P (insn) = 1;
23819 /* Load the pic register before setting the frame pointer,
23820 so we can use r7 as a temporary work register. */
23821 if (flag_pic && arm_pic_register != INVALID_REGNUM)
23822 arm_load_pic_register (live_regs_mask);
23824 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
23825 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
23826 stack_pointer_rtx);
23828 size = offsets->outgoing_args - offsets->saved_args;
23829 if (flag_stack_usage_info)
23830 current_function_static_stack_size = size;
23832 /* If we have a frame, then do stack checking. FIXME: not implemented. */
23833 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
23834 sorry ("-fstack-check=specific for Thumb-1");
23836 amount = offsets->outgoing_args - offsets->saved_regs;
23837 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
23838 if (amount)
23840 if (amount < 512)
23842 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23843 GEN_INT (- amount)));
23844 RTX_FRAME_RELATED_P (insn) = 1;
23846 else
23848 rtx reg, dwarf;
23850 /* The stack decrement is too big for an immediate value in a single
23851 insn. In theory we could issue multiple subtracts, but after
23852 three of them it becomes more space efficient to place the full
23853 value in the constant pool and load into a register. (Also the
23854 ARM debugger really likes to see only one stack decrement per
23855 function). So instead we look for a scratch register into which
23856 we can load the decrement, and then we subtract this from the
23857 stack pointer. Unfortunately on the thumb the only available
23858 scratch registers are the argument registers, and we cannot use
23859 these as they may hold arguments to the function. Instead we
23860 attempt to locate a call preserved register which is used by this
23861 function. If we can find one, then we know that it will have
23862 been pushed at the start of the prologue and so we can corrupt
23863 it now. */
23864 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
23865 if (live_regs_mask & (1 << regno))
23866 break;
23868 gcc_assert(regno <= LAST_LO_REGNUM);
23870 reg = gen_rtx_REG (SImode, regno);
23872 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
23874 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23875 stack_pointer_rtx, reg));
23877 dwarf = gen_rtx_SET (stack_pointer_rtx,
23878 plus_constant (Pmode, stack_pointer_rtx,
23879 -amount));
23880 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23881 RTX_FRAME_RELATED_P (insn) = 1;
23885 if (frame_pointer_needed)
23886 thumb_set_frame_pointer (offsets);
23888 /* If we are profiling, make sure no instructions are scheduled before
23889 the call to mcount. Similarly if the user has requested no
23890 scheduling in the prolog. Similarly if we want non-call exceptions
23891 using the EABI unwinder, to prevent faulting instructions from being
23892 swapped with a stack adjustment. */
23893 if (crtl->profile || !TARGET_SCHED_PROLOG
23894 || (arm_except_unwind_info (&global_options) == UI_TARGET
23895 && cfun->can_throw_non_call_exceptions))
23896 emit_insn (gen_blockage ());
23898 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
23899 if (live_regs_mask & 0xff)
23900 cfun->machine->lr_save_eliminated = 0;
23903 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
23904 POP instruction can be generated. LR should be replaced by PC. All
23905 the checks required are already done by USE_RETURN_INSN (). Hence,
23906 all we really need to check here is if single register is to be
23907 returned, or multiple register return. */
23908 void
23909 thumb2_expand_return (bool simple_return)
23911 int i, num_regs;
23912 unsigned long saved_regs_mask;
23913 arm_stack_offsets *offsets;
23915 offsets = arm_get_frame_offsets ();
23916 saved_regs_mask = offsets->saved_regs_mask;
23918 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
23919 if (saved_regs_mask & (1 << i))
23920 num_regs++;
23922 if (!simple_return && saved_regs_mask)
23924 if (num_regs == 1)
23926 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
23927 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
23928 rtx addr = gen_rtx_MEM (SImode,
23929 gen_rtx_POST_INC (SImode,
23930 stack_pointer_rtx));
23931 set_mem_alias_set (addr, get_frame_alias_set ());
23932 XVECEXP (par, 0, 0) = ret_rtx;
23933 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
23934 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
23935 emit_jump_insn (par);
23937 else
23939 saved_regs_mask &= ~ (1 << LR_REGNUM);
23940 saved_regs_mask |= (1 << PC_REGNUM);
23941 arm_emit_multi_reg_pop (saved_regs_mask);
23944 else
23946 emit_jump_insn (simple_return_rtx);
23950 void
23951 thumb1_expand_epilogue (void)
23953 HOST_WIDE_INT amount;
23954 arm_stack_offsets *offsets;
23955 int regno;
23957 /* Naked functions don't have prologues. */
23958 if (IS_NAKED (arm_current_func_type ()))
23959 return;
23961 offsets = arm_get_frame_offsets ();
23962 amount = offsets->outgoing_args - offsets->saved_regs;
23964 if (frame_pointer_needed)
23966 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
23967 amount = offsets->locals_base - offsets->saved_regs;
23969 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
23971 gcc_assert (amount >= 0);
23972 if (amount)
23974 emit_insn (gen_blockage ());
23976 if (amount < 512)
23977 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23978 GEN_INT (amount)));
23979 else
23981 /* r3 is always free in the epilogue. */
23982 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
23984 emit_insn (gen_movsi (reg, GEN_INT (amount)));
23985 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
23989 /* Emit a USE (stack_pointer_rtx), so that
23990 the stack adjustment will not be deleted. */
23991 emit_insn (gen_force_register_use (stack_pointer_rtx));
23993 if (crtl->profile || !TARGET_SCHED_PROLOG)
23994 emit_insn (gen_blockage ());
23996 /* Emit a clobber for each insn that will be restored in the epilogue,
23997 so that flow2 will get register lifetimes correct. */
23998 for (regno = 0; regno < 13; regno++)
23999 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24000 emit_clobber (gen_rtx_REG (SImode, regno));
24002 if (! df_regs_ever_live_p (LR_REGNUM))
24003 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24006 /* Epilogue code for APCS frame. */
24007 static void
24008 arm_expand_epilogue_apcs_frame (bool really_return)
24010 unsigned long func_type;
24011 unsigned long saved_regs_mask;
24012 int num_regs = 0;
24013 int i;
24014 int floats_from_frame = 0;
24015 arm_stack_offsets *offsets;
24017 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24018 func_type = arm_current_func_type ();
24020 /* Get frame offsets for ARM. */
24021 offsets = arm_get_frame_offsets ();
24022 saved_regs_mask = offsets->saved_regs_mask;
24024 /* Find the offset of the floating-point save area in the frame. */
24025 floats_from_frame
24026 = (offsets->saved_args
24027 + arm_compute_static_chain_stack_bytes ()
24028 - offsets->frame);
24030 /* Compute how many core registers saved and how far away the floats are. */
24031 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24032 if (saved_regs_mask & (1 << i))
24034 num_regs++;
24035 floats_from_frame += 4;
24038 if (TARGET_HARD_FLOAT)
24040 int start_reg;
24041 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24043 /* The offset is from IP_REGNUM. */
24044 int saved_size = arm_get_vfp_saved_size ();
24045 if (saved_size > 0)
24047 rtx_insn *insn;
24048 floats_from_frame += saved_size;
24049 insn = emit_insn (gen_addsi3 (ip_rtx,
24050 hard_frame_pointer_rtx,
24051 GEN_INT (-floats_from_frame)));
24052 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24053 ip_rtx, hard_frame_pointer_rtx);
24056 /* Generate VFP register multi-pop. */
24057 start_reg = FIRST_VFP_REGNUM;
24059 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24060 /* Look for a case where a reg does not need restoring. */
24061 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24062 && (!df_regs_ever_live_p (i + 1)
24063 || call_used_regs[i + 1]))
24065 if (start_reg != i)
24066 arm_emit_vfp_multi_reg_pop (start_reg,
24067 (i - start_reg) / 2,
24068 gen_rtx_REG (SImode,
24069 IP_REGNUM));
24070 start_reg = i + 2;
24073 /* Restore the remaining regs that we have discovered (or possibly
24074 even all of them, if the conditional in the for loop never
24075 fired). */
24076 if (start_reg != i)
24077 arm_emit_vfp_multi_reg_pop (start_reg,
24078 (i - start_reg) / 2,
24079 gen_rtx_REG (SImode, IP_REGNUM));
24082 if (TARGET_IWMMXT)
24084 /* The frame pointer is guaranteed to be non-double-word aligned, as
24085 it is set to double-word-aligned old_stack_pointer - 4. */
24086 rtx_insn *insn;
24087 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24089 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24090 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24092 rtx addr = gen_frame_mem (V2SImode,
24093 plus_constant (Pmode, hard_frame_pointer_rtx,
24094 - lrm_count * 4));
24095 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24096 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24097 gen_rtx_REG (V2SImode, i),
24098 NULL_RTX);
24099 lrm_count += 2;
24103 /* saved_regs_mask should contain IP which contains old stack pointer
24104 at the time of activation creation. Since SP and IP are adjacent registers,
24105 we can restore the value directly into SP. */
24106 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24107 saved_regs_mask &= ~(1 << IP_REGNUM);
24108 saved_regs_mask |= (1 << SP_REGNUM);
24110 /* There are two registers left in saved_regs_mask - LR and PC. We
24111 only need to restore LR (the return address), but to
24112 save time we can load it directly into PC, unless we need a
24113 special function exit sequence, or we are not really returning. */
24114 if (really_return
24115 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24116 && !crtl->calls_eh_return)
24117 /* Delete LR from the register mask, so that LR on
24118 the stack is loaded into the PC in the register mask. */
24119 saved_regs_mask &= ~(1 << LR_REGNUM);
24120 else
24121 saved_regs_mask &= ~(1 << PC_REGNUM);
24123 num_regs = bit_count (saved_regs_mask);
24124 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24126 rtx_insn *insn;
24127 emit_insn (gen_blockage ());
24128 /* Unwind the stack to just below the saved registers. */
24129 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24130 hard_frame_pointer_rtx,
24131 GEN_INT (- 4 * num_regs)));
24133 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24134 stack_pointer_rtx, hard_frame_pointer_rtx);
24137 arm_emit_multi_reg_pop (saved_regs_mask);
24139 if (IS_INTERRUPT (func_type))
24141 /* Interrupt handlers will have pushed the
24142 IP onto the stack, so restore it now. */
24143 rtx_insn *insn;
24144 rtx addr = gen_rtx_MEM (SImode,
24145 gen_rtx_POST_INC (SImode,
24146 stack_pointer_rtx));
24147 set_mem_alias_set (addr, get_frame_alias_set ());
24148 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24149 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24150 gen_rtx_REG (SImode, IP_REGNUM),
24151 NULL_RTX);
24154 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24155 return;
24157 if (crtl->calls_eh_return)
24158 emit_insn (gen_addsi3 (stack_pointer_rtx,
24159 stack_pointer_rtx,
24160 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24162 if (IS_STACKALIGN (func_type))
24163 /* Restore the original stack pointer. Before prologue, the stack was
24164 realigned and the original stack pointer saved in r0. For details,
24165 see comment in arm_expand_prologue. */
24166 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
24168 emit_jump_insn (simple_return_rtx);
24171 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24172 function is not a sibcall. */
24173 void
24174 arm_expand_epilogue (bool really_return)
24176 unsigned long func_type;
24177 unsigned long saved_regs_mask;
24178 int num_regs = 0;
24179 int i;
24180 int amount;
24181 arm_stack_offsets *offsets;
24183 func_type = arm_current_func_type ();
24185 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24186 let output_return_instruction take care of instruction emission if any. */
24187 if (IS_NAKED (func_type)
24188 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24190 if (really_return)
24191 emit_jump_insn (simple_return_rtx);
24192 return;
24195 /* If we are throwing an exception, then we really must be doing a
24196 return, so we can't tail-call. */
24197 gcc_assert (!crtl->calls_eh_return || really_return);
24199 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24201 arm_expand_epilogue_apcs_frame (really_return);
24202 return;
24205 /* Get frame offsets for ARM. */
24206 offsets = arm_get_frame_offsets ();
24207 saved_regs_mask = offsets->saved_regs_mask;
24208 num_regs = bit_count (saved_regs_mask);
24210 if (frame_pointer_needed)
24212 rtx_insn *insn;
24213 /* Restore stack pointer if necessary. */
24214 if (TARGET_ARM)
24216 /* In ARM mode, frame pointer points to first saved register.
24217 Restore stack pointer to last saved register. */
24218 amount = offsets->frame - offsets->saved_regs;
24220 /* Force out any pending memory operations that reference stacked data
24221 before stack de-allocation occurs. */
24222 emit_insn (gen_blockage ());
24223 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24224 hard_frame_pointer_rtx,
24225 GEN_INT (amount)));
24226 arm_add_cfa_adjust_cfa_note (insn, amount,
24227 stack_pointer_rtx,
24228 hard_frame_pointer_rtx);
24230 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24231 deleted. */
24232 emit_insn (gen_force_register_use (stack_pointer_rtx));
24234 else
24236 /* In Thumb-2 mode, the frame pointer points to the last saved
24237 register. */
24238 amount = offsets->locals_base - offsets->saved_regs;
24239 if (amount)
24241 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24242 hard_frame_pointer_rtx,
24243 GEN_INT (amount)));
24244 arm_add_cfa_adjust_cfa_note (insn, amount,
24245 hard_frame_pointer_rtx,
24246 hard_frame_pointer_rtx);
24249 /* Force out any pending memory operations that reference stacked data
24250 before stack de-allocation occurs. */
24251 emit_insn (gen_blockage ());
24252 insn = emit_insn (gen_movsi (stack_pointer_rtx,
24253 hard_frame_pointer_rtx));
24254 arm_add_cfa_adjust_cfa_note (insn, 0,
24255 stack_pointer_rtx,
24256 hard_frame_pointer_rtx);
24257 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24258 deleted. */
24259 emit_insn (gen_force_register_use (stack_pointer_rtx));
24262 else
24264 /* Pop off outgoing args and local frame to adjust stack pointer to
24265 last saved register. */
24266 amount = offsets->outgoing_args - offsets->saved_regs;
24267 if (amount)
24269 rtx_insn *tmp;
24270 /* Force out any pending memory operations that reference stacked data
24271 before stack de-allocation occurs. */
24272 emit_insn (gen_blockage ());
24273 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24274 stack_pointer_rtx,
24275 GEN_INT (amount)));
24276 arm_add_cfa_adjust_cfa_note (tmp, amount,
24277 stack_pointer_rtx, stack_pointer_rtx);
24278 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24279 not deleted. */
24280 emit_insn (gen_force_register_use (stack_pointer_rtx));
24284 if (TARGET_HARD_FLOAT)
24286 /* Generate VFP register multi-pop. */
24287 int end_reg = LAST_VFP_REGNUM + 1;
24289 /* Scan the registers in reverse order. We need to match
24290 any groupings made in the prologue and generate matching
24291 vldm operations. The need to match groups is because,
24292 unlike pop, vldm can only do consecutive regs. */
24293 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
24294 /* Look for a case where a reg does not need restoring. */
24295 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24296 && (!df_regs_ever_live_p (i + 1)
24297 || call_used_regs[i + 1]))
24299 /* Restore the regs discovered so far (from reg+2 to
24300 end_reg). */
24301 if (end_reg > i + 2)
24302 arm_emit_vfp_multi_reg_pop (i + 2,
24303 (end_reg - (i + 2)) / 2,
24304 stack_pointer_rtx);
24305 end_reg = i;
24308 /* Restore the remaining regs that we have discovered (or possibly
24309 even all of them, if the conditional in the for loop never
24310 fired). */
24311 if (end_reg > i + 2)
24312 arm_emit_vfp_multi_reg_pop (i + 2,
24313 (end_reg - (i + 2)) / 2,
24314 stack_pointer_rtx);
24317 if (TARGET_IWMMXT)
24318 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
24319 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24321 rtx_insn *insn;
24322 rtx addr = gen_rtx_MEM (V2SImode,
24323 gen_rtx_POST_INC (SImode,
24324 stack_pointer_rtx));
24325 set_mem_alias_set (addr, get_frame_alias_set ());
24326 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24327 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24328 gen_rtx_REG (V2SImode, i),
24329 NULL_RTX);
24330 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
24331 stack_pointer_rtx, stack_pointer_rtx);
24334 if (saved_regs_mask)
24336 rtx insn;
24337 bool return_in_pc = false;
24339 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
24340 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
24341 && !IS_STACKALIGN (func_type)
24342 && really_return
24343 && crtl->args.pretend_args_size == 0
24344 && saved_regs_mask & (1 << LR_REGNUM)
24345 && !crtl->calls_eh_return)
24347 saved_regs_mask &= ~(1 << LR_REGNUM);
24348 saved_regs_mask |= (1 << PC_REGNUM);
24349 return_in_pc = true;
24352 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
24354 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24355 if (saved_regs_mask & (1 << i))
24357 rtx addr = gen_rtx_MEM (SImode,
24358 gen_rtx_POST_INC (SImode,
24359 stack_pointer_rtx));
24360 set_mem_alias_set (addr, get_frame_alias_set ());
24362 if (i == PC_REGNUM)
24364 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24365 XVECEXP (insn, 0, 0) = ret_rtx;
24366 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
24367 addr);
24368 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
24369 insn = emit_jump_insn (insn);
24371 else
24373 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
24374 addr));
24375 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24376 gen_rtx_REG (SImode, i),
24377 NULL_RTX);
24378 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
24379 stack_pointer_rtx,
24380 stack_pointer_rtx);
24384 else
24386 if (TARGET_LDRD
24387 && current_tune->prefer_ldrd_strd
24388 && !optimize_function_for_size_p (cfun))
24390 if (TARGET_THUMB2)
24391 thumb2_emit_ldrd_pop (saved_regs_mask);
24392 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
24393 arm_emit_ldrd_pop (saved_regs_mask);
24394 else
24395 arm_emit_multi_reg_pop (saved_regs_mask);
24397 else
24398 arm_emit_multi_reg_pop (saved_regs_mask);
24401 if (return_in_pc)
24402 return;
24405 amount
24406 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
24407 if (amount)
24409 int i, j;
24410 rtx dwarf = NULL_RTX;
24411 rtx_insn *tmp =
24412 emit_insn (gen_addsi3 (stack_pointer_rtx,
24413 stack_pointer_rtx,
24414 GEN_INT (amount)));
24416 RTX_FRAME_RELATED_P (tmp) = 1;
24418 if (cfun->machine->uses_anonymous_args)
24420 /* Restore pretend args. Refer arm_expand_prologue on how to save
24421 pretend_args in stack. */
24422 int num_regs = crtl->args.pretend_args_size / 4;
24423 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
24424 for (j = 0, i = 0; j < num_regs; i++)
24425 if (saved_regs_mask & (1 << i))
24427 rtx reg = gen_rtx_REG (SImode, i);
24428 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
24429 j++;
24431 REG_NOTES (tmp) = dwarf;
24433 arm_add_cfa_adjust_cfa_note (tmp, amount,
24434 stack_pointer_rtx, stack_pointer_rtx);
24437 if (!really_return)
24438 return;
24440 if (crtl->calls_eh_return)
24441 emit_insn (gen_addsi3 (stack_pointer_rtx,
24442 stack_pointer_rtx,
24443 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24445 if (IS_STACKALIGN (func_type))
24446 /* Restore the original stack pointer. Before prologue, the stack was
24447 realigned and the original stack pointer saved in r0. For details,
24448 see comment in arm_expand_prologue. */
24449 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
24451 emit_jump_insn (simple_return_rtx);
24454 /* Implementation of insn prologue_thumb1_interwork. This is the first
24455 "instruction" of a function called in ARM mode. Swap to thumb mode. */
24457 const char *
24458 thumb1_output_interwork (void)
24460 const char * name;
24461 FILE *f = asm_out_file;
24463 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
24464 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
24465 == SYMBOL_REF);
24466 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
24468 /* Generate code sequence to switch us into Thumb mode. */
24469 /* The .code 32 directive has already been emitted by
24470 ASM_DECLARE_FUNCTION_NAME. */
24471 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
24472 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
24474 /* Generate a label, so that the debugger will notice the
24475 change in instruction sets. This label is also used by
24476 the assembler to bypass the ARM code when this function
24477 is called from a Thumb encoded function elsewhere in the
24478 same file. Hence the definition of STUB_NAME here must
24479 agree with the definition in gas/config/tc-arm.c. */
24481 #define STUB_NAME ".real_start_of"
24483 fprintf (f, "\t.code\t16\n");
24484 #ifdef ARM_PE
24485 if (arm_dllexport_name_p (name))
24486 name = arm_strip_name_encoding (name);
24487 #endif
24488 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
24489 fprintf (f, "\t.thumb_func\n");
24490 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
24492 return "";
24495 /* Handle the case of a double word load into a low register from
24496 a computed memory address. The computed address may involve a
24497 register which is overwritten by the load. */
24498 const char *
24499 thumb_load_double_from_address (rtx *operands)
24501 rtx addr;
24502 rtx base;
24503 rtx offset;
24504 rtx arg1;
24505 rtx arg2;
24507 gcc_assert (REG_P (operands[0]));
24508 gcc_assert (MEM_P (operands[1]));
24510 /* Get the memory address. */
24511 addr = XEXP (operands[1], 0);
24513 /* Work out how the memory address is computed. */
24514 switch (GET_CODE (addr))
24516 case REG:
24517 operands[2] = adjust_address (operands[1], SImode, 4);
24519 if (REGNO (operands[0]) == REGNO (addr))
24521 output_asm_insn ("ldr\t%H0, %2", operands);
24522 output_asm_insn ("ldr\t%0, %1", operands);
24524 else
24526 output_asm_insn ("ldr\t%0, %1", operands);
24527 output_asm_insn ("ldr\t%H0, %2", operands);
24529 break;
24531 case CONST:
24532 /* Compute <address> + 4 for the high order load. */
24533 operands[2] = adjust_address (operands[1], SImode, 4);
24535 output_asm_insn ("ldr\t%0, %1", operands);
24536 output_asm_insn ("ldr\t%H0, %2", operands);
24537 break;
24539 case PLUS:
24540 arg1 = XEXP (addr, 0);
24541 arg2 = XEXP (addr, 1);
24543 if (CONSTANT_P (arg1))
24544 base = arg2, offset = arg1;
24545 else
24546 base = arg1, offset = arg2;
24548 gcc_assert (REG_P (base));
24550 /* Catch the case of <address> = <reg> + <reg> */
24551 if (REG_P (offset))
24553 int reg_offset = REGNO (offset);
24554 int reg_base = REGNO (base);
24555 int reg_dest = REGNO (operands[0]);
24557 /* Add the base and offset registers together into the
24558 higher destination register. */
24559 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
24560 reg_dest + 1, reg_base, reg_offset);
24562 /* Load the lower destination register from the address in
24563 the higher destination register. */
24564 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
24565 reg_dest, reg_dest + 1);
24567 /* Load the higher destination register from its own address
24568 plus 4. */
24569 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
24570 reg_dest + 1, reg_dest + 1);
24572 else
24574 /* Compute <address> + 4 for the high order load. */
24575 operands[2] = adjust_address (operands[1], SImode, 4);
24577 /* If the computed address is held in the low order register
24578 then load the high order register first, otherwise always
24579 load the low order register first. */
24580 if (REGNO (operands[0]) == REGNO (base))
24582 output_asm_insn ("ldr\t%H0, %2", operands);
24583 output_asm_insn ("ldr\t%0, %1", operands);
24585 else
24587 output_asm_insn ("ldr\t%0, %1", operands);
24588 output_asm_insn ("ldr\t%H0, %2", operands);
24591 break;
24593 case LABEL_REF:
24594 /* With no registers to worry about we can just load the value
24595 directly. */
24596 operands[2] = adjust_address (operands[1], SImode, 4);
24598 output_asm_insn ("ldr\t%H0, %2", operands);
24599 output_asm_insn ("ldr\t%0, %1", operands);
24600 break;
24602 default:
24603 gcc_unreachable ();
24606 return "";
24609 const char *
24610 thumb_output_move_mem_multiple (int n, rtx *operands)
24612 switch (n)
24614 case 2:
24615 if (REGNO (operands[4]) > REGNO (operands[5]))
24616 std::swap (operands[4], operands[5]);
24618 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
24619 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
24620 break;
24622 case 3:
24623 if (REGNO (operands[4]) > REGNO (operands[5]))
24624 std::swap (operands[4], operands[5]);
24625 if (REGNO (operands[5]) > REGNO (operands[6]))
24626 std::swap (operands[5], operands[6]);
24627 if (REGNO (operands[4]) > REGNO (operands[5]))
24628 std::swap (operands[4], operands[5]);
24630 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
24631 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
24632 break;
24634 default:
24635 gcc_unreachable ();
24638 return "";
24641 /* Output a call-via instruction for thumb state. */
24642 const char *
24643 thumb_call_via_reg (rtx reg)
24645 int regno = REGNO (reg);
24646 rtx *labelp;
24648 gcc_assert (regno < LR_REGNUM);
24650 /* If we are in the normal text section we can use a single instance
24651 per compilation unit. If we are doing function sections, then we need
24652 an entry per section, since we can't rely on reachability. */
24653 if (in_section == text_section)
24655 thumb_call_reg_needed = 1;
24657 if (thumb_call_via_label[regno] == NULL)
24658 thumb_call_via_label[regno] = gen_label_rtx ();
24659 labelp = thumb_call_via_label + regno;
24661 else
24663 if (cfun->machine->call_via[regno] == NULL)
24664 cfun->machine->call_via[regno] = gen_label_rtx ();
24665 labelp = cfun->machine->call_via + regno;
24668 output_asm_insn ("bl\t%a0", labelp);
24669 return "";
24672 /* Routines for generating rtl. */
24673 void
24674 thumb_expand_movmemqi (rtx *operands)
24676 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
24677 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
24678 HOST_WIDE_INT len = INTVAL (operands[2]);
24679 HOST_WIDE_INT offset = 0;
24681 while (len >= 12)
24683 emit_insn (gen_movmem12b (out, in, out, in));
24684 len -= 12;
24687 if (len >= 8)
24689 emit_insn (gen_movmem8b (out, in, out, in));
24690 len -= 8;
24693 if (len >= 4)
24695 rtx reg = gen_reg_rtx (SImode);
24696 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
24697 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
24698 len -= 4;
24699 offset += 4;
24702 if (len >= 2)
24704 rtx reg = gen_reg_rtx (HImode);
24705 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
24706 plus_constant (Pmode, in,
24707 offset))));
24708 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
24709 offset)),
24710 reg));
24711 len -= 2;
24712 offset += 2;
24715 if (len)
24717 rtx reg = gen_reg_rtx (QImode);
24718 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
24719 plus_constant (Pmode, in,
24720 offset))));
24721 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
24722 offset)),
24723 reg));
24727 void
24728 thumb_reload_out_hi (rtx *operands)
24730 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
24733 /* Return the length of a function name prefix
24734 that starts with the character 'c'. */
24735 static int
24736 arm_get_strip_length (int c)
24738 switch (c)
24740 ARM_NAME_ENCODING_LENGTHS
24741 default: return 0;
24745 /* Return a pointer to a function's name with any
24746 and all prefix encodings stripped from it. */
24747 const char *
24748 arm_strip_name_encoding (const char *name)
24750 int skip;
24752 while ((skip = arm_get_strip_length (* name)))
24753 name += skip;
24755 return name;
24758 /* If there is a '*' anywhere in the name's prefix, then
24759 emit the stripped name verbatim, otherwise prepend an
24760 underscore if leading underscores are being used. */
24761 void
24762 arm_asm_output_labelref (FILE *stream, const char *name)
24764 int skip;
24765 int verbatim = 0;
24767 while ((skip = arm_get_strip_length (* name)))
24769 verbatim |= (*name == '*');
24770 name += skip;
24773 if (verbatim)
24774 fputs (name, stream);
24775 else
24776 asm_fprintf (stream, "%U%s", name);
24779 /* This function is used to emit an EABI tag and its associated value.
24780 We emit the numerical value of the tag in case the assembler does not
24781 support textual tags. (Eg gas prior to 2.20). If requested we include
24782 the tag name in a comment so that anyone reading the assembler output
24783 will know which tag is being set.
24785 This function is not static because arm-c.c needs it too. */
24787 void
24788 arm_emit_eabi_attribute (const char *name, int num, int val)
24790 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
24791 if (flag_verbose_asm || flag_debug_asm)
24792 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
24793 asm_fprintf (asm_out_file, "\n");
24796 /* This function is used to print CPU tuning information as comment
24797 in assembler file. Pointers are not printed for now. */
24799 void
24800 arm_print_tune_info (void)
24802 asm_fprintf (asm_out_file, "\t@.tune parameters\n");
24803 asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
24804 current_tune->constant_limit);
24805 asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
24806 current_tune->max_insns_skipped);
24807 asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n",
24808 current_tune->prefetch.num_slots);
24809 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n",
24810 current_tune->prefetch.l1_cache_size);
24811 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
24812 current_tune->prefetch.l1_cache_line_size);
24813 asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
24814 (int) current_tune->prefer_constant_pool);
24815 asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
24816 asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
24817 asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
24818 current_tune->branch_cost (false, false));
24819 asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
24820 current_tune->branch_cost (false, true));
24821 asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
24822 current_tune->branch_cost (true, false));
24823 asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
24824 current_tune->branch_cost (true, true));
24825 asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
24826 (int) current_tune->prefer_ldrd_strd);
24827 asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
24828 (int) current_tune->logical_op_non_short_circuit_thumb,
24829 (int) current_tune->logical_op_non_short_circuit_arm);
24830 asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
24831 (int) current_tune->prefer_neon_for_64bits);
24832 asm_fprintf (asm_out_file,
24833 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
24834 (int) current_tune->disparage_flag_setting_t16_encodings);
24835 asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
24836 (int) current_tune->string_ops_prefer_neon);
24837 asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
24838 current_tune->max_insns_inline_memset);
24839 asm_fprintf (asm_out_file, "\t\t@fusible_ops:\t%u\n",
24840 current_tune->fusible_ops);
24841 asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
24842 (int) current_tune->sched_autopref);
24845 static void
24846 arm_file_start (void)
24848 int val;
24850 if (TARGET_BPABI)
24852 if (arm_selected_arch)
24854 /* armv7ve doesn't support any extensions. */
24855 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
24857 /* Keep backward compatability for assemblers
24858 which don't support armv7ve. */
24859 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
24860 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
24861 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
24862 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
24863 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
24865 else
24867 const char* pos = strchr (arm_selected_arch->name, '+');
24868 if (pos)
24870 char buf[32];
24871 gcc_assert (strlen (arm_selected_arch->name)
24872 <= sizeof (buf) / sizeof (*pos));
24873 strncpy (buf, arm_selected_arch->name,
24874 (pos - arm_selected_arch->name) * sizeof (*pos));
24875 buf[pos - arm_selected_arch->name] = '\0';
24876 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
24877 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
24879 else
24880 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
24883 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
24884 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
24885 else
24887 const char* truncated_name
24888 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
24889 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
24892 if (print_tune_info)
24893 arm_print_tune_info ();
24895 if (! TARGET_SOFT_FLOAT)
24897 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
24898 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
24900 if (TARGET_HARD_FLOAT_ABI)
24901 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
24904 /* Some of these attributes only apply when the corresponding features
24905 are used. However we don't have any easy way of figuring this out.
24906 Conservatively record the setting that would have been used. */
24908 if (flag_rounding_math)
24909 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
24911 if (!flag_unsafe_math_optimizations)
24913 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
24914 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
24916 if (flag_signaling_nans)
24917 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
24919 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
24920 flag_finite_math_only ? 1 : 3);
24922 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
24923 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
24924 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
24925 flag_short_enums ? 1 : 2);
24927 /* Tag_ABI_optimization_goals. */
24928 if (optimize_size)
24929 val = 4;
24930 else if (optimize >= 2)
24931 val = 2;
24932 else if (optimize)
24933 val = 1;
24934 else
24935 val = 6;
24936 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
24938 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
24939 unaligned_access);
24941 if (arm_fp16_format)
24942 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
24943 (int) arm_fp16_format);
24945 if (arm_lang_output_object_attributes_hook)
24946 arm_lang_output_object_attributes_hook();
24949 default_file_start ();
24952 static void
24953 arm_file_end (void)
24955 int regno;
24957 if (NEED_INDICATE_EXEC_STACK)
24958 /* Add .note.GNU-stack. */
24959 file_end_indicate_exec_stack ();
24961 if (! thumb_call_reg_needed)
24962 return;
24964 switch_to_section (text_section);
24965 asm_fprintf (asm_out_file, "\t.code 16\n");
24966 ASM_OUTPUT_ALIGN (asm_out_file, 1);
24968 for (regno = 0; regno < LR_REGNUM; regno++)
24970 rtx label = thumb_call_via_label[regno];
24972 if (label != 0)
24974 targetm.asm_out.internal_label (asm_out_file, "L",
24975 CODE_LABEL_NUMBER (label));
24976 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
24981 #ifndef ARM_PE
24982 /* Symbols in the text segment can be accessed without indirecting via the
24983 constant pool; it may take an extra binary operation, but this is still
24984 faster than indirecting via memory. Don't do this when not optimizing,
24985 since we won't be calculating al of the offsets necessary to do this
24986 simplification. */
24988 static void
24989 arm_encode_section_info (tree decl, rtx rtl, int first)
24991 if (optimize > 0 && TREE_CONSTANT (decl))
24992 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
24994 default_encode_section_info (decl, rtl, first);
24996 #endif /* !ARM_PE */
24998 static void
24999 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25001 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25002 && !strcmp (prefix, "L"))
25004 arm_ccfsm_state = 0;
25005 arm_target_insn = NULL;
25007 default_internal_label (stream, prefix, labelno);
25010 /* Output code to add DELTA to the first argument, and then jump
25011 to FUNCTION. Used for C++ multiple inheritance. */
25013 static void
25014 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
25015 HOST_WIDE_INT, tree function)
25017 static int thunk_label = 0;
25018 char label[256];
25019 char labelpc[256];
25020 int mi_delta = delta;
25021 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25022 int shift = 0;
25023 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25024 ? 1 : 0);
25025 if (mi_delta < 0)
25026 mi_delta = - mi_delta;
25028 final_start_function (emit_barrier (), file, 1);
25030 if (TARGET_THUMB1)
25032 int labelno = thunk_label++;
25033 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25034 /* Thunks are entered in arm mode when avaiable. */
25035 if (TARGET_THUMB1_ONLY)
25037 /* push r3 so we can use it as a temporary. */
25038 /* TODO: Omit this save if r3 is not used. */
25039 fputs ("\tpush {r3}\n", file);
25040 fputs ("\tldr\tr3, ", file);
25042 else
25044 fputs ("\tldr\tr12, ", file);
25046 assemble_name (file, label);
25047 fputc ('\n', file);
25048 if (flag_pic)
25050 /* If we are generating PIC, the ldr instruction below loads
25051 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25052 the address of the add + 8, so we have:
25054 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25055 = target + 1.
25057 Note that we have "+ 1" because some versions of GNU ld
25058 don't set the low bit of the result for R_ARM_REL32
25059 relocations against thumb function symbols.
25060 On ARMv6M this is +4, not +8. */
25061 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25062 assemble_name (file, labelpc);
25063 fputs (":\n", file);
25064 if (TARGET_THUMB1_ONLY)
25066 /* This is 2 insns after the start of the thunk, so we know it
25067 is 4-byte aligned. */
25068 fputs ("\tadd\tr3, pc, r3\n", file);
25069 fputs ("\tmov r12, r3\n", file);
25071 else
25072 fputs ("\tadd\tr12, pc, r12\n", file);
25074 else if (TARGET_THUMB1_ONLY)
25075 fputs ("\tmov r12, r3\n", file);
25077 if (TARGET_THUMB1_ONLY)
25079 if (mi_delta > 255)
25081 fputs ("\tldr\tr3, ", file);
25082 assemble_name (file, label);
25083 fputs ("+4\n", file);
25084 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25085 mi_op, this_regno, this_regno);
25087 else if (mi_delta != 0)
25089 /* Thumb1 unified syntax requires s suffix in instruction name when
25090 one of the operands is immediate. */
25091 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25092 mi_op, this_regno, this_regno,
25093 mi_delta);
25096 else
25098 /* TODO: Use movw/movt for large constants when available. */
25099 while (mi_delta != 0)
25101 if ((mi_delta & (3 << shift)) == 0)
25102 shift += 2;
25103 else
25105 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25106 mi_op, this_regno, this_regno,
25107 mi_delta & (0xff << shift));
25108 mi_delta &= ~(0xff << shift);
25109 shift += 8;
25113 if (TARGET_THUMB1)
25115 if (TARGET_THUMB1_ONLY)
25116 fputs ("\tpop\t{r3}\n", file);
25118 fprintf (file, "\tbx\tr12\n");
25119 ASM_OUTPUT_ALIGN (file, 2);
25120 assemble_name (file, label);
25121 fputs (":\n", file);
25122 if (flag_pic)
25124 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25125 rtx tem = XEXP (DECL_RTL (function), 0);
25126 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25127 pipeline offset is four rather than eight. Adjust the offset
25128 accordingly. */
25129 tem = plus_constant (GET_MODE (tem), tem,
25130 TARGET_THUMB1_ONLY ? -3 : -7);
25131 tem = gen_rtx_MINUS (GET_MODE (tem),
25132 tem,
25133 gen_rtx_SYMBOL_REF (Pmode,
25134 ggc_strdup (labelpc)));
25135 assemble_integer (tem, 4, BITS_PER_WORD, 1);
25137 else
25138 /* Output ".word .LTHUNKn". */
25139 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25141 if (TARGET_THUMB1_ONLY && mi_delta > 255)
25142 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25144 else
25146 fputs ("\tb\t", file);
25147 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25148 if (NEED_PLT_RELOC)
25149 fputs ("(PLT)", file);
25150 fputc ('\n', file);
25153 final_end_function ();
25156 /* MI thunk handling for TARGET_32BIT. */
25158 static void
25159 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
25160 HOST_WIDE_INT vcall_offset, tree function)
25162 /* On ARM, this_regno is R0 or R1 depending on
25163 whether the function returns an aggregate or not.
25165 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
25166 function)
25167 ? R1_REGNUM : R0_REGNUM);
25169 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
25170 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
25171 reload_completed = 1;
25172 emit_note (NOTE_INSN_PROLOGUE_END);
25174 /* Add DELTA to THIS_RTX. */
25175 if (delta != 0)
25176 arm_split_constant (PLUS, Pmode, NULL_RTX,
25177 delta, this_rtx, this_rtx, false);
25179 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
25180 if (vcall_offset != 0)
25182 /* Load *THIS_RTX. */
25183 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
25184 /* Compute *THIS_RTX + VCALL_OFFSET. */
25185 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
25186 false);
25187 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
25188 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
25189 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
25192 /* Generate a tail call to the target function. */
25193 if (!TREE_USED (function))
25195 assemble_external (function);
25196 TREE_USED (function) = 1;
25198 rtx funexp = XEXP (DECL_RTL (function), 0);
25199 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
25200 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
25201 SIBLING_CALL_P (insn) = 1;
25203 insn = get_insns ();
25204 shorten_branches (insn);
25205 final_start_function (insn, file, 1);
25206 final (insn, file, 1);
25207 final_end_function ();
25209 /* Stop pretending this is a post-reload pass. */
25210 reload_completed = 0;
25213 /* Output code to add DELTA to the first argument, and then jump
25214 to FUNCTION. Used for C++ multiple inheritance. */
25216 static void
25217 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
25218 HOST_WIDE_INT vcall_offset, tree function)
25220 if (TARGET_32BIT)
25221 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
25222 else
25223 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
25227 arm_emit_vector_const (FILE *file, rtx x)
25229 int i;
25230 const char * pattern;
25232 gcc_assert (GET_CODE (x) == CONST_VECTOR);
25234 switch (GET_MODE (x))
25236 case V2SImode: pattern = "%08x"; break;
25237 case V4HImode: pattern = "%04x"; break;
25238 case V8QImode: pattern = "%02x"; break;
25239 default: gcc_unreachable ();
25242 fprintf (file, "0x");
25243 for (i = CONST_VECTOR_NUNITS (x); i--;)
25245 rtx element;
25247 element = CONST_VECTOR_ELT (x, i);
25248 fprintf (file, pattern, INTVAL (element));
25251 return 1;
25254 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25255 HFmode constant pool entries are actually loaded with ldr. */
25256 void
25257 arm_emit_fp16_const (rtx c)
25259 long bits;
25261 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
25262 if (WORDS_BIG_ENDIAN)
25263 assemble_zeros (2);
25264 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25265 if (!WORDS_BIG_ENDIAN)
25266 assemble_zeros (2);
25269 const char *
25270 arm_output_load_gr (rtx *operands)
25272 rtx reg;
25273 rtx offset;
25274 rtx wcgr;
25275 rtx sum;
25277 if (!MEM_P (operands [1])
25278 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25279 || !REG_P (reg = XEXP (sum, 0))
25280 || !CONST_INT_P (offset = XEXP (sum, 1))
25281 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25282 return "wldrw%?\t%0, %1";
25284 /* Fix up an out-of-range load of a GR register. */
25285 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25286 wcgr = operands[0];
25287 operands[0] = reg;
25288 output_asm_insn ("ldr%?\t%0, %1", operands);
25290 operands[0] = wcgr;
25291 operands[1] = reg;
25292 output_asm_insn ("tmcr%?\t%0, %1", operands);
25293 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25295 return "";
25298 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25300 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25301 named arg and all anonymous args onto the stack.
25302 XXX I know the prologue shouldn't be pushing registers, but it is faster
25303 that way. */
25305 static void
25306 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25307 machine_mode mode,
25308 tree type,
25309 int *pretend_size,
25310 int second_time ATTRIBUTE_UNUSED)
25312 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25313 int nregs;
25315 cfun->machine->uses_anonymous_args = 1;
25316 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
25318 nregs = pcum->aapcs_ncrn;
25319 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
25320 nregs++;
25322 else
25323 nregs = pcum->nregs;
25325 if (nregs < NUM_ARG_REGS)
25326 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
25329 /* We can't rely on the caller doing the proper promotion when
25330 using APCS or ATPCS. */
25332 static bool
25333 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
25335 return !TARGET_AAPCS_BASED;
25338 static machine_mode
25339 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
25340 machine_mode mode,
25341 int *punsignedp ATTRIBUTE_UNUSED,
25342 const_tree fntype ATTRIBUTE_UNUSED,
25343 int for_return ATTRIBUTE_UNUSED)
25345 if (GET_MODE_CLASS (mode) == MODE_INT
25346 && GET_MODE_SIZE (mode) < 4)
25347 return SImode;
25349 return mode;
25352 /* AAPCS based ABIs use short enums by default. */
25354 static bool
25355 arm_default_short_enums (void)
25357 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
25361 /* AAPCS requires that anonymous bitfields affect structure alignment. */
25363 static bool
25364 arm_align_anon_bitfield (void)
25366 return TARGET_AAPCS_BASED;
25370 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
25372 static tree
25373 arm_cxx_guard_type (void)
25375 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
25379 /* The EABI says test the least significant bit of a guard variable. */
25381 static bool
25382 arm_cxx_guard_mask_bit (void)
25384 return TARGET_AAPCS_BASED;
25388 /* The EABI specifies that all array cookies are 8 bytes long. */
25390 static tree
25391 arm_get_cookie_size (tree type)
25393 tree size;
25395 if (!TARGET_AAPCS_BASED)
25396 return default_cxx_get_cookie_size (type);
25398 size = build_int_cst (sizetype, 8);
25399 return size;
25403 /* The EABI says that array cookies should also contain the element size. */
25405 static bool
25406 arm_cookie_has_size (void)
25408 return TARGET_AAPCS_BASED;
25412 /* The EABI says constructors and destructors should return a pointer to
25413 the object constructed/destroyed. */
25415 static bool
25416 arm_cxx_cdtor_returns_this (void)
25418 return TARGET_AAPCS_BASED;
25421 /* The EABI says that an inline function may never be the key
25422 method. */
25424 static bool
25425 arm_cxx_key_method_may_be_inline (void)
25427 return !TARGET_AAPCS_BASED;
25430 static void
25431 arm_cxx_determine_class_data_visibility (tree decl)
25433 if (!TARGET_AAPCS_BASED
25434 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
25435 return;
25437 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
25438 is exported. However, on systems without dynamic vague linkage,
25439 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
25440 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
25441 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
25442 else
25443 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
25444 DECL_VISIBILITY_SPECIFIED (decl) = 1;
25447 static bool
25448 arm_cxx_class_data_always_comdat (void)
25450 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
25451 vague linkage if the class has no key function. */
25452 return !TARGET_AAPCS_BASED;
25456 /* The EABI says __aeabi_atexit should be used to register static
25457 destructors. */
25459 static bool
25460 arm_cxx_use_aeabi_atexit (void)
25462 return TARGET_AAPCS_BASED;
25466 void
25467 arm_set_return_address (rtx source, rtx scratch)
25469 arm_stack_offsets *offsets;
25470 HOST_WIDE_INT delta;
25471 rtx addr;
25472 unsigned long saved_regs;
25474 offsets = arm_get_frame_offsets ();
25475 saved_regs = offsets->saved_regs_mask;
25477 if ((saved_regs & (1 << LR_REGNUM)) == 0)
25478 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
25479 else
25481 if (frame_pointer_needed)
25482 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
25483 else
25485 /* LR will be the first saved register. */
25486 delta = offsets->outgoing_args - (offsets->frame + 4);
25489 if (delta >= 4096)
25491 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
25492 GEN_INT (delta & ~4095)));
25493 addr = scratch;
25494 delta &= 4095;
25496 else
25497 addr = stack_pointer_rtx;
25499 addr = plus_constant (Pmode, addr, delta);
25501 /* The store needs to be marked as frame related in order to prevent
25502 DSE from deleting it as dead if it is based on fp. */
25503 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
25504 RTX_FRAME_RELATED_P (insn) = 1;
25505 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
25510 void
25511 thumb_set_return_address (rtx source, rtx scratch)
25513 arm_stack_offsets *offsets;
25514 HOST_WIDE_INT delta;
25515 HOST_WIDE_INT limit;
25516 int reg;
25517 rtx addr;
25518 unsigned long mask;
25520 emit_use (source);
25522 offsets = arm_get_frame_offsets ();
25523 mask = offsets->saved_regs_mask;
25524 if (mask & (1 << LR_REGNUM))
25526 limit = 1024;
25527 /* Find the saved regs. */
25528 if (frame_pointer_needed)
25530 delta = offsets->soft_frame - offsets->saved_args;
25531 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
25532 if (TARGET_THUMB1)
25533 limit = 128;
25535 else
25537 delta = offsets->outgoing_args - offsets->saved_args;
25538 reg = SP_REGNUM;
25540 /* Allow for the stack frame. */
25541 if (TARGET_THUMB1 && TARGET_BACKTRACE)
25542 delta -= 16;
25543 /* The link register is always the first saved register. */
25544 delta -= 4;
25546 /* Construct the address. */
25547 addr = gen_rtx_REG (SImode, reg);
25548 if (delta > limit)
25550 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
25551 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
25552 addr = scratch;
25554 else
25555 addr = plus_constant (Pmode, addr, delta);
25557 /* The store needs to be marked as frame related in order to prevent
25558 DSE from deleting it as dead if it is based on fp. */
25559 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
25560 RTX_FRAME_RELATED_P (insn) = 1;
25561 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
25563 else
25564 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
25567 /* Implements target hook vector_mode_supported_p. */
25568 bool
25569 arm_vector_mode_supported_p (machine_mode mode)
25571 /* Neon also supports V2SImode, etc. listed in the clause below. */
25572 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
25573 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
25574 || mode == V2DImode || mode == V8HFmode))
25575 return true;
25577 if ((TARGET_NEON || TARGET_IWMMXT)
25578 && ((mode == V2SImode)
25579 || (mode == V4HImode)
25580 || (mode == V8QImode)))
25581 return true;
25583 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
25584 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
25585 || mode == V2HAmode))
25586 return true;
25588 return false;
25591 /* Implements target hook array_mode_supported_p. */
25593 static bool
25594 arm_array_mode_supported_p (machine_mode mode,
25595 unsigned HOST_WIDE_INT nelems)
25597 if (TARGET_NEON
25598 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
25599 && (nelems >= 2 && nelems <= 4))
25600 return true;
25602 return false;
25605 /* Use the option -mvectorize-with-neon-double to override the use of quardword
25606 registers when autovectorizing for Neon, at least until multiple vector
25607 widths are supported properly by the middle-end. */
25609 static machine_mode
25610 arm_preferred_simd_mode (machine_mode mode)
25612 if (TARGET_NEON)
25613 switch (mode)
25615 case SFmode:
25616 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
25617 case SImode:
25618 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
25619 case HImode:
25620 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
25621 case QImode:
25622 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
25623 case DImode:
25624 if (!TARGET_NEON_VECTORIZE_DOUBLE)
25625 return V2DImode;
25626 break;
25628 default:;
25631 if (TARGET_REALLY_IWMMXT)
25632 switch (mode)
25634 case SImode:
25635 return V2SImode;
25636 case HImode:
25637 return V4HImode;
25638 case QImode:
25639 return V8QImode;
25641 default:;
25644 return word_mode;
25647 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
25649 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
25650 using r0-r4 for function arguments, r7 for the stack frame and don't have
25651 enough left over to do doubleword arithmetic. For Thumb-2 all the
25652 potentially problematic instructions accept high registers so this is not
25653 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
25654 that require many low registers. */
25655 static bool
25656 arm_class_likely_spilled_p (reg_class_t rclass)
25658 if ((TARGET_THUMB1 && rclass == LO_REGS)
25659 || rclass == CC_REG)
25660 return true;
25662 return false;
25665 /* Implements target hook small_register_classes_for_mode_p. */
25666 bool
25667 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
25669 return TARGET_THUMB1;
25672 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
25673 ARM insns and therefore guarantee that the shift count is modulo 256.
25674 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
25675 guarantee no particular behavior for out-of-range counts. */
25677 static unsigned HOST_WIDE_INT
25678 arm_shift_truncation_mask (machine_mode mode)
25680 return mode == SImode ? 255 : 0;
25684 /* Map internal gcc register numbers to DWARF2 register numbers. */
25686 unsigned int
25687 arm_dbx_register_number (unsigned int regno)
25689 if (regno < 16)
25690 return regno;
25692 if (IS_VFP_REGNUM (regno))
25694 /* See comment in arm_dwarf_register_span. */
25695 if (VFP_REGNO_OK_FOR_SINGLE (regno))
25696 return 64 + regno - FIRST_VFP_REGNUM;
25697 else
25698 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
25701 if (IS_IWMMXT_GR_REGNUM (regno))
25702 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
25704 if (IS_IWMMXT_REGNUM (regno))
25705 return 112 + regno - FIRST_IWMMXT_REGNUM;
25707 return DWARF_FRAME_REGISTERS;
25710 /* Dwarf models VFPv3 registers as 32 64-bit registers.
25711 GCC models tham as 64 32-bit registers, so we need to describe this to
25712 the DWARF generation code. Other registers can use the default. */
25713 static rtx
25714 arm_dwarf_register_span (rtx rtl)
25716 machine_mode mode;
25717 unsigned regno;
25718 rtx parts[16];
25719 int nregs;
25720 int i;
25722 regno = REGNO (rtl);
25723 if (!IS_VFP_REGNUM (regno))
25724 return NULL_RTX;
25726 /* XXX FIXME: The EABI defines two VFP register ranges:
25727 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
25728 256-287: D0-D31
25729 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
25730 corresponding D register. Until GDB supports this, we shall use the
25731 legacy encodings. We also use these encodings for D0-D15 for
25732 compatibility with older debuggers. */
25733 mode = GET_MODE (rtl);
25734 if (GET_MODE_SIZE (mode) < 8)
25735 return NULL_RTX;
25737 if (VFP_REGNO_OK_FOR_SINGLE (regno))
25739 nregs = GET_MODE_SIZE (mode) / 4;
25740 for (i = 0; i < nregs; i += 2)
25741 if (TARGET_BIG_END)
25743 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
25744 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
25746 else
25748 parts[i] = gen_rtx_REG (SImode, regno + i);
25749 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
25752 else
25754 nregs = GET_MODE_SIZE (mode) / 8;
25755 for (i = 0; i < nregs; i++)
25756 parts[i] = gen_rtx_REG (DImode, regno + i);
25759 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
25762 #if ARM_UNWIND_INFO
25763 /* Emit unwind directives for a store-multiple instruction or stack pointer
25764 push during alignment.
25765 These should only ever be generated by the function prologue code, so
25766 expect them to have a particular form.
25767 The store-multiple instruction sometimes pushes pc as the last register,
25768 although it should not be tracked into unwind information, or for -Os
25769 sometimes pushes some dummy registers before first register that needs
25770 to be tracked in unwind information; such dummy registers are there just
25771 to avoid separate stack adjustment, and will not be restored in the
25772 epilogue. */
25774 static void
25775 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
25777 int i;
25778 HOST_WIDE_INT offset;
25779 HOST_WIDE_INT nregs;
25780 int reg_size;
25781 unsigned reg;
25782 unsigned lastreg;
25783 unsigned padfirst = 0, padlast = 0;
25784 rtx e;
25786 e = XVECEXP (p, 0, 0);
25787 gcc_assert (GET_CODE (e) == SET);
25789 /* First insn will adjust the stack pointer. */
25790 gcc_assert (GET_CODE (e) == SET
25791 && REG_P (SET_DEST (e))
25792 && REGNO (SET_DEST (e)) == SP_REGNUM
25793 && GET_CODE (SET_SRC (e)) == PLUS);
25795 offset = -INTVAL (XEXP (SET_SRC (e), 1));
25796 nregs = XVECLEN (p, 0) - 1;
25797 gcc_assert (nregs);
25799 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
25800 if (reg < 16)
25802 /* For -Os dummy registers can be pushed at the beginning to
25803 avoid separate stack pointer adjustment. */
25804 e = XVECEXP (p, 0, 1);
25805 e = XEXP (SET_DEST (e), 0);
25806 if (GET_CODE (e) == PLUS)
25807 padfirst = INTVAL (XEXP (e, 1));
25808 gcc_assert (padfirst == 0 || optimize_size);
25809 /* The function prologue may also push pc, but not annotate it as it is
25810 never restored. We turn this into a stack pointer adjustment. */
25811 e = XVECEXP (p, 0, nregs);
25812 e = XEXP (SET_DEST (e), 0);
25813 if (GET_CODE (e) == PLUS)
25814 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
25815 else
25816 padlast = offset - 4;
25817 gcc_assert (padlast == 0 || padlast == 4);
25818 if (padlast == 4)
25819 fprintf (asm_out_file, "\t.pad #4\n");
25820 reg_size = 4;
25821 fprintf (asm_out_file, "\t.save {");
25823 else if (IS_VFP_REGNUM (reg))
25825 reg_size = 8;
25826 fprintf (asm_out_file, "\t.vsave {");
25828 else
25829 /* Unknown register type. */
25830 gcc_unreachable ();
25832 /* If the stack increment doesn't match the size of the saved registers,
25833 something has gone horribly wrong. */
25834 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
25836 offset = padfirst;
25837 lastreg = 0;
25838 /* The remaining insns will describe the stores. */
25839 for (i = 1; i <= nregs; i++)
25841 /* Expect (set (mem <addr>) (reg)).
25842 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
25843 e = XVECEXP (p, 0, i);
25844 gcc_assert (GET_CODE (e) == SET
25845 && MEM_P (SET_DEST (e))
25846 && REG_P (SET_SRC (e)));
25848 reg = REGNO (SET_SRC (e));
25849 gcc_assert (reg >= lastreg);
25851 if (i != 1)
25852 fprintf (asm_out_file, ", ");
25853 /* We can't use %r for vfp because we need to use the
25854 double precision register names. */
25855 if (IS_VFP_REGNUM (reg))
25856 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
25857 else
25858 asm_fprintf (asm_out_file, "%r", reg);
25860 if (flag_checking)
25862 /* Check that the addresses are consecutive. */
25863 e = XEXP (SET_DEST (e), 0);
25864 if (GET_CODE (e) == PLUS)
25865 gcc_assert (REG_P (XEXP (e, 0))
25866 && REGNO (XEXP (e, 0)) == SP_REGNUM
25867 && CONST_INT_P (XEXP (e, 1))
25868 && offset == INTVAL (XEXP (e, 1)));
25869 else
25870 gcc_assert (i == 1
25871 && REG_P (e)
25872 && REGNO (e) == SP_REGNUM);
25873 offset += reg_size;
25876 fprintf (asm_out_file, "}\n");
25877 if (padfirst)
25878 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
25881 /* Emit unwind directives for a SET. */
25883 static void
25884 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
25886 rtx e0;
25887 rtx e1;
25888 unsigned reg;
25890 e0 = XEXP (p, 0);
25891 e1 = XEXP (p, 1);
25892 switch (GET_CODE (e0))
25894 case MEM:
25895 /* Pushing a single register. */
25896 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
25897 || !REG_P (XEXP (XEXP (e0, 0), 0))
25898 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
25899 abort ();
25901 asm_fprintf (asm_out_file, "\t.save ");
25902 if (IS_VFP_REGNUM (REGNO (e1)))
25903 asm_fprintf(asm_out_file, "{d%d}\n",
25904 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
25905 else
25906 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
25907 break;
25909 case REG:
25910 if (REGNO (e0) == SP_REGNUM)
25912 /* A stack increment. */
25913 if (GET_CODE (e1) != PLUS
25914 || !REG_P (XEXP (e1, 0))
25915 || REGNO (XEXP (e1, 0)) != SP_REGNUM
25916 || !CONST_INT_P (XEXP (e1, 1)))
25917 abort ();
25919 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
25920 -INTVAL (XEXP (e1, 1)));
25922 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
25924 HOST_WIDE_INT offset;
25926 if (GET_CODE (e1) == PLUS)
25928 if (!REG_P (XEXP (e1, 0))
25929 || !CONST_INT_P (XEXP (e1, 1)))
25930 abort ();
25931 reg = REGNO (XEXP (e1, 0));
25932 offset = INTVAL (XEXP (e1, 1));
25933 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
25934 HARD_FRAME_POINTER_REGNUM, reg,
25935 offset);
25937 else if (REG_P (e1))
25939 reg = REGNO (e1);
25940 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
25941 HARD_FRAME_POINTER_REGNUM, reg);
25943 else
25944 abort ();
25946 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
25948 /* Move from sp to reg. */
25949 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
25951 else if (GET_CODE (e1) == PLUS
25952 && REG_P (XEXP (e1, 0))
25953 && REGNO (XEXP (e1, 0)) == SP_REGNUM
25954 && CONST_INT_P (XEXP (e1, 1)))
25956 /* Set reg to offset from sp. */
25957 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
25958 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
25960 else
25961 abort ();
25962 break;
25964 default:
25965 abort ();
25970 /* Emit unwind directives for the given insn. */
25972 static void
25973 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
25975 rtx note, pat;
25976 bool handled_one = false;
25978 if (arm_except_unwind_info (&global_options) != UI_TARGET)
25979 return;
25981 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
25982 && (TREE_NOTHROW (current_function_decl)
25983 || crtl->all_throwers_are_sibcalls))
25984 return;
25986 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
25987 return;
25989 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
25991 switch (REG_NOTE_KIND (note))
25993 case REG_FRAME_RELATED_EXPR:
25994 pat = XEXP (note, 0);
25995 goto found;
25997 case REG_CFA_REGISTER:
25998 pat = XEXP (note, 0);
25999 if (pat == NULL)
26001 pat = PATTERN (insn);
26002 if (GET_CODE (pat) == PARALLEL)
26003 pat = XVECEXP (pat, 0, 0);
26006 /* Only emitted for IS_STACKALIGN re-alignment. */
26008 rtx dest, src;
26009 unsigned reg;
26011 src = SET_SRC (pat);
26012 dest = SET_DEST (pat);
26014 gcc_assert (src == stack_pointer_rtx);
26015 reg = REGNO (dest);
26016 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26017 reg + 0x90, reg);
26019 handled_one = true;
26020 break;
26022 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26023 to get correct dwarf information for shrink-wrap. We should not
26024 emit unwind information for it because these are used either for
26025 pretend arguments or notes to adjust sp and restore registers from
26026 stack. */
26027 case REG_CFA_DEF_CFA:
26028 case REG_CFA_ADJUST_CFA:
26029 case REG_CFA_RESTORE:
26030 return;
26032 case REG_CFA_EXPRESSION:
26033 case REG_CFA_OFFSET:
26034 /* ??? Only handling here what we actually emit. */
26035 gcc_unreachable ();
26037 default:
26038 break;
26041 if (handled_one)
26042 return;
26043 pat = PATTERN (insn);
26044 found:
26046 switch (GET_CODE (pat))
26048 case SET:
26049 arm_unwind_emit_set (asm_out_file, pat);
26050 break;
26052 case SEQUENCE:
26053 /* Store multiple. */
26054 arm_unwind_emit_sequence (asm_out_file, pat);
26055 break;
26057 default:
26058 abort();
26063 /* Output a reference from a function exception table to the type_info
26064 object X. The EABI specifies that the symbol should be relocated by
26065 an R_ARM_TARGET2 relocation. */
26067 static bool
26068 arm_output_ttype (rtx x)
26070 fputs ("\t.word\t", asm_out_file);
26071 output_addr_const (asm_out_file, x);
26072 /* Use special relocations for symbol references. */
26073 if (!CONST_INT_P (x))
26074 fputs ("(TARGET2)", asm_out_file);
26075 fputc ('\n', asm_out_file);
26077 return TRUE;
26080 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26082 static void
26083 arm_asm_emit_except_personality (rtx personality)
26085 fputs ("\t.personality\t", asm_out_file);
26086 output_addr_const (asm_out_file, personality);
26087 fputc ('\n', asm_out_file);
26089 #endif /* ARM_UNWIND_INFO */
26091 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26093 static void
26094 arm_asm_init_sections (void)
26096 #if ARM_UNWIND_INFO
26097 exception_section = get_unnamed_section (0, output_section_asm_op,
26098 "\t.handlerdata");
26099 #endif /* ARM_UNWIND_INFO */
26101 #ifdef OBJECT_FORMAT_ELF
26102 if (target_pure_code)
26103 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
26104 #endif
26107 /* Output unwind directives for the start/end of a function. */
26109 void
26110 arm_output_fn_unwind (FILE * f, bool prologue)
26112 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26113 return;
26115 if (prologue)
26116 fputs ("\t.fnstart\n", f);
26117 else
26119 /* If this function will never be unwound, then mark it as such.
26120 The came condition is used in arm_unwind_emit to suppress
26121 the frame annotations. */
26122 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26123 && (TREE_NOTHROW (current_function_decl)
26124 || crtl->all_throwers_are_sibcalls))
26125 fputs("\t.cantunwind\n", f);
26127 fputs ("\t.fnend\n", f);
26131 static bool
26132 arm_emit_tls_decoration (FILE *fp, rtx x)
26134 enum tls_reloc reloc;
26135 rtx val;
26137 val = XVECEXP (x, 0, 0);
26138 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26140 output_addr_const (fp, val);
26142 switch (reloc)
26144 case TLS_GD32:
26145 fputs ("(tlsgd)", fp);
26146 break;
26147 case TLS_LDM32:
26148 fputs ("(tlsldm)", fp);
26149 break;
26150 case TLS_LDO32:
26151 fputs ("(tlsldo)", fp);
26152 break;
26153 case TLS_IE32:
26154 fputs ("(gottpoff)", fp);
26155 break;
26156 case TLS_LE32:
26157 fputs ("(tpoff)", fp);
26158 break;
26159 case TLS_DESCSEQ:
26160 fputs ("(tlsdesc)", fp);
26161 break;
26162 default:
26163 gcc_unreachable ();
26166 switch (reloc)
26168 case TLS_GD32:
26169 case TLS_LDM32:
26170 case TLS_IE32:
26171 case TLS_DESCSEQ:
26172 fputs (" + (. - ", fp);
26173 output_addr_const (fp, XVECEXP (x, 0, 2));
26174 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26175 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26176 output_addr_const (fp, XVECEXP (x, 0, 3));
26177 fputc (')', fp);
26178 break;
26179 default:
26180 break;
26183 return TRUE;
26186 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26188 static void
26189 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26191 gcc_assert (size == 4);
26192 fputs ("\t.word\t", file);
26193 output_addr_const (file, x);
26194 fputs ("(tlsldo)", file);
26197 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26199 static bool
26200 arm_output_addr_const_extra (FILE *fp, rtx x)
26202 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26203 return arm_emit_tls_decoration (fp, x);
26204 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26206 char label[256];
26207 int labelno = INTVAL (XVECEXP (x, 0, 0));
26209 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26210 assemble_name_raw (fp, label);
26212 return TRUE;
26214 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26216 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26217 if (GOT_PCREL)
26218 fputs ("+.", fp);
26219 fputs ("-(", fp);
26220 output_addr_const (fp, XVECEXP (x, 0, 0));
26221 fputc (')', fp);
26222 return TRUE;
26224 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26226 output_addr_const (fp, XVECEXP (x, 0, 0));
26227 if (GOT_PCREL)
26228 fputs ("+.", fp);
26229 fputs ("-(", fp);
26230 output_addr_const (fp, XVECEXP (x, 0, 1));
26231 fputc (')', fp);
26232 return TRUE;
26234 else if (GET_CODE (x) == CONST_VECTOR)
26235 return arm_emit_vector_const (fp, x);
26237 return FALSE;
26240 /* Output assembly for a shift instruction.
26241 SET_FLAGS determines how the instruction modifies the condition codes.
26242 0 - Do not set condition codes.
26243 1 - Set condition codes.
26244 2 - Use smallest instruction. */
26245 const char *
26246 arm_output_shift(rtx * operands, int set_flags)
26248 char pattern[100];
26249 static const char flag_chars[3] = {'?', '.', '!'};
26250 const char *shift;
26251 HOST_WIDE_INT val;
26252 char c;
26254 c = flag_chars[set_flags];
26255 shift = shift_op(operands[3], &val);
26256 if (shift)
26258 if (val != -1)
26259 operands[2] = GEN_INT(val);
26260 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26262 else
26263 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26265 output_asm_insn (pattern, operands);
26266 return "";
26269 /* Output assembly for a WMMX immediate shift instruction. */
26270 const char *
26271 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26273 int shift = INTVAL (operands[2]);
26274 char templ[50];
26275 machine_mode opmode = GET_MODE (operands[0]);
26277 gcc_assert (shift >= 0);
26279 /* If the shift value in the register versions is > 63 (for D qualifier),
26280 31 (for W qualifier) or 15 (for H qualifier). */
26281 if (((opmode == V4HImode) && (shift > 15))
26282 || ((opmode == V2SImode) && (shift > 31))
26283 || ((opmode == DImode) && (shift > 63)))
26285 if (wror_or_wsra)
26287 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26288 output_asm_insn (templ, operands);
26289 if (opmode == DImode)
26291 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26292 output_asm_insn (templ, operands);
26295 else
26297 /* The destination register will contain all zeros. */
26298 sprintf (templ, "wzero\t%%0");
26299 output_asm_insn (templ, operands);
26301 return "";
26304 if ((opmode == DImode) && (shift > 32))
26306 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26307 output_asm_insn (templ, operands);
26308 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26309 output_asm_insn (templ, operands);
26311 else
26313 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26314 output_asm_insn (templ, operands);
26316 return "";
26319 /* Output assembly for a WMMX tinsr instruction. */
26320 const char *
26321 arm_output_iwmmxt_tinsr (rtx *operands)
26323 int mask = INTVAL (operands[3]);
26324 int i;
26325 char templ[50];
26326 int units = mode_nunits[GET_MODE (operands[0])];
26327 gcc_assert ((mask & (mask - 1)) == 0);
26328 for (i = 0; i < units; ++i)
26330 if ((mask & 0x01) == 1)
26332 break;
26334 mask >>= 1;
26336 gcc_assert (i < units);
26338 switch (GET_MODE (operands[0]))
26340 case V8QImode:
26341 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
26342 break;
26343 case V4HImode:
26344 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
26345 break;
26346 case V2SImode:
26347 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
26348 break;
26349 default:
26350 gcc_unreachable ();
26351 break;
26353 output_asm_insn (templ, operands);
26355 return "";
26358 /* Output a Thumb-1 casesi dispatch sequence. */
26359 const char *
26360 thumb1_output_casesi (rtx *operands)
26362 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
26364 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26366 switch (GET_MODE(diff_vec))
26368 case QImode:
26369 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26370 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26371 case HImode:
26372 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26373 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26374 case SImode:
26375 return "bl\t%___gnu_thumb1_case_si";
26376 default:
26377 gcc_unreachable ();
26381 /* Output a Thumb-2 casesi instruction. */
26382 const char *
26383 thumb2_output_casesi (rtx *operands)
26385 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
26387 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26389 output_asm_insn ("cmp\t%0, %1", operands);
26390 output_asm_insn ("bhi\t%l3", operands);
26391 switch (GET_MODE(diff_vec))
26393 case QImode:
26394 return "tbb\t[%|pc, %0]";
26395 case HImode:
26396 return "tbh\t[%|pc, %0, lsl #1]";
26397 case SImode:
26398 if (flag_pic)
26400 output_asm_insn ("adr\t%4, %l2", operands);
26401 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
26402 output_asm_insn ("add\t%4, %4, %5", operands);
26403 return "bx\t%4";
26405 else
26407 output_asm_insn ("adr\t%4, %l2", operands);
26408 return "ldr\t%|pc, [%4, %0, lsl #2]";
26410 default:
26411 gcc_unreachable ();
26415 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
26416 per-core tuning structs. */
26417 static int
26418 arm_issue_rate (void)
26420 return current_tune->issue_rate;
26423 /* Return how many instructions should scheduler lookahead to choose the
26424 best one. */
26425 static int
26426 arm_first_cycle_multipass_dfa_lookahead (void)
26428 int issue_rate = arm_issue_rate ();
26430 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
26433 /* Enable modeling of L2 auto-prefetcher. */
26434 static int
26435 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
26437 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
26440 const char *
26441 arm_mangle_type (const_tree type)
26443 /* The ARM ABI documents (10th October 2008) say that "__va_list"
26444 has to be managled as if it is in the "std" namespace. */
26445 if (TARGET_AAPCS_BASED
26446 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
26447 return "St9__va_list";
26449 /* Half-precision float. */
26450 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
26451 return "Dh";
26453 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
26454 builtin type. */
26455 if (TYPE_NAME (type) != NULL)
26456 return arm_mangle_builtin_type (type);
26458 /* Use the default mangling. */
26459 return NULL;
26462 /* Order of allocation of core registers for Thumb: this allocation is
26463 written over the corresponding initial entries of the array
26464 initialized with REG_ALLOC_ORDER. We allocate all low registers
26465 first. Saving and restoring a low register is usually cheaper than
26466 using a call-clobbered high register. */
26468 static const int thumb_core_reg_alloc_order[] =
26470 3, 2, 1, 0, 4, 5, 6, 7,
26471 14, 12, 8, 9, 10, 11
26474 /* Adjust register allocation order when compiling for Thumb. */
26476 void
26477 arm_order_regs_for_local_alloc (void)
26479 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
26480 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
26481 if (TARGET_THUMB)
26482 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
26483 sizeof (thumb_core_reg_alloc_order));
26486 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
26488 bool
26489 arm_frame_pointer_required (void)
26491 if (SUBTARGET_FRAME_POINTER_REQUIRED)
26492 return true;
26494 /* If the function receives nonlocal gotos, it needs to save the frame
26495 pointer in the nonlocal_goto_save_area object. */
26496 if (cfun->has_nonlocal_label)
26497 return true;
26499 /* The frame pointer is required for non-leaf APCS frames. */
26500 if (TARGET_ARM && TARGET_APCS_FRAME && !leaf_function_p ())
26501 return true;
26503 /* If we are probing the stack in the prologue, we will have a faulting
26504 instruction prior to the stack adjustment and this requires a frame
26505 pointer if we want to catch the exception using the EABI unwinder. */
26506 if (!IS_INTERRUPT (arm_current_func_type ())
26507 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
26508 && arm_except_unwind_info (&global_options) == UI_TARGET
26509 && cfun->can_throw_non_call_exceptions)
26511 HOST_WIDE_INT size = get_frame_size ();
26513 /* That's irrelevant if there is no stack adjustment. */
26514 if (size <= 0)
26515 return false;
26517 /* That's relevant only if there is a stack probe. */
26518 if (crtl->is_leaf && !cfun->calls_alloca)
26520 /* We don't have the final size of the frame so adjust. */
26521 size += 32 * UNITS_PER_WORD;
26522 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
26523 return true;
26525 else
26526 return true;
26529 return false;
26532 /* Only thumb1 can't support conditional execution, so return true if
26533 the target is not thumb1. */
26534 static bool
26535 arm_have_conditional_execution (void)
26537 return !TARGET_THUMB1;
26540 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
26541 static HOST_WIDE_INT
26542 arm_vector_alignment (const_tree type)
26544 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
26546 if (TARGET_AAPCS_BASED)
26547 align = MIN (align, 64);
26549 return align;
26552 static unsigned int
26553 arm_autovectorize_vector_sizes (void)
26555 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
26558 static bool
26559 arm_vector_alignment_reachable (const_tree type, bool is_packed)
26561 /* Vectors which aren't in packed structures will not be less aligned than
26562 the natural alignment of their element type, so this is safe. */
26563 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
26564 return !is_packed;
26566 return default_builtin_vector_alignment_reachable (type, is_packed);
26569 static bool
26570 arm_builtin_support_vector_misalignment (machine_mode mode,
26571 const_tree type, int misalignment,
26572 bool is_packed)
26574 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
26576 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
26578 if (is_packed)
26579 return align == 1;
26581 /* If the misalignment is unknown, we should be able to handle the access
26582 so long as it is not to a member of a packed data structure. */
26583 if (misalignment == -1)
26584 return true;
26586 /* Return true if the misalignment is a multiple of the natural alignment
26587 of the vector's element type. This is probably always going to be
26588 true in practice, since we've already established that this isn't a
26589 packed access. */
26590 return ((misalignment % align) == 0);
26593 return default_builtin_support_vector_misalignment (mode, type, misalignment,
26594 is_packed);
26597 static void
26598 arm_conditional_register_usage (void)
26600 int regno;
26602 if (TARGET_THUMB1 && optimize_size)
26604 /* When optimizing for size on Thumb-1, it's better not
26605 to use the HI regs, because of the overhead of
26606 stacking them. */
26607 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
26608 fixed_regs[regno] = call_used_regs[regno] = 1;
26611 /* The link register can be clobbered by any branch insn,
26612 but we have no way to track that at present, so mark
26613 it as unavailable. */
26614 if (TARGET_THUMB1)
26615 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
26617 if (TARGET_32BIT && TARGET_HARD_FLOAT)
26619 /* VFPv3 registers are disabled when earlier VFP
26620 versions are selected due to the definition of
26621 LAST_VFP_REGNUM. */
26622 for (regno = FIRST_VFP_REGNUM;
26623 regno <= LAST_VFP_REGNUM; ++ regno)
26625 fixed_regs[regno] = 0;
26626 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
26627 || regno >= FIRST_VFP_REGNUM + 32;
26631 if (TARGET_REALLY_IWMMXT)
26633 regno = FIRST_IWMMXT_GR_REGNUM;
26634 /* The 2002/10/09 revision of the XScale ABI has wCG0
26635 and wCG1 as call-preserved registers. The 2002/11/21
26636 revision changed this so that all wCG registers are
26637 scratch registers. */
26638 for (regno = FIRST_IWMMXT_GR_REGNUM;
26639 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
26640 fixed_regs[regno] = 0;
26641 /* The XScale ABI has wR0 - wR9 as scratch registers,
26642 the rest as call-preserved registers. */
26643 for (regno = FIRST_IWMMXT_REGNUM;
26644 regno <= LAST_IWMMXT_REGNUM; ++ regno)
26646 fixed_regs[regno] = 0;
26647 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
26651 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
26653 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
26654 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
26656 else if (TARGET_APCS_STACK)
26658 fixed_regs[10] = 1;
26659 call_used_regs[10] = 1;
26661 /* -mcaller-super-interworking reserves r11 for calls to
26662 _interwork_r11_call_via_rN(). Making the register global
26663 is an easy way of ensuring that it remains valid for all
26664 calls. */
26665 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
26666 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
26668 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26669 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26670 if (TARGET_CALLER_INTERWORKING)
26671 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26673 SUBTARGET_CONDITIONAL_REGISTER_USAGE
26676 static reg_class_t
26677 arm_preferred_rename_class (reg_class_t rclass)
26679 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
26680 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
26681 and code size can be reduced. */
26682 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
26683 return LO_REGS;
26684 else
26685 return NO_REGS;
26688 /* Compute the attribute "length" of insn "*push_multi".
26689 So this function MUST be kept in sync with that insn pattern. */
26691 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
26693 int i, regno, hi_reg;
26694 int num_saves = XVECLEN (parallel_op, 0);
26696 /* ARM mode. */
26697 if (TARGET_ARM)
26698 return 4;
26699 /* Thumb1 mode. */
26700 if (TARGET_THUMB1)
26701 return 2;
26703 /* Thumb2 mode. */
26704 regno = REGNO (first_op);
26705 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
26706 list is 8-bit. Normally this means all registers in the list must be
26707 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
26708 encodings. There is one exception for PUSH that LR in HI_REGS can be used
26709 with 16-bit encoding. */
26710 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
26711 for (i = 1; i < num_saves && !hi_reg; i++)
26713 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
26714 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
26717 if (!hi_reg)
26718 return 2;
26719 return 4;
26722 /* Compute the attribute "length" of insn. Currently, this function is used
26723 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
26724 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
26725 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
26726 true if OPERANDS contains insn which explicit updates base register. */
26729 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
26731 /* ARM mode. */
26732 if (TARGET_ARM)
26733 return 4;
26734 /* Thumb1 mode. */
26735 if (TARGET_THUMB1)
26736 return 2;
26738 rtx parallel_op = operands[0];
26739 /* Initialize to elements number of PARALLEL. */
26740 unsigned indx = XVECLEN (parallel_op, 0) - 1;
26741 /* Initialize the value to base register. */
26742 unsigned regno = REGNO (operands[1]);
26743 /* Skip return and write back pattern.
26744 We only need register pop pattern for later analysis. */
26745 unsigned first_indx = 0;
26746 first_indx += return_pc ? 1 : 0;
26747 first_indx += write_back_p ? 1 : 0;
26749 /* A pop operation can be done through LDM or POP. If the base register is SP
26750 and if it's with write back, then a LDM will be alias of POP. */
26751 bool pop_p = (regno == SP_REGNUM && write_back_p);
26752 bool ldm_p = !pop_p;
26754 /* Check base register for LDM. */
26755 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
26756 return 4;
26758 /* Check each register in the list. */
26759 for (; indx >= first_indx; indx--)
26761 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
26762 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
26763 comment in arm_attr_length_push_multi. */
26764 if (REGNO_REG_CLASS (regno) == HI_REGS
26765 && (regno != PC_REGNUM || ldm_p))
26766 return 4;
26769 return 2;
26772 /* Compute the number of instructions emitted by output_move_double. */
26774 arm_count_output_move_double_insns (rtx *operands)
26776 int count;
26777 rtx ops[2];
26778 /* output_move_double may modify the operands array, so call it
26779 here on a copy of the array. */
26780 ops[0] = operands[0];
26781 ops[1] = operands[1];
26782 output_move_double (ops, false, &count);
26783 return count;
26787 vfp3_const_double_for_fract_bits (rtx operand)
26789 REAL_VALUE_TYPE r0;
26791 if (!CONST_DOUBLE_P (operand))
26792 return 0;
26794 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
26795 if (exact_real_inverse (DFmode, &r0)
26796 && !REAL_VALUE_NEGATIVE (r0))
26798 if (exact_real_truncate (DFmode, &r0))
26800 HOST_WIDE_INT value = real_to_integer (&r0);
26801 value = value & 0xffffffff;
26802 if ((value != 0) && ( (value & (value - 1)) == 0))
26804 int ret = exact_log2 (value);
26805 gcc_assert (IN_RANGE (ret, 0, 31));
26806 return ret;
26810 return 0;
26813 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
26814 log2 is in [1, 32], return that log2. Otherwise return -1.
26815 This is used in the patterns for vcvt.s32.f32 floating-point to
26816 fixed-point conversions. */
26819 vfp3_const_double_for_bits (rtx x)
26821 const REAL_VALUE_TYPE *r;
26823 if (!CONST_DOUBLE_P (x))
26824 return -1;
26826 r = CONST_DOUBLE_REAL_VALUE (x);
26828 if (REAL_VALUE_NEGATIVE (*r)
26829 || REAL_VALUE_ISNAN (*r)
26830 || REAL_VALUE_ISINF (*r)
26831 || !real_isinteger (r, SFmode))
26832 return -1;
26834 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
26836 /* The exact_log2 above will have returned -1 if this is
26837 not an exact log2. */
26838 if (!IN_RANGE (hwint, 1, 32))
26839 return -1;
26841 return hwint;
26845 /* Emit a memory barrier around an atomic sequence according to MODEL. */
26847 static void
26848 arm_pre_atomic_barrier (enum memmodel model)
26850 if (need_atomic_barrier_p (model, true))
26851 emit_insn (gen_memory_barrier ());
26854 static void
26855 arm_post_atomic_barrier (enum memmodel model)
26857 if (need_atomic_barrier_p (model, false))
26858 emit_insn (gen_memory_barrier ());
26861 /* Emit the load-exclusive and store-exclusive instructions.
26862 Use acquire and release versions if necessary. */
26864 static void
26865 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
26867 rtx (*gen) (rtx, rtx);
26869 if (acq)
26871 switch (mode)
26873 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
26874 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
26875 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
26876 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
26877 default:
26878 gcc_unreachable ();
26881 else
26883 switch (mode)
26885 case QImode: gen = gen_arm_load_exclusiveqi; break;
26886 case HImode: gen = gen_arm_load_exclusivehi; break;
26887 case SImode: gen = gen_arm_load_exclusivesi; break;
26888 case DImode: gen = gen_arm_load_exclusivedi; break;
26889 default:
26890 gcc_unreachable ();
26894 emit_insn (gen (rval, mem));
26897 static void
26898 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
26899 rtx mem, bool rel)
26901 rtx (*gen) (rtx, rtx, rtx);
26903 if (rel)
26905 switch (mode)
26907 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
26908 case HImode: gen = gen_arm_store_release_exclusivehi; break;
26909 case SImode: gen = gen_arm_store_release_exclusivesi; break;
26910 case DImode: gen = gen_arm_store_release_exclusivedi; break;
26911 default:
26912 gcc_unreachable ();
26915 else
26917 switch (mode)
26919 case QImode: gen = gen_arm_store_exclusiveqi; break;
26920 case HImode: gen = gen_arm_store_exclusivehi; break;
26921 case SImode: gen = gen_arm_store_exclusivesi; break;
26922 case DImode: gen = gen_arm_store_exclusivedi; break;
26923 default:
26924 gcc_unreachable ();
26928 emit_insn (gen (bval, rval, mem));
26931 /* Mark the previous jump instruction as unlikely. */
26933 static void
26934 emit_unlikely_jump (rtx insn)
26936 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
26938 insn = emit_jump_insn (insn);
26939 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
26942 /* Expand a compare and swap pattern. */
26944 void
26945 arm_expand_compare_and_swap (rtx operands[])
26947 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
26948 machine_mode mode;
26949 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
26951 bval = operands[0];
26952 rval = operands[1];
26953 mem = operands[2];
26954 oldval = operands[3];
26955 newval = operands[4];
26956 is_weak = operands[5];
26957 mod_s = operands[6];
26958 mod_f = operands[7];
26959 mode = GET_MODE (mem);
26961 /* Normally the succ memory model must be stronger than fail, but in the
26962 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
26963 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
26965 if (TARGET_HAVE_LDACQ
26966 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
26967 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
26968 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
26970 switch (mode)
26972 case QImode:
26973 case HImode:
26974 /* For narrow modes, we're going to perform the comparison in SImode,
26975 so do the zero-extension now. */
26976 rval = gen_reg_rtx (SImode);
26977 oldval = convert_modes (SImode, mode, oldval, true);
26978 /* FALLTHRU */
26980 case SImode:
26981 /* Force the value into a register if needed. We waited until after
26982 the zero-extension above to do this properly. */
26983 if (!arm_add_operand (oldval, SImode))
26984 oldval = force_reg (SImode, oldval);
26985 break;
26987 case DImode:
26988 if (!cmpdi_operand (oldval, mode))
26989 oldval = force_reg (mode, oldval);
26990 break;
26992 default:
26993 gcc_unreachable ();
26996 switch (mode)
26998 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
26999 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27000 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27001 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27002 default:
27003 gcc_unreachable ();
27006 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CCmode, CC_REGNUM);
27007 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27009 if (mode == QImode || mode == HImode)
27010 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27012 /* In all cases, we arrange for success to be signaled by Z set.
27013 This arrangement allows for the boolean result to be used directly
27014 in a subsequent branch, post optimization. For Thumb-1 targets, the
27015 boolean negation of the result is also stored in bval because Thumb-1
27016 backend lacks dependency tracking for CC flag due to flag-setting not
27017 being represented at RTL level. */
27018 if (TARGET_THUMB1)
27019 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
27020 else
27022 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
27023 emit_insn (gen_rtx_SET (bval, x));
27027 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27028 another memory store between the load-exclusive and store-exclusive can
27029 reset the monitor from Exclusive to Open state. This means we must wait
27030 until after reload to split the pattern, lest we get a register spill in
27031 the middle of the atomic sequence. Success of the compare and swap is
27032 indicated by the Z flag set for 32bit targets and by neg_bval being zero
27033 for Thumb-1 targets (ie. negation of the boolean value returned by
27034 atomic_compare_and_swapmode standard pattern in operand 0). */
27036 void
27037 arm_split_compare_and_swap (rtx operands[])
27039 rtx rval, mem, oldval, newval, neg_bval;
27040 machine_mode mode;
27041 enum memmodel mod_s, mod_f;
27042 bool is_weak;
27043 rtx_code_label *label1, *label2;
27044 rtx x, cond;
27046 rval = operands[1];
27047 mem = operands[2];
27048 oldval = operands[3];
27049 newval = operands[4];
27050 is_weak = (operands[5] != const0_rtx);
27051 mod_s = memmodel_from_int (INTVAL (operands[6]));
27052 mod_f = memmodel_from_int (INTVAL (operands[7]));
27053 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
27054 mode = GET_MODE (mem);
27056 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
27058 bool use_acquire = TARGET_HAVE_LDACQ
27059 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27060 || is_mm_release (mod_s));
27062 bool use_release = TARGET_HAVE_LDACQ
27063 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27064 || is_mm_acquire (mod_s));
27066 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
27067 a full barrier is emitted after the store-release. */
27068 if (is_armv8_sync)
27069 use_acquire = false;
27071 /* Checks whether a barrier is needed and emits one accordingly. */
27072 if (!(use_acquire || use_release))
27073 arm_pre_atomic_barrier (mod_s);
27075 label1 = NULL;
27076 if (!is_weak)
27078 label1 = gen_label_rtx ();
27079 emit_label (label1);
27081 label2 = gen_label_rtx ();
27083 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27085 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
27086 as required to communicate with arm_expand_compare_and_swap. */
27087 if (TARGET_32BIT)
27089 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
27090 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27091 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27092 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27093 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
27095 else
27097 emit_move_insn (neg_bval, const1_rtx);
27098 cond = gen_rtx_NE (VOIDmode, rval, oldval);
27099 if (thumb1_cmpneg_operand (oldval, SImode))
27100 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
27101 label2, cond));
27102 else
27103 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
27106 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
27108 /* Weak or strong, we want EQ to be true for success, so that we
27109 match the flags that we got from the compare above. */
27110 if (TARGET_32BIT)
27112 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27113 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
27114 emit_insn (gen_rtx_SET (cond, x));
27117 if (!is_weak)
27119 /* Z is set to boolean value of !neg_bval, as required to communicate
27120 with arm_expand_compare_and_swap. */
27121 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
27122 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
27125 if (!is_mm_relaxed (mod_f))
27126 emit_label (label2);
27128 /* Checks whether a barrier is needed and emits one accordingly. */
27129 if (is_armv8_sync
27130 || !(use_acquire || use_release))
27131 arm_post_atomic_barrier (mod_s);
27133 if (is_mm_relaxed (mod_f))
27134 emit_label (label2);
27137 /* Split an atomic operation pattern. Operation is given by CODE and is one
27138 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
27139 operation). Operation is performed on the content at MEM and on VALUE
27140 following the memory model MODEL_RTX. The content at MEM before and after
27141 the operation is returned in OLD_OUT and NEW_OUT respectively while the
27142 success of the operation is returned in COND. Using a scratch register or
27143 an operand register for these determines what result is returned for that
27144 pattern. */
27146 void
27147 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27148 rtx value, rtx model_rtx, rtx cond)
27150 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
27151 machine_mode mode = GET_MODE (mem);
27152 machine_mode wmode = (mode == DImode ? DImode : SImode);
27153 rtx_code_label *label;
27154 bool all_low_regs, bind_old_new;
27155 rtx x;
27157 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
27159 bool use_acquire = TARGET_HAVE_LDACQ
27160 && !(is_mm_relaxed (model) || is_mm_consume (model)
27161 || is_mm_release (model));
27163 bool use_release = TARGET_HAVE_LDACQ
27164 && !(is_mm_relaxed (model) || is_mm_consume (model)
27165 || is_mm_acquire (model));
27167 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
27168 a full barrier is emitted after the store-release. */
27169 if (is_armv8_sync)
27170 use_acquire = false;
27172 /* Checks whether a barrier is needed and emits one accordingly. */
27173 if (!(use_acquire || use_release))
27174 arm_pre_atomic_barrier (model);
27176 label = gen_label_rtx ();
27177 emit_label (label);
27179 if (new_out)
27180 new_out = gen_lowpart (wmode, new_out);
27181 if (old_out)
27182 old_out = gen_lowpart (wmode, old_out);
27183 else
27184 old_out = new_out;
27185 value = simplify_gen_subreg (wmode, value, mode, 0);
27187 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27189 /* Does the operation require destination and first operand to use the same
27190 register? This is decided by register constraints of relevant insn
27191 patterns in thumb1.md. */
27192 gcc_assert (!new_out || REG_P (new_out));
27193 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
27194 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
27195 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
27196 bind_old_new =
27197 (TARGET_THUMB1
27198 && code != SET
27199 && code != MINUS
27200 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
27202 /* We want to return the old value while putting the result of the operation
27203 in the same register as the old value so copy the old value over to the
27204 destination register and use that register for the operation. */
27205 if (old_out && bind_old_new)
27207 emit_move_insn (new_out, old_out);
27208 old_out = new_out;
27211 switch (code)
27213 case SET:
27214 new_out = value;
27215 break;
27217 case NOT:
27218 x = gen_rtx_AND (wmode, old_out, value);
27219 emit_insn (gen_rtx_SET (new_out, x));
27220 x = gen_rtx_NOT (wmode, new_out);
27221 emit_insn (gen_rtx_SET (new_out, x));
27222 break;
27224 case MINUS:
27225 if (CONST_INT_P (value))
27227 value = GEN_INT (-INTVAL (value));
27228 code = PLUS;
27230 /* FALLTHRU */
27232 case PLUS:
27233 if (mode == DImode)
27235 /* DImode plus/minus need to clobber flags. */
27236 /* The adddi3 and subdi3 patterns are incorrectly written so that
27237 they require matching operands, even when we could easily support
27238 three operands. Thankfully, this can be fixed up post-splitting,
27239 as the individual add+adc patterns do accept three operands and
27240 post-reload cprop can make these moves go away. */
27241 emit_move_insn (new_out, old_out);
27242 if (code == PLUS)
27243 x = gen_adddi3 (new_out, new_out, value);
27244 else
27245 x = gen_subdi3 (new_out, new_out, value);
27246 emit_insn (x);
27247 break;
27249 /* FALLTHRU */
27251 default:
27252 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27253 emit_insn (gen_rtx_SET (new_out, x));
27254 break;
27257 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27258 use_release);
27260 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27261 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27263 /* Checks whether a barrier is needed and emits one accordingly. */
27264 if (is_armv8_sync
27265 || !(use_acquire || use_release))
27266 arm_post_atomic_barrier (model);
27269 #define MAX_VECT_LEN 16
27271 struct expand_vec_perm_d
27273 rtx target, op0, op1;
27274 unsigned char perm[MAX_VECT_LEN];
27275 machine_mode vmode;
27276 unsigned char nelt;
27277 bool one_vector_p;
27278 bool testing_p;
27281 /* Generate a variable permutation. */
27283 static void
27284 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27286 machine_mode vmode = GET_MODE (target);
27287 bool one_vector_p = rtx_equal_p (op0, op1);
27289 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27290 gcc_checking_assert (GET_MODE (op0) == vmode);
27291 gcc_checking_assert (GET_MODE (op1) == vmode);
27292 gcc_checking_assert (GET_MODE (sel) == vmode);
27293 gcc_checking_assert (TARGET_NEON);
27295 if (one_vector_p)
27297 if (vmode == V8QImode)
27298 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27299 else
27300 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27302 else
27304 rtx pair;
27306 if (vmode == V8QImode)
27308 pair = gen_reg_rtx (V16QImode);
27309 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27310 pair = gen_lowpart (TImode, pair);
27311 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27313 else
27315 pair = gen_reg_rtx (OImode);
27316 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27317 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27322 void
27323 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27325 machine_mode vmode = GET_MODE (target);
27326 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27327 bool one_vector_p = rtx_equal_p (op0, op1);
27328 rtx rmask[MAX_VECT_LEN], mask;
27330 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27331 numbering of elements for big-endian, we must reverse the order. */
27332 gcc_checking_assert (!BYTES_BIG_ENDIAN);
27334 /* The VTBL instruction does not use a modulo index, so we must take care
27335 of that ourselves. */
27336 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27337 for (i = 0; i < nelt; ++i)
27338 rmask[i] = mask;
27339 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27340 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27342 arm_expand_vec_perm_1 (target, op0, op1, sel);
27345 /* Map lane ordering between architectural lane order, and GCC lane order,
27346 taking into account ABI. See comment above output_move_neon for details. */
27348 static int
27349 neon_endian_lane_map (machine_mode mode, int lane)
27351 if (BYTES_BIG_ENDIAN)
27353 int nelems = GET_MODE_NUNITS (mode);
27354 /* Reverse lane order. */
27355 lane = (nelems - 1 - lane);
27356 /* Reverse D register order, to match ABI. */
27357 if (GET_MODE_SIZE (mode) == 16)
27358 lane = lane ^ (nelems / 2);
27360 return lane;
27363 /* Some permutations index into pairs of vectors, this is a helper function
27364 to map indexes into those pairs of vectors. */
27366 static int
27367 neon_pair_endian_lane_map (machine_mode mode, int lane)
27369 int nelem = GET_MODE_NUNITS (mode);
27370 if (BYTES_BIG_ENDIAN)
27371 lane =
27372 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
27373 return lane;
27376 /* Generate or test for an insn that supports a constant permutation. */
27378 /* Recognize patterns for the VUZP insns. */
27380 static bool
27381 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27383 unsigned int i, odd, mask, nelt = d->nelt;
27384 rtx out0, out1, in0, in1;
27385 rtx (*gen)(rtx, rtx, rtx, rtx);
27386 int first_elem;
27387 int swap_nelt;
27389 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27390 return false;
27392 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
27393 big endian pattern on 64 bit vectors, so we correct for that. */
27394 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
27395 && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
27397 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
27399 if (first_elem == neon_endian_lane_map (d->vmode, 0))
27400 odd = 0;
27401 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
27402 odd = 1;
27403 else
27404 return false;
27405 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27407 for (i = 0; i < nelt; i++)
27409 unsigned elt =
27410 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
27411 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
27412 return false;
27415 /* Success! */
27416 if (d->testing_p)
27417 return true;
27419 switch (d->vmode)
27421 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27422 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
27423 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
27424 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
27425 case V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
27426 case V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
27427 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
27428 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
27429 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
27430 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
27431 default:
27432 gcc_unreachable ();
27435 in0 = d->op0;
27436 in1 = d->op1;
27437 if (swap_nelt != 0)
27438 std::swap (in0, in1);
27440 out0 = d->target;
27441 out1 = gen_reg_rtx (d->vmode);
27442 if (odd)
27443 std::swap (out0, out1);
27445 emit_insn (gen (out0, in0, in1, out1));
27446 return true;
27449 /* Recognize patterns for the VZIP insns. */
27451 static bool
27452 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27454 unsigned int i, high, mask, nelt = d->nelt;
27455 rtx out0, out1, in0, in1;
27456 rtx (*gen)(rtx, rtx, rtx, rtx);
27457 int first_elem;
27458 bool is_swapped;
27460 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27461 return false;
27463 is_swapped = BYTES_BIG_ENDIAN;
27465 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
27467 high = nelt / 2;
27468 if (first_elem == neon_endian_lane_map (d->vmode, high))
27470 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
27471 high = 0;
27472 else
27473 return false;
27474 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27476 for (i = 0; i < nelt / 2; i++)
27478 unsigned elt =
27479 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
27480 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
27481 != elt)
27482 return false;
27483 elt =
27484 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
27485 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
27486 != elt)
27487 return false;
27490 /* Success! */
27491 if (d->testing_p)
27492 return true;
27494 switch (d->vmode)
27496 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27497 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
27498 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
27499 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
27500 case V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
27501 case V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
27502 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
27503 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
27504 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
27505 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
27506 default:
27507 gcc_unreachable ();
27510 in0 = d->op0;
27511 in1 = d->op1;
27512 if (is_swapped)
27513 std::swap (in0, in1);
27515 out0 = d->target;
27516 out1 = gen_reg_rtx (d->vmode);
27517 if (high)
27518 std::swap (out0, out1);
27520 emit_insn (gen (out0, in0, in1, out1));
27521 return true;
27524 /* Recognize patterns for the VREV insns. */
27526 static bool
27527 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27529 unsigned int i, j, diff, nelt = d->nelt;
27530 rtx (*gen)(rtx, rtx);
27532 if (!d->one_vector_p)
27533 return false;
27535 diff = d->perm[0];
27536 switch (diff)
27538 case 7:
27539 switch (d->vmode)
27541 case V16QImode: gen = gen_neon_vrev64v16qi; break;
27542 case V8QImode: gen = gen_neon_vrev64v8qi; break;
27543 default:
27544 return false;
27546 break;
27547 case 3:
27548 switch (d->vmode)
27550 case V16QImode: gen = gen_neon_vrev32v16qi; break;
27551 case V8QImode: gen = gen_neon_vrev32v8qi; break;
27552 case V8HImode: gen = gen_neon_vrev64v8hi; break;
27553 case V4HImode: gen = gen_neon_vrev64v4hi; break;
27554 case V8HFmode: gen = gen_neon_vrev64v8hf; break;
27555 case V4HFmode: gen = gen_neon_vrev64v4hf; break;
27556 default:
27557 return false;
27559 break;
27560 case 1:
27561 switch (d->vmode)
27563 case V16QImode: gen = gen_neon_vrev16v16qi; break;
27564 case V8QImode: gen = gen_neon_vrev16v8qi; break;
27565 case V8HImode: gen = gen_neon_vrev32v8hi; break;
27566 case V4HImode: gen = gen_neon_vrev32v4hi; break;
27567 case V4SImode: gen = gen_neon_vrev64v4si; break;
27568 case V2SImode: gen = gen_neon_vrev64v2si; break;
27569 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
27570 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
27571 default:
27572 return false;
27574 break;
27575 default:
27576 return false;
27579 for (i = 0; i < nelt ; i += diff + 1)
27580 for (j = 0; j <= diff; j += 1)
27582 /* This is guaranteed to be true as the value of diff
27583 is 7, 3, 1 and we should have enough elements in the
27584 queue to generate this. Getting a vector mask with a
27585 value of diff other than these values implies that
27586 something is wrong by the time we get here. */
27587 gcc_assert (i + j < nelt);
27588 if (d->perm[i + j] != i + diff - j)
27589 return false;
27592 /* Success! */
27593 if (d->testing_p)
27594 return true;
27596 emit_insn (gen (d->target, d->op0));
27597 return true;
27600 /* Recognize patterns for the VTRN insns. */
27602 static bool
27603 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
27605 unsigned int i, odd, mask, nelt = d->nelt;
27606 rtx out0, out1, in0, in1;
27607 rtx (*gen)(rtx, rtx, rtx, rtx);
27609 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27610 return false;
27612 /* Note that these are little-endian tests. Adjust for big-endian later. */
27613 if (d->perm[0] == 0)
27614 odd = 0;
27615 else if (d->perm[0] == 1)
27616 odd = 1;
27617 else
27618 return false;
27619 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27621 for (i = 0; i < nelt; i += 2)
27623 if (d->perm[i] != i + odd)
27624 return false;
27625 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
27626 return false;
27629 /* Success! */
27630 if (d->testing_p)
27631 return true;
27633 switch (d->vmode)
27635 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
27636 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
27637 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
27638 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
27639 case V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
27640 case V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
27641 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
27642 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
27643 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
27644 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
27645 default:
27646 gcc_unreachable ();
27649 in0 = d->op0;
27650 in1 = d->op1;
27651 if (BYTES_BIG_ENDIAN)
27653 std::swap (in0, in1);
27654 odd = !odd;
27657 out0 = d->target;
27658 out1 = gen_reg_rtx (d->vmode);
27659 if (odd)
27660 std::swap (out0, out1);
27662 emit_insn (gen (out0, in0, in1, out1));
27663 return true;
27666 /* Recognize patterns for the VEXT insns. */
27668 static bool
27669 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
27671 unsigned int i, nelt = d->nelt;
27672 rtx (*gen) (rtx, rtx, rtx, rtx);
27673 rtx offset;
27675 unsigned int location;
27677 unsigned int next = d->perm[0] + 1;
27679 /* TODO: Handle GCC's numbering of elements for big-endian. */
27680 if (BYTES_BIG_ENDIAN)
27681 return false;
27683 /* Check if the extracted indexes are increasing by one. */
27684 for (i = 1; i < nelt; next++, i++)
27686 /* If we hit the most significant element of the 2nd vector in
27687 the previous iteration, no need to test further. */
27688 if (next == 2 * nelt)
27689 return false;
27691 /* If we are operating on only one vector: it could be a
27692 rotation. If there are only two elements of size < 64, let
27693 arm_evpc_neon_vrev catch it. */
27694 if (d->one_vector_p && (next == nelt))
27696 if ((nelt == 2) && (d->vmode != V2DImode))
27697 return false;
27698 else
27699 next = 0;
27702 if (d->perm[i] != next)
27703 return false;
27706 location = d->perm[0];
27708 switch (d->vmode)
27710 case V16QImode: gen = gen_neon_vextv16qi; break;
27711 case V8QImode: gen = gen_neon_vextv8qi; break;
27712 case V4HImode: gen = gen_neon_vextv4hi; break;
27713 case V8HImode: gen = gen_neon_vextv8hi; break;
27714 case V2SImode: gen = gen_neon_vextv2si; break;
27715 case V4SImode: gen = gen_neon_vextv4si; break;
27716 case V4HFmode: gen = gen_neon_vextv4hf; break;
27717 case V8HFmode: gen = gen_neon_vextv8hf; break;
27718 case V2SFmode: gen = gen_neon_vextv2sf; break;
27719 case V4SFmode: gen = gen_neon_vextv4sf; break;
27720 case V2DImode: gen = gen_neon_vextv2di; break;
27721 default:
27722 return false;
27725 /* Success! */
27726 if (d->testing_p)
27727 return true;
27729 offset = GEN_INT (location);
27730 emit_insn (gen (d->target, d->op0, d->op1, offset));
27731 return true;
27734 /* The NEON VTBL instruction is a fully variable permuation that's even
27735 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
27736 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
27737 can do slightly better by expanding this as a constant where we don't
27738 have to apply a mask. */
27740 static bool
27741 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
27743 rtx rperm[MAX_VECT_LEN], sel;
27744 machine_mode vmode = d->vmode;
27745 unsigned int i, nelt = d->nelt;
27747 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27748 numbering of elements for big-endian, we must reverse the order. */
27749 if (BYTES_BIG_ENDIAN)
27750 return false;
27752 if (d->testing_p)
27753 return true;
27755 /* Generic code will try constant permutation twice. Once with the
27756 original mode and again with the elements lowered to QImode.
27757 So wait and don't do the selector expansion ourselves. */
27758 if (vmode != V8QImode && vmode != V16QImode)
27759 return false;
27761 for (i = 0; i < nelt; ++i)
27762 rperm[i] = GEN_INT (d->perm[i]);
27763 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
27764 sel = force_reg (vmode, sel);
27766 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
27767 return true;
27770 static bool
27771 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
27773 /* Check if the input mask matches vext before reordering the
27774 operands. */
27775 if (TARGET_NEON)
27776 if (arm_evpc_neon_vext (d))
27777 return true;
27779 /* The pattern matching functions above are written to look for a small
27780 number to begin the sequence (0, 1, N/2). If we begin with an index
27781 from the second operand, we can swap the operands. */
27782 if (d->perm[0] >= d->nelt)
27784 unsigned i, nelt = d->nelt;
27786 for (i = 0; i < nelt; ++i)
27787 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
27789 std::swap (d->op0, d->op1);
27792 if (TARGET_NEON)
27794 if (arm_evpc_neon_vuzp (d))
27795 return true;
27796 if (arm_evpc_neon_vzip (d))
27797 return true;
27798 if (arm_evpc_neon_vrev (d))
27799 return true;
27800 if (arm_evpc_neon_vtrn (d))
27801 return true;
27802 return arm_evpc_neon_vtbl (d);
27804 return false;
27807 /* Expand a vec_perm_const pattern. */
27809 bool
27810 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
27812 struct expand_vec_perm_d d;
27813 int i, nelt, which;
27815 d.target = target;
27816 d.op0 = op0;
27817 d.op1 = op1;
27819 d.vmode = GET_MODE (target);
27820 gcc_assert (VECTOR_MODE_P (d.vmode));
27821 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
27822 d.testing_p = false;
27824 for (i = which = 0; i < nelt; ++i)
27826 rtx e = XVECEXP (sel, 0, i);
27827 int ei = INTVAL (e) & (2 * nelt - 1);
27828 which |= (ei < nelt ? 1 : 2);
27829 d.perm[i] = ei;
27832 switch (which)
27834 default:
27835 gcc_unreachable();
27837 case 3:
27838 d.one_vector_p = false;
27839 if (!rtx_equal_p (op0, op1))
27840 break;
27842 /* The elements of PERM do not suggest that only the first operand
27843 is used, but both operands are identical. Allow easier matching
27844 of the permutation by folding the permutation into the single
27845 input vector. */
27846 /* FALLTHRU */
27847 case 2:
27848 for (i = 0; i < nelt; ++i)
27849 d.perm[i] &= nelt - 1;
27850 d.op0 = op1;
27851 d.one_vector_p = true;
27852 break;
27854 case 1:
27855 d.op1 = op0;
27856 d.one_vector_p = true;
27857 break;
27860 return arm_expand_vec_perm_const_1 (&d);
27863 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
27865 static bool
27866 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
27867 const unsigned char *sel)
27869 struct expand_vec_perm_d d;
27870 unsigned int i, nelt, which;
27871 bool ret;
27873 d.vmode = vmode;
27874 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
27875 d.testing_p = true;
27876 memcpy (d.perm, sel, nelt);
27878 /* Categorize the set of elements in the selector. */
27879 for (i = which = 0; i < nelt; ++i)
27881 unsigned char e = d.perm[i];
27882 gcc_assert (e < 2 * nelt);
27883 which |= (e < nelt ? 1 : 2);
27886 /* For all elements from second vector, fold the elements to first. */
27887 if (which == 2)
27888 for (i = 0; i < nelt; ++i)
27889 d.perm[i] -= nelt;
27891 /* Check whether the mask can be applied to the vector type. */
27892 d.one_vector_p = (which != 3);
27894 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
27895 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
27896 if (!d.one_vector_p)
27897 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
27899 start_sequence ();
27900 ret = arm_expand_vec_perm_const_1 (&d);
27901 end_sequence ();
27903 return ret;
27906 bool
27907 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
27909 /* If we are soft float and we do not have ldrd
27910 then all auto increment forms are ok. */
27911 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
27912 return true;
27914 switch (code)
27916 /* Post increment and Pre Decrement are supported for all
27917 instruction forms except for vector forms. */
27918 case ARM_POST_INC:
27919 case ARM_PRE_DEC:
27920 if (VECTOR_MODE_P (mode))
27922 if (code != ARM_PRE_DEC)
27923 return true;
27924 else
27925 return false;
27928 return true;
27930 case ARM_POST_DEC:
27931 case ARM_PRE_INC:
27932 /* Without LDRD and mode size greater than
27933 word size, there is no point in auto-incrementing
27934 because ldm and stm will not have these forms. */
27935 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
27936 return false;
27938 /* Vector and floating point modes do not support
27939 these auto increment forms. */
27940 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
27941 return false;
27943 return true;
27945 default:
27946 return false;
27950 return false;
27953 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
27954 on ARM, since we know that shifts by negative amounts are no-ops.
27955 Additionally, the default expansion code is not available or suitable
27956 for post-reload insn splits (this can occur when the register allocator
27957 chooses not to do a shift in NEON).
27959 This function is used in both initial expand and post-reload splits, and
27960 handles all kinds of 64-bit shifts.
27962 Input requirements:
27963 - It is safe for the input and output to be the same register, but
27964 early-clobber rules apply for the shift amount and scratch registers.
27965 - Shift by register requires both scratch registers. In all other cases
27966 the scratch registers may be NULL.
27967 - Ashiftrt by a register also clobbers the CC register. */
27968 void
27969 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
27970 rtx amount, rtx scratch1, rtx scratch2)
27972 rtx out_high = gen_highpart (SImode, out);
27973 rtx out_low = gen_lowpart (SImode, out);
27974 rtx in_high = gen_highpart (SImode, in);
27975 rtx in_low = gen_lowpart (SImode, in);
27977 /* Terminology:
27978 in = the register pair containing the input value.
27979 out = the destination register pair.
27980 up = the high- or low-part of each pair.
27981 down = the opposite part to "up".
27982 In a shift, we can consider bits to shift from "up"-stream to
27983 "down"-stream, so in a left-shift "up" is the low-part and "down"
27984 is the high-part of each register pair. */
27986 rtx out_up = code == ASHIFT ? out_low : out_high;
27987 rtx out_down = code == ASHIFT ? out_high : out_low;
27988 rtx in_up = code == ASHIFT ? in_low : in_high;
27989 rtx in_down = code == ASHIFT ? in_high : in_low;
27991 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
27992 gcc_assert (out
27993 && (REG_P (out) || GET_CODE (out) == SUBREG)
27994 && GET_MODE (out) == DImode);
27995 gcc_assert (in
27996 && (REG_P (in) || GET_CODE (in) == SUBREG)
27997 && GET_MODE (in) == DImode);
27998 gcc_assert (amount
27999 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28000 && GET_MODE (amount) == SImode)
28001 || CONST_INT_P (amount)));
28002 gcc_assert (scratch1 == NULL
28003 || (GET_CODE (scratch1) == SCRATCH)
28004 || (GET_MODE (scratch1) == SImode
28005 && REG_P (scratch1)));
28006 gcc_assert (scratch2 == NULL
28007 || (GET_CODE (scratch2) == SCRATCH)
28008 || (GET_MODE (scratch2) == SImode
28009 && REG_P (scratch2)));
28010 gcc_assert (!REG_P (out) || !REG_P (amount)
28011 || !HARD_REGISTER_P (out)
28012 || (REGNO (out) != REGNO (amount)
28013 && REGNO (out) + 1 != REGNO (amount)));
28015 /* Macros to make following code more readable. */
28016 #define SUB_32(DEST,SRC) \
28017 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28018 #define RSB_32(DEST,SRC) \
28019 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28020 #define SUB_S_32(DEST,SRC) \
28021 gen_addsi3_compare0 ((DEST), (SRC), \
28022 GEN_INT (-32))
28023 #define SET(DEST,SRC) \
28024 gen_rtx_SET ((DEST), (SRC))
28025 #define SHIFT(CODE,SRC,AMOUNT) \
28026 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28027 #define LSHIFT(CODE,SRC,AMOUNT) \
28028 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28029 SImode, (SRC), (AMOUNT))
28030 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28031 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28032 SImode, (SRC), (AMOUNT))
28033 #define ORR(A,B) \
28034 gen_rtx_IOR (SImode, (A), (B))
28035 #define BRANCH(COND,LABEL) \
28036 gen_arm_cond_branch ((LABEL), \
28037 gen_rtx_ ## COND (CCmode, cc_reg, \
28038 const0_rtx), \
28039 cc_reg)
28041 /* Shifts by register and shifts by constant are handled separately. */
28042 if (CONST_INT_P (amount))
28044 /* We have a shift-by-constant. */
28046 /* First, handle out-of-range shift amounts.
28047 In both cases we try to match the result an ARM instruction in a
28048 shift-by-register would give. This helps reduce execution
28049 differences between optimization levels, but it won't stop other
28050 parts of the compiler doing different things. This is "undefined
28051 behavior, in any case. */
28052 if (INTVAL (amount) <= 0)
28053 emit_insn (gen_movdi (out, in));
28054 else if (INTVAL (amount) >= 64)
28056 if (code == ASHIFTRT)
28058 rtx const31_rtx = GEN_INT (31);
28059 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28060 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28062 else
28063 emit_insn (gen_movdi (out, const0_rtx));
28066 /* Now handle valid shifts. */
28067 else if (INTVAL (amount) < 32)
28069 /* Shifts by a constant less than 32. */
28070 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28072 /* Clearing the out register in DImode first avoids lots
28073 of spilling and results in less stack usage.
28074 Later this redundant insn is completely removed.
28075 Do that only if "in" and "out" are different registers. */
28076 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
28077 emit_insn (SET (out, const0_rtx));
28078 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28079 emit_insn (SET (out_down,
28080 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28081 out_down)));
28082 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28084 else
28086 /* Shifts by a constant greater than 31. */
28087 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28089 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
28090 emit_insn (SET (out, const0_rtx));
28091 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28092 if (code == ASHIFTRT)
28093 emit_insn (gen_ashrsi3 (out_up, in_up,
28094 GEN_INT (31)));
28095 else
28096 emit_insn (SET (out_up, const0_rtx));
28099 else
28101 /* We have a shift-by-register. */
28102 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28104 /* This alternative requires the scratch registers. */
28105 gcc_assert (scratch1 && REG_P (scratch1));
28106 gcc_assert (scratch2 && REG_P (scratch2));
28108 /* We will need the values "amount-32" and "32-amount" later.
28109 Swapping them around now allows the later code to be more general. */
28110 switch (code)
28112 case ASHIFT:
28113 emit_insn (SUB_32 (scratch1, amount));
28114 emit_insn (RSB_32 (scratch2, amount));
28115 break;
28116 case ASHIFTRT:
28117 emit_insn (RSB_32 (scratch1, amount));
28118 /* Also set CC = amount > 32. */
28119 emit_insn (SUB_S_32 (scratch2, amount));
28120 break;
28121 case LSHIFTRT:
28122 emit_insn (RSB_32 (scratch1, amount));
28123 emit_insn (SUB_32 (scratch2, amount));
28124 break;
28125 default:
28126 gcc_unreachable ();
28129 /* Emit code like this:
28131 arithmetic-left:
28132 out_down = in_down << amount;
28133 out_down = (in_up << (amount - 32)) | out_down;
28134 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28135 out_up = in_up << amount;
28137 arithmetic-right:
28138 out_down = in_down >> amount;
28139 out_down = (in_up << (32 - amount)) | out_down;
28140 if (amount < 32)
28141 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28142 out_up = in_up << amount;
28144 logical-right:
28145 out_down = in_down >> amount;
28146 out_down = (in_up << (32 - amount)) | out_down;
28147 if (amount < 32)
28148 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28149 out_up = in_up << amount;
28151 The ARM and Thumb2 variants are the same but implemented slightly
28152 differently. If this were only called during expand we could just
28153 use the Thumb2 case and let combine do the right thing, but this
28154 can also be called from post-reload splitters. */
28156 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28158 if (!TARGET_THUMB2)
28160 /* Emit code for ARM mode. */
28161 emit_insn (SET (out_down,
28162 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28163 if (code == ASHIFTRT)
28165 rtx_code_label *done_label = gen_label_rtx ();
28166 emit_jump_insn (BRANCH (LT, done_label));
28167 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28168 out_down)));
28169 emit_label (done_label);
28171 else
28172 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28173 out_down)));
28175 else
28177 /* Emit code for Thumb2 mode.
28178 Thumb2 can't do shift and or in one insn. */
28179 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28180 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28182 if (code == ASHIFTRT)
28184 rtx_code_label *done_label = gen_label_rtx ();
28185 emit_jump_insn (BRANCH (LT, done_label));
28186 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28187 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28188 emit_label (done_label);
28190 else
28192 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28193 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28197 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28200 #undef SUB_32
28201 #undef RSB_32
28202 #undef SUB_S_32
28203 #undef SET
28204 #undef SHIFT
28205 #undef LSHIFT
28206 #undef REV_LSHIFT
28207 #undef ORR
28208 #undef BRANCH
28211 /* Returns true if the pattern is a valid symbolic address, which is either a
28212 symbol_ref or (symbol_ref + addend).
28214 According to the ARM ELF ABI, the initial addend of REL-type relocations
28215 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
28216 literal field of the instruction as a 16-bit signed value in the range
28217 -32768 <= A < 32768. */
28219 bool
28220 arm_valid_symbolic_address_p (rtx addr)
28222 rtx xop0, xop1 = NULL_RTX;
28223 rtx tmp = addr;
28225 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
28226 return true;
28228 /* (const (plus: symbol_ref const_int)) */
28229 if (GET_CODE (addr) == CONST)
28230 tmp = XEXP (addr, 0);
28232 if (GET_CODE (tmp) == PLUS)
28234 xop0 = XEXP (tmp, 0);
28235 xop1 = XEXP (tmp, 1);
28237 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
28238 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
28241 return false;
28244 /* Returns true if a valid comparison operation and makes
28245 the operands in a form that is valid. */
28246 bool
28247 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28249 enum rtx_code code = GET_CODE (*comparison);
28250 int code_int;
28251 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28252 ? GET_MODE (*op2) : GET_MODE (*op1);
28254 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28256 if (code == UNEQ || code == LTGT)
28257 return false;
28259 code_int = (int)code;
28260 arm_canonicalize_comparison (&code_int, op1, op2, 0);
28261 PUT_CODE (*comparison, (enum rtx_code)code_int);
28263 switch (mode)
28265 case SImode:
28266 if (!arm_add_operand (*op1, mode))
28267 *op1 = force_reg (mode, *op1);
28268 if (!arm_add_operand (*op2, mode))
28269 *op2 = force_reg (mode, *op2);
28270 return true;
28272 case DImode:
28273 if (!cmpdi_operand (*op1, mode))
28274 *op1 = force_reg (mode, *op1);
28275 if (!cmpdi_operand (*op2, mode))
28276 *op2 = force_reg (mode, *op2);
28277 return true;
28279 case HFmode:
28280 if (!TARGET_VFP_FP16INST)
28281 break;
28282 /* FP16 comparisons are done in SF mode. */
28283 mode = SFmode;
28284 *op1 = convert_to_mode (mode, *op1, 1);
28285 *op2 = convert_to_mode (mode, *op2, 1);
28286 /* Fall through. */
28287 case SFmode:
28288 case DFmode:
28289 if (!vfp_compare_operand (*op1, mode))
28290 *op1 = force_reg (mode, *op1);
28291 if (!vfp_compare_operand (*op2, mode))
28292 *op2 = force_reg (mode, *op2);
28293 return true;
28294 default:
28295 break;
28298 return false;
28302 /* Maximum number of instructions to set block of memory. */
28303 static int
28304 arm_block_set_max_insns (void)
28306 if (optimize_function_for_size_p (cfun))
28307 return 4;
28308 else
28309 return current_tune->max_insns_inline_memset;
28312 /* Return TRUE if it's profitable to set block of memory for
28313 non-vectorized case. VAL is the value to set the memory
28314 with. LENGTH is the number of bytes to set. ALIGN is the
28315 alignment of the destination memory in bytes. UNALIGNED_P
28316 is TRUE if we can only set the memory with instructions
28317 meeting alignment requirements. USE_STRD_P is TRUE if we
28318 can use strd to set the memory. */
28319 static bool
28320 arm_block_set_non_vect_profit_p (rtx val,
28321 unsigned HOST_WIDE_INT length,
28322 unsigned HOST_WIDE_INT align,
28323 bool unaligned_p, bool use_strd_p)
28325 int num = 0;
28326 /* For leftovers in bytes of 0-7, we can set the memory block using
28327 strb/strh/str with minimum instruction number. */
28328 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28330 if (unaligned_p)
28332 num = arm_const_inline_cost (SET, val);
28333 num += length / align + length % align;
28335 else if (use_strd_p)
28337 num = arm_const_double_inline_cost (val);
28338 num += (length >> 3) + leftover[length & 7];
28340 else
28342 num = arm_const_inline_cost (SET, val);
28343 num += (length >> 2) + leftover[length & 3];
28346 /* We may be able to combine last pair STRH/STRB into a single STR
28347 by shifting one byte back. */
28348 if (unaligned_access && length > 3 && (length & 3) == 3)
28349 num--;
28351 return (num <= arm_block_set_max_insns ());
28354 /* Return TRUE if it's profitable to set block of memory for
28355 vectorized case. LENGTH is the number of bytes to set.
28356 ALIGN is the alignment of destination memory in bytes.
28357 MODE is the vector mode used to set the memory. */
28358 static bool
28359 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28360 unsigned HOST_WIDE_INT align,
28361 machine_mode mode)
28363 int num;
28364 bool unaligned_p = ((align & 3) != 0);
28365 unsigned int nelt = GET_MODE_NUNITS (mode);
28367 /* Instruction loading constant value. */
28368 num = 1;
28369 /* Instructions storing the memory. */
28370 num += (length + nelt - 1) / nelt;
28371 /* Instructions adjusting the address expression. Only need to
28372 adjust address expression if it's 4 bytes aligned and bytes
28373 leftover can only be stored by mis-aligned store instruction. */
28374 if (!unaligned_p && (length & 3) != 0)
28375 num++;
28377 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28378 if (!unaligned_p && mode == V16QImode)
28379 num--;
28381 return (num <= arm_block_set_max_insns ());
28384 /* Set a block of memory using vectorization instructions for the
28385 unaligned case. We fill the first LENGTH bytes of the memory
28386 area starting from DSTBASE with byte constant VALUE. ALIGN is
28387 the alignment requirement of memory. Return TRUE if succeeded. */
28388 static bool
28389 arm_block_set_unaligned_vect (rtx dstbase,
28390 unsigned HOST_WIDE_INT length,
28391 unsigned HOST_WIDE_INT value,
28392 unsigned HOST_WIDE_INT align)
28394 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28395 rtx dst, mem;
28396 rtx val_elt, val_vec, reg;
28397 rtx rval[MAX_VECT_LEN];
28398 rtx (*gen_func) (rtx, rtx);
28399 machine_mode mode;
28400 unsigned HOST_WIDE_INT v = value;
28401 unsigned int offset = 0;
28402 gcc_assert ((align & 0x3) != 0);
28403 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28404 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28405 if (length >= nelt_v16)
28407 mode = V16QImode;
28408 gen_func = gen_movmisalignv16qi;
28410 else
28412 mode = V8QImode;
28413 gen_func = gen_movmisalignv8qi;
28415 nelt_mode = GET_MODE_NUNITS (mode);
28416 gcc_assert (length >= nelt_mode);
28417 /* Skip if it isn't profitable. */
28418 if (!arm_block_set_vect_profit_p (length, align, mode))
28419 return false;
28421 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28422 mem = adjust_automodify_address (dstbase, mode, dst, offset);
28424 v = sext_hwi (v, BITS_PER_WORD);
28425 val_elt = GEN_INT (v);
28426 for (j = 0; j < nelt_mode; j++)
28427 rval[j] = val_elt;
28429 reg = gen_reg_rtx (mode);
28430 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28431 /* Emit instruction loading the constant value. */
28432 emit_move_insn (reg, val_vec);
28434 /* Handle nelt_mode bytes in a vector. */
28435 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28437 emit_insn ((*gen_func) (mem, reg));
28438 if (i + 2 * nelt_mode <= length)
28440 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28441 offset += nelt_mode;
28442 mem = adjust_automodify_address (dstbase, mode, dst, offset);
28446 /* If there are not less than nelt_v8 bytes leftover, we must be in
28447 V16QI mode. */
28448 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28450 /* Handle (8, 16) bytes leftover. */
28451 if (i + nelt_v8 < length)
28453 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28454 offset += length - i;
28455 mem = adjust_automodify_address (dstbase, mode, dst, offset);
28457 /* We are shifting bytes back, set the alignment accordingly. */
28458 if ((length & 1) != 0 && align >= 2)
28459 set_mem_align (mem, BITS_PER_UNIT);
28461 emit_insn (gen_movmisalignv16qi (mem, reg));
28463 /* Handle (0, 8] bytes leftover. */
28464 else if (i < length && i + nelt_v8 >= length)
28466 if (mode == V16QImode)
28467 reg = gen_lowpart (V8QImode, reg);
28469 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28470 + (nelt_mode - nelt_v8))));
28471 offset += (length - i) + (nelt_mode - nelt_v8);
28472 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
28474 /* We are shifting bytes back, set the alignment accordingly. */
28475 if ((length & 1) != 0 && align >= 2)
28476 set_mem_align (mem, BITS_PER_UNIT);
28478 emit_insn (gen_movmisalignv8qi (mem, reg));
28481 return true;
28484 /* Set a block of memory using vectorization instructions for the
28485 aligned case. We fill the first LENGTH bytes of the memory area
28486 starting from DSTBASE with byte constant VALUE. ALIGN is the
28487 alignment requirement of memory. Return TRUE if succeeded. */
28488 static bool
28489 arm_block_set_aligned_vect (rtx dstbase,
28490 unsigned HOST_WIDE_INT length,
28491 unsigned HOST_WIDE_INT value,
28492 unsigned HOST_WIDE_INT align)
28494 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
28495 rtx dst, addr, mem;
28496 rtx val_elt, val_vec, reg;
28497 rtx rval[MAX_VECT_LEN];
28498 machine_mode mode;
28499 unsigned HOST_WIDE_INT v = value;
28500 unsigned int offset = 0;
28502 gcc_assert ((align & 0x3) == 0);
28503 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28504 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28505 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
28506 mode = V16QImode;
28507 else
28508 mode = V8QImode;
28510 nelt_mode = GET_MODE_NUNITS (mode);
28511 gcc_assert (length >= nelt_mode);
28512 /* Skip if it isn't profitable. */
28513 if (!arm_block_set_vect_profit_p (length, align, mode))
28514 return false;
28516 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28518 v = sext_hwi (v, BITS_PER_WORD);
28519 val_elt = GEN_INT (v);
28520 for (j = 0; j < nelt_mode; j++)
28521 rval[j] = val_elt;
28523 reg = gen_reg_rtx (mode);
28524 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28525 /* Emit instruction loading the constant value. */
28526 emit_move_insn (reg, val_vec);
28528 i = 0;
28529 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28530 if (mode == V16QImode)
28532 mem = adjust_automodify_address (dstbase, mode, dst, offset);
28533 emit_insn (gen_movmisalignv16qi (mem, reg));
28534 i += nelt_mode;
28535 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28536 if (i + nelt_v8 < length && i + nelt_v16 > length)
28538 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28539 offset += length - nelt_mode;
28540 mem = adjust_automodify_address (dstbase, mode, dst, offset);
28541 /* We are shifting bytes back, set the alignment accordingly. */
28542 if ((length & 0x3) == 0)
28543 set_mem_align (mem, BITS_PER_UNIT * 4);
28544 else if ((length & 0x1) == 0)
28545 set_mem_align (mem, BITS_PER_UNIT * 2);
28546 else
28547 set_mem_align (mem, BITS_PER_UNIT);
28549 emit_insn (gen_movmisalignv16qi (mem, reg));
28550 return true;
28552 /* Fall through for bytes leftover. */
28553 mode = V8QImode;
28554 nelt_mode = GET_MODE_NUNITS (mode);
28555 reg = gen_lowpart (V8QImode, reg);
28558 /* Handle 8 bytes in a vector. */
28559 for (; (i + nelt_mode <= length); i += nelt_mode)
28561 addr = plus_constant (Pmode, dst, i);
28562 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
28563 emit_move_insn (mem, reg);
28566 /* Handle single word leftover by shifting 4 bytes back. We can
28567 use aligned access for this case. */
28568 if (i + UNITS_PER_WORD == length)
28570 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
28571 offset += i - UNITS_PER_WORD;
28572 mem = adjust_automodify_address (dstbase, mode, addr, offset);
28573 /* We are shifting 4 bytes back, set the alignment accordingly. */
28574 if (align > UNITS_PER_WORD)
28575 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
28577 emit_move_insn (mem, reg);
28579 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28580 We have to use unaligned access for this case. */
28581 else if (i < length)
28583 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28584 offset += length - nelt_mode;
28585 mem = adjust_automodify_address (dstbase, mode, dst, offset);
28586 /* We are shifting bytes back, set the alignment accordingly. */
28587 if ((length & 1) == 0)
28588 set_mem_align (mem, BITS_PER_UNIT * 2);
28589 else
28590 set_mem_align (mem, BITS_PER_UNIT);
28592 emit_insn (gen_movmisalignv8qi (mem, reg));
28595 return true;
28598 /* Set a block of memory using plain strh/strb instructions, only
28599 using instructions allowed by ALIGN on processor. We fill the
28600 first LENGTH bytes of the memory area starting from DSTBASE
28601 with byte constant VALUE. ALIGN is the alignment requirement
28602 of memory. */
28603 static bool
28604 arm_block_set_unaligned_non_vect (rtx dstbase,
28605 unsigned HOST_WIDE_INT length,
28606 unsigned HOST_WIDE_INT value,
28607 unsigned HOST_WIDE_INT align)
28609 unsigned int i;
28610 rtx dst, addr, mem;
28611 rtx val_exp, val_reg, reg;
28612 machine_mode mode;
28613 HOST_WIDE_INT v = value;
28615 gcc_assert (align == 1 || align == 2);
28617 if (align == 2)
28618 v |= (value << BITS_PER_UNIT);
28620 v = sext_hwi (v, BITS_PER_WORD);
28621 val_exp = GEN_INT (v);
28622 /* Skip if it isn't profitable. */
28623 if (!arm_block_set_non_vect_profit_p (val_exp, length,
28624 align, true, false))
28625 return false;
28627 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28628 mode = (align == 2 ? HImode : QImode);
28629 val_reg = force_reg (SImode, val_exp);
28630 reg = gen_lowpart (mode, val_reg);
28632 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
28634 addr = plus_constant (Pmode, dst, i);
28635 mem = adjust_automodify_address (dstbase, mode, addr, i);
28636 emit_move_insn (mem, reg);
28639 /* Handle single byte leftover. */
28640 if (i + 1 == length)
28642 reg = gen_lowpart (QImode, val_reg);
28643 addr = plus_constant (Pmode, dst, i);
28644 mem = adjust_automodify_address (dstbase, QImode, addr, i);
28645 emit_move_insn (mem, reg);
28646 i++;
28649 gcc_assert (i == length);
28650 return true;
28653 /* Set a block of memory using plain strd/str/strh/strb instructions,
28654 to permit unaligned copies on processors which support unaligned
28655 semantics for those instructions. We fill the first LENGTH bytes
28656 of the memory area starting from DSTBASE with byte constant VALUE.
28657 ALIGN is the alignment requirement of memory. */
28658 static bool
28659 arm_block_set_aligned_non_vect (rtx dstbase,
28660 unsigned HOST_WIDE_INT length,
28661 unsigned HOST_WIDE_INT value,
28662 unsigned HOST_WIDE_INT align)
28664 unsigned int i;
28665 rtx dst, addr, mem;
28666 rtx val_exp, val_reg, reg;
28667 unsigned HOST_WIDE_INT v;
28668 bool use_strd_p;
28670 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
28671 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
28673 v = (value | (value << 8) | (value << 16) | (value << 24));
28674 if (length < UNITS_PER_WORD)
28675 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
28677 if (use_strd_p)
28678 v |= (v << BITS_PER_WORD);
28679 else
28680 v = sext_hwi (v, BITS_PER_WORD);
28682 val_exp = GEN_INT (v);
28683 /* Skip if it isn't profitable. */
28684 if (!arm_block_set_non_vect_profit_p (val_exp, length,
28685 align, false, use_strd_p))
28687 if (!use_strd_p)
28688 return false;
28690 /* Try without strd. */
28691 v = (v >> BITS_PER_WORD);
28692 v = sext_hwi (v, BITS_PER_WORD);
28693 val_exp = GEN_INT (v);
28694 use_strd_p = false;
28695 if (!arm_block_set_non_vect_profit_p (val_exp, length,
28696 align, false, use_strd_p))
28697 return false;
28700 i = 0;
28701 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28702 /* Handle double words using strd if possible. */
28703 if (use_strd_p)
28705 val_reg = force_reg (DImode, val_exp);
28706 reg = val_reg;
28707 for (; (i + 8 <= length); i += 8)
28709 addr = plus_constant (Pmode, dst, i);
28710 mem = adjust_automodify_address (dstbase, DImode, addr, i);
28711 emit_move_insn (mem, reg);
28714 else
28715 val_reg = force_reg (SImode, val_exp);
28717 /* Handle words. */
28718 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
28719 for (; (i + 4 <= length); i += 4)
28721 addr = plus_constant (Pmode, dst, i);
28722 mem = adjust_automodify_address (dstbase, SImode, addr, i);
28723 if ((align & 3) == 0)
28724 emit_move_insn (mem, reg);
28725 else
28726 emit_insn (gen_unaligned_storesi (mem, reg));
28729 /* Merge last pair of STRH and STRB into a STR if possible. */
28730 if (unaligned_access && i > 0 && (i + 3) == length)
28732 addr = plus_constant (Pmode, dst, i - 1);
28733 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
28734 /* We are shifting one byte back, set the alignment accordingly. */
28735 if ((align & 1) == 0)
28736 set_mem_align (mem, BITS_PER_UNIT);
28738 /* Most likely this is an unaligned access, and we can't tell at
28739 compilation time. */
28740 emit_insn (gen_unaligned_storesi (mem, reg));
28741 return true;
28744 /* Handle half word leftover. */
28745 if (i + 2 <= length)
28747 reg = gen_lowpart (HImode, val_reg);
28748 addr = plus_constant (Pmode, dst, i);
28749 mem = adjust_automodify_address (dstbase, HImode, addr, i);
28750 if ((align & 1) == 0)
28751 emit_move_insn (mem, reg);
28752 else
28753 emit_insn (gen_unaligned_storehi (mem, reg));
28755 i += 2;
28758 /* Handle single byte leftover. */
28759 if (i + 1 == length)
28761 reg = gen_lowpart (QImode, val_reg);
28762 addr = plus_constant (Pmode, dst, i);
28763 mem = adjust_automodify_address (dstbase, QImode, addr, i);
28764 emit_move_insn (mem, reg);
28767 return true;
28770 /* Set a block of memory using vectorization instructions for both
28771 aligned and unaligned cases. We fill the first LENGTH bytes of
28772 the memory area starting from DSTBASE with byte constant VALUE.
28773 ALIGN is the alignment requirement of memory. */
28774 static bool
28775 arm_block_set_vect (rtx dstbase,
28776 unsigned HOST_WIDE_INT length,
28777 unsigned HOST_WIDE_INT value,
28778 unsigned HOST_WIDE_INT align)
28780 /* Check whether we need to use unaligned store instruction. */
28781 if (((align & 3) != 0 || (length & 3) != 0)
28782 /* Check whether unaligned store instruction is available. */
28783 && (!unaligned_access || BYTES_BIG_ENDIAN))
28784 return false;
28786 if ((align & 3) == 0)
28787 return arm_block_set_aligned_vect (dstbase, length, value, align);
28788 else
28789 return arm_block_set_unaligned_vect (dstbase, length, value, align);
28792 /* Expand string store operation. Firstly we try to do that by using
28793 vectorization instructions, then try with ARM unaligned access and
28794 double-word store if profitable. OPERANDS[0] is the destination,
28795 OPERANDS[1] is the number of bytes, operands[2] is the value to
28796 initialize the memory, OPERANDS[3] is the known alignment of the
28797 destination. */
28798 bool
28799 arm_gen_setmem (rtx *operands)
28801 rtx dstbase = operands[0];
28802 unsigned HOST_WIDE_INT length;
28803 unsigned HOST_WIDE_INT value;
28804 unsigned HOST_WIDE_INT align;
28806 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
28807 return false;
28809 length = UINTVAL (operands[1]);
28810 if (length > 64)
28811 return false;
28813 value = (UINTVAL (operands[2]) & 0xFF);
28814 align = UINTVAL (operands[3]);
28815 if (TARGET_NEON && length >= 8
28816 && current_tune->string_ops_prefer_neon
28817 && arm_block_set_vect (dstbase, length, value, align))
28818 return true;
28820 if (!unaligned_access && (align & 3) != 0)
28821 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
28823 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
28827 static bool
28828 arm_macro_fusion_p (void)
28830 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
28833 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
28834 for MOVW / MOVT macro fusion. */
28836 static bool
28837 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
28839 /* We are trying to fuse
28840 movw imm / movt imm
28841 instructions as a group that gets scheduled together. */
28843 rtx set_dest = SET_DEST (curr_set);
28845 if (GET_MODE (set_dest) != SImode)
28846 return false;
28848 /* We are trying to match:
28849 prev (movw) == (set (reg r0) (const_int imm16))
28850 curr (movt) == (set (zero_extract (reg r0)
28851 (const_int 16)
28852 (const_int 16))
28853 (const_int imm16_1))
28855 prev (movw) == (set (reg r1)
28856 (high (symbol_ref ("SYM"))))
28857 curr (movt) == (set (reg r0)
28858 (lo_sum (reg r1)
28859 (symbol_ref ("SYM")))) */
28861 if (GET_CODE (set_dest) == ZERO_EXTRACT)
28863 if (CONST_INT_P (SET_SRC (curr_set))
28864 && CONST_INT_P (SET_SRC (prev_set))
28865 && REG_P (XEXP (set_dest, 0))
28866 && REG_P (SET_DEST (prev_set))
28867 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
28868 return true;
28871 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
28872 && REG_P (SET_DEST (curr_set))
28873 && REG_P (SET_DEST (prev_set))
28874 && GET_CODE (SET_SRC (prev_set)) == HIGH
28875 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
28876 return true;
28878 return false;
28881 static bool
28882 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
28884 rtx prev_set = single_set (prev);
28885 rtx curr_set = single_set (curr);
28887 if (!prev_set
28888 || !curr_set)
28889 return false;
28891 if (any_condjump_p (curr))
28892 return false;
28894 if (!arm_macro_fusion_p ())
28895 return false;
28897 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
28898 && aarch_crypto_can_dual_issue (prev, curr))
28899 return true;
28901 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
28902 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
28903 return true;
28905 return false;
28908 /* Return true iff the instruction fusion described by OP is enabled. */
28909 bool
28910 arm_fusion_enabled_p (tune_params::fuse_ops op)
28912 return current_tune->fusible_ops & op;
28915 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
28917 static unsigned HOST_WIDE_INT
28918 arm_asan_shadow_offset (void)
28920 return HOST_WIDE_INT_1U << 29;
28924 /* This is a temporary fix for PR60655. Ideally we need
28925 to handle most of these cases in the generic part but
28926 currently we reject minus (..) (sym_ref). We try to
28927 ameliorate the case with minus (sym_ref1) (sym_ref2)
28928 where they are in the same section. */
28930 static bool
28931 arm_const_not_ok_for_debug_p (rtx p)
28933 tree decl_op0 = NULL;
28934 tree decl_op1 = NULL;
28936 if (GET_CODE (p) == MINUS)
28938 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
28940 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
28941 if (decl_op1
28942 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
28943 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
28945 if ((VAR_P (decl_op1)
28946 || TREE_CODE (decl_op1) == CONST_DECL)
28947 && (VAR_P (decl_op0)
28948 || TREE_CODE (decl_op0) == CONST_DECL))
28949 return (get_variable_section (decl_op1, false)
28950 != get_variable_section (decl_op0, false));
28952 if (TREE_CODE (decl_op1) == LABEL_DECL
28953 && TREE_CODE (decl_op0) == LABEL_DECL)
28954 return (DECL_CONTEXT (decl_op1)
28955 != DECL_CONTEXT (decl_op0));
28958 return true;
28962 return false;
28965 /* return TRUE if x is a reference to a value in a constant pool */
28966 extern bool
28967 arm_is_constant_pool_ref (rtx x)
28969 return (MEM_P (x)
28970 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
28971 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
28974 /* Remember the last target of arm_set_current_function. */
28975 static GTY(()) tree arm_previous_fndecl;
28977 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
28979 void
28980 save_restore_target_globals (tree new_tree)
28982 /* If we have a previous state, use it. */
28983 if (TREE_TARGET_GLOBALS (new_tree))
28984 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
28985 else if (new_tree == target_option_default_node)
28986 restore_target_globals (&default_target_globals);
28987 else
28989 /* Call target_reinit and save the state for TARGET_GLOBALS. */
28990 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
28993 arm_option_params_internal ();
28996 /* Invalidate arm_previous_fndecl. */
28998 void
28999 arm_reset_previous_fndecl (void)
29001 arm_previous_fndecl = NULL_TREE;
29004 /* Establish appropriate back-end context for processing the function
29005 FNDECL. The argument might be NULL to indicate processing at top
29006 level, outside of any function scope. */
29008 static void
29009 arm_set_current_function (tree fndecl)
29011 if (!fndecl || fndecl == arm_previous_fndecl)
29012 return;
29014 tree old_tree = (arm_previous_fndecl
29015 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
29016 : NULL_TREE);
29018 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
29020 /* If current function has no attributes but previous one did,
29021 use the default node. */
29022 if (! new_tree && old_tree)
29023 new_tree = target_option_default_node;
29025 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
29026 the default have been handled by save_restore_target_globals from
29027 arm_pragma_target_parse. */
29028 if (old_tree == new_tree)
29029 return;
29031 arm_previous_fndecl = fndecl;
29033 /* First set the target options. */
29034 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
29036 save_restore_target_globals (new_tree);
29039 /* Implement TARGET_OPTION_PRINT. */
29041 static void
29042 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
29044 int flags = ptr->x_target_flags;
29045 const struct arm_fpu_desc *fpu_desc = &all_fpus[ptr->x_arm_fpu_index];
29047 fprintf (file, "%*sselected arch %s\n", indent, "",
29048 TARGET_THUMB2_P (flags) ? "thumb2" :
29049 TARGET_THUMB_P (flags) ? "thumb1" :
29050 "arm");
29052 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_desc->name);
29055 /* Hook to determine if one function can safely inline another. */
29057 static bool
29058 arm_can_inline_p (tree caller, tree callee)
29060 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
29061 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
29063 struct cl_target_option *caller_opts
29064 = TREE_TARGET_OPTION (caller_tree ? caller_tree
29065 : target_option_default_node);
29067 struct cl_target_option *callee_opts
29068 = TREE_TARGET_OPTION (callee_tree ? callee_tree
29069 : target_option_default_node);
29071 const struct arm_fpu_desc *caller_fpu
29072 = &all_fpus[caller_opts->x_arm_fpu_index];
29073 const struct arm_fpu_desc *callee_fpu
29074 = &all_fpus[callee_opts->x_arm_fpu_index];
29076 /* Callee's fpu features should be a subset of the caller's. */
29077 if ((caller_fpu->features & callee_fpu->features) != callee_fpu->features)
29078 return false;
29080 /* Need same FPU regs. */
29081 if (callee_fpu->regs != callee_fpu->regs)
29082 return false;
29084 /* OK to inline between different modes.
29085 Function with mode specific instructions, e.g using asm,
29086 must be explicitly protected with noinline. */
29087 return true;
29090 /* Hook to fix function's alignment affected by target attribute. */
29092 static void
29093 arm_relayout_function (tree fndecl)
29095 if (DECL_USER_ALIGN (fndecl))
29096 return;
29098 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
29100 if (!callee_tree)
29101 callee_tree = target_option_default_node;
29103 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
29104 SET_DECL_ALIGN (fndecl, FUNCTION_BOUNDARY_P (opts->x_target_flags));
29107 /* Inner function to process the attribute((target(...))), take an argument and
29108 set the current options from the argument. If we have a list, recursively
29109 go over the list. */
29111 static bool
29112 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
29114 if (TREE_CODE (args) == TREE_LIST)
29116 bool ret = true;
29118 for (; args; args = TREE_CHAIN (args))
29119 if (TREE_VALUE (args)
29120 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
29121 ret = false;
29122 return ret;
29125 else if (TREE_CODE (args) != STRING_CST)
29127 error ("attribute %<target%> argument not a string");
29128 return false;
29131 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
29132 char *q;
29134 while ((q = strtok (argstr, ",")) != NULL)
29136 while (ISSPACE (*q)) ++q;
29138 argstr = NULL;
29139 if (!strncmp (q, "thumb", 5))
29140 opts->x_target_flags |= MASK_THUMB;
29142 else if (!strncmp (q, "arm", 3))
29143 opts->x_target_flags &= ~MASK_THUMB;
29145 else if (!strncmp (q, "fpu=", 4))
29147 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
29148 &opts->x_arm_fpu_index, CL_TARGET))
29150 error ("invalid fpu for attribute(target(\"%s\"))", q);
29151 return false;
29154 else
29156 error ("attribute(target(\"%s\")) is unknown", q);
29157 return false;
29160 arm_option_check_internal (opts);
29163 return true;
29166 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
29168 tree
29169 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
29170 struct gcc_options *opts_set)
29172 if (!arm_valid_target_attribute_rec (args, opts))
29173 return NULL_TREE;
29175 /* Do any overrides, such as global options arch=xxx. */
29176 arm_option_override_internal (opts, opts_set);
29178 return build_target_option_node (opts);
29181 static void
29182 add_attribute (const char * mode, tree *attributes)
29184 size_t len = strlen (mode);
29185 tree value = build_string (len, mode);
29187 TREE_TYPE (value) = build_array_type (char_type_node,
29188 build_index_type (size_int (len)));
29190 *attributes = tree_cons (get_identifier ("target"),
29191 build_tree_list (NULL_TREE, value),
29192 *attributes);
29195 /* For testing. Insert thumb or arm modes alternatively on functions. */
29197 static void
29198 arm_insert_attributes (tree fndecl, tree * attributes)
29200 const char *mode;
29202 if (! TARGET_FLIP_THUMB)
29203 return;
29205 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
29206 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
29207 return;
29209 /* Nested definitions must inherit mode. */
29210 if (current_function_decl)
29212 mode = TARGET_THUMB ? "thumb" : "arm";
29213 add_attribute (mode, attributes);
29214 return;
29217 /* If there is already a setting don't change it. */
29218 if (lookup_attribute ("target", *attributes) != NULL)
29219 return;
29221 mode = thumb_flipper ? "thumb" : "arm";
29222 add_attribute (mode, attributes);
29224 thumb_flipper = !thumb_flipper;
29227 /* Hook to validate attribute((target("string"))). */
29229 static bool
29230 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
29231 tree args, int ARG_UNUSED (flags))
29233 bool ret = true;
29234 struct gcc_options func_options;
29235 tree cur_tree, new_optimize;
29236 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
29238 /* Get the optimization options of the current function. */
29239 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
29241 /* If the function changed the optimization levels as well as setting target
29242 options, start with the optimizations specified. */
29243 if (!func_optimize)
29244 func_optimize = optimization_default_node;
29246 /* Init func_options. */
29247 memset (&func_options, 0, sizeof (func_options));
29248 init_options_struct (&func_options, NULL);
29249 lang_hooks.init_options_struct (&func_options);
29251 /* Initialize func_options to the defaults. */
29252 cl_optimization_restore (&func_options,
29253 TREE_OPTIMIZATION (func_optimize));
29255 cl_target_option_restore (&func_options,
29256 TREE_TARGET_OPTION (target_option_default_node));
29258 /* Set func_options flags with new target mode. */
29259 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
29260 &global_options_set);
29262 if (cur_tree == NULL_TREE)
29263 ret = false;
29265 new_optimize = build_optimization_node (&func_options);
29267 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
29269 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
29271 finalize_options_struct (&func_options);
29273 return ret;
29276 void
29277 arm_declare_function_name (FILE *stream, const char *name, tree decl)
29280 fprintf (stream, "\t.syntax unified\n");
29282 if (TARGET_THUMB)
29284 if (is_called_in_ARM_mode (decl)
29285 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
29286 && cfun->is_thunk))
29287 fprintf (stream, "\t.code 32\n");
29288 else if (TARGET_THUMB1)
29289 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
29290 else
29291 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
29293 else
29294 fprintf (stream, "\t.arm\n");
29296 asm_fprintf (asm_out_file, "\t.fpu %s\n",
29297 TARGET_SOFT_FLOAT ? "softvfp" : TARGET_FPU_NAME);
29299 if (TARGET_POKE_FUNCTION_NAME)
29300 arm_poke_function_name (stream, (const char *) name);
29303 /* If MEM is in the form of [base+offset], extract the two parts
29304 of address and set to BASE and OFFSET, otherwise return false
29305 after clearing BASE and OFFSET. */
29307 static bool
29308 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29310 rtx addr;
29312 gcc_assert (MEM_P (mem));
29314 addr = XEXP (mem, 0);
29316 /* Strip off const from addresses like (const (addr)). */
29317 if (GET_CODE (addr) == CONST)
29318 addr = XEXP (addr, 0);
29320 if (GET_CODE (addr) == REG)
29322 *base = addr;
29323 *offset = const0_rtx;
29324 return true;
29327 if (GET_CODE (addr) == PLUS
29328 && GET_CODE (XEXP (addr, 0)) == REG
29329 && CONST_INT_P (XEXP (addr, 1)))
29331 *base = XEXP (addr, 0);
29332 *offset = XEXP (addr, 1);
29333 return true;
29336 *base = NULL_RTX;
29337 *offset = NULL_RTX;
29339 return false;
29342 /* If INSN is a load or store of address in the form of [base+offset],
29343 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29344 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29345 otherwise return FALSE. */
29347 static bool
29348 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29350 rtx x, dest, src;
29352 gcc_assert (INSN_P (insn));
29353 x = PATTERN (insn);
29354 if (GET_CODE (x) != SET)
29355 return false;
29357 src = SET_SRC (x);
29358 dest = SET_DEST (x);
29359 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29361 *is_load = false;
29362 extract_base_offset_in_addr (dest, base, offset);
29364 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29366 *is_load = true;
29367 extract_base_offset_in_addr (src, base, offset);
29369 else
29370 return false;
29372 return (*base != NULL_RTX && *offset != NULL_RTX);
29375 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29377 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29378 and PRI are only calculated for these instructions. For other instruction,
29379 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29380 instruction fusion can be supported by returning different priorities.
29382 It's important that irrelevant instructions get the largest FUSION_PRI. */
29384 static void
29385 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29386 int *fusion_pri, int *pri)
29388 int tmp, off_val;
29389 bool is_load;
29390 rtx base, offset;
29392 gcc_assert (INSN_P (insn));
29394 tmp = max_pri - 1;
29395 if (!fusion_load_store (insn, &base, &offset, &is_load))
29397 *pri = tmp;
29398 *fusion_pri = tmp;
29399 return;
29402 /* Load goes first. */
29403 if (is_load)
29404 *fusion_pri = tmp - 1;
29405 else
29406 *fusion_pri = tmp - 2;
29408 tmp /= 2;
29410 /* INSN with smaller base register goes first. */
29411 tmp -= ((REGNO (base) & 0xff) << 20);
29413 /* INSN with smaller offset goes first. */
29414 off_val = (int)(INTVAL (offset));
29415 if (off_val >= 0)
29416 tmp -= (off_val & 0xfffff);
29417 else
29418 tmp += ((- off_val) & 0xfffff);
29420 *pri = tmp;
29421 return;
29425 /* Construct and return a PARALLEL RTX vector with elements numbering the
29426 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
29427 the vector - from the perspective of the architecture. This does not
29428 line up with GCC's perspective on lane numbers, so we end up with
29429 different masks depending on our target endian-ness. The diagram
29430 below may help. We must draw the distinction when building masks
29431 which select one half of the vector. An instruction selecting
29432 architectural low-lanes for a big-endian target, must be described using
29433 a mask selecting GCC high-lanes.
29435 Big-Endian Little-Endian
29437 GCC 0 1 2 3 3 2 1 0
29438 | x | x | x | x | | x | x | x | x |
29439 Architecture 3 2 1 0 3 2 1 0
29441 Low Mask: { 2, 3 } { 0, 1 }
29442 High Mask: { 0, 1 } { 2, 3 }
29446 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
29448 int nunits = GET_MODE_NUNITS (mode);
29449 rtvec v = rtvec_alloc (nunits / 2);
29450 int high_base = nunits / 2;
29451 int low_base = 0;
29452 int base;
29453 rtx t1;
29454 int i;
29456 if (BYTES_BIG_ENDIAN)
29457 base = high ? low_base : high_base;
29458 else
29459 base = high ? high_base : low_base;
29461 for (i = 0; i < nunits / 2; i++)
29462 RTVEC_ELT (v, i) = GEN_INT (base + i);
29464 t1 = gen_rtx_PARALLEL (mode, v);
29465 return t1;
29468 /* Check OP for validity as a PARALLEL RTX vector with elements
29469 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
29470 from the perspective of the architecture. See the diagram above
29471 arm_simd_vect_par_cnst_half_p for more details. */
29473 bool
29474 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
29475 bool high)
29477 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
29478 HOST_WIDE_INT count_op = XVECLEN (op, 0);
29479 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
29480 int i = 0;
29482 if (!VECTOR_MODE_P (mode))
29483 return false;
29485 if (count_op != count_ideal)
29486 return false;
29488 for (i = 0; i < count_ideal; i++)
29490 rtx elt_op = XVECEXP (op, 0, i);
29491 rtx elt_ideal = XVECEXP (ideal, 0, i);
29493 if (!CONST_INT_P (elt_op)
29494 || INTVAL (elt_ideal) != INTVAL (elt_op))
29495 return false;
29497 return true;
29500 /* Can output mi_thunk for all cases except for non-zero vcall_offset
29501 in Thumb1. */
29502 static bool
29503 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
29504 const_tree)
29506 /* For now, we punt and not handle this for TARGET_THUMB1. */
29507 if (vcall_offset && TARGET_THUMB1)
29508 return false;
29510 /* Otherwise ok. */
29511 return true;
29514 /* Generate RTL for a conditional branch with rtx comparison CODE in
29515 mode CC_MODE. The destination of the unlikely conditional branch
29516 is LABEL_REF. */
29518 void
29519 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
29520 rtx label_ref)
29522 rtx x;
29523 x = gen_rtx_fmt_ee (code, VOIDmode,
29524 gen_rtx_REG (cc_mode, CC_REGNUM),
29525 const0_rtx);
29527 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
29528 gen_rtx_LABEL_REF (VOIDmode, label_ref),
29529 pc_rtx);
29530 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
29533 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
29535 For pure-code sections there is no letter code for this attribute, so
29536 output all the section flags numerically when this is needed. */
29538 static bool
29539 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
29542 if (flags & SECTION_ARM_PURECODE)
29544 *num = 0x20000000;
29546 if (!(flags & SECTION_DEBUG))
29547 *num |= 0x2;
29548 if (flags & SECTION_EXCLUDE)
29549 *num |= 0x80000000;
29550 if (flags & SECTION_WRITE)
29551 *num |= 0x1;
29552 if (flags & SECTION_CODE)
29553 *num |= 0x4;
29554 if (flags & SECTION_MERGE)
29555 *num |= 0x10;
29556 if (flags & SECTION_STRINGS)
29557 *num |= 0x20;
29558 if (flags & SECTION_TLS)
29559 *num |= 0x400;
29560 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
29561 *num |= 0x200;
29563 return true;
29566 return false;
29569 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
29571 If pure-code is passed as an option, make sure all functions are in
29572 sections that have the SHF_ARM_PURECODE attribute. */
29574 static section *
29575 arm_function_section (tree decl, enum node_frequency freq,
29576 bool startup, bool exit)
29578 const char * section_name;
29579 section * sec;
29581 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
29582 return default_function_section (decl, freq, startup, exit);
29584 if (!target_pure_code)
29585 return default_function_section (decl, freq, startup, exit);
29588 section_name = DECL_SECTION_NAME (decl);
29590 /* If a function is not in a named section then it falls under the 'default'
29591 text section, also known as '.text'. We can preserve previous behavior as
29592 the default text section already has the SHF_ARM_PURECODE section
29593 attribute. */
29594 if (!section_name)
29596 section *default_sec = default_function_section (decl, freq, startup,
29597 exit);
29599 /* If default_sec is not null, then it must be a special section like for
29600 example .text.startup. We set the pure-code attribute and return the
29601 same section to preserve existing behavior. */
29602 if (default_sec)
29603 default_sec->common.flags |= SECTION_ARM_PURECODE;
29604 return default_sec;
29607 /* Otherwise look whether a section has already been created with
29608 'section_name'. */
29609 sec = get_named_section (decl, section_name, 0);
29610 if (!sec)
29611 /* If that is not the case passing NULL as the section's name to
29612 'get_named_section' will create a section with the declaration's
29613 section name. */
29614 sec = get_named_section (decl, NULL, 0);
29616 /* Set the SHF_ARM_PURECODE attribute. */
29617 sec->common.flags |= SECTION_ARM_PURECODE;
29619 return sec;
29622 /* Implements the TARGET_SECTION_FLAGS hook.
29624 If DECL is a function declaration and pure-code is passed as an option
29625 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
29626 section's name and RELOC indicates whether the declarations initializer may
29627 contain runtime relocations. */
29629 static unsigned int
29630 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
29632 unsigned int flags = default_section_type_flags (decl, name, reloc);
29634 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
29635 flags |= SECTION_ARM_PURECODE;
29637 return flags;
29640 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
29642 static void
29643 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
29644 rtx op0, rtx op1,
29645 rtx *quot_p, rtx *rem_p)
29647 if (mode == SImode)
29648 gcc_assert (!TARGET_IDIV);
29650 machine_mode libval_mode = smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode),
29651 MODE_INT);
29653 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
29654 libval_mode, 2,
29655 op0, GET_MODE (op0),
29656 op1, GET_MODE (op1));
29658 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
29659 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
29660 GET_MODE_SIZE (mode));
29662 gcc_assert (quotient);
29663 gcc_assert (remainder);
29665 *quot_p = quotient;
29666 *rem_p = remainder;
29669 #include "gt-arm.h"