Fix PR77933: stack corruption on ARM when using high registers and LR
[official-gcc.git] / gcc / config / arm / arm.c
blobd7ce87c27ee33b0f14c1a693a81e1de19b610d41
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "flags.h"
49 #include "reload.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "cfgrtl.h"
53 #include "sched-int.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "intl.h"
57 #include "libfuncs.h"
58 #include "params.h"
59 #include "opts.h"
60 #include "dumpfile.h"
61 #include "target-globals.h"
62 #include "builtins.h"
63 #include "tm-constrs.h"
64 #include "rtl-iter.h"
65 #include "optabs-libfuncs.h"
67 /* This file should be included last. */
68 #include "target-def.h"
70 /* Forward definitions of types. */
71 typedef struct minipool_node Mnode;
72 typedef struct minipool_fixup Mfix;
74 void (*arm_lang_output_object_attributes_hook)(void);
76 struct four_ints
78 int i[4];
81 /* Forward function declarations. */
82 static bool arm_const_not_ok_for_debug_p (rtx);
83 static bool arm_needs_doubleword_align (machine_mode, const_tree);
84 static int arm_compute_static_chain_stack_bytes (void);
85 static arm_stack_offsets *arm_get_frame_offsets (void);
86 static void arm_add_gc_roots (void);
87 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
88 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
89 static unsigned bit_count (unsigned long);
90 static unsigned feature_count (const arm_feature_set*);
91 static int arm_address_register_rtx_p (rtx, int);
92 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
93 static bool is_called_in_ARM_mode (tree);
94 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
95 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
96 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
97 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
98 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
99 inline static int thumb1_index_register_rtx_p (rtx, int);
100 static int thumb_far_jump_used_p (void);
101 static bool thumb_force_lr_save (void);
102 static unsigned arm_size_return_regs (void);
103 static bool arm_assemble_integer (rtx, unsigned int, int);
104 static void arm_print_operand (FILE *, rtx, int);
105 static void arm_print_operand_address (FILE *, machine_mode, rtx);
106 static bool arm_print_operand_punct_valid_p (unsigned char code);
107 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
108 static arm_cc get_arm_condition_code (rtx);
109 static const char *output_multi_immediate (rtx *, const char *, const char *,
110 int, HOST_WIDE_INT);
111 static const char *shift_op (rtx, HOST_WIDE_INT *);
112 static struct machine_function *arm_init_machine_status (void);
113 static void thumb_exit (FILE *, int);
114 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
115 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
116 static Mnode *add_minipool_forward_ref (Mfix *);
117 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
118 static Mnode *add_minipool_backward_ref (Mfix *);
119 static void assign_minipool_offsets (Mfix *);
120 static void arm_print_value (FILE *, rtx);
121 static void dump_minipool (rtx_insn *);
122 static int arm_barrier_cost (rtx_insn *);
123 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
124 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
125 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
126 machine_mode, rtx);
127 static void arm_reorg (void);
128 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
129 static unsigned long arm_compute_save_reg0_reg12_mask (void);
130 static unsigned long arm_compute_save_reg_mask (void);
131 static unsigned long arm_isr_value (tree);
132 static unsigned long arm_compute_func_type (void);
133 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
134 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
135 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
136 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
137 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
138 #endif
139 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
140 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
141 static int arm_comp_type_attributes (const_tree, const_tree);
142 static void arm_set_default_type_attributes (tree);
143 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
144 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
145 static int optimal_immediate_sequence (enum rtx_code code,
146 unsigned HOST_WIDE_INT val,
147 struct four_ints *return_sequence);
148 static int optimal_immediate_sequence_1 (enum rtx_code code,
149 unsigned HOST_WIDE_INT val,
150 struct four_ints *return_sequence,
151 int i);
152 static int arm_get_strip_length (int);
153 static bool arm_function_ok_for_sibcall (tree, tree);
154 static machine_mode arm_promote_function_mode (const_tree,
155 machine_mode, int *,
156 const_tree, int);
157 static bool arm_return_in_memory (const_tree, const_tree);
158 static rtx arm_function_value (const_tree, const_tree, bool);
159 static rtx arm_libcall_value_1 (machine_mode);
160 static rtx arm_libcall_value (machine_mode, const_rtx);
161 static bool arm_function_value_regno_p (const unsigned int);
162 static void arm_internal_label (FILE *, const char *, unsigned long);
163 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
164 tree);
165 static bool arm_have_conditional_execution (void);
166 static bool arm_cannot_force_const_mem (machine_mode, rtx);
167 static bool arm_legitimate_constant_p (machine_mode, rtx);
168 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
169 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
170 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
171 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
172 static void emit_constant_insn (rtx cond, rtx pattern);
173 static rtx_insn *emit_set_insn (rtx, rtx);
174 static rtx emit_multi_reg_push (unsigned long, unsigned long);
175 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
176 tree, bool);
177 static rtx arm_function_arg (cumulative_args_t, machine_mode,
178 const_tree, bool);
179 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
180 const_tree, bool);
181 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
182 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
183 const_tree);
184 static rtx aapcs_libcall_value (machine_mode);
185 static int aapcs_select_return_coproc (const_tree, const_tree);
187 #ifdef OBJECT_FORMAT_ELF
188 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
189 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
190 #endif
191 #ifndef ARM_PE
192 static void arm_encode_section_info (tree, rtx, int);
193 #endif
195 static void arm_file_end (void);
196 static void arm_file_start (void);
197 static void arm_insert_attributes (tree, tree *);
199 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
200 tree, int *, int);
201 static bool arm_pass_by_reference (cumulative_args_t,
202 machine_mode, const_tree, bool);
203 static bool arm_promote_prototypes (const_tree);
204 static bool arm_default_short_enums (void);
205 static bool arm_align_anon_bitfield (void);
206 static bool arm_return_in_msb (const_tree);
207 static bool arm_must_pass_in_stack (machine_mode, const_tree);
208 static bool arm_return_in_memory (const_tree, const_tree);
209 #if ARM_UNWIND_INFO
210 static void arm_unwind_emit (FILE *, rtx_insn *);
211 static bool arm_output_ttype (rtx);
212 static void arm_asm_emit_except_personality (rtx);
213 #endif
214 static void arm_asm_init_sections (void);
215 static rtx arm_dwarf_register_span (rtx);
217 static tree arm_cxx_guard_type (void);
218 static bool arm_cxx_guard_mask_bit (void);
219 static tree arm_get_cookie_size (tree);
220 static bool arm_cookie_has_size (void);
221 static bool arm_cxx_cdtor_returns_this (void);
222 static bool arm_cxx_key_method_may_be_inline (void);
223 static void arm_cxx_determine_class_data_visibility (tree);
224 static bool arm_cxx_class_data_always_comdat (void);
225 static bool arm_cxx_use_aeabi_atexit (void);
226 static void arm_init_libfuncs (void);
227 static tree arm_build_builtin_va_list (void);
228 static void arm_expand_builtin_va_start (tree, rtx);
229 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
230 static void arm_option_override (void);
231 static void arm_override_options_after_change (void);
232 static void arm_option_print (FILE *, int, struct cl_target_option *);
233 static void arm_set_current_function (tree);
234 static bool arm_can_inline_p (tree, tree);
235 static void arm_relayout_function (tree);
236 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
237 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
238 static bool arm_macro_fusion_p (void);
239 static bool arm_cannot_copy_insn_p (rtx_insn *);
240 static int arm_issue_rate (void);
241 static int arm_first_cycle_multipass_dfa_lookahead (void);
242 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
243 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
244 static bool arm_output_addr_const_extra (FILE *, rtx);
245 static bool arm_allocate_stack_slots_for_args (void);
246 static bool arm_warn_func_return (tree);
247 static tree arm_promoted_type (const_tree t);
248 static tree arm_convert_to_type (tree type, tree expr);
249 static bool arm_scalar_mode_supported_p (machine_mode);
250 static bool arm_frame_pointer_required (void);
251 static bool arm_can_eliminate (const int, const int);
252 static void arm_asm_trampoline_template (FILE *);
253 static void arm_trampoline_init (rtx, tree, rtx);
254 static rtx arm_trampoline_adjust_address (rtx);
255 static rtx arm_pic_static_addr (rtx orig, rtx reg);
256 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
257 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
258 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
259 static bool arm_array_mode_supported_p (machine_mode,
260 unsigned HOST_WIDE_INT);
261 static machine_mode arm_preferred_simd_mode (machine_mode);
262 static bool arm_class_likely_spilled_p (reg_class_t);
263 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
264 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
265 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
266 const_tree type,
267 int misalignment,
268 bool is_packed);
269 static void arm_conditional_register_usage (void);
270 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
271 static unsigned int arm_autovectorize_vector_sizes (void);
272 static int arm_default_branch_cost (bool, bool);
273 static int arm_cortex_a5_branch_cost (bool, bool);
274 static int arm_cortex_m_branch_cost (bool, bool);
275 static int arm_cortex_m7_branch_cost (bool, bool);
277 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
278 const unsigned char *sel);
280 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
282 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
283 tree vectype,
284 int misalign ATTRIBUTE_UNUSED);
285 static unsigned arm_add_stmt_cost (void *data, int count,
286 enum vect_cost_for_stmt kind,
287 struct _stmt_vec_info *stmt_info,
288 int misalign,
289 enum vect_cost_model_location where);
291 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
292 bool op0_preserve_value);
293 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
295 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
296 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
297 const_tree);
298 static section *arm_function_section (tree, enum node_frequency, bool, bool);
299 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
300 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
301 int reloc);
302 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
304 /* Table of machine attributes. */
305 static const struct attribute_spec arm_attribute_table[] =
307 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
308 affects_type_identity } */
309 /* Function calls made to this symbol must be done indirectly, because
310 it may lie outside of the 26 bit addressing range of a normal function
311 call. */
312 { "long_call", 0, 0, false, true, true, NULL, false },
313 /* Whereas these functions are always known to reside within the 26 bit
314 addressing range. */
315 { "short_call", 0, 0, false, true, true, NULL, false },
316 /* Specify the procedure call conventions for a function. */
317 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
318 false },
319 /* Interrupt Service Routines have special prologue and epilogue requirements. */
320 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
321 false },
322 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
323 false },
324 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
325 false },
326 #ifdef ARM_PE
327 /* ARM/PE has three new attributes:
328 interfacearm - ?
329 dllexport - for exporting a function/variable that will live in a dll
330 dllimport - for importing a function/variable from a dll
332 Microsoft allows multiple declspecs in one __declspec, separating
333 them with spaces. We do NOT support this. Instead, use __declspec
334 multiple times.
336 { "dllimport", 0, 0, true, false, false, NULL, false },
337 { "dllexport", 0, 0, true, false, false, NULL, false },
338 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
339 false },
340 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
341 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
342 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
343 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
344 false },
345 #endif
346 { NULL, 0, 0, false, false, false, NULL, false }
349 /* Initialize the GCC target structure. */
350 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
351 #undef TARGET_MERGE_DECL_ATTRIBUTES
352 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
353 #endif
355 #undef TARGET_LEGITIMIZE_ADDRESS
356 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
358 #undef TARGET_ATTRIBUTE_TABLE
359 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
361 #undef TARGET_INSERT_ATTRIBUTES
362 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
364 #undef TARGET_ASM_FILE_START
365 #define TARGET_ASM_FILE_START arm_file_start
366 #undef TARGET_ASM_FILE_END
367 #define TARGET_ASM_FILE_END arm_file_end
369 #undef TARGET_ASM_ALIGNED_SI_OP
370 #define TARGET_ASM_ALIGNED_SI_OP NULL
371 #undef TARGET_ASM_INTEGER
372 #define TARGET_ASM_INTEGER arm_assemble_integer
374 #undef TARGET_PRINT_OPERAND
375 #define TARGET_PRINT_OPERAND arm_print_operand
376 #undef TARGET_PRINT_OPERAND_ADDRESS
377 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
378 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
379 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
381 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
382 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
384 #undef TARGET_ASM_FUNCTION_PROLOGUE
385 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
387 #undef TARGET_ASM_FUNCTION_EPILOGUE
388 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
390 #undef TARGET_CAN_INLINE_P
391 #define TARGET_CAN_INLINE_P arm_can_inline_p
393 #undef TARGET_RELAYOUT_FUNCTION
394 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
396 #undef TARGET_OPTION_OVERRIDE
397 #define TARGET_OPTION_OVERRIDE arm_option_override
399 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
400 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
402 #undef TARGET_OPTION_PRINT
403 #define TARGET_OPTION_PRINT arm_option_print
405 #undef TARGET_COMP_TYPE_ATTRIBUTES
406 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
408 #undef TARGET_SCHED_MACRO_FUSION_P
409 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
411 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
412 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
414 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
415 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
417 #undef TARGET_SCHED_ADJUST_COST
418 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
420 #undef TARGET_SET_CURRENT_FUNCTION
421 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
423 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
424 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
426 #undef TARGET_SCHED_REORDER
427 #define TARGET_SCHED_REORDER arm_sched_reorder
429 #undef TARGET_REGISTER_MOVE_COST
430 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
432 #undef TARGET_MEMORY_MOVE_COST
433 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
435 #undef TARGET_ENCODE_SECTION_INFO
436 #ifdef ARM_PE
437 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
438 #else
439 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
440 #endif
442 #undef TARGET_STRIP_NAME_ENCODING
443 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
445 #undef TARGET_ASM_INTERNAL_LABEL
446 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
448 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
449 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
451 #undef TARGET_FUNCTION_VALUE
452 #define TARGET_FUNCTION_VALUE arm_function_value
454 #undef TARGET_LIBCALL_VALUE
455 #define TARGET_LIBCALL_VALUE arm_libcall_value
457 #undef TARGET_FUNCTION_VALUE_REGNO_P
458 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
460 #undef TARGET_ASM_OUTPUT_MI_THUNK
461 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
462 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
463 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
465 #undef TARGET_RTX_COSTS
466 #define TARGET_RTX_COSTS arm_rtx_costs
467 #undef TARGET_ADDRESS_COST
468 #define TARGET_ADDRESS_COST arm_address_cost
470 #undef TARGET_SHIFT_TRUNCATION_MASK
471 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
472 #undef TARGET_VECTOR_MODE_SUPPORTED_P
473 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
474 #undef TARGET_ARRAY_MODE_SUPPORTED_P
475 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
476 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
477 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
478 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
479 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
480 arm_autovectorize_vector_sizes
482 #undef TARGET_MACHINE_DEPENDENT_REORG
483 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
485 #undef TARGET_INIT_BUILTINS
486 #define TARGET_INIT_BUILTINS arm_init_builtins
487 #undef TARGET_EXPAND_BUILTIN
488 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
489 #undef TARGET_BUILTIN_DECL
490 #define TARGET_BUILTIN_DECL arm_builtin_decl
492 #undef TARGET_INIT_LIBFUNCS
493 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
495 #undef TARGET_PROMOTE_FUNCTION_MODE
496 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
497 #undef TARGET_PROMOTE_PROTOTYPES
498 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
499 #undef TARGET_PASS_BY_REFERENCE
500 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
501 #undef TARGET_ARG_PARTIAL_BYTES
502 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
503 #undef TARGET_FUNCTION_ARG
504 #define TARGET_FUNCTION_ARG arm_function_arg
505 #undef TARGET_FUNCTION_ARG_ADVANCE
506 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
507 #undef TARGET_FUNCTION_ARG_BOUNDARY
508 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
510 #undef TARGET_SETUP_INCOMING_VARARGS
511 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
513 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
514 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
516 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
517 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
518 #undef TARGET_TRAMPOLINE_INIT
519 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
520 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
521 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
523 #undef TARGET_WARN_FUNC_RETURN
524 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
526 #undef TARGET_DEFAULT_SHORT_ENUMS
527 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
529 #undef TARGET_ALIGN_ANON_BITFIELD
530 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
532 #undef TARGET_NARROW_VOLATILE_BITFIELD
533 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
535 #undef TARGET_CXX_GUARD_TYPE
536 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
538 #undef TARGET_CXX_GUARD_MASK_BIT
539 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
541 #undef TARGET_CXX_GET_COOKIE_SIZE
542 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
544 #undef TARGET_CXX_COOKIE_HAS_SIZE
545 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
547 #undef TARGET_CXX_CDTOR_RETURNS_THIS
548 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
550 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
551 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
553 #undef TARGET_CXX_USE_AEABI_ATEXIT
554 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
556 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
557 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
558 arm_cxx_determine_class_data_visibility
560 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
561 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
563 #undef TARGET_RETURN_IN_MSB
564 #define TARGET_RETURN_IN_MSB arm_return_in_msb
566 #undef TARGET_RETURN_IN_MEMORY
567 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
569 #undef TARGET_MUST_PASS_IN_STACK
570 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
572 #if ARM_UNWIND_INFO
573 #undef TARGET_ASM_UNWIND_EMIT
574 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
576 /* EABI unwinding tables use a different format for the typeinfo tables. */
577 #undef TARGET_ASM_TTYPE
578 #define TARGET_ASM_TTYPE arm_output_ttype
580 #undef TARGET_ARM_EABI_UNWINDER
581 #define TARGET_ARM_EABI_UNWINDER true
583 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
584 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
586 #undef TARGET_ASM_INIT_SECTIONS
587 #endif /* ARM_UNWIND_INFO */
588 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
590 #undef TARGET_DWARF_REGISTER_SPAN
591 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
593 #undef TARGET_CANNOT_COPY_INSN_P
594 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
596 #ifdef HAVE_AS_TLS
597 #undef TARGET_HAVE_TLS
598 #define TARGET_HAVE_TLS true
599 #endif
601 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
602 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
604 #undef TARGET_LEGITIMATE_CONSTANT_P
605 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
607 #undef TARGET_CANNOT_FORCE_CONST_MEM
608 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
610 #undef TARGET_MAX_ANCHOR_OFFSET
611 #define TARGET_MAX_ANCHOR_OFFSET 4095
613 /* The minimum is set such that the total size of the block
614 for a particular anchor is -4088 + 1 + 4095 bytes, which is
615 divisible by eight, ensuring natural spacing of anchors. */
616 #undef TARGET_MIN_ANCHOR_OFFSET
617 #define TARGET_MIN_ANCHOR_OFFSET -4088
619 #undef TARGET_SCHED_ISSUE_RATE
620 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
622 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
623 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
624 arm_first_cycle_multipass_dfa_lookahead
626 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
627 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
628 arm_first_cycle_multipass_dfa_lookahead_guard
630 #undef TARGET_MANGLE_TYPE
631 #define TARGET_MANGLE_TYPE arm_mangle_type
633 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
634 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
636 #undef TARGET_BUILD_BUILTIN_VA_LIST
637 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
638 #undef TARGET_EXPAND_BUILTIN_VA_START
639 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
640 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
641 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
643 #ifdef HAVE_AS_TLS
644 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
645 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
646 #endif
648 #undef TARGET_LEGITIMATE_ADDRESS_P
649 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
651 #undef TARGET_PREFERRED_RELOAD_CLASS
652 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
654 #undef TARGET_PROMOTED_TYPE
655 #define TARGET_PROMOTED_TYPE arm_promoted_type
657 #undef TARGET_CONVERT_TO_TYPE
658 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
660 #undef TARGET_SCALAR_MODE_SUPPORTED_P
661 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
663 #undef TARGET_FRAME_POINTER_REQUIRED
664 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
666 #undef TARGET_CAN_ELIMINATE
667 #define TARGET_CAN_ELIMINATE arm_can_eliminate
669 #undef TARGET_CONDITIONAL_REGISTER_USAGE
670 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
672 #undef TARGET_CLASS_LIKELY_SPILLED_P
673 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
675 #undef TARGET_VECTORIZE_BUILTINS
676 #define TARGET_VECTORIZE_BUILTINS
678 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
679 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
680 arm_builtin_vectorized_function
682 #undef TARGET_VECTOR_ALIGNMENT
683 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
685 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
686 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
687 arm_vector_alignment_reachable
689 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
690 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
691 arm_builtin_support_vector_misalignment
693 #undef TARGET_PREFERRED_RENAME_CLASS
694 #define TARGET_PREFERRED_RENAME_CLASS \
695 arm_preferred_rename_class
697 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
698 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
699 arm_vectorize_vec_perm_const_ok
701 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
702 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
703 arm_builtin_vectorization_cost
704 #undef TARGET_VECTORIZE_ADD_STMT_COST
705 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
707 #undef TARGET_CANONICALIZE_COMPARISON
708 #define TARGET_CANONICALIZE_COMPARISON \
709 arm_canonicalize_comparison
711 #undef TARGET_ASAN_SHADOW_OFFSET
712 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
714 #undef MAX_INSN_PER_IT_BLOCK
715 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
717 #undef TARGET_CAN_USE_DOLOOP_P
718 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
720 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
721 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
723 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
724 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
726 #undef TARGET_SCHED_FUSION_PRIORITY
727 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
729 #undef TARGET_ASM_FUNCTION_SECTION
730 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
732 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
733 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
735 #undef TARGET_SECTION_TYPE_FLAGS
736 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
738 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
739 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
741 struct gcc_target targetm = TARGET_INITIALIZER;
743 /* Obstack for minipool constant handling. */
744 static struct obstack minipool_obstack;
745 static char * minipool_startobj;
747 /* The maximum number of insns skipped which
748 will be conditionalised if possible. */
749 static int max_insns_skipped = 5;
751 extern FILE * asm_out_file;
753 /* True if we are currently building a constant table. */
754 int making_const_table;
756 /* The processor for which instructions should be scheduled. */
757 enum processor_type arm_tune = TARGET_CPU_arm_none;
759 /* The current tuning set. */
760 const struct tune_params *current_tune;
762 /* Which floating point hardware to schedule for. */
763 int arm_fpu_attr;
765 /* Used for Thumb call_via trampolines. */
766 rtx thumb_call_via_label[14];
767 static int thumb_call_reg_needed;
769 /* The bits in this mask specify which
770 instructions we are allowed to generate. */
771 arm_feature_set insn_flags = ARM_FSET_EMPTY;
773 /* The bits in this mask specify which instruction scheduling options should
774 be used. */
775 arm_feature_set tune_flags = ARM_FSET_EMPTY;
777 /* The highest ARM architecture version supported by the
778 target. */
779 enum base_architecture arm_base_arch = BASE_ARCH_0;
781 /* The following are used in the arm.md file as equivalents to bits
782 in the above two flag variables. */
784 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
785 int arm_arch3m = 0;
787 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
788 int arm_arch4 = 0;
790 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
791 int arm_arch4t = 0;
793 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
794 int arm_arch5 = 0;
796 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
797 int arm_arch5e = 0;
799 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
800 int arm_arch6 = 0;
802 /* Nonzero if this chip supports the ARM 6K extensions. */
803 int arm_arch6k = 0;
805 /* Nonzero if this chip supports the ARM 6KZ extensions. */
806 int arm_arch6kz = 0;
808 /* Nonzero if instructions present in ARMv6-M can be used. */
809 int arm_arch6m = 0;
811 /* Nonzero if this chip supports the ARM 7 extensions. */
812 int arm_arch7 = 0;
814 /* Nonzero if instructions not present in the 'M' profile can be used. */
815 int arm_arch_notm = 0;
817 /* Nonzero if instructions present in ARMv7E-M can be used. */
818 int arm_arch7em = 0;
820 /* Nonzero if instructions present in ARMv8 can be used. */
821 int arm_arch8 = 0;
823 /* Nonzero if this chip supports the ARMv8.1 extensions. */
824 int arm_arch8_1 = 0;
826 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
827 int arm_arch8_2 = 0;
829 /* Nonzero if this chip supports the FP16 instructions extension of ARM
830 Architecture 8.2. */
831 int arm_fp16_inst = 0;
833 /* Nonzero if this chip can benefit from load scheduling. */
834 int arm_ld_sched = 0;
836 /* Nonzero if this chip is a StrongARM. */
837 int arm_tune_strongarm = 0;
839 /* Nonzero if this chip supports Intel Wireless MMX technology. */
840 int arm_arch_iwmmxt = 0;
842 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
843 int arm_arch_iwmmxt2 = 0;
845 /* Nonzero if this chip is an XScale. */
846 int arm_arch_xscale = 0;
848 /* Nonzero if tuning for XScale */
849 int arm_tune_xscale = 0;
851 /* Nonzero if we want to tune for stores that access the write-buffer.
852 This typically means an ARM6 or ARM7 with MMU or MPU. */
853 int arm_tune_wbuf = 0;
855 /* Nonzero if tuning for Cortex-A9. */
856 int arm_tune_cortex_a9 = 0;
858 /* Nonzero if we should define __THUMB_INTERWORK__ in the
859 preprocessor.
860 XXX This is a bit of a hack, it's intended to help work around
861 problems in GLD which doesn't understand that armv5t code is
862 interworking clean. */
863 int arm_cpp_interwork = 0;
865 /* Nonzero if chip supports Thumb 1. */
866 int arm_arch_thumb1;
868 /* Nonzero if chip supports Thumb 2. */
869 int arm_arch_thumb2;
871 /* Nonzero if chip supports integer division instruction. */
872 int arm_arch_arm_hwdiv;
873 int arm_arch_thumb_hwdiv;
875 /* Nonzero if chip disallows volatile memory access in IT block. */
876 int arm_arch_no_volatile_ce;
878 /* Nonzero if we should use Neon to handle 64-bits operations rather
879 than core registers. */
880 int prefer_neon_for_64bits = 0;
882 /* Nonzero if we shouldn't use literal pools. */
883 bool arm_disable_literal_pool = false;
885 /* The register number to be used for the PIC offset register. */
886 unsigned arm_pic_register = INVALID_REGNUM;
888 enum arm_pcs arm_pcs_default;
890 /* For an explanation of these variables, see final_prescan_insn below. */
891 int arm_ccfsm_state;
892 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
893 enum arm_cond_code arm_current_cc;
895 rtx arm_target_insn;
896 int arm_target_label;
897 /* The number of conditionally executed insns, including the current insn. */
898 int arm_condexec_count = 0;
899 /* A bitmask specifying the patterns for the IT block.
900 Zero means do not output an IT block before this insn. */
901 int arm_condexec_mask = 0;
902 /* The number of bits used in arm_condexec_mask. */
903 int arm_condexec_masklen = 0;
905 /* Nonzero if chip supports the ARMv8 CRC instructions. */
906 int arm_arch_crc = 0;
908 /* Nonzero if the core has a very small, high-latency, multiply unit. */
909 int arm_m_profile_small_mul = 0;
911 /* The condition codes of the ARM, and the inverse function. */
912 static const char * const arm_condition_codes[] =
914 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
915 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
918 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
919 int arm_regs_in_sequence[] =
921 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
924 #define ARM_LSL_NAME "lsl"
925 #define streq(string1, string2) (strcmp (string1, string2) == 0)
927 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
928 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
929 | (1 << PIC_OFFSET_TABLE_REGNUM)))
931 /* Initialization code. */
933 struct processors
935 const char *const name;
936 enum processor_type core;
937 const char *arch;
938 enum base_architecture base_arch;
939 const arm_feature_set flags;
940 const struct tune_params *const tune;
944 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
945 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
947 num_slots, \
948 l1_size, \
949 l1_line_size \
952 /* arm generic vectorizer costs. */
953 static const
954 struct cpu_vec_costs arm_default_vec_cost = {
955 1, /* scalar_stmt_cost. */
956 1, /* scalar load_cost. */
957 1, /* scalar_store_cost. */
958 1, /* vec_stmt_cost. */
959 1, /* vec_to_scalar_cost. */
960 1, /* scalar_to_vec_cost. */
961 1, /* vec_align_load_cost. */
962 1, /* vec_unalign_load_cost. */
963 1, /* vec_unalign_store_cost. */
964 1, /* vec_store_cost. */
965 3, /* cond_taken_branch_cost. */
966 1, /* cond_not_taken_branch_cost. */
969 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
970 #include "aarch-cost-tables.h"
974 const struct cpu_cost_table cortexa9_extra_costs =
976 /* ALU */
978 0, /* arith. */
979 0, /* logical. */
980 0, /* shift. */
981 COSTS_N_INSNS (1), /* shift_reg. */
982 COSTS_N_INSNS (1), /* arith_shift. */
983 COSTS_N_INSNS (2), /* arith_shift_reg. */
984 0, /* log_shift. */
985 COSTS_N_INSNS (1), /* log_shift_reg. */
986 COSTS_N_INSNS (1), /* extend. */
987 COSTS_N_INSNS (2), /* extend_arith. */
988 COSTS_N_INSNS (1), /* bfi. */
989 COSTS_N_INSNS (1), /* bfx. */
990 0, /* clz. */
991 0, /* rev. */
992 0, /* non_exec. */
993 true /* non_exec_costs_exec. */
996 /* MULT SImode */
998 COSTS_N_INSNS (3), /* simple. */
999 COSTS_N_INSNS (3), /* flag_setting. */
1000 COSTS_N_INSNS (2), /* extend. */
1001 COSTS_N_INSNS (3), /* add. */
1002 COSTS_N_INSNS (2), /* extend_add. */
1003 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1005 /* MULT DImode */
1007 0, /* simple (N/A). */
1008 0, /* flag_setting (N/A). */
1009 COSTS_N_INSNS (4), /* extend. */
1010 0, /* add (N/A). */
1011 COSTS_N_INSNS (4), /* extend_add. */
1012 0 /* idiv (N/A). */
1015 /* LD/ST */
1017 COSTS_N_INSNS (2), /* load. */
1018 COSTS_N_INSNS (2), /* load_sign_extend. */
1019 COSTS_N_INSNS (2), /* ldrd. */
1020 COSTS_N_INSNS (2), /* ldm_1st. */
1021 1, /* ldm_regs_per_insn_1st. */
1022 2, /* ldm_regs_per_insn_subsequent. */
1023 COSTS_N_INSNS (5), /* loadf. */
1024 COSTS_N_INSNS (5), /* loadd. */
1025 COSTS_N_INSNS (1), /* load_unaligned. */
1026 COSTS_N_INSNS (2), /* store. */
1027 COSTS_N_INSNS (2), /* strd. */
1028 COSTS_N_INSNS (2), /* stm_1st. */
1029 1, /* stm_regs_per_insn_1st. */
1030 2, /* stm_regs_per_insn_subsequent. */
1031 COSTS_N_INSNS (1), /* storef. */
1032 COSTS_N_INSNS (1), /* stored. */
1033 COSTS_N_INSNS (1), /* store_unaligned. */
1034 COSTS_N_INSNS (1), /* loadv. */
1035 COSTS_N_INSNS (1) /* storev. */
1038 /* FP SFmode */
1040 COSTS_N_INSNS (14), /* div. */
1041 COSTS_N_INSNS (4), /* mult. */
1042 COSTS_N_INSNS (7), /* mult_addsub. */
1043 COSTS_N_INSNS (30), /* fma. */
1044 COSTS_N_INSNS (3), /* addsub. */
1045 COSTS_N_INSNS (1), /* fpconst. */
1046 COSTS_N_INSNS (1), /* neg. */
1047 COSTS_N_INSNS (3), /* compare. */
1048 COSTS_N_INSNS (3), /* widen. */
1049 COSTS_N_INSNS (3), /* narrow. */
1050 COSTS_N_INSNS (3), /* toint. */
1051 COSTS_N_INSNS (3), /* fromint. */
1052 COSTS_N_INSNS (3) /* roundint. */
1054 /* FP DFmode */
1056 COSTS_N_INSNS (24), /* div. */
1057 COSTS_N_INSNS (5), /* mult. */
1058 COSTS_N_INSNS (8), /* mult_addsub. */
1059 COSTS_N_INSNS (30), /* fma. */
1060 COSTS_N_INSNS (3), /* addsub. */
1061 COSTS_N_INSNS (1), /* fpconst. */
1062 COSTS_N_INSNS (1), /* neg. */
1063 COSTS_N_INSNS (3), /* compare. */
1064 COSTS_N_INSNS (3), /* widen. */
1065 COSTS_N_INSNS (3), /* narrow. */
1066 COSTS_N_INSNS (3), /* toint. */
1067 COSTS_N_INSNS (3), /* fromint. */
1068 COSTS_N_INSNS (3) /* roundint. */
1071 /* Vector */
1073 COSTS_N_INSNS (1) /* alu. */
1077 const struct cpu_cost_table cortexa8_extra_costs =
1079 /* ALU */
1081 0, /* arith. */
1082 0, /* logical. */
1083 COSTS_N_INSNS (1), /* shift. */
1084 0, /* shift_reg. */
1085 COSTS_N_INSNS (1), /* arith_shift. */
1086 0, /* arith_shift_reg. */
1087 COSTS_N_INSNS (1), /* log_shift. */
1088 0, /* log_shift_reg. */
1089 0, /* extend. */
1090 0, /* extend_arith. */
1091 0, /* bfi. */
1092 0, /* bfx. */
1093 0, /* clz. */
1094 0, /* rev. */
1095 0, /* non_exec. */
1096 true /* non_exec_costs_exec. */
1099 /* MULT SImode */
1101 COSTS_N_INSNS (1), /* simple. */
1102 COSTS_N_INSNS (1), /* flag_setting. */
1103 COSTS_N_INSNS (1), /* extend. */
1104 COSTS_N_INSNS (1), /* add. */
1105 COSTS_N_INSNS (1), /* extend_add. */
1106 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1108 /* MULT DImode */
1110 0, /* simple (N/A). */
1111 0, /* flag_setting (N/A). */
1112 COSTS_N_INSNS (2), /* extend. */
1113 0, /* add (N/A). */
1114 COSTS_N_INSNS (2), /* extend_add. */
1115 0 /* idiv (N/A). */
1118 /* LD/ST */
1120 COSTS_N_INSNS (1), /* load. */
1121 COSTS_N_INSNS (1), /* load_sign_extend. */
1122 COSTS_N_INSNS (1), /* ldrd. */
1123 COSTS_N_INSNS (1), /* ldm_1st. */
1124 1, /* ldm_regs_per_insn_1st. */
1125 2, /* ldm_regs_per_insn_subsequent. */
1126 COSTS_N_INSNS (1), /* loadf. */
1127 COSTS_N_INSNS (1), /* loadd. */
1128 COSTS_N_INSNS (1), /* load_unaligned. */
1129 COSTS_N_INSNS (1), /* store. */
1130 COSTS_N_INSNS (1), /* strd. */
1131 COSTS_N_INSNS (1), /* stm_1st. */
1132 1, /* stm_regs_per_insn_1st. */
1133 2, /* stm_regs_per_insn_subsequent. */
1134 COSTS_N_INSNS (1), /* storef. */
1135 COSTS_N_INSNS (1), /* stored. */
1136 COSTS_N_INSNS (1), /* store_unaligned. */
1137 COSTS_N_INSNS (1), /* loadv. */
1138 COSTS_N_INSNS (1) /* storev. */
1141 /* FP SFmode */
1143 COSTS_N_INSNS (36), /* div. */
1144 COSTS_N_INSNS (11), /* mult. */
1145 COSTS_N_INSNS (20), /* mult_addsub. */
1146 COSTS_N_INSNS (30), /* fma. */
1147 COSTS_N_INSNS (9), /* addsub. */
1148 COSTS_N_INSNS (3), /* fpconst. */
1149 COSTS_N_INSNS (3), /* neg. */
1150 COSTS_N_INSNS (6), /* compare. */
1151 COSTS_N_INSNS (4), /* widen. */
1152 COSTS_N_INSNS (4), /* narrow. */
1153 COSTS_N_INSNS (8), /* toint. */
1154 COSTS_N_INSNS (8), /* fromint. */
1155 COSTS_N_INSNS (8) /* roundint. */
1157 /* FP DFmode */
1159 COSTS_N_INSNS (64), /* div. */
1160 COSTS_N_INSNS (16), /* mult. */
1161 COSTS_N_INSNS (25), /* mult_addsub. */
1162 COSTS_N_INSNS (30), /* fma. */
1163 COSTS_N_INSNS (9), /* addsub. */
1164 COSTS_N_INSNS (3), /* fpconst. */
1165 COSTS_N_INSNS (3), /* neg. */
1166 COSTS_N_INSNS (6), /* compare. */
1167 COSTS_N_INSNS (6), /* widen. */
1168 COSTS_N_INSNS (6), /* narrow. */
1169 COSTS_N_INSNS (8), /* toint. */
1170 COSTS_N_INSNS (8), /* fromint. */
1171 COSTS_N_INSNS (8) /* roundint. */
1174 /* Vector */
1176 COSTS_N_INSNS (1) /* alu. */
1180 const struct cpu_cost_table cortexa5_extra_costs =
1182 /* ALU */
1184 0, /* arith. */
1185 0, /* logical. */
1186 COSTS_N_INSNS (1), /* shift. */
1187 COSTS_N_INSNS (1), /* shift_reg. */
1188 COSTS_N_INSNS (1), /* arith_shift. */
1189 COSTS_N_INSNS (1), /* arith_shift_reg. */
1190 COSTS_N_INSNS (1), /* log_shift. */
1191 COSTS_N_INSNS (1), /* log_shift_reg. */
1192 COSTS_N_INSNS (1), /* extend. */
1193 COSTS_N_INSNS (1), /* extend_arith. */
1194 COSTS_N_INSNS (1), /* bfi. */
1195 COSTS_N_INSNS (1), /* bfx. */
1196 COSTS_N_INSNS (1), /* clz. */
1197 COSTS_N_INSNS (1), /* rev. */
1198 0, /* non_exec. */
1199 true /* non_exec_costs_exec. */
1203 /* MULT SImode */
1205 0, /* simple. */
1206 COSTS_N_INSNS (1), /* flag_setting. */
1207 COSTS_N_INSNS (1), /* extend. */
1208 COSTS_N_INSNS (1), /* add. */
1209 COSTS_N_INSNS (1), /* extend_add. */
1210 COSTS_N_INSNS (7) /* idiv. */
1212 /* MULT DImode */
1214 0, /* simple (N/A). */
1215 0, /* flag_setting (N/A). */
1216 COSTS_N_INSNS (1), /* extend. */
1217 0, /* add. */
1218 COSTS_N_INSNS (2), /* extend_add. */
1219 0 /* idiv (N/A). */
1222 /* LD/ST */
1224 COSTS_N_INSNS (1), /* load. */
1225 COSTS_N_INSNS (1), /* load_sign_extend. */
1226 COSTS_N_INSNS (6), /* ldrd. */
1227 COSTS_N_INSNS (1), /* ldm_1st. */
1228 1, /* ldm_regs_per_insn_1st. */
1229 2, /* ldm_regs_per_insn_subsequent. */
1230 COSTS_N_INSNS (2), /* loadf. */
1231 COSTS_N_INSNS (4), /* loadd. */
1232 COSTS_N_INSNS (1), /* load_unaligned. */
1233 COSTS_N_INSNS (1), /* store. */
1234 COSTS_N_INSNS (3), /* strd. */
1235 COSTS_N_INSNS (1), /* stm_1st. */
1236 1, /* stm_regs_per_insn_1st. */
1237 2, /* stm_regs_per_insn_subsequent. */
1238 COSTS_N_INSNS (2), /* storef. */
1239 COSTS_N_INSNS (2), /* stored. */
1240 COSTS_N_INSNS (1), /* store_unaligned. */
1241 COSTS_N_INSNS (1), /* loadv. */
1242 COSTS_N_INSNS (1) /* storev. */
1245 /* FP SFmode */
1247 COSTS_N_INSNS (15), /* div. */
1248 COSTS_N_INSNS (3), /* mult. */
1249 COSTS_N_INSNS (7), /* mult_addsub. */
1250 COSTS_N_INSNS (7), /* fma. */
1251 COSTS_N_INSNS (3), /* addsub. */
1252 COSTS_N_INSNS (3), /* fpconst. */
1253 COSTS_N_INSNS (3), /* neg. */
1254 COSTS_N_INSNS (3), /* compare. */
1255 COSTS_N_INSNS (3), /* widen. */
1256 COSTS_N_INSNS (3), /* narrow. */
1257 COSTS_N_INSNS (3), /* toint. */
1258 COSTS_N_INSNS (3), /* fromint. */
1259 COSTS_N_INSNS (3) /* roundint. */
1261 /* FP DFmode */
1263 COSTS_N_INSNS (30), /* div. */
1264 COSTS_N_INSNS (6), /* mult. */
1265 COSTS_N_INSNS (10), /* mult_addsub. */
1266 COSTS_N_INSNS (7), /* fma. */
1267 COSTS_N_INSNS (3), /* addsub. */
1268 COSTS_N_INSNS (3), /* fpconst. */
1269 COSTS_N_INSNS (3), /* neg. */
1270 COSTS_N_INSNS (3), /* compare. */
1271 COSTS_N_INSNS (3), /* widen. */
1272 COSTS_N_INSNS (3), /* narrow. */
1273 COSTS_N_INSNS (3), /* toint. */
1274 COSTS_N_INSNS (3), /* fromint. */
1275 COSTS_N_INSNS (3) /* roundint. */
1278 /* Vector */
1280 COSTS_N_INSNS (1) /* alu. */
1285 const struct cpu_cost_table cortexa7_extra_costs =
1287 /* ALU */
1289 0, /* arith. */
1290 0, /* logical. */
1291 COSTS_N_INSNS (1), /* shift. */
1292 COSTS_N_INSNS (1), /* shift_reg. */
1293 COSTS_N_INSNS (1), /* arith_shift. */
1294 COSTS_N_INSNS (1), /* arith_shift_reg. */
1295 COSTS_N_INSNS (1), /* log_shift. */
1296 COSTS_N_INSNS (1), /* log_shift_reg. */
1297 COSTS_N_INSNS (1), /* extend. */
1298 COSTS_N_INSNS (1), /* extend_arith. */
1299 COSTS_N_INSNS (1), /* bfi. */
1300 COSTS_N_INSNS (1), /* bfx. */
1301 COSTS_N_INSNS (1), /* clz. */
1302 COSTS_N_INSNS (1), /* rev. */
1303 0, /* non_exec. */
1304 true /* non_exec_costs_exec. */
1308 /* MULT SImode */
1310 0, /* simple. */
1311 COSTS_N_INSNS (1), /* flag_setting. */
1312 COSTS_N_INSNS (1), /* extend. */
1313 COSTS_N_INSNS (1), /* add. */
1314 COSTS_N_INSNS (1), /* extend_add. */
1315 COSTS_N_INSNS (7) /* idiv. */
1317 /* MULT DImode */
1319 0, /* simple (N/A). */
1320 0, /* flag_setting (N/A). */
1321 COSTS_N_INSNS (1), /* extend. */
1322 0, /* add. */
1323 COSTS_N_INSNS (2), /* extend_add. */
1324 0 /* idiv (N/A). */
1327 /* LD/ST */
1329 COSTS_N_INSNS (1), /* load. */
1330 COSTS_N_INSNS (1), /* load_sign_extend. */
1331 COSTS_N_INSNS (3), /* ldrd. */
1332 COSTS_N_INSNS (1), /* ldm_1st. */
1333 1, /* ldm_regs_per_insn_1st. */
1334 2, /* ldm_regs_per_insn_subsequent. */
1335 COSTS_N_INSNS (2), /* loadf. */
1336 COSTS_N_INSNS (2), /* loadd. */
1337 COSTS_N_INSNS (1), /* load_unaligned. */
1338 COSTS_N_INSNS (1), /* store. */
1339 COSTS_N_INSNS (3), /* strd. */
1340 COSTS_N_INSNS (1), /* stm_1st. */
1341 1, /* stm_regs_per_insn_1st. */
1342 2, /* stm_regs_per_insn_subsequent. */
1343 COSTS_N_INSNS (2), /* storef. */
1344 COSTS_N_INSNS (2), /* stored. */
1345 COSTS_N_INSNS (1), /* store_unaligned. */
1346 COSTS_N_INSNS (1), /* loadv. */
1347 COSTS_N_INSNS (1) /* storev. */
1350 /* FP SFmode */
1352 COSTS_N_INSNS (15), /* div. */
1353 COSTS_N_INSNS (3), /* mult. */
1354 COSTS_N_INSNS (7), /* mult_addsub. */
1355 COSTS_N_INSNS (7), /* fma. */
1356 COSTS_N_INSNS (3), /* addsub. */
1357 COSTS_N_INSNS (3), /* fpconst. */
1358 COSTS_N_INSNS (3), /* neg. */
1359 COSTS_N_INSNS (3), /* compare. */
1360 COSTS_N_INSNS (3), /* widen. */
1361 COSTS_N_INSNS (3), /* narrow. */
1362 COSTS_N_INSNS (3), /* toint. */
1363 COSTS_N_INSNS (3), /* fromint. */
1364 COSTS_N_INSNS (3) /* roundint. */
1366 /* FP DFmode */
1368 COSTS_N_INSNS (30), /* div. */
1369 COSTS_N_INSNS (6), /* mult. */
1370 COSTS_N_INSNS (10), /* mult_addsub. */
1371 COSTS_N_INSNS (7), /* fma. */
1372 COSTS_N_INSNS (3), /* addsub. */
1373 COSTS_N_INSNS (3), /* fpconst. */
1374 COSTS_N_INSNS (3), /* neg. */
1375 COSTS_N_INSNS (3), /* compare. */
1376 COSTS_N_INSNS (3), /* widen. */
1377 COSTS_N_INSNS (3), /* narrow. */
1378 COSTS_N_INSNS (3), /* toint. */
1379 COSTS_N_INSNS (3), /* fromint. */
1380 COSTS_N_INSNS (3) /* roundint. */
1383 /* Vector */
1385 COSTS_N_INSNS (1) /* alu. */
1389 const struct cpu_cost_table cortexa12_extra_costs =
1391 /* ALU */
1393 0, /* arith. */
1394 0, /* logical. */
1395 0, /* shift. */
1396 COSTS_N_INSNS (1), /* shift_reg. */
1397 COSTS_N_INSNS (1), /* arith_shift. */
1398 COSTS_N_INSNS (1), /* arith_shift_reg. */
1399 COSTS_N_INSNS (1), /* log_shift. */
1400 COSTS_N_INSNS (1), /* log_shift_reg. */
1401 0, /* extend. */
1402 COSTS_N_INSNS (1), /* extend_arith. */
1403 0, /* bfi. */
1404 COSTS_N_INSNS (1), /* bfx. */
1405 COSTS_N_INSNS (1), /* clz. */
1406 COSTS_N_INSNS (1), /* rev. */
1407 0, /* non_exec. */
1408 true /* non_exec_costs_exec. */
1410 /* MULT SImode */
1413 COSTS_N_INSNS (2), /* simple. */
1414 COSTS_N_INSNS (3), /* flag_setting. */
1415 COSTS_N_INSNS (2), /* extend. */
1416 COSTS_N_INSNS (3), /* add. */
1417 COSTS_N_INSNS (2), /* extend_add. */
1418 COSTS_N_INSNS (18) /* idiv. */
1420 /* MULT DImode */
1422 0, /* simple (N/A). */
1423 0, /* flag_setting (N/A). */
1424 COSTS_N_INSNS (3), /* extend. */
1425 0, /* add (N/A). */
1426 COSTS_N_INSNS (3), /* extend_add. */
1427 0 /* idiv (N/A). */
1430 /* LD/ST */
1432 COSTS_N_INSNS (3), /* load. */
1433 COSTS_N_INSNS (3), /* load_sign_extend. */
1434 COSTS_N_INSNS (3), /* ldrd. */
1435 COSTS_N_INSNS (3), /* ldm_1st. */
1436 1, /* ldm_regs_per_insn_1st. */
1437 2, /* ldm_regs_per_insn_subsequent. */
1438 COSTS_N_INSNS (3), /* loadf. */
1439 COSTS_N_INSNS (3), /* loadd. */
1440 0, /* load_unaligned. */
1441 0, /* store. */
1442 0, /* strd. */
1443 0, /* stm_1st. */
1444 1, /* stm_regs_per_insn_1st. */
1445 2, /* stm_regs_per_insn_subsequent. */
1446 COSTS_N_INSNS (2), /* storef. */
1447 COSTS_N_INSNS (2), /* stored. */
1448 0, /* store_unaligned. */
1449 COSTS_N_INSNS (1), /* loadv. */
1450 COSTS_N_INSNS (1) /* storev. */
1453 /* FP SFmode */
1455 COSTS_N_INSNS (17), /* div. */
1456 COSTS_N_INSNS (4), /* mult. */
1457 COSTS_N_INSNS (8), /* mult_addsub. */
1458 COSTS_N_INSNS (8), /* fma. */
1459 COSTS_N_INSNS (4), /* addsub. */
1460 COSTS_N_INSNS (2), /* fpconst. */
1461 COSTS_N_INSNS (2), /* neg. */
1462 COSTS_N_INSNS (2), /* compare. */
1463 COSTS_N_INSNS (4), /* widen. */
1464 COSTS_N_INSNS (4), /* narrow. */
1465 COSTS_N_INSNS (4), /* toint. */
1466 COSTS_N_INSNS (4), /* fromint. */
1467 COSTS_N_INSNS (4) /* roundint. */
1469 /* FP DFmode */
1471 COSTS_N_INSNS (31), /* div. */
1472 COSTS_N_INSNS (4), /* mult. */
1473 COSTS_N_INSNS (8), /* mult_addsub. */
1474 COSTS_N_INSNS (8), /* fma. */
1475 COSTS_N_INSNS (4), /* addsub. */
1476 COSTS_N_INSNS (2), /* fpconst. */
1477 COSTS_N_INSNS (2), /* neg. */
1478 COSTS_N_INSNS (2), /* compare. */
1479 COSTS_N_INSNS (4), /* widen. */
1480 COSTS_N_INSNS (4), /* narrow. */
1481 COSTS_N_INSNS (4), /* toint. */
1482 COSTS_N_INSNS (4), /* fromint. */
1483 COSTS_N_INSNS (4) /* roundint. */
1486 /* Vector */
1488 COSTS_N_INSNS (1) /* alu. */
1492 const struct cpu_cost_table cortexa15_extra_costs =
1494 /* ALU */
1496 0, /* arith. */
1497 0, /* logical. */
1498 0, /* shift. */
1499 0, /* shift_reg. */
1500 COSTS_N_INSNS (1), /* arith_shift. */
1501 COSTS_N_INSNS (1), /* arith_shift_reg. */
1502 COSTS_N_INSNS (1), /* log_shift. */
1503 COSTS_N_INSNS (1), /* log_shift_reg. */
1504 0, /* extend. */
1505 COSTS_N_INSNS (1), /* extend_arith. */
1506 COSTS_N_INSNS (1), /* bfi. */
1507 0, /* bfx. */
1508 0, /* clz. */
1509 0, /* rev. */
1510 0, /* non_exec. */
1511 true /* non_exec_costs_exec. */
1513 /* MULT SImode */
1516 COSTS_N_INSNS (2), /* simple. */
1517 COSTS_N_INSNS (3), /* flag_setting. */
1518 COSTS_N_INSNS (2), /* extend. */
1519 COSTS_N_INSNS (2), /* add. */
1520 COSTS_N_INSNS (2), /* extend_add. */
1521 COSTS_N_INSNS (18) /* idiv. */
1523 /* MULT DImode */
1525 0, /* simple (N/A). */
1526 0, /* flag_setting (N/A). */
1527 COSTS_N_INSNS (3), /* extend. */
1528 0, /* add (N/A). */
1529 COSTS_N_INSNS (3), /* extend_add. */
1530 0 /* idiv (N/A). */
1533 /* LD/ST */
1535 COSTS_N_INSNS (3), /* load. */
1536 COSTS_N_INSNS (3), /* load_sign_extend. */
1537 COSTS_N_INSNS (3), /* ldrd. */
1538 COSTS_N_INSNS (4), /* ldm_1st. */
1539 1, /* ldm_regs_per_insn_1st. */
1540 2, /* ldm_regs_per_insn_subsequent. */
1541 COSTS_N_INSNS (4), /* loadf. */
1542 COSTS_N_INSNS (4), /* loadd. */
1543 0, /* load_unaligned. */
1544 0, /* store. */
1545 0, /* strd. */
1546 COSTS_N_INSNS (1), /* stm_1st. */
1547 1, /* stm_regs_per_insn_1st. */
1548 2, /* stm_regs_per_insn_subsequent. */
1549 0, /* storef. */
1550 0, /* stored. */
1551 0, /* store_unaligned. */
1552 COSTS_N_INSNS (1), /* loadv. */
1553 COSTS_N_INSNS (1) /* storev. */
1556 /* FP SFmode */
1558 COSTS_N_INSNS (17), /* div. */
1559 COSTS_N_INSNS (4), /* mult. */
1560 COSTS_N_INSNS (8), /* mult_addsub. */
1561 COSTS_N_INSNS (8), /* fma. */
1562 COSTS_N_INSNS (4), /* addsub. */
1563 COSTS_N_INSNS (2), /* fpconst. */
1564 COSTS_N_INSNS (2), /* neg. */
1565 COSTS_N_INSNS (5), /* compare. */
1566 COSTS_N_INSNS (4), /* widen. */
1567 COSTS_N_INSNS (4), /* narrow. */
1568 COSTS_N_INSNS (4), /* toint. */
1569 COSTS_N_INSNS (4), /* fromint. */
1570 COSTS_N_INSNS (4) /* roundint. */
1572 /* FP DFmode */
1574 COSTS_N_INSNS (31), /* div. */
1575 COSTS_N_INSNS (4), /* mult. */
1576 COSTS_N_INSNS (8), /* mult_addsub. */
1577 COSTS_N_INSNS (8), /* fma. */
1578 COSTS_N_INSNS (4), /* addsub. */
1579 COSTS_N_INSNS (2), /* fpconst. */
1580 COSTS_N_INSNS (2), /* neg. */
1581 COSTS_N_INSNS (2), /* compare. */
1582 COSTS_N_INSNS (4), /* widen. */
1583 COSTS_N_INSNS (4), /* narrow. */
1584 COSTS_N_INSNS (4), /* toint. */
1585 COSTS_N_INSNS (4), /* fromint. */
1586 COSTS_N_INSNS (4) /* roundint. */
1589 /* Vector */
1591 COSTS_N_INSNS (1) /* alu. */
1595 const struct cpu_cost_table v7m_extra_costs =
1597 /* ALU */
1599 0, /* arith. */
1600 0, /* logical. */
1601 0, /* shift. */
1602 0, /* shift_reg. */
1603 0, /* arith_shift. */
1604 COSTS_N_INSNS (1), /* arith_shift_reg. */
1605 0, /* log_shift. */
1606 COSTS_N_INSNS (1), /* log_shift_reg. */
1607 0, /* extend. */
1608 COSTS_N_INSNS (1), /* extend_arith. */
1609 0, /* bfi. */
1610 0, /* bfx. */
1611 0, /* clz. */
1612 0, /* rev. */
1613 COSTS_N_INSNS (1), /* non_exec. */
1614 false /* non_exec_costs_exec. */
1617 /* MULT SImode */
1619 COSTS_N_INSNS (1), /* simple. */
1620 COSTS_N_INSNS (1), /* flag_setting. */
1621 COSTS_N_INSNS (2), /* extend. */
1622 COSTS_N_INSNS (1), /* add. */
1623 COSTS_N_INSNS (3), /* extend_add. */
1624 COSTS_N_INSNS (8) /* idiv. */
1626 /* MULT DImode */
1628 0, /* simple (N/A). */
1629 0, /* flag_setting (N/A). */
1630 COSTS_N_INSNS (2), /* extend. */
1631 0, /* add (N/A). */
1632 COSTS_N_INSNS (3), /* extend_add. */
1633 0 /* idiv (N/A). */
1636 /* LD/ST */
1638 COSTS_N_INSNS (2), /* load. */
1639 0, /* load_sign_extend. */
1640 COSTS_N_INSNS (3), /* ldrd. */
1641 COSTS_N_INSNS (2), /* ldm_1st. */
1642 1, /* ldm_regs_per_insn_1st. */
1643 1, /* ldm_regs_per_insn_subsequent. */
1644 COSTS_N_INSNS (2), /* loadf. */
1645 COSTS_N_INSNS (3), /* loadd. */
1646 COSTS_N_INSNS (1), /* load_unaligned. */
1647 COSTS_N_INSNS (2), /* store. */
1648 COSTS_N_INSNS (3), /* strd. */
1649 COSTS_N_INSNS (2), /* stm_1st. */
1650 1, /* stm_regs_per_insn_1st. */
1651 1, /* stm_regs_per_insn_subsequent. */
1652 COSTS_N_INSNS (2), /* storef. */
1653 COSTS_N_INSNS (3), /* stored. */
1654 COSTS_N_INSNS (1), /* store_unaligned. */
1655 COSTS_N_INSNS (1), /* loadv. */
1656 COSTS_N_INSNS (1) /* storev. */
1659 /* FP SFmode */
1661 COSTS_N_INSNS (7), /* div. */
1662 COSTS_N_INSNS (2), /* mult. */
1663 COSTS_N_INSNS (5), /* mult_addsub. */
1664 COSTS_N_INSNS (3), /* fma. */
1665 COSTS_N_INSNS (1), /* addsub. */
1666 0, /* fpconst. */
1667 0, /* neg. */
1668 0, /* compare. */
1669 0, /* widen. */
1670 0, /* narrow. */
1671 0, /* toint. */
1672 0, /* fromint. */
1673 0 /* roundint. */
1675 /* FP DFmode */
1677 COSTS_N_INSNS (15), /* div. */
1678 COSTS_N_INSNS (5), /* mult. */
1679 COSTS_N_INSNS (7), /* mult_addsub. */
1680 COSTS_N_INSNS (7), /* fma. */
1681 COSTS_N_INSNS (3), /* addsub. */
1682 0, /* fpconst. */
1683 0, /* neg. */
1684 0, /* compare. */
1685 0, /* widen. */
1686 0, /* narrow. */
1687 0, /* toint. */
1688 0, /* fromint. */
1689 0 /* roundint. */
1692 /* Vector */
1694 COSTS_N_INSNS (1) /* alu. */
1698 const struct tune_params arm_slowmul_tune =
1700 &generic_extra_costs, /* Insn extra costs. */
1701 NULL, /* Sched adj cost. */
1702 arm_default_branch_cost,
1703 &arm_default_vec_cost,
1704 3, /* Constant limit. */
1705 5, /* Max cond insns. */
1706 8, /* Memset max inline. */
1707 1, /* Issue rate. */
1708 ARM_PREFETCH_NOT_BENEFICIAL,
1709 tune_params::PREF_CONST_POOL_TRUE,
1710 tune_params::PREF_LDRD_FALSE,
1711 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1712 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1713 tune_params::DISPARAGE_FLAGS_NEITHER,
1714 tune_params::PREF_NEON_64_FALSE,
1715 tune_params::PREF_NEON_STRINGOPS_FALSE,
1716 tune_params::FUSE_NOTHING,
1717 tune_params::SCHED_AUTOPREF_OFF
1720 const struct tune_params arm_fastmul_tune =
1722 &generic_extra_costs, /* Insn extra costs. */
1723 NULL, /* Sched adj cost. */
1724 arm_default_branch_cost,
1725 &arm_default_vec_cost,
1726 1, /* Constant limit. */
1727 5, /* Max cond insns. */
1728 8, /* Memset max inline. */
1729 1, /* Issue rate. */
1730 ARM_PREFETCH_NOT_BENEFICIAL,
1731 tune_params::PREF_CONST_POOL_TRUE,
1732 tune_params::PREF_LDRD_FALSE,
1733 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1734 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1735 tune_params::DISPARAGE_FLAGS_NEITHER,
1736 tune_params::PREF_NEON_64_FALSE,
1737 tune_params::PREF_NEON_STRINGOPS_FALSE,
1738 tune_params::FUSE_NOTHING,
1739 tune_params::SCHED_AUTOPREF_OFF
1742 /* StrongARM has early execution of branches, so a sequence that is worth
1743 skipping is shorter. Set max_insns_skipped to a lower value. */
1745 const struct tune_params arm_strongarm_tune =
1747 &generic_extra_costs, /* Insn extra costs. */
1748 NULL, /* Sched adj cost. */
1749 arm_default_branch_cost,
1750 &arm_default_vec_cost,
1751 1, /* Constant limit. */
1752 3, /* Max cond insns. */
1753 8, /* Memset max inline. */
1754 1, /* Issue rate. */
1755 ARM_PREFETCH_NOT_BENEFICIAL,
1756 tune_params::PREF_CONST_POOL_TRUE,
1757 tune_params::PREF_LDRD_FALSE,
1758 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1759 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1760 tune_params::DISPARAGE_FLAGS_NEITHER,
1761 tune_params::PREF_NEON_64_FALSE,
1762 tune_params::PREF_NEON_STRINGOPS_FALSE,
1763 tune_params::FUSE_NOTHING,
1764 tune_params::SCHED_AUTOPREF_OFF
1767 const struct tune_params arm_xscale_tune =
1769 &generic_extra_costs, /* Insn extra costs. */
1770 xscale_sched_adjust_cost,
1771 arm_default_branch_cost,
1772 &arm_default_vec_cost,
1773 2, /* Constant limit. */
1774 3, /* Max cond insns. */
1775 8, /* Memset max inline. */
1776 1, /* Issue rate. */
1777 ARM_PREFETCH_NOT_BENEFICIAL,
1778 tune_params::PREF_CONST_POOL_TRUE,
1779 tune_params::PREF_LDRD_FALSE,
1780 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1781 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1782 tune_params::DISPARAGE_FLAGS_NEITHER,
1783 tune_params::PREF_NEON_64_FALSE,
1784 tune_params::PREF_NEON_STRINGOPS_FALSE,
1785 tune_params::FUSE_NOTHING,
1786 tune_params::SCHED_AUTOPREF_OFF
1789 const struct tune_params arm_9e_tune =
1791 &generic_extra_costs, /* Insn extra costs. */
1792 NULL, /* Sched adj cost. */
1793 arm_default_branch_cost,
1794 &arm_default_vec_cost,
1795 1, /* Constant limit. */
1796 5, /* Max cond insns. */
1797 8, /* Memset max inline. */
1798 1, /* Issue rate. */
1799 ARM_PREFETCH_NOT_BENEFICIAL,
1800 tune_params::PREF_CONST_POOL_TRUE,
1801 tune_params::PREF_LDRD_FALSE,
1802 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1803 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1804 tune_params::DISPARAGE_FLAGS_NEITHER,
1805 tune_params::PREF_NEON_64_FALSE,
1806 tune_params::PREF_NEON_STRINGOPS_FALSE,
1807 tune_params::FUSE_NOTHING,
1808 tune_params::SCHED_AUTOPREF_OFF
1811 const struct tune_params arm_marvell_pj4_tune =
1813 &generic_extra_costs, /* Insn extra costs. */
1814 NULL, /* Sched adj cost. */
1815 arm_default_branch_cost,
1816 &arm_default_vec_cost,
1817 1, /* Constant limit. */
1818 5, /* Max cond insns. */
1819 8, /* Memset max inline. */
1820 2, /* Issue rate. */
1821 ARM_PREFETCH_NOT_BENEFICIAL,
1822 tune_params::PREF_CONST_POOL_TRUE,
1823 tune_params::PREF_LDRD_FALSE,
1824 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1825 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1826 tune_params::DISPARAGE_FLAGS_NEITHER,
1827 tune_params::PREF_NEON_64_FALSE,
1828 tune_params::PREF_NEON_STRINGOPS_FALSE,
1829 tune_params::FUSE_NOTHING,
1830 tune_params::SCHED_AUTOPREF_OFF
1833 const struct tune_params arm_v6t2_tune =
1835 &generic_extra_costs, /* Insn extra costs. */
1836 NULL, /* Sched adj cost. */
1837 arm_default_branch_cost,
1838 &arm_default_vec_cost,
1839 1, /* Constant limit. */
1840 5, /* Max cond insns. */
1841 8, /* Memset max inline. */
1842 1, /* Issue rate. */
1843 ARM_PREFETCH_NOT_BENEFICIAL,
1844 tune_params::PREF_CONST_POOL_FALSE,
1845 tune_params::PREF_LDRD_FALSE,
1846 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1847 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1848 tune_params::DISPARAGE_FLAGS_NEITHER,
1849 tune_params::PREF_NEON_64_FALSE,
1850 tune_params::PREF_NEON_STRINGOPS_FALSE,
1851 tune_params::FUSE_NOTHING,
1852 tune_params::SCHED_AUTOPREF_OFF
1856 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1857 const struct tune_params arm_cortex_tune =
1859 &generic_extra_costs,
1860 NULL, /* Sched adj cost. */
1861 arm_default_branch_cost,
1862 &arm_default_vec_cost,
1863 1, /* Constant limit. */
1864 5, /* Max cond insns. */
1865 8, /* Memset max inline. */
1866 2, /* Issue rate. */
1867 ARM_PREFETCH_NOT_BENEFICIAL,
1868 tune_params::PREF_CONST_POOL_FALSE,
1869 tune_params::PREF_LDRD_FALSE,
1870 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1871 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1872 tune_params::DISPARAGE_FLAGS_NEITHER,
1873 tune_params::PREF_NEON_64_FALSE,
1874 tune_params::PREF_NEON_STRINGOPS_FALSE,
1875 tune_params::FUSE_NOTHING,
1876 tune_params::SCHED_AUTOPREF_OFF
1879 const struct tune_params arm_cortex_a8_tune =
1881 &cortexa8_extra_costs,
1882 NULL, /* Sched adj cost. */
1883 arm_default_branch_cost,
1884 &arm_default_vec_cost,
1885 1, /* Constant limit. */
1886 5, /* Max cond insns. */
1887 8, /* Memset max inline. */
1888 2, /* Issue rate. */
1889 ARM_PREFETCH_NOT_BENEFICIAL,
1890 tune_params::PREF_CONST_POOL_FALSE,
1891 tune_params::PREF_LDRD_FALSE,
1892 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1893 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1894 tune_params::DISPARAGE_FLAGS_NEITHER,
1895 tune_params::PREF_NEON_64_FALSE,
1896 tune_params::PREF_NEON_STRINGOPS_TRUE,
1897 tune_params::FUSE_NOTHING,
1898 tune_params::SCHED_AUTOPREF_OFF
1901 const struct tune_params arm_cortex_a7_tune =
1903 &cortexa7_extra_costs,
1904 NULL, /* Sched adj cost. */
1905 arm_default_branch_cost,
1906 &arm_default_vec_cost,
1907 1, /* Constant limit. */
1908 5, /* Max cond insns. */
1909 8, /* Memset max inline. */
1910 2, /* Issue rate. */
1911 ARM_PREFETCH_NOT_BENEFICIAL,
1912 tune_params::PREF_CONST_POOL_FALSE,
1913 tune_params::PREF_LDRD_FALSE,
1914 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1915 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1916 tune_params::DISPARAGE_FLAGS_NEITHER,
1917 tune_params::PREF_NEON_64_FALSE,
1918 tune_params::PREF_NEON_STRINGOPS_TRUE,
1919 tune_params::FUSE_NOTHING,
1920 tune_params::SCHED_AUTOPREF_OFF
1923 const struct tune_params arm_cortex_a15_tune =
1925 &cortexa15_extra_costs,
1926 NULL, /* Sched adj cost. */
1927 arm_default_branch_cost,
1928 &arm_default_vec_cost,
1929 1, /* Constant limit. */
1930 2, /* Max cond insns. */
1931 8, /* Memset max inline. */
1932 3, /* Issue rate. */
1933 ARM_PREFETCH_NOT_BENEFICIAL,
1934 tune_params::PREF_CONST_POOL_FALSE,
1935 tune_params::PREF_LDRD_TRUE,
1936 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1937 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1938 tune_params::DISPARAGE_FLAGS_ALL,
1939 tune_params::PREF_NEON_64_FALSE,
1940 tune_params::PREF_NEON_STRINGOPS_TRUE,
1941 tune_params::FUSE_NOTHING,
1942 tune_params::SCHED_AUTOPREF_FULL
1945 const struct tune_params arm_cortex_a35_tune =
1947 &cortexa53_extra_costs,
1948 NULL, /* Sched adj cost. */
1949 arm_default_branch_cost,
1950 &arm_default_vec_cost,
1951 1, /* Constant limit. */
1952 5, /* Max cond insns. */
1953 8, /* Memset max inline. */
1954 1, /* Issue rate. */
1955 ARM_PREFETCH_NOT_BENEFICIAL,
1956 tune_params::PREF_CONST_POOL_FALSE,
1957 tune_params::PREF_LDRD_FALSE,
1958 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1959 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1960 tune_params::DISPARAGE_FLAGS_NEITHER,
1961 tune_params::PREF_NEON_64_FALSE,
1962 tune_params::PREF_NEON_STRINGOPS_TRUE,
1963 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1964 tune_params::SCHED_AUTOPREF_OFF
1967 const struct tune_params arm_cortex_a53_tune =
1969 &cortexa53_extra_costs,
1970 NULL, /* Sched adj cost. */
1971 arm_default_branch_cost,
1972 &arm_default_vec_cost,
1973 1, /* Constant limit. */
1974 5, /* Max cond insns. */
1975 8, /* Memset max inline. */
1976 2, /* Issue rate. */
1977 ARM_PREFETCH_NOT_BENEFICIAL,
1978 tune_params::PREF_CONST_POOL_FALSE,
1979 tune_params::PREF_LDRD_FALSE,
1980 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1981 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1982 tune_params::DISPARAGE_FLAGS_NEITHER,
1983 tune_params::PREF_NEON_64_FALSE,
1984 tune_params::PREF_NEON_STRINGOPS_TRUE,
1985 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
1986 tune_params::SCHED_AUTOPREF_OFF
1989 const struct tune_params arm_cortex_a57_tune =
1991 &cortexa57_extra_costs,
1992 NULL, /* Sched adj cost. */
1993 arm_default_branch_cost,
1994 &arm_default_vec_cost,
1995 1, /* Constant limit. */
1996 2, /* Max cond insns. */
1997 8, /* Memset max inline. */
1998 3, /* Issue rate. */
1999 ARM_PREFETCH_NOT_BENEFICIAL,
2000 tune_params::PREF_CONST_POOL_FALSE,
2001 tune_params::PREF_LDRD_TRUE,
2002 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2003 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2004 tune_params::DISPARAGE_FLAGS_ALL,
2005 tune_params::PREF_NEON_64_FALSE,
2006 tune_params::PREF_NEON_STRINGOPS_TRUE,
2007 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2008 tune_params::SCHED_AUTOPREF_FULL
2011 const struct tune_params arm_exynosm1_tune =
2013 &exynosm1_extra_costs,
2014 NULL, /* Sched adj cost. */
2015 arm_default_branch_cost,
2016 &arm_default_vec_cost,
2017 1, /* Constant limit. */
2018 2, /* Max cond insns. */
2019 8, /* Memset max inline. */
2020 3, /* Issue rate. */
2021 ARM_PREFETCH_NOT_BENEFICIAL,
2022 tune_params::PREF_CONST_POOL_FALSE,
2023 tune_params::PREF_LDRD_TRUE,
2024 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2025 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2026 tune_params::DISPARAGE_FLAGS_ALL,
2027 tune_params::PREF_NEON_64_FALSE,
2028 tune_params::PREF_NEON_STRINGOPS_TRUE,
2029 tune_params::FUSE_NOTHING,
2030 tune_params::SCHED_AUTOPREF_OFF
2033 const struct tune_params arm_xgene1_tune =
2035 &xgene1_extra_costs,
2036 NULL, /* Sched adj cost. */
2037 arm_default_branch_cost,
2038 &arm_default_vec_cost,
2039 1, /* Constant limit. */
2040 2, /* Max cond insns. */
2041 32, /* Memset max inline. */
2042 4, /* Issue rate. */
2043 ARM_PREFETCH_NOT_BENEFICIAL,
2044 tune_params::PREF_CONST_POOL_FALSE,
2045 tune_params::PREF_LDRD_TRUE,
2046 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2047 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2048 tune_params::DISPARAGE_FLAGS_ALL,
2049 tune_params::PREF_NEON_64_FALSE,
2050 tune_params::PREF_NEON_STRINGOPS_FALSE,
2051 tune_params::FUSE_NOTHING,
2052 tune_params::SCHED_AUTOPREF_OFF
2055 const struct tune_params arm_qdf24xx_tune =
2057 &qdf24xx_extra_costs,
2058 NULL, /* Scheduler cost adjustment. */
2059 arm_default_branch_cost,
2060 &arm_default_vec_cost, /* Vectorizer costs. */
2061 1, /* Constant limit. */
2062 2, /* Max cond insns. */
2063 8, /* Memset max inline. */
2064 4, /* Issue rate. */
2065 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2066 tune_params::PREF_CONST_POOL_FALSE,
2067 tune_params::PREF_LDRD_TRUE,
2068 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2069 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2070 tune_params::DISPARAGE_FLAGS_ALL,
2071 tune_params::PREF_NEON_64_FALSE,
2072 tune_params::PREF_NEON_STRINGOPS_TRUE,
2073 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2074 tune_params::SCHED_AUTOPREF_FULL
2077 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2078 less appealing. Set max_insns_skipped to a low value. */
2080 const struct tune_params arm_cortex_a5_tune =
2082 &cortexa5_extra_costs,
2083 NULL, /* Sched adj cost. */
2084 arm_cortex_a5_branch_cost,
2085 &arm_default_vec_cost,
2086 1, /* Constant limit. */
2087 1, /* Max cond insns. */
2088 8, /* Memset max inline. */
2089 2, /* Issue rate. */
2090 ARM_PREFETCH_NOT_BENEFICIAL,
2091 tune_params::PREF_CONST_POOL_FALSE,
2092 tune_params::PREF_LDRD_FALSE,
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2094 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2095 tune_params::DISPARAGE_FLAGS_NEITHER,
2096 tune_params::PREF_NEON_64_FALSE,
2097 tune_params::PREF_NEON_STRINGOPS_TRUE,
2098 tune_params::FUSE_NOTHING,
2099 tune_params::SCHED_AUTOPREF_OFF
2102 const struct tune_params arm_cortex_a9_tune =
2104 &cortexa9_extra_costs,
2105 cortex_a9_sched_adjust_cost,
2106 arm_default_branch_cost,
2107 &arm_default_vec_cost,
2108 1, /* Constant limit. */
2109 5, /* Max cond insns. */
2110 8, /* Memset max inline. */
2111 2, /* Issue rate. */
2112 ARM_PREFETCH_BENEFICIAL(4,32,32),
2113 tune_params::PREF_CONST_POOL_FALSE,
2114 tune_params::PREF_LDRD_FALSE,
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2116 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2117 tune_params::DISPARAGE_FLAGS_NEITHER,
2118 tune_params::PREF_NEON_64_FALSE,
2119 tune_params::PREF_NEON_STRINGOPS_FALSE,
2120 tune_params::FUSE_NOTHING,
2121 tune_params::SCHED_AUTOPREF_OFF
2124 const struct tune_params arm_cortex_a12_tune =
2126 &cortexa12_extra_costs,
2127 NULL, /* Sched adj cost. */
2128 arm_default_branch_cost,
2129 &arm_default_vec_cost, /* Vectorizer costs. */
2130 1, /* Constant limit. */
2131 2, /* Max cond insns. */
2132 8, /* Memset max inline. */
2133 2, /* Issue rate. */
2134 ARM_PREFETCH_NOT_BENEFICIAL,
2135 tune_params::PREF_CONST_POOL_FALSE,
2136 tune_params::PREF_LDRD_TRUE,
2137 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2138 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2139 tune_params::DISPARAGE_FLAGS_ALL,
2140 tune_params::PREF_NEON_64_FALSE,
2141 tune_params::PREF_NEON_STRINGOPS_TRUE,
2142 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2143 tune_params::SCHED_AUTOPREF_OFF
2146 const struct tune_params arm_cortex_a73_tune =
2148 &cortexa57_extra_costs,
2149 NULL, /* Sched adj cost. */
2150 arm_default_branch_cost,
2151 &arm_default_vec_cost, /* Vectorizer costs. */
2152 1, /* Constant limit. */
2153 2, /* Max cond insns. */
2154 8, /* Memset max inline. */
2155 2, /* Issue rate. */
2156 ARM_PREFETCH_NOT_BENEFICIAL,
2157 tune_params::PREF_CONST_POOL_FALSE,
2158 tune_params::PREF_LDRD_TRUE,
2159 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2160 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2161 tune_params::DISPARAGE_FLAGS_ALL,
2162 tune_params::PREF_NEON_64_FALSE,
2163 tune_params::PREF_NEON_STRINGOPS_TRUE,
2164 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2165 tune_params::SCHED_AUTOPREF_FULL
2168 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2169 cycle to execute each. An LDR from the constant pool also takes two cycles
2170 to execute, but mildly increases pipelining opportunity (consecutive
2171 loads/stores can be pipelined together, saving one cycle), and may also
2172 improve icache utilisation. Hence we prefer the constant pool for such
2173 processors. */
2175 const struct tune_params arm_v7m_tune =
2177 &v7m_extra_costs,
2178 NULL, /* Sched adj cost. */
2179 arm_cortex_m_branch_cost,
2180 &arm_default_vec_cost,
2181 1, /* Constant limit. */
2182 2, /* Max cond insns. */
2183 8, /* Memset max inline. */
2184 1, /* Issue rate. */
2185 ARM_PREFETCH_NOT_BENEFICIAL,
2186 tune_params::PREF_CONST_POOL_TRUE,
2187 tune_params::PREF_LDRD_FALSE,
2188 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2189 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2190 tune_params::DISPARAGE_FLAGS_NEITHER,
2191 tune_params::PREF_NEON_64_FALSE,
2192 tune_params::PREF_NEON_STRINGOPS_FALSE,
2193 tune_params::FUSE_NOTHING,
2194 tune_params::SCHED_AUTOPREF_OFF
2197 /* Cortex-M7 tuning. */
2199 const struct tune_params arm_cortex_m7_tune =
2201 &v7m_extra_costs,
2202 NULL, /* Sched adj cost. */
2203 arm_cortex_m7_branch_cost,
2204 &arm_default_vec_cost,
2205 0, /* Constant limit. */
2206 1, /* Max cond insns. */
2207 8, /* Memset max inline. */
2208 2, /* Issue rate. */
2209 ARM_PREFETCH_NOT_BENEFICIAL,
2210 tune_params::PREF_CONST_POOL_TRUE,
2211 tune_params::PREF_LDRD_FALSE,
2212 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2213 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2214 tune_params::DISPARAGE_FLAGS_NEITHER,
2215 tune_params::PREF_NEON_64_FALSE,
2216 tune_params::PREF_NEON_STRINGOPS_FALSE,
2217 tune_params::FUSE_NOTHING,
2218 tune_params::SCHED_AUTOPREF_OFF
2221 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2222 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2223 cortex-m23. */
2224 const struct tune_params arm_v6m_tune =
2226 &generic_extra_costs, /* Insn extra costs. */
2227 NULL, /* Sched adj cost. */
2228 arm_default_branch_cost,
2229 &arm_default_vec_cost, /* Vectorizer costs. */
2230 1, /* Constant limit. */
2231 5, /* Max cond insns. */
2232 8, /* Memset max inline. */
2233 1, /* Issue rate. */
2234 ARM_PREFETCH_NOT_BENEFICIAL,
2235 tune_params::PREF_CONST_POOL_FALSE,
2236 tune_params::PREF_LDRD_FALSE,
2237 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2238 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2239 tune_params::DISPARAGE_FLAGS_NEITHER,
2240 tune_params::PREF_NEON_64_FALSE,
2241 tune_params::PREF_NEON_STRINGOPS_FALSE,
2242 tune_params::FUSE_NOTHING,
2243 tune_params::SCHED_AUTOPREF_OFF
2246 const struct tune_params arm_fa726te_tune =
2248 &generic_extra_costs, /* Insn extra costs. */
2249 fa726te_sched_adjust_cost,
2250 arm_default_branch_cost,
2251 &arm_default_vec_cost,
2252 1, /* Constant limit. */
2253 5, /* Max cond insns. */
2254 8, /* Memset max inline. */
2255 2, /* Issue rate. */
2256 ARM_PREFETCH_NOT_BENEFICIAL,
2257 tune_params::PREF_CONST_POOL_TRUE,
2258 tune_params::PREF_LDRD_FALSE,
2259 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2260 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2261 tune_params::DISPARAGE_FLAGS_NEITHER,
2262 tune_params::PREF_NEON_64_FALSE,
2263 tune_params::PREF_NEON_STRINGOPS_FALSE,
2264 tune_params::FUSE_NOTHING,
2265 tune_params::SCHED_AUTOPREF_OFF
2269 /* Not all of these give usefully different compilation alternatives,
2270 but there is no simple way of generalizing them. */
2271 static const struct processors all_cores[] =
2273 /* ARM Cores */
2274 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2275 {NAME, TARGET_CPU_##IDENT, #ARCH, BASE_ARCH_##ARCH, \
2276 FLAGS, &arm_##COSTS##_tune},
2277 #include "arm-cores.def"
2278 #undef ARM_CORE
2279 {NULL, TARGET_CPU_arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
2282 static const struct processors all_architectures[] =
2284 /* ARM Architectures */
2285 /* We don't specify tuning costs here as it will be figured out
2286 from the core. */
2288 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2289 {NAME, TARGET_CPU_##CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2290 #include "arm-arches.def"
2291 #undef ARM_ARCH
2292 {NULL, TARGET_CPU_arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
2296 /* These are populated as commandline arguments are processed, or NULL
2297 if not specified. */
2298 static const struct processors *arm_selected_arch;
2299 static const struct processors *arm_selected_cpu;
2300 static const struct processors *arm_selected_tune;
2302 /* The name of the preprocessor macro to define for this architecture. PROFILE
2303 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2304 is thus chosen to be big enough to hold the longest architecture name. */
2306 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2308 /* Available values for -mfpu=. */
2310 const struct arm_fpu_desc all_fpus[] =
2312 #define ARM_FPU(NAME, REV, VFP_REGS, FEATURES) \
2313 { NAME, REV, VFP_REGS, FEATURES },
2314 #include "arm-fpus.def"
2315 #undef ARM_FPU
2318 /* Supported TLS relocations. */
2320 enum tls_reloc {
2321 TLS_GD32,
2322 TLS_LDM32,
2323 TLS_LDO32,
2324 TLS_IE32,
2325 TLS_LE32,
2326 TLS_DESCSEQ /* GNU scheme */
2329 /* The maximum number of insns to be used when loading a constant. */
2330 inline static int
2331 arm_constant_limit (bool size_p)
2333 return size_p ? 1 : current_tune->constant_limit;
2336 /* Emit an insn that's a simple single-set. Both the operands must be known
2337 to be valid. */
2338 inline static rtx_insn *
2339 emit_set_insn (rtx x, rtx y)
2341 return emit_insn (gen_rtx_SET (x, y));
2344 /* Return the number of bits set in VALUE. */
2345 static unsigned
2346 bit_count (unsigned long value)
2348 unsigned long count = 0;
2350 while (value)
2352 count++;
2353 value &= value - 1; /* Clear the least-significant set bit. */
2356 return count;
2359 /* Return the number of features in feature-set SET. */
2360 static unsigned
2361 feature_count (const arm_feature_set * set)
2363 return (bit_count (ARM_FSET_CPU1 (*set))
2364 + bit_count (ARM_FSET_CPU2 (*set)));
2367 typedef struct
2369 machine_mode mode;
2370 const char *name;
2371 } arm_fixed_mode_set;
2373 /* A small helper for setting fixed-point library libfuncs. */
2375 static void
2376 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2377 const char *funcname, const char *modename,
2378 int num_suffix)
2380 char buffer[50];
2382 if (num_suffix == 0)
2383 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2384 else
2385 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2387 set_optab_libfunc (optable, mode, buffer);
2390 static void
2391 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2392 machine_mode from, const char *funcname,
2393 const char *toname, const char *fromname)
2395 char buffer[50];
2396 const char *maybe_suffix_2 = "";
2398 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2399 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2400 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2401 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2402 maybe_suffix_2 = "2";
2404 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2405 maybe_suffix_2);
2407 set_conv_libfunc (optable, to, from, buffer);
2410 /* Set up library functions unique to ARM. */
2412 static void
2413 arm_init_libfuncs (void)
2415 /* For Linux, we have access to kernel support for atomic operations. */
2416 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2417 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2419 /* There are no special library functions unless we are using the
2420 ARM BPABI. */
2421 if (!TARGET_BPABI)
2422 return;
2424 /* The functions below are described in Section 4 of the "Run-Time
2425 ABI for the ARM architecture", Version 1.0. */
2427 /* Double-precision floating-point arithmetic. Table 2. */
2428 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2429 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2430 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2431 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2432 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2434 /* Double-precision comparisons. Table 3. */
2435 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2436 set_optab_libfunc (ne_optab, DFmode, NULL);
2437 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2438 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2439 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2440 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2441 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2443 /* Single-precision floating-point arithmetic. Table 4. */
2444 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2445 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2446 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2447 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2448 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2450 /* Single-precision comparisons. Table 5. */
2451 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2452 set_optab_libfunc (ne_optab, SFmode, NULL);
2453 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2454 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2455 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2456 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2457 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2459 /* Floating-point to integer conversions. Table 6. */
2460 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2461 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2462 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2463 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2464 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2465 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2466 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2467 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2469 /* Conversions between floating types. Table 7. */
2470 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2471 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2473 /* Integer to floating-point conversions. Table 8. */
2474 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2475 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2476 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2477 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2478 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2479 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2480 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2481 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2483 /* Long long. Table 9. */
2484 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2485 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2486 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2487 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2488 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2489 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2490 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2491 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2493 /* Integer (32/32->32) division. \S 4.3.1. */
2494 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2495 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2497 /* The divmod functions are designed so that they can be used for
2498 plain division, even though they return both the quotient and the
2499 remainder. The quotient is returned in the usual location (i.e.,
2500 r0 for SImode, {r0, r1} for DImode), just as would be expected
2501 for an ordinary division routine. Because the AAPCS calling
2502 conventions specify that all of { r0, r1, r2, r3 } are
2503 callee-saved registers, there is no need to tell the compiler
2504 explicitly that those registers are clobbered by these
2505 routines. */
2506 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2507 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2509 /* For SImode division the ABI provides div-without-mod routines,
2510 which are faster. */
2511 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2512 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2514 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2515 divmod libcalls instead. */
2516 set_optab_libfunc (smod_optab, DImode, NULL);
2517 set_optab_libfunc (umod_optab, DImode, NULL);
2518 set_optab_libfunc (smod_optab, SImode, NULL);
2519 set_optab_libfunc (umod_optab, SImode, NULL);
2521 /* Half-precision float operations. The compiler handles all operations
2522 with NULL libfuncs by converting the SFmode. */
2523 switch (arm_fp16_format)
2525 case ARM_FP16_FORMAT_IEEE:
2526 case ARM_FP16_FORMAT_ALTERNATIVE:
2528 /* Conversions. */
2529 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2530 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2531 ? "__gnu_f2h_ieee"
2532 : "__gnu_f2h_alternative"));
2533 set_conv_libfunc (sext_optab, SFmode, HFmode,
2534 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2535 ? "__gnu_h2f_ieee"
2536 : "__gnu_h2f_alternative"));
2538 /* Arithmetic. */
2539 set_optab_libfunc (add_optab, HFmode, NULL);
2540 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2541 set_optab_libfunc (smul_optab, HFmode, NULL);
2542 set_optab_libfunc (neg_optab, HFmode, NULL);
2543 set_optab_libfunc (sub_optab, HFmode, NULL);
2545 /* Comparisons. */
2546 set_optab_libfunc (eq_optab, HFmode, NULL);
2547 set_optab_libfunc (ne_optab, HFmode, NULL);
2548 set_optab_libfunc (lt_optab, HFmode, NULL);
2549 set_optab_libfunc (le_optab, HFmode, NULL);
2550 set_optab_libfunc (ge_optab, HFmode, NULL);
2551 set_optab_libfunc (gt_optab, HFmode, NULL);
2552 set_optab_libfunc (unord_optab, HFmode, NULL);
2553 break;
2555 default:
2556 break;
2559 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2561 const arm_fixed_mode_set fixed_arith_modes[] =
2563 { QQmode, "qq" },
2564 { UQQmode, "uqq" },
2565 { HQmode, "hq" },
2566 { UHQmode, "uhq" },
2567 { SQmode, "sq" },
2568 { USQmode, "usq" },
2569 { DQmode, "dq" },
2570 { UDQmode, "udq" },
2571 { TQmode, "tq" },
2572 { UTQmode, "utq" },
2573 { HAmode, "ha" },
2574 { UHAmode, "uha" },
2575 { SAmode, "sa" },
2576 { USAmode, "usa" },
2577 { DAmode, "da" },
2578 { UDAmode, "uda" },
2579 { TAmode, "ta" },
2580 { UTAmode, "uta" }
2582 const arm_fixed_mode_set fixed_conv_modes[] =
2584 { QQmode, "qq" },
2585 { UQQmode, "uqq" },
2586 { HQmode, "hq" },
2587 { UHQmode, "uhq" },
2588 { SQmode, "sq" },
2589 { USQmode, "usq" },
2590 { DQmode, "dq" },
2591 { UDQmode, "udq" },
2592 { TQmode, "tq" },
2593 { UTQmode, "utq" },
2594 { HAmode, "ha" },
2595 { UHAmode, "uha" },
2596 { SAmode, "sa" },
2597 { USAmode, "usa" },
2598 { DAmode, "da" },
2599 { UDAmode, "uda" },
2600 { TAmode, "ta" },
2601 { UTAmode, "uta" },
2602 { QImode, "qi" },
2603 { HImode, "hi" },
2604 { SImode, "si" },
2605 { DImode, "di" },
2606 { TImode, "ti" },
2607 { SFmode, "sf" },
2608 { DFmode, "df" }
2610 unsigned int i, j;
2612 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2614 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2615 "add", fixed_arith_modes[i].name, 3);
2616 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2617 "ssadd", fixed_arith_modes[i].name, 3);
2618 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2619 "usadd", fixed_arith_modes[i].name, 3);
2620 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2621 "sub", fixed_arith_modes[i].name, 3);
2622 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2623 "sssub", fixed_arith_modes[i].name, 3);
2624 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2625 "ussub", fixed_arith_modes[i].name, 3);
2626 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2627 "mul", fixed_arith_modes[i].name, 3);
2628 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2629 "ssmul", fixed_arith_modes[i].name, 3);
2630 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2631 "usmul", fixed_arith_modes[i].name, 3);
2632 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2633 "div", fixed_arith_modes[i].name, 3);
2634 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2635 "udiv", fixed_arith_modes[i].name, 3);
2636 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2637 "ssdiv", fixed_arith_modes[i].name, 3);
2638 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2639 "usdiv", fixed_arith_modes[i].name, 3);
2640 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2641 "neg", fixed_arith_modes[i].name, 2);
2642 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2643 "ssneg", fixed_arith_modes[i].name, 2);
2644 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2645 "usneg", fixed_arith_modes[i].name, 2);
2646 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2647 "ashl", fixed_arith_modes[i].name, 3);
2648 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2649 "ashr", fixed_arith_modes[i].name, 3);
2650 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2651 "lshr", fixed_arith_modes[i].name, 3);
2652 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2653 "ssashl", fixed_arith_modes[i].name, 3);
2654 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2655 "usashl", fixed_arith_modes[i].name, 3);
2656 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2657 "cmp", fixed_arith_modes[i].name, 2);
2660 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2661 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2663 if (i == j
2664 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2665 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2666 continue;
2668 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2669 fixed_conv_modes[j].mode, "fract",
2670 fixed_conv_modes[i].name,
2671 fixed_conv_modes[j].name);
2672 arm_set_fixed_conv_libfunc (satfract_optab,
2673 fixed_conv_modes[i].mode,
2674 fixed_conv_modes[j].mode, "satfract",
2675 fixed_conv_modes[i].name,
2676 fixed_conv_modes[j].name);
2677 arm_set_fixed_conv_libfunc (fractuns_optab,
2678 fixed_conv_modes[i].mode,
2679 fixed_conv_modes[j].mode, "fractuns",
2680 fixed_conv_modes[i].name,
2681 fixed_conv_modes[j].name);
2682 arm_set_fixed_conv_libfunc (satfractuns_optab,
2683 fixed_conv_modes[i].mode,
2684 fixed_conv_modes[j].mode, "satfractuns",
2685 fixed_conv_modes[i].name,
2686 fixed_conv_modes[j].name);
2690 if (TARGET_AAPCS_BASED)
2691 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2694 /* On AAPCS systems, this is the "struct __va_list". */
2695 static GTY(()) tree va_list_type;
2697 /* Return the type to use as __builtin_va_list. */
2698 static tree
2699 arm_build_builtin_va_list (void)
2701 tree va_list_name;
2702 tree ap_field;
2704 if (!TARGET_AAPCS_BASED)
2705 return std_build_builtin_va_list ();
2707 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2708 defined as:
2710 struct __va_list
2712 void *__ap;
2715 The C Library ABI further reinforces this definition in \S
2716 4.1.
2718 We must follow this definition exactly. The structure tag
2719 name is visible in C++ mangled names, and thus forms a part
2720 of the ABI. The field name may be used by people who
2721 #include <stdarg.h>. */
2722 /* Create the type. */
2723 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2724 /* Give it the required name. */
2725 va_list_name = build_decl (BUILTINS_LOCATION,
2726 TYPE_DECL,
2727 get_identifier ("__va_list"),
2728 va_list_type);
2729 DECL_ARTIFICIAL (va_list_name) = 1;
2730 TYPE_NAME (va_list_type) = va_list_name;
2731 TYPE_STUB_DECL (va_list_type) = va_list_name;
2732 /* Create the __ap field. */
2733 ap_field = build_decl (BUILTINS_LOCATION,
2734 FIELD_DECL,
2735 get_identifier ("__ap"),
2736 ptr_type_node);
2737 DECL_ARTIFICIAL (ap_field) = 1;
2738 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2739 TYPE_FIELDS (va_list_type) = ap_field;
2740 /* Compute its layout. */
2741 layout_type (va_list_type);
2743 return va_list_type;
2746 /* Return an expression of type "void *" pointing to the next
2747 available argument in a variable-argument list. VALIST is the
2748 user-level va_list object, of type __builtin_va_list. */
2749 static tree
2750 arm_extract_valist_ptr (tree valist)
2752 if (TREE_TYPE (valist) == error_mark_node)
2753 return error_mark_node;
2755 /* On an AAPCS target, the pointer is stored within "struct
2756 va_list". */
2757 if (TARGET_AAPCS_BASED)
2759 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2760 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2761 valist, ap_field, NULL_TREE);
2764 return valist;
2767 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2768 static void
2769 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2771 valist = arm_extract_valist_ptr (valist);
2772 std_expand_builtin_va_start (valist, nextarg);
2775 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2776 static tree
2777 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2778 gimple_seq *post_p)
2780 valist = arm_extract_valist_ptr (valist);
2781 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2784 /* Check any incompatible options that the user has specified. */
2785 static void
2786 arm_option_check_internal (struct gcc_options *opts)
2788 int flags = opts->x_target_flags;
2789 const struct arm_fpu_desc *fpu_desc = &all_fpus[opts->x_arm_fpu_index];
2791 /* iWMMXt and NEON are incompatible. */
2792 if (TARGET_IWMMXT
2793 && ARM_FPU_FSET_HAS (fpu_desc->features, FPU_FL_NEON))
2794 error ("iWMMXt and NEON are incompatible");
2796 /* Make sure that the processor choice does not conflict with any of the
2797 other command line choices. */
2798 if (TARGET_ARM_P (flags) && !ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM))
2799 error ("target CPU does not support ARM mode");
2801 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2802 from here where no function is being compiled currently. */
2803 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2804 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2806 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2807 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2809 /* If this target is normally configured to use APCS frames, warn if they
2810 are turned off and debugging is turned on. */
2811 if (TARGET_ARM_P (flags)
2812 && write_symbols != NO_DEBUG
2813 && !TARGET_APCS_FRAME
2814 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2815 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2817 /* iWMMXt unsupported under Thumb mode. */
2818 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2819 error ("iWMMXt unsupported under Thumb mode");
2821 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2822 error ("can not use -mtp=cp15 with 16-bit Thumb");
2824 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2826 error ("RTP PIC is incompatible with Thumb");
2827 flag_pic = 0;
2830 /* We only support -mslow-flash-data on armv7-m targets. */
2831 if (target_slow_flash_data
2832 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2833 || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON)))
2834 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2836 /* We only support pure-code on Thumb-2 M-profile targets. */
2837 if (target_pure_code
2838 && (!arm_arch_thumb2 || arm_arch_notm || flag_pic || TARGET_NEON))
2839 error ("-mpure-code only supports non-pic code on armv7-m targets");
2843 /* Recompute the global settings depending on target attribute options. */
2845 static void
2846 arm_option_params_internal (void)
2848 /* If we are not using the default (ARM mode) section anchor offset
2849 ranges, then set the correct ranges now. */
2850 if (TARGET_THUMB1)
2852 /* Thumb-1 LDR instructions cannot have negative offsets.
2853 Permissible positive offset ranges are 5-bit (for byte loads),
2854 6-bit (for halfword loads), or 7-bit (for word loads).
2855 Empirical results suggest a 7-bit anchor range gives the best
2856 overall code size. */
2857 targetm.min_anchor_offset = 0;
2858 targetm.max_anchor_offset = 127;
2860 else if (TARGET_THUMB2)
2862 /* The minimum is set such that the total size of the block
2863 for a particular anchor is 248 + 1 + 4095 bytes, which is
2864 divisible by eight, ensuring natural spacing of anchors. */
2865 targetm.min_anchor_offset = -248;
2866 targetm.max_anchor_offset = 4095;
2868 else
2870 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2871 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2874 if (optimize_size)
2876 /* If optimizing for size, bump the number of instructions that we
2877 are prepared to conditionally execute (even on a StrongARM). */
2878 max_insns_skipped = 6;
2880 /* For THUMB2, we limit the conditional sequence to one IT block. */
2881 if (TARGET_THUMB2)
2882 max_insns_skipped = arm_restrict_it ? 1 : 4;
2884 else
2885 /* When -mrestrict-it is in use tone down the if-conversion. */
2886 max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2887 ? 1 : current_tune->max_insns_skipped;
2890 /* True if -mflip-thumb should next add an attribute for the default
2891 mode, false if it should next add an attribute for the opposite mode. */
2892 static GTY(()) bool thumb_flipper;
2894 /* Options after initial target override. */
2895 static GTY(()) tree init_optimize;
2897 static void
2898 arm_override_options_after_change_1 (struct gcc_options *opts)
2900 if (opts->x_align_functions <= 0)
2901 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2902 && opts->x_optimize_size ? 2 : 4;
2905 /* Implement targetm.override_options_after_change. */
2907 static void
2908 arm_override_options_after_change (void)
2910 arm_override_options_after_change_1 (&global_options);
2913 /* Reset options between modes that the user has specified. */
2914 static void
2915 arm_option_override_internal (struct gcc_options *opts,
2916 struct gcc_options *opts_set)
2918 arm_override_options_after_change_1 (opts);
2920 if (TARGET_INTERWORK && !ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB))
2922 /* The default is to enable interworking, so this warning message would
2923 be confusing to users who have just compiled with, eg, -march=armv3. */
2924 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2925 opts->x_target_flags &= ~MASK_INTERWORK;
2928 if (TARGET_THUMB_P (opts->x_target_flags)
2929 && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB)))
2931 warning (0, "target CPU does not support THUMB instructions");
2932 opts->x_target_flags &= ~MASK_THUMB;
2935 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2937 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2938 opts->x_target_flags &= ~MASK_APCS_FRAME;
2941 /* Callee super interworking implies thumb interworking. Adding
2942 this to the flags here simplifies the logic elsewhere. */
2943 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2944 opts->x_target_flags |= MASK_INTERWORK;
2946 /* need to remember initial values so combinaisons of options like
2947 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2948 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2950 if (! opts_set->x_arm_restrict_it)
2951 opts->x_arm_restrict_it = arm_arch8;
2953 /* ARM execution state and M profile don't have [restrict] IT. */
2954 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2955 opts->x_arm_restrict_it = 0;
2957 /* Enable -munaligned-access by default for
2958 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2959 i.e. Thumb2 and ARM state only.
2960 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2961 - ARMv8 architecture-base processors.
2963 Disable -munaligned-access by default for
2964 - all pre-ARMv6 architecture-based processors
2965 - ARMv6-M architecture-based processors
2966 - ARMv8-M Baseline processors. */
2968 if (! opts_set->x_unaligned_access)
2970 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2971 && arm_arch6 && (arm_arch_notm || arm_arch7));
2973 else if (opts->x_unaligned_access == 1
2974 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2976 warning (0, "target CPU does not support unaligned accesses");
2977 opts->x_unaligned_access = 0;
2980 /* Don't warn since it's on by default in -O2. */
2981 if (TARGET_THUMB1_P (opts->x_target_flags))
2982 opts->x_flag_schedule_insns = 0;
2983 else
2984 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
2986 /* Disable shrink-wrap when optimizing function for size, since it tends to
2987 generate additional returns. */
2988 if (optimize_function_for_size_p (cfun)
2989 && TARGET_THUMB2_P (opts->x_target_flags))
2990 opts->x_flag_shrink_wrap = false;
2991 else
2992 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
2994 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2995 - epilogue_insns - does not accurately model the corresponding insns
2996 emitted in the asm file. In particular, see the comment in thumb_exit
2997 'Find out how many of the (return) argument registers we can corrupt'.
2998 As a consequence, the epilogue may clobber registers without fipa-ra
2999 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3000 TODO: Accurately model clobbers for epilogue_insns and reenable
3001 fipa-ra. */
3002 if (TARGET_THUMB1_P (opts->x_target_flags))
3003 opts->x_flag_ipa_ra = 0;
3004 else
3005 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3007 /* Thumb2 inline assembly code should always use unified syntax.
3008 This will apply to ARM and Thumb1 eventually. */
3009 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3011 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3012 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3013 #endif
3016 /* Fix up any incompatible options that the user has specified. */
3017 static void
3018 arm_option_override (void)
3020 arm_selected_arch = NULL;
3021 arm_selected_cpu = NULL;
3022 arm_selected_tune = NULL;
3024 if (global_options_set.x_arm_arch_option)
3025 arm_selected_arch = &all_architectures[arm_arch_option];
3027 if (global_options_set.x_arm_cpu_option)
3029 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
3030 arm_selected_tune = &all_cores[(int) arm_cpu_option];
3033 if (global_options_set.x_arm_tune_option)
3034 arm_selected_tune = &all_cores[(int) arm_tune_option];
3036 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3037 SUBTARGET_OVERRIDE_OPTIONS;
3038 #endif
3040 if (arm_selected_arch)
3042 if (arm_selected_cpu)
3044 const arm_feature_set tuning_flags = ARM_FSET_MAKE_CPU1 (FL_TUNE);
3045 arm_feature_set selected_flags;
3046 ARM_FSET_XOR (selected_flags, arm_selected_cpu->flags,
3047 arm_selected_arch->flags);
3048 ARM_FSET_EXCLUDE (selected_flags, selected_flags, tuning_flags);
3049 /* Check for conflict between mcpu and march. */
3050 if (!ARM_FSET_IS_EMPTY (selected_flags))
3052 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3053 arm_selected_cpu->name, arm_selected_arch->name);
3054 /* -march wins for code generation.
3055 -mcpu wins for default tuning. */
3056 if (!arm_selected_tune)
3057 arm_selected_tune = arm_selected_cpu;
3059 arm_selected_cpu = arm_selected_arch;
3061 else
3062 /* -mcpu wins. */
3063 arm_selected_arch = NULL;
3065 else
3066 /* Pick a CPU based on the architecture. */
3067 arm_selected_cpu = arm_selected_arch;
3070 /* If the user did not specify a processor, choose one for them. */
3071 if (!arm_selected_cpu)
3073 const struct processors * sel;
3074 arm_feature_set sought = ARM_FSET_EMPTY;;
3076 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
3077 gcc_assert (arm_selected_cpu->name);
3079 sel = arm_selected_cpu;
3080 insn_flags = sel->flags;
3082 /* Now check to see if the user has specified some command line
3083 switch that require certain abilities from the cpu. */
3085 if (TARGET_INTERWORK || TARGET_THUMB)
3087 ARM_FSET_ADD_CPU1 (sought, FL_THUMB);
3088 ARM_FSET_ADD_CPU1 (sought, FL_MODE32);
3090 /* There are no ARM processors that support both APCS-26 and
3091 interworking. Therefore we force FL_MODE26 to be removed
3092 from insn_flags here (if it was set), so that the search
3093 below will always be able to find a compatible processor. */
3094 ARM_FSET_DEL_CPU1 (insn_flags, FL_MODE26);
3097 if (!ARM_FSET_IS_EMPTY (sought)
3098 && !(ARM_FSET_CPU_SUBSET (sought, insn_flags)))
3100 /* Try to locate a CPU type that supports all of the abilities
3101 of the default CPU, plus the extra abilities requested by
3102 the user. */
3103 for (sel = all_cores; sel->name != NULL; sel++)
3104 if (ARM_FSET_CPU_SUBSET (sought, sel->flags))
3105 break;
3107 if (sel->name == NULL)
3109 unsigned current_bit_count = 0;
3110 const struct processors * best_fit = NULL;
3112 /* Ideally we would like to issue an error message here
3113 saying that it was not possible to find a CPU compatible
3114 with the default CPU, but which also supports the command
3115 line options specified by the programmer, and so they
3116 ought to use the -mcpu=<name> command line option to
3117 override the default CPU type.
3119 If we cannot find a cpu that has both the
3120 characteristics of the default cpu and the given
3121 command line options we scan the array again looking
3122 for a best match. */
3123 for (sel = all_cores; sel->name != NULL; sel++)
3125 arm_feature_set required = ARM_FSET_EMPTY;
3126 ARM_FSET_UNION (required, sought, insn_flags);
3127 if (ARM_FSET_CPU_SUBSET (required, sel->flags))
3129 unsigned count;
3130 arm_feature_set flags;
3131 ARM_FSET_INTER (flags, sel->flags, insn_flags);
3132 count = feature_count (&flags);
3134 if (count >= current_bit_count)
3136 best_fit = sel;
3137 current_bit_count = count;
3141 gcc_assert (best_fit);
3142 sel = best_fit;
3145 arm_selected_cpu = sel;
3149 gcc_assert (arm_selected_cpu);
3150 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3151 if (!arm_selected_tune)
3152 arm_selected_tune = &all_cores[arm_selected_cpu->core];
3154 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
3155 insn_flags = arm_selected_cpu->flags;
3156 arm_base_arch = arm_selected_cpu->base_arch;
3158 arm_tune = arm_selected_tune->core;
3159 tune_flags = arm_selected_tune->flags;
3160 current_tune = arm_selected_tune->tune;
3162 /* TBD: Dwarf info for apcs frame is not handled yet. */
3163 if (TARGET_APCS_FRAME)
3164 flag_shrink_wrap = false;
3166 /* BPABI targets use linker tricks to allow interworking on cores
3167 without thumb support. */
3168 if (TARGET_INTERWORK
3169 && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB) || TARGET_BPABI))
3171 warning (0, "target CPU does not support interworking" );
3172 target_flags &= ~MASK_INTERWORK;
3175 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3177 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3178 target_flags |= MASK_APCS_FRAME;
3181 if (TARGET_POKE_FUNCTION_NAME)
3182 target_flags |= MASK_APCS_FRAME;
3184 if (TARGET_APCS_REENT && flag_pic)
3185 error ("-fpic and -mapcs-reent are incompatible");
3187 if (TARGET_APCS_REENT)
3188 warning (0, "APCS reentrant code not supported. Ignored");
3190 if (TARGET_APCS_FLOAT)
3191 warning (0, "passing floating point arguments in fp regs not yet supported");
3193 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3194 arm_arch3m = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH3M);
3195 arm_arch4 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH4);
3196 arm_arch4t = arm_arch4 && (ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB));
3197 arm_arch5 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5);
3198 arm_arch5e = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5E);
3199 arm_arch6 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6);
3200 arm_arch6k = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6K);
3201 arm_arch6kz = arm_arch6k && ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6KZ);
3202 arm_arch_notm = ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM);
3203 arm_arch6m = arm_arch6 && !arm_arch_notm;
3204 arm_arch7 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7);
3205 arm_arch7em = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7EM);
3206 arm_arch8 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH8);
3207 arm_arch8_1 = ARM_FSET_HAS_CPU2 (insn_flags, FL2_ARCH8_1);
3208 arm_arch8_2 = ARM_FSET_HAS_CPU2 (insn_flags, FL2_ARCH8_2);
3209 arm_arch_thumb1 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB);
3210 arm_arch_thumb2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB2);
3211 arm_arch_xscale = ARM_FSET_HAS_CPU1 (insn_flags, FL_XSCALE);
3213 arm_ld_sched = ARM_FSET_HAS_CPU1 (tune_flags, FL_LDSCHED);
3214 arm_tune_strongarm = ARM_FSET_HAS_CPU1 (tune_flags, FL_STRONG);
3215 arm_tune_wbuf = ARM_FSET_HAS_CPU1 (tune_flags, FL_WBUF);
3216 arm_tune_xscale = ARM_FSET_HAS_CPU1 (tune_flags, FL_XSCALE);
3217 arm_arch_iwmmxt = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT);
3218 arm_arch_iwmmxt2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT2);
3219 arm_arch_thumb_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB_DIV);
3220 arm_arch_arm_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARM_DIV);
3221 arm_arch_no_volatile_ce = ARM_FSET_HAS_CPU1 (insn_flags, FL_NO_VOLATILE_CE);
3222 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3223 arm_arch_crc = ARM_FSET_HAS_CPU1 (insn_flags, FL_CRC32);
3224 arm_m_profile_small_mul = ARM_FSET_HAS_CPU1 (insn_flags, FL_SMALLMUL);
3225 arm_fp16_inst = ARM_FSET_HAS_CPU2 (insn_flags, FL2_FP16INST);
3226 if (arm_fp16_inst)
3228 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3229 error ("selected fp16 options are incompatible.");
3230 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3233 /* V5 code we generate is completely interworking capable, so we turn off
3234 TARGET_INTERWORK here to avoid many tests later on. */
3236 /* XXX However, we must pass the right pre-processor defines to CPP
3237 or GLD can get confused. This is a hack. */
3238 if (TARGET_INTERWORK)
3239 arm_cpp_interwork = 1;
3241 if (arm_arch5)
3242 target_flags &= ~MASK_INTERWORK;
3244 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3245 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3247 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3248 error ("iwmmxt abi requires an iwmmxt capable cpu");
3250 if (!global_options_set.x_arm_fpu_index)
3252 const char *target_fpu_name;
3253 bool ok;
3255 #ifdef FPUTYPE_DEFAULT
3256 target_fpu_name = FPUTYPE_DEFAULT;
3257 #else
3258 target_fpu_name = "vfp";
3259 #endif
3261 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
3262 CL_TARGET);
3263 gcc_assert (ok);
3266 /* If soft-float is specified then don't use FPU. */
3267 if (TARGET_SOFT_FLOAT)
3268 arm_fpu_attr = FPU_NONE;
3269 else
3270 arm_fpu_attr = FPU_VFP;
3272 if (TARGET_AAPCS_BASED)
3274 if (TARGET_CALLER_INTERWORKING)
3275 error ("AAPCS does not support -mcaller-super-interworking");
3276 else
3277 if (TARGET_CALLEE_INTERWORKING)
3278 error ("AAPCS does not support -mcallee-super-interworking");
3281 /* __fp16 support currently assumes the core has ldrh. */
3282 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3283 sorry ("__fp16 and no ldrh");
3285 if (TARGET_AAPCS_BASED)
3287 if (arm_abi == ARM_ABI_IWMMXT)
3288 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3289 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3290 && TARGET_HARD_FLOAT)
3291 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3292 else
3293 arm_pcs_default = ARM_PCS_AAPCS;
3295 else
3297 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3298 sorry ("-mfloat-abi=hard and VFP");
3300 if (arm_abi == ARM_ABI_APCS)
3301 arm_pcs_default = ARM_PCS_APCS;
3302 else
3303 arm_pcs_default = ARM_PCS_ATPCS;
3306 /* For arm2/3 there is no need to do any scheduling if we are doing
3307 software floating-point. */
3308 if (TARGET_SOFT_FLOAT && !ARM_FSET_HAS_CPU1 (tune_flags, FL_MODE32))
3309 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3311 /* Use the cp15 method if it is available. */
3312 if (target_thread_pointer == TP_AUTO)
3314 if (arm_arch6k && !TARGET_THUMB1)
3315 target_thread_pointer = TP_CP15;
3316 else
3317 target_thread_pointer = TP_SOFT;
3320 /* Override the default structure alignment for AAPCS ABI. */
3321 if (!global_options_set.x_arm_structure_size_boundary)
3323 if (TARGET_AAPCS_BASED)
3324 arm_structure_size_boundary = 8;
3326 else
3328 if (arm_structure_size_boundary != 8
3329 && arm_structure_size_boundary != 32
3330 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3332 if (ARM_DOUBLEWORD_ALIGN)
3333 warning (0,
3334 "structure size boundary can only be set to 8, 32 or 64");
3335 else
3336 warning (0, "structure size boundary can only be set to 8 or 32");
3337 arm_structure_size_boundary
3338 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3342 if (TARGET_VXWORKS_RTP)
3344 if (!global_options_set.x_arm_pic_data_is_text_relative)
3345 arm_pic_data_is_text_relative = 0;
3347 else if (flag_pic
3348 && !arm_pic_data_is_text_relative
3349 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3350 /* When text & data segments don't have a fixed displacement, the
3351 intended use is with a single, read only, pic base register.
3352 Unless the user explicitly requested not to do that, set
3353 it. */
3354 target_flags |= MASK_SINGLE_PIC_BASE;
3356 /* If stack checking is disabled, we can use r10 as the PIC register,
3357 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3358 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3360 if (TARGET_VXWORKS_RTP)
3361 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3362 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3365 if (flag_pic && TARGET_VXWORKS_RTP)
3366 arm_pic_register = 9;
3368 if (arm_pic_register_string != NULL)
3370 int pic_register = decode_reg_name (arm_pic_register_string);
3372 if (!flag_pic)
3373 warning (0, "-mpic-register= is useless without -fpic");
3375 /* Prevent the user from choosing an obviously stupid PIC register. */
3376 else if (pic_register < 0 || call_used_regs[pic_register]
3377 || pic_register == HARD_FRAME_POINTER_REGNUM
3378 || pic_register == STACK_POINTER_REGNUM
3379 || pic_register >= PC_REGNUM
3380 || (TARGET_VXWORKS_RTP
3381 && (unsigned int) pic_register != arm_pic_register))
3382 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3383 else
3384 arm_pic_register = pic_register;
3387 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3388 if (fix_cm3_ldrd == 2)
3390 if (arm_selected_cpu->core == TARGET_CPU_cortexm3)
3391 fix_cm3_ldrd = 1;
3392 else
3393 fix_cm3_ldrd = 0;
3396 /* Hot/Cold partitioning is not currently supported, since we can't
3397 handle literal pool placement in that case. */
3398 if (flag_reorder_blocks_and_partition)
3400 inform (input_location,
3401 "-freorder-blocks-and-partition not supported on this architecture");
3402 flag_reorder_blocks_and_partition = 0;
3403 flag_reorder_blocks = 1;
3406 if (flag_pic)
3407 /* Hoisting PIC address calculations more aggressively provides a small,
3408 but measurable, size reduction for PIC code. Therefore, we decrease
3409 the bar for unrestricted expression hoisting to the cost of PIC address
3410 calculation, which is 2 instructions. */
3411 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3412 global_options.x_param_values,
3413 global_options_set.x_param_values);
3415 /* ARM EABI defaults to strict volatile bitfields. */
3416 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3417 && abi_version_at_least(2))
3418 flag_strict_volatile_bitfields = 1;
3420 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3421 have deemed it beneficial (signified by setting
3422 prefetch.num_slots to 1 or more). */
3423 if (flag_prefetch_loop_arrays < 0
3424 && HAVE_prefetch
3425 && optimize >= 3
3426 && current_tune->prefetch.num_slots > 0)
3427 flag_prefetch_loop_arrays = 1;
3429 /* Set up parameters to be used in prefetching algorithm. Do not
3430 override the defaults unless we are tuning for a core we have
3431 researched values for. */
3432 if (current_tune->prefetch.num_slots > 0)
3433 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3434 current_tune->prefetch.num_slots,
3435 global_options.x_param_values,
3436 global_options_set.x_param_values);
3437 if (current_tune->prefetch.l1_cache_line_size >= 0)
3438 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3439 current_tune->prefetch.l1_cache_line_size,
3440 global_options.x_param_values,
3441 global_options_set.x_param_values);
3442 if (current_tune->prefetch.l1_cache_size >= 0)
3443 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3444 current_tune->prefetch.l1_cache_size,
3445 global_options.x_param_values,
3446 global_options_set.x_param_values);
3448 /* Use Neon to perform 64-bits operations rather than core
3449 registers. */
3450 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3451 if (use_neon_for_64bits == 1)
3452 prefer_neon_for_64bits = true;
3454 /* Use the alternative scheduling-pressure algorithm by default. */
3455 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3456 global_options.x_param_values,
3457 global_options_set.x_param_values);
3459 /* Look through ready list and all of queue for instructions
3460 relevant for L2 auto-prefetcher. */
3461 int param_sched_autopref_queue_depth;
3463 switch (current_tune->sched_autopref)
3465 case tune_params::SCHED_AUTOPREF_OFF:
3466 param_sched_autopref_queue_depth = -1;
3467 break;
3469 case tune_params::SCHED_AUTOPREF_RANK:
3470 param_sched_autopref_queue_depth = 0;
3471 break;
3473 case tune_params::SCHED_AUTOPREF_FULL:
3474 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3475 break;
3477 default:
3478 gcc_unreachable ();
3481 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3482 param_sched_autopref_queue_depth,
3483 global_options.x_param_values,
3484 global_options_set.x_param_values);
3486 /* Currently, for slow flash data, we just disable literal pools. We also
3487 disable it for pure-code. */
3488 if (target_slow_flash_data || target_pure_code)
3489 arm_disable_literal_pool = true;
3491 /* Disable scheduling fusion by default if it's not armv7 processor
3492 or doesn't prefer ldrd/strd. */
3493 if (flag_schedule_fusion == 2
3494 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3495 flag_schedule_fusion = 0;
3497 /* Need to remember initial options before they are overriden. */
3498 init_optimize = build_optimization_node (&global_options);
3500 arm_option_override_internal (&global_options, &global_options_set);
3501 arm_option_check_internal (&global_options);
3502 arm_option_params_internal ();
3504 /* Register global variables with the garbage collector. */
3505 arm_add_gc_roots ();
3507 /* Save the initial options in case the user does function specific
3508 options or #pragma target. */
3509 target_option_default_node = target_option_current_node
3510 = build_target_option_node (&global_options);
3512 /* Init initial mode for testing. */
3513 thumb_flipper = TARGET_THUMB;
3516 static void
3517 arm_add_gc_roots (void)
3519 gcc_obstack_init(&minipool_obstack);
3520 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3523 /* A table of known ARM exception types.
3524 For use with the interrupt function attribute. */
3526 typedef struct
3528 const char *const arg;
3529 const unsigned long return_value;
3531 isr_attribute_arg;
3533 static const isr_attribute_arg isr_attribute_args [] =
3535 { "IRQ", ARM_FT_ISR },
3536 { "irq", ARM_FT_ISR },
3537 { "FIQ", ARM_FT_FIQ },
3538 { "fiq", ARM_FT_FIQ },
3539 { "ABORT", ARM_FT_ISR },
3540 { "abort", ARM_FT_ISR },
3541 { "ABORT", ARM_FT_ISR },
3542 { "abort", ARM_FT_ISR },
3543 { "UNDEF", ARM_FT_EXCEPTION },
3544 { "undef", ARM_FT_EXCEPTION },
3545 { "SWI", ARM_FT_EXCEPTION },
3546 { "swi", ARM_FT_EXCEPTION },
3547 { NULL, ARM_FT_NORMAL }
3550 /* Returns the (interrupt) function type of the current
3551 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3553 static unsigned long
3554 arm_isr_value (tree argument)
3556 const isr_attribute_arg * ptr;
3557 const char * arg;
3559 if (!arm_arch_notm)
3560 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3562 /* No argument - default to IRQ. */
3563 if (argument == NULL_TREE)
3564 return ARM_FT_ISR;
3566 /* Get the value of the argument. */
3567 if (TREE_VALUE (argument) == NULL_TREE
3568 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3569 return ARM_FT_UNKNOWN;
3571 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3573 /* Check it against the list of known arguments. */
3574 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3575 if (streq (arg, ptr->arg))
3576 return ptr->return_value;
3578 /* An unrecognized interrupt type. */
3579 return ARM_FT_UNKNOWN;
3582 /* Computes the type of the current function. */
3584 static unsigned long
3585 arm_compute_func_type (void)
3587 unsigned long type = ARM_FT_UNKNOWN;
3588 tree a;
3589 tree attr;
3591 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3593 /* Decide if the current function is volatile. Such functions
3594 never return, and many memory cycles can be saved by not storing
3595 register values that will never be needed again. This optimization
3596 was added to speed up context switching in a kernel application. */
3597 if (optimize > 0
3598 && (TREE_NOTHROW (current_function_decl)
3599 || !(flag_unwind_tables
3600 || (flag_exceptions
3601 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3602 && TREE_THIS_VOLATILE (current_function_decl))
3603 type |= ARM_FT_VOLATILE;
3605 if (cfun->static_chain_decl != NULL)
3606 type |= ARM_FT_NESTED;
3608 attr = DECL_ATTRIBUTES (current_function_decl);
3610 a = lookup_attribute ("naked", attr);
3611 if (a != NULL_TREE)
3612 type |= ARM_FT_NAKED;
3614 a = lookup_attribute ("isr", attr);
3615 if (a == NULL_TREE)
3616 a = lookup_attribute ("interrupt", attr);
3618 if (a == NULL_TREE)
3619 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3620 else
3621 type |= arm_isr_value (TREE_VALUE (a));
3623 return type;
3626 /* Returns the type of the current function. */
3628 unsigned long
3629 arm_current_func_type (void)
3631 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3632 cfun->machine->func_type = arm_compute_func_type ();
3634 return cfun->machine->func_type;
3637 bool
3638 arm_allocate_stack_slots_for_args (void)
3640 /* Naked functions should not allocate stack slots for arguments. */
3641 return !IS_NAKED (arm_current_func_type ());
3644 static bool
3645 arm_warn_func_return (tree decl)
3647 /* Naked functions are implemented entirely in assembly, including the
3648 return sequence, so suppress warnings about this. */
3649 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3653 /* Output assembler code for a block containing the constant parts
3654 of a trampoline, leaving space for the variable parts.
3656 On the ARM, (if r8 is the static chain regnum, and remembering that
3657 referencing pc adds an offset of 8) the trampoline looks like:
3658 ldr r8, [pc, #0]
3659 ldr pc, [pc]
3660 .word static chain value
3661 .word function's address
3662 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3664 static void
3665 arm_asm_trampoline_template (FILE *f)
3667 fprintf (f, "\t.syntax unified\n");
3669 if (TARGET_ARM)
3671 fprintf (f, "\t.arm\n");
3672 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3673 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3675 else if (TARGET_THUMB2)
3677 fprintf (f, "\t.thumb\n");
3678 /* The Thumb-2 trampoline is similar to the arm implementation.
3679 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3680 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3681 STATIC_CHAIN_REGNUM, PC_REGNUM);
3682 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3684 else
3686 ASM_OUTPUT_ALIGN (f, 2);
3687 fprintf (f, "\t.code\t16\n");
3688 fprintf (f, ".Ltrampoline_start:\n");
3689 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3690 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3691 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3692 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3693 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3694 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3696 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3697 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3700 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3702 static void
3703 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3705 rtx fnaddr, mem, a_tramp;
3707 emit_block_move (m_tramp, assemble_trampoline_template (),
3708 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3710 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3711 emit_move_insn (mem, chain_value);
3713 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3714 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3715 emit_move_insn (mem, fnaddr);
3717 a_tramp = XEXP (m_tramp, 0);
3718 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3719 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3720 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3723 /* Thumb trampolines should be entered in thumb mode, so set
3724 the bottom bit of the address. */
3726 static rtx
3727 arm_trampoline_adjust_address (rtx addr)
3729 if (TARGET_THUMB)
3730 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3731 NULL, 0, OPTAB_LIB_WIDEN);
3732 return addr;
3735 /* Return 1 if it is possible to return using a single instruction.
3736 If SIBLING is non-null, this is a test for a return before a sibling
3737 call. SIBLING is the call insn, so we can examine its register usage. */
3740 use_return_insn (int iscond, rtx sibling)
3742 int regno;
3743 unsigned int func_type;
3744 unsigned long saved_int_regs;
3745 unsigned HOST_WIDE_INT stack_adjust;
3746 arm_stack_offsets *offsets;
3748 /* Never use a return instruction before reload has run. */
3749 if (!reload_completed)
3750 return 0;
3752 func_type = arm_current_func_type ();
3754 /* Naked, volatile and stack alignment functions need special
3755 consideration. */
3756 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3757 return 0;
3759 /* So do interrupt functions that use the frame pointer and Thumb
3760 interrupt functions. */
3761 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3762 return 0;
3764 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3765 && !optimize_function_for_size_p (cfun))
3766 return 0;
3768 offsets = arm_get_frame_offsets ();
3769 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3771 /* As do variadic functions. */
3772 if (crtl->args.pretend_args_size
3773 || cfun->machine->uses_anonymous_args
3774 /* Or if the function calls __builtin_eh_return () */
3775 || crtl->calls_eh_return
3776 /* Or if the function calls alloca */
3777 || cfun->calls_alloca
3778 /* Or if there is a stack adjustment. However, if the stack pointer
3779 is saved on the stack, we can use a pre-incrementing stack load. */
3780 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3781 && stack_adjust == 4))
3782 /* Or if the static chain register was saved above the frame, under the
3783 assumption that the stack pointer isn't saved on the stack. */
3784 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3785 && arm_compute_static_chain_stack_bytes() != 0))
3786 return 0;
3788 saved_int_regs = offsets->saved_regs_mask;
3790 /* Unfortunately, the insn
3792 ldmib sp, {..., sp, ...}
3794 triggers a bug on most SA-110 based devices, such that the stack
3795 pointer won't be correctly restored if the instruction takes a
3796 page fault. We work around this problem by popping r3 along with
3797 the other registers, since that is never slower than executing
3798 another instruction.
3800 We test for !arm_arch5 here, because code for any architecture
3801 less than this could potentially be run on one of the buggy
3802 chips. */
3803 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3805 /* Validate that r3 is a call-clobbered register (always true in
3806 the default abi) ... */
3807 if (!call_used_regs[3])
3808 return 0;
3810 /* ... that it isn't being used for a return value ... */
3811 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3812 return 0;
3814 /* ... or for a tail-call argument ... */
3815 if (sibling)
3817 gcc_assert (CALL_P (sibling));
3819 if (find_regno_fusage (sibling, USE, 3))
3820 return 0;
3823 /* ... and that there are no call-saved registers in r0-r2
3824 (always true in the default ABI). */
3825 if (saved_int_regs & 0x7)
3826 return 0;
3829 /* Can't be done if interworking with Thumb, and any registers have been
3830 stacked. */
3831 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3832 return 0;
3834 /* On StrongARM, conditional returns are expensive if they aren't
3835 taken and multiple registers have been stacked. */
3836 if (iscond && arm_tune_strongarm)
3838 /* Conditional return when just the LR is stored is a simple
3839 conditional-load instruction, that's not expensive. */
3840 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3841 return 0;
3843 if (flag_pic
3844 && arm_pic_register != INVALID_REGNUM
3845 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3846 return 0;
3849 /* If there are saved registers but the LR isn't saved, then we need
3850 two instructions for the return. */
3851 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3852 return 0;
3854 /* Can't be done if any of the VFP regs are pushed,
3855 since this also requires an insn. */
3856 if (TARGET_HARD_FLOAT)
3857 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3858 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3859 return 0;
3861 if (TARGET_REALLY_IWMMXT)
3862 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3863 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3864 return 0;
3866 return 1;
3869 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3870 shrink-wrapping if possible. This is the case if we need to emit a
3871 prologue, which we can test by looking at the offsets. */
3872 bool
3873 use_simple_return_p (void)
3875 arm_stack_offsets *offsets;
3877 offsets = arm_get_frame_offsets ();
3878 return offsets->outgoing_args != 0;
3881 /* Return TRUE if int I is a valid immediate ARM constant. */
3884 const_ok_for_arm (HOST_WIDE_INT i)
3886 int lowbit;
3888 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3889 be all zero, or all one. */
3890 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3891 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3892 != ((~(unsigned HOST_WIDE_INT) 0)
3893 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3894 return FALSE;
3896 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3898 /* Fast return for 0 and small values. We must do this for zero, since
3899 the code below can't handle that one case. */
3900 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3901 return TRUE;
3903 /* Get the number of trailing zeros. */
3904 lowbit = ffs((int) i) - 1;
3906 /* Only even shifts are allowed in ARM mode so round down to the
3907 nearest even number. */
3908 if (TARGET_ARM)
3909 lowbit &= ~1;
3911 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3912 return TRUE;
3914 if (TARGET_ARM)
3916 /* Allow rotated constants in ARM mode. */
3917 if (lowbit <= 4
3918 && ((i & ~0xc000003f) == 0
3919 || (i & ~0xf000000f) == 0
3920 || (i & ~0xfc000003) == 0))
3921 return TRUE;
3923 else
3925 HOST_WIDE_INT v;
3927 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3928 v = i & 0xff;
3929 v |= v << 16;
3930 if (i == v || i == (v | (v << 8)))
3931 return TRUE;
3933 /* Allow repeated pattern 0xXY00XY00. */
3934 v = i & 0xff00;
3935 v |= v << 16;
3936 if (i == v)
3937 return TRUE;
3940 return FALSE;
3943 /* Return true if I is a valid constant for the operation CODE. */
3945 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3947 if (const_ok_for_arm (i))
3948 return 1;
3950 switch (code)
3952 case SET:
3953 /* See if we can use movw. */
3954 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
3955 return 1;
3956 else
3957 /* Otherwise, try mvn. */
3958 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3960 case PLUS:
3961 /* See if we can use addw or subw. */
3962 if (TARGET_THUMB2
3963 && ((i & 0xfffff000) == 0
3964 || ((-i) & 0xfffff000) == 0))
3965 return 1;
3966 /* Fall through. */
3967 case COMPARE:
3968 case EQ:
3969 case NE:
3970 case GT:
3971 case LE:
3972 case LT:
3973 case GE:
3974 case GEU:
3975 case LTU:
3976 case GTU:
3977 case LEU:
3978 case UNORDERED:
3979 case ORDERED:
3980 case UNEQ:
3981 case UNGE:
3982 case UNLT:
3983 case UNGT:
3984 case UNLE:
3985 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3987 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3988 case XOR:
3989 return 0;
3991 case IOR:
3992 if (TARGET_THUMB2)
3993 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3994 return 0;
3996 case AND:
3997 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3999 default:
4000 gcc_unreachable ();
4004 /* Return true if I is a valid di mode constant for the operation CODE. */
4006 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4008 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4009 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4010 rtx hi = GEN_INT (hi_val);
4011 rtx lo = GEN_INT (lo_val);
4013 if (TARGET_THUMB1)
4014 return 0;
4016 switch (code)
4018 case AND:
4019 case IOR:
4020 case XOR:
4021 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4022 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4023 case PLUS:
4024 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4026 default:
4027 return 0;
4031 /* Emit a sequence of insns to handle a large constant.
4032 CODE is the code of the operation required, it can be any of SET, PLUS,
4033 IOR, AND, XOR, MINUS;
4034 MODE is the mode in which the operation is being performed;
4035 VAL is the integer to operate on;
4036 SOURCE is the other operand (a register, or a null-pointer for SET);
4037 SUBTARGETS means it is safe to create scratch registers if that will
4038 either produce a simpler sequence, or we will want to cse the values.
4039 Return value is the number of insns emitted. */
4041 /* ??? Tweak this for thumb2. */
4043 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4044 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4046 rtx cond;
4048 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4049 cond = COND_EXEC_TEST (PATTERN (insn));
4050 else
4051 cond = NULL_RTX;
4053 if (subtargets || code == SET
4054 || (REG_P (target) && REG_P (source)
4055 && REGNO (target) != REGNO (source)))
4057 /* After arm_reorg has been called, we can't fix up expensive
4058 constants by pushing them into memory so we must synthesize
4059 them in-line, regardless of the cost. This is only likely to
4060 be more costly on chips that have load delay slots and we are
4061 compiling without running the scheduler (so no splitting
4062 occurred before the final instruction emission).
4064 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4066 if (!cfun->machine->after_arm_reorg
4067 && !cond
4068 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4069 1, 0)
4070 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4071 + (code != SET))))
4073 if (code == SET)
4075 /* Currently SET is the only monadic value for CODE, all
4076 the rest are diadic. */
4077 if (TARGET_USE_MOVT)
4078 arm_emit_movpair (target, GEN_INT (val));
4079 else
4080 emit_set_insn (target, GEN_INT (val));
4082 return 1;
4084 else
4086 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4088 if (TARGET_USE_MOVT)
4089 arm_emit_movpair (temp, GEN_INT (val));
4090 else
4091 emit_set_insn (temp, GEN_INT (val));
4093 /* For MINUS, the value is subtracted from, since we never
4094 have subtraction of a constant. */
4095 if (code == MINUS)
4096 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4097 else
4098 emit_set_insn (target,
4099 gen_rtx_fmt_ee (code, mode, source, temp));
4100 return 2;
4105 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4109 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4110 ARM/THUMB2 immediates, and add up to VAL.
4111 Thr function return value gives the number of insns required. */
4112 static int
4113 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4114 struct four_ints *return_sequence)
4116 int best_consecutive_zeros = 0;
4117 int i;
4118 int best_start = 0;
4119 int insns1, insns2;
4120 struct four_ints tmp_sequence;
4122 /* If we aren't targeting ARM, the best place to start is always at
4123 the bottom, otherwise look more closely. */
4124 if (TARGET_ARM)
4126 for (i = 0; i < 32; i += 2)
4128 int consecutive_zeros = 0;
4130 if (!(val & (3 << i)))
4132 while ((i < 32) && !(val & (3 << i)))
4134 consecutive_zeros += 2;
4135 i += 2;
4137 if (consecutive_zeros > best_consecutive_zeros)
4139 best_consecutive_zeros = consecutive_zeros;
4140 best_start = i - consecutive_zeros;
4142 i -= 2;
4147 /* So long as it won't require any more insns to do so, it's
4148 desirable to emit a small constant (in bits 0...9) in the last
4149 insn. This way there is more chance that it can be combined with
4150 a later addressing insn to form a pre-indexed load or store
4151 operation. Consider:
4153 *((volatile int *)0xe0000100) = 1;
4154 *((volatile int *)0xe0000110) = 2;
4156 We want this to wind up as:
4158 mov rA, #0xe0000000
4159 mov rB, #1
4160 str rB, [rA, #0x100]
4161 mov rB, #2
4162 str rB, [rA, #0x110]
4164 rather than having to synthesize both large constants from scratch.
4166 Therefore, we calculate how many insns would be required to emit
4167 the constant starting from `best_start', and also starting from
4168 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4169 yield a shorter sequence, we may as well use zero. */
4170 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4171 if (best_start != 0
4172 && ((HOST_WIDE_INT_1U << best_start) < val))
4174 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4175 if (insns2 <= insns1)
4177 *return_sequence = tmp_sequence;
4178 insns1 = insns2;
4182 return insns1;
4185 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4186 static int
4187 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4188 struct four_ints *return_sequence, int i)
4190 int remainder = val & 0xffffffff;
4191 int insns = 0;
4193 /* Try and find a way of doing the job in either two or three
4194 instructions.
4196 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4197 location. We start at position I. This may be the MSB, or
4198 optimial_immediate_sequence may have positioned it at the largest block
4199 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4200 wrapping around to the top of the word when we drop off the bottom.
4201 In the worst case this code should produce no more than four insns.
4203 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4204 constants, shifted to any arbitrary location. We should always start
4205 at the MSB. */
4208 int end;
4209 unsigned int b1, b2, b3, b4;
4210 unsigned HOST_WIDE_INT result;
4211 int loc;
4213 gcc_assert (insns < 4);
4215 if (i <= 0)
4216 i += 32;
4218 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4219 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4221 loc = i;
4222 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4223 /* We can use addw/subw for the last 12 bits. */
4224 result = remainder;
4225 else
4227 /* Use an 8-bit shifted/rotated immediate. */
4228 end = i - 8;
4229 if (end < 0)
4230 end += 32;
4231 result = remainder & ((0x0ff << end)
4232 | ((i < end) ? (0xff >> (32 - end))
4233 : 0));
4234 i -= 8;
4237 else
4239 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4240 arbitrary shifts. */
4241 i -= TARGET_ARM ? 2 : 1;
4242 continue;
4245 /* Next, see if we can do a better job with a thumb2 replicated
4246 constant.
4248 We do it this way around to catch the cases like 0x01F001E0 where
4249 two 8-bit immediates would work, but a replicated constant would
4250 make it worse.
4252 TODO: 16-bit constants that don't clear all the bits, but still win.
4253 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4254 if (TARGET_THUMB2)
4256 b1 = (remainder & 0xff000000) >> 24;
4257 b2 = (remainder & 0x00ff0000) >> 16;
4258 b3 = (remainder & 0x0000ff00) >> 8;
4259 b4 = remainder & 0xff;
4261 if (loc > 24)
4263 /* The 8-bit immediate already found clears b1 (and maybe b2),
4264 but must leave b3 and b4 alone. */
4266 /* First try to find a 32-bit replicated constant that clears
4267 almost everything. We can assume that we can't do it in one,
4268 or else we wouldn't be here. */
4269 unsigned int tmp = b1 & b2 & b3 & b4;
4270 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4271 + (tmp << 24);
4272 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4273 + (tmp == b3) + (tmp == b4);
4274 if (tmp
4275 && (matching_bytes >= 3
4276 || (matching_bytes == 2
4277 && const_ok_for_op (remainder & ~tmp2, code))))
4279 /* At least 3 of the bytes match, and the fourth has at
4280 least as many bits set, or two of the bytes match
4281 and it will only require one more insn to finish. */
4282 result = tmp2;
4283 i = tmp != b1 ? 32
4284 : tmp != b2 ? 24
4285 : tmp != b3 ? 16
4286 : 8;
4289 /* Second, try to find a 16-bit replicated constant that can
4290 leave three of the bytes clear. If b2 or b4 is already
4291 zero, then we can. If the 8-bit from above would not
4292 clear b2 anyway, then we still win. */
4293 else if (b1 == b3 && (!b2 || !b4
4294 || (remainder & 0x00ff0000 & ~result)))
4296 result = remainder & 0xff00ff00;
4297 i = 24;
4300 else if (loc > 16)
4302 /* The 8-bit immediate already found clears b2 (and maybe b3)
4303 and we don't get here unless b1 is alredy clear, but it will
4304 leave b4 unchanged. */
4306 /* If we can clear b2 and b4 at once, then we win, since the
4307 8-bits couldn't possibly reach that far. */
4308 if (b2 == b4)
4310 result = remainder & 0x00ff00ff;
4311 i = 16;
4316 return_sequence->i[insns++] = result;
4317 remainder &= ~result;
4319 if (code == SET || code == MINUS)
4320 code = PLUS;
4322 while (remainder);
4324 return insns;
4327 /* Emit an instruction with the indicated PATTERN. If COND is
4328 non-NULL, conditionalize the execution of the instruction on COND
4329 being true. */
4331 static void
4332 emit_constant_insn (rtx cond, rtx pattern)
4334 if (cond)
4335 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4336 emit_insn (pattern);
4339 /* As above, but extra parameter GENERATE which, if clear, suppresses
4340 RTL generation. */
4342 static int
4343 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4344 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4345 int subtargets, int generate)
4347 int can_invert = 0;
4348 int can_negate = 0;
4349 int final_invert = 0;
4350 int i;
4351 int set_sign_bit_copies = 0;
4352 int clear_sign_bit_copies = 0;
4353 int clear_zero_bit_copies = 0;
4354 int set_zero_bit_copies = 0;
4355 int insns = 0, neg_insns, inv_insns;
4356 unsigned HOST_WIDE_INT temp1, temp2;
4357 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4358 struct four_ints *immediates;
4359 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4361 /* Find out which operations are safe for a given CODE. Also do a quick
4362 check for degenerate cases; these can occur when DImode operations
4363 are split. */
4364 switch (code)
4366 case SET:
4367 can_invert = 1;
4368 break;
4370 case PLUS:
4371 can_negate = 1;
4372 break;
4374 case IOR:
4375 if (remainder == 0xffffffff)
4377 if (generate)
4378 emit_constant_insn (cond,
4379 gen_rtx_SET (target,
4380 GEN_INT (ARM_SIGN_EXTEND (val))));
4381 return 1;
4384 if (remainder == 0)
4386 if (reload_completed && rtx_equal_p (target, source))
4387 return 0;
4389 if (generate)
4390 emit_constant_insn (cond, gen_rtx_SET (target, source));
4391 return 1;
4393 break;
4395 case AND:
4396 if (remainder == 0)
4398 if (generate)
4399 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4400 return 1;
4402 if (remainder == 0xffffffff)
4404 if (reload_completed && rtx_equal_p (target, source))
4405 return 0;
4406 if (generate)
4407 emit_constant_insn (cond, gen_rtx_SET (target, source));
4408 return 1;
4410 can_invert = 1;
4411 break;
4413 case XOR:
4414 if (remainder == 0)
4416 if (reload_completed && rtx_equal_p (target, source))
4417 return 0;
4418 if (generate)
4419 emit_constant_insn (cond, gen_rtx_SET (target, source));
4420 return 1;
4423 if (remainder == 0xffffffff)
4425 if (generate)
4426 emit_constant_insn (cond,
4427 gen_rtx_SET (target,
4428 gen_rtx_NOT (mode, source)));
4429 return 1;
4431 final_invert = 1;
4432 break;
4434 case MINUS:
4435 /* We treat MINUS as (val - source), since (source - val) is always
4436 passed as (source + (-val)). */
4437 if (remainder == 0)
4439 if (generate)
4440 emit_constant_insn (cond,
4441 gen_rtx_SET (target,
4442 gen_rtx_NEG (mode, source)));
4443 return 1;
4445 if (const_ok_for_arm (val))
4447 if (generate)
4448 emit_constant_insn (cond,
4449 gen_rtx_SET (target,
4450 gen_rtx_MINUS (mode, GEN_INT (val),
4451 source)));
4452 return 1;
4455 break;
4457 default:
4458 gcc_unreachable ();
4461 /* If we can do it in one insn get out quickly. */
4462 if (const_ok_for_op (val, code))
4464 if (generate)
4465 emit_constant_insn (cond,
4466 gen_rtx_SET (target,
4467 (source
4468 ? gen_rtx_fmt_ee (code, mode, source,
4469 GEN_INT (val))
4470 : GEN_INT (val))));
4471 return 1;
4474 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4475 insn. */
4476 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4477 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4479 if (generate)
4481 if (mode == SImode && i == 16)
4482 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4483 smaller insn. */
4484 emit_constant_insn (cond,
4485 gen_zero_extendhisi2
4486 (target, gen_lowpart (HImode, source)));
4487 else
4488 /* Extz only supports SImode, but we can coerce the operands
4489 into that mode. */
4490 emit_constant_insn (cond,
4491 gen_extzv_t2 (gen_lowpart (SImode, target),
4492 gen_lowpart (SImode, source),
4493 GEN_INT (i), const0_rtx));
4496 return 1;
4499 /* Calculate a few attributes that may be useful for specific
4500 optimizations. */
4501 /* Count number of leading zeros. */
4502 for (i = 31; i >= 0; i--)
4504 if ((remainder & (1 << i)) == 0)
4505 clear_sign_bit_copies++;
4506 else
4507 break;
4510 /* Count number of leading 1's. */
4511 for (i = 31; i >= 0; i--)
4513 if ((remainder & (1 << i)) != 0)
4514 set_sign_bit_copies++;
4515 else
4516 break;
4519 /* Count number of trailing zero's. */
4520 for (i = 0; i <= 31; i++)
4522 if ((remainder & (1 << i)) == 0)
4523 clear_zero_bit_copies++;
4524 else
4525 break;
4528 /* Count number of trailing 1's. */
4529 for (i = 0; i <= 31; i++)
4531 if ((remainder & (1 << i)) != 0)
4532 set_zero_bit_copies++;
4533 else
4534 break;
4537 switch (code)
4539 case SET:
4540 /* See if we can do this by sign_extending a constant that is known
4541 to be negative. This is a good, way of doing it, since the shift
4542 may well merge into a subsequent insn. */
4543 if (set_sign_bit_copies > 1)
4545 if (const_ok_for_arm
4546 (temp1 = ARM_SIGN_EXTEND (remainder
4547 << (set_sign_bit_copies - 1))))
4549 if (generate)
4551 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4552 emit_constant_insn (cond,
4553 gen_rtx_SET (new_src, GEN_INT (temp1)));
4554 emit_constant_insn (cond,
4555 gen_ashrsi3 (target, new_src,
4556 GEN_INT (set_sign_bit_copies - 1)));
4558 return 2;
4560 /* For an inverted constant, we will need to set the low bits,
4561 these will be shifted out of harm's way. */
4562 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4563 if (const_ok_for_arm (~temp1))
4565 if (generate)
4567 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4568 emit_constant_insn (cond,
4569 gen_rtx_SET (new_src, GEN_INT (temp1)));
4570 emit_constant_insn (cond,
4571 gen_ashrsi3 (target, new_src,
4572 GEN_INT (set_sign_bit_copies - 1)));
4574 return 2;
4578 /* See if we can calculate the value as the difference between two
4579 valid immediates. */
4580 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4582 int topshift = clear_sign_bit_copies & ~1;
4584 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4585 & (0xff000000 >> topshift));
4587 /* If temp1 is zero, then that means the 9 most significant
4588 bits of remainder were 1 and we've caused it to overflow.
4589 When topshift is 0 we don't need to do anything since we
4590 can borrow from 'bit 32'. */
4591 if (temp1 == 0 && topshift != 0)
4592 temp1 = 0x80000000 >> (topshift - 1);
4594 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4596 if (const_ok_for_arm (temp2))
4598 if (generate)
4600 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4601 emit_constant_insn (cond,
4602 gen_rtx_SET (new_src, GEN_INT (temp1)));
4603 emit_constant_insn (cond,
4604 gen_addsi3 (target, new_src,
4605 GEN_INT (-temp2)));
4608 return 2;
4612 /* See if we can generate this by setting the bottom (or the top)
4613 16 bits, and then shifting these into the other half of the
4614 word. We only look for the simplest cases, to do more would cost
4615 too much. Be careful, however, not to generate this when the
4616 alternative would take fewer insns. */
4617 if (val & 0xffff0000)
4619 temp1 = remainder & 0xffff0000;
4620 temp2 = remainder & 0x0000ffff;
4622 /* Overlaps outside this range are best done using other methods. */
4623 for (i = 9; i < 24; i++)
4625 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4626 && !const_ok_for_arm (temp2))
4628 rtx new_src = (subtargets
4629 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4630 : target);
4631 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4632 source, subtargets, generate);
4633 source = new_src;
4634 if (generate)
4635 emit_constant_insn
4636 (cond,
4637 gen_rtx_SET
4638 (target,
4639 gen_rtx_IOR (mode,
4640 gen_rtx_ASHIFT (mode, source,
4641 GEN_INT (i)),
4642 source)));
4643 return insns + 1;
4647 /* Don't duplicate cases already considered. */
4648 for (i = 17; i < 24; i++)
4650 if (((temp1 | (temp1 >> i)) == remainder)
4651 && !const_ok_for_arm (temp1))
4653 rtx new_src = (subtargets
4654 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4655 : target);
4656 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4657 source, subtargets, generate);
4658 source = new_src;
4659 if (generate)
4660 emit_constant_insn
4661 (cond,
4662 gen_rtx_SET (target,
4663 gen_rtx_IOR
4664 (mode,
4665 gen_rtx_LSHIFTRT (mode, source,
4666 GEN_INT (i)),
4667 source)));
4668 return insns + 1;
4672 break;
4674 case IOR:
4675 case XOR:
4676 /* If we have IOR or XOR, and the constant can be loaded in a
4677 single instruction, and we can find a temporary to put it in,
4678 then this can be done in two instructions instead of 3-4. */
4679 if (subtargets
4680 /* TARGET can't be NULL if SUBTARGETS is 0 */
4681 || (reload_completed && !reg_mentioned_p (target, source)))
4683 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4685 if (generate)
4687 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4689 emit_constant_insn (cond,
4690 gen_rtx_SET (sub, GEN_INT (val)));
4691 emit_constant_insn (cond,
4692 gen_rtx_SET (target,
4693 gen_rtx_fmt_ee (code, mode,
4694 source, sub)));
4696 return 2;
4700 if (code == XOR)
4701 break;
4703 /* Convert.
4704 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4705 and the remainder 0s for e.g. 0xfff00000)
4706 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4708 This can be done in 2 instructions by using shifts with mov or mvn.
4709 e.g. for
4710 x = x | 0xfff00000;
4711 we generate.
4712 mvn r0, r0, asl #12
4713 mvn r0, r0, lsr #12 */
4714 if (set_sign_bit_copies > 8
4715 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4717 if (generate)
4719 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4720 rtx shift = GEN_INT (set_sign_bit_copies);
4722 emit_constant_insn
4723 (cond,
4724 gen_rtx_SET (sub,
4725 gen_rtx_NOT (mode,
4726 gen_rtx_ASHIFT (mode,
4727 source,
4728 shift))));
4729 emit_constant_insn
4730 (cond,
4731 gen_rtx_SET (target,
4732 gen_rtx_NOT (mode,
4733 gen_rtx_LSHIFTRT (mode, sub,
4734 shift))));
4736 return 2;
4739 /* Convert
4740 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4742 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4744 For eg. r0 = r0 | 0xfff
4745 mvn r0, r0, lsr #12
4746 mvn r0, r0, asl #12
4749 if (set_zero_bit_copies > 8
4750 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4752 if (generate)
4754 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4755 rtx shift = GEN_INT (set_zero_bit_copies);
4757 emit_constant_insn
4758 (cond,
4759 gen_rtx_SET (sub,
4760 gen_rtx_NOT (mode,
4761 gen_rtx_LSHIFTRT (mode,
4762 source,
4763 shift))));
4764 emit_constant_insn
4765 (cond,
4766 gen_rtx_SET (target,
4767 gen_rtx_NOT (mode,
4768 gen_rtx_ASHIFT (mode, sub,
4769 shift))));
4771 return 2;
4774 /* This will never be reached for Thumb2 because orn is a valid
4775 instruction. This is for Thumb1 and the ARM 32 bit cases.
4777 x = y | constant (such that ~constant is a valid constant)
4778 Transform this to
4779 x = ~(~y & ~constant).
4781 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4783 if (generate)
4785 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4786 emit_constant_insn (cond,
4787 gen_rtx_SET (sub,
4788 gen_rtx_NOT (mode, source)));
4789 source = sub;
4790 if (subtargets)
4791 sub = gen_reg_rtx (mode);
4792 emit_constant_insn (cond,
4793 gen_rtx_SET (sub,
4794 gen_rtx_AND (mode, source,
4795 GEN_INT (temp1))));
4796 emit_constant_insn (cond,
4797 gen_rtx_SET (target,
4798 gen_rtx_NOT (mode, sub)));
4800 return 3;
4802 break;
4804 case AND:
4805 /* See if two shifts will do 2 or more insn's worth of work. */
4806 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4808 HOST_WIDE_INT shift_mask = ((0xffffffff
4809 << (32 - clear_sign_bit_copies))
4810 & 0xffffffff);
4812 if ((remainder | shift_mask) != 0xffffffff)
4814 HOST_WIDE_INT new_val
4815 = ARM_SIGN_EXTEND (remainder | shift_mask);
4817 if (generate)
4819 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4820 insns = arm_gen_constant (AND, SImode, cond, new_val,
4821 new_src, source, subtargets, 1);
4822 source = new_src;
4824 else
4826 rtx targ = subtargets ? NULL_RTX : target;
4827 insns = arm_gen_constant (AND, mode, cond, new_val,
4828 targ, source, subtargets, 0);
4832 if (generate)
4834 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4835 rtx shift = GEN_INT (clear_sign_bit_copies);
4837 emit_insn (gen_ashlsi3 (new_src, source, shift));
4838 emit_insn (gen_lshrsi3 (target, new_src, shift));
4841 return insns + 2;
4844 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4846 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4848 if ((remainder | shift_mask) != 0xffffffff)
4850 HOST_WIDE_INT new_val
4851 = ARM_SIGN_EXTEND (remainder | shift_mask);
4852 if (generate)
4854 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4856 insns = arm_gen_constant (AND, mode, cond, new_val,
4857 new_src, source, subtargets, 1);
4858 source = new_src;
4860 else
4862 rtx targ = subtargets ? NULL_RTX : target;
4864 insns = arm_gen_constant (AND, mode, cond, new_val,
4865 targ, source, subtargets, 0);
4869 if (generate)
4871 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4872 rtx shift = GEN_INT (clear_zero_bit_copies);
4874 emit_insn (gen_lshrsi3 (new_src, source, shift));
4875 emit_insn (gen_ashlsi3 (target, new_src, shift));
4878 return insns + 2;
4881 break;
4883 default:
4884 break;
4887 /* Calculate what the instruction sequences would be if we generated it
4888 normally, negated, or inverted. */
4889 if (code == AND)
4890 /* AND cannot be split into multiple insns, so invert and use BIC. */
4891 insns = 99;
4892 else
4893 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4895 if (can_negate)
4896 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4897 &neg_immediates);
4898 else
4899 neg_insns = 99;
4901 if (can_invert || final_invert)
4902 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4903 &inv_immediates);
4904 else
4905 inv_insns = 99;
4907 immediates = &pos_immediates;
4909 /* Is the negated immediate sequence more efficient? */
4910 if (neg_insns < insns && neg_insns <= inv_insns)
4912 insns = neg_insns;
4913 immediates = &neg_immediates;
4915 else
4916 can_negate = 0;
4918 /* Is the inverted immediate sequence more efficient?
4919 We must allow for an extra NOT instruction for XOR operations, although
4920 there is some chance that the final 'mvn' will get optimized later. */
4921 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4923 insns = inv_insns;
4924 immediates = &inv_immediates;
4926 else
4928 can_invert = 0;
4929 final_invert = 0;
4932 /* Now output the chosen sequence as instructions. */
4933 if (generate)
4935 for (i = 0; i < insns; i++)
4937 rtx new_src, temp1_rtx;
4939 temp1 = immediates->i[i];
4941 if (code == SET || code == MINUS)
4942 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4943 else if ((final_invert || i < (insns - 1)) && subtargets)
4944 new_src = gen_reg_rtx (mode);
4945 else
4946 new_src = target;
4948 if (can_invert)
4949 temp1 = ~temp1;
4950 else if (can_negate)
4951 temp1 = -temp1;
4953 temp1 = trunc_int_for_mode (temp1, mode);
4954 temp1_rtx = GEN_INT (temp1);
4956 if (code == SET)
4958 else if (code == MINUS)
4959 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4960 else
4961 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4963 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
4964 source = new_src;
4966 if (code == SET)
4968 can_negate = can_invert;
4969 can_invert = 0;
4970 code = PLUS;
4972 else if (code == MINUS)
4973 code = PLUS;
4977 if (final_invert)
4979 if (generate)
4980 emit_constant_insn (cond, gen_rtx_SET (target,
4981 gen_rtx_NOT (mode, source)));
4982 insns++;
4985 return insns;
4988 /* Canonicalize a comparison so that we are more likely to recognize it.
4989 This can be done for a few constant compares, where we can make the
4990 immediate value easier to load. */
4992 static void
4993 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4994 bool op0_preserve_value)
4996 machine_mode mode;
4997 unsigned HOST_WIDE_INT i, maxval;
4999 mode = GET_MODE (*op0);
5000 if (mode == VOIDmode)
5001 mode = GET_MODE (*op1);
5003 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5005 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5006 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5007 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5008 for GTU/LEU in Thumb mode. */
5009 if (mode == DImode)
5012 if (*code == GT || *code == LE
5013 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5015 /* Missing comparison. First try to use an available
5016 comparison. */
5017 if (CONST_INT_P (*op1))
5019 i = INTVAL (*op1);
5020 switch (*code)
5022 case GT:
5023 case LE:
5024 if (i != maxval
5025 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5027 *op1 = GEN_INT (i + 1);
5028 *code = *code == GT ? GE : LT;
5029 return;
5031 break;
5032 case GTU:
5033 case LEU:
5034 if (i != ~((unsigned HOST_WIDE_INT) 0)
5035 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5037 *op1 = GEN_INT (i + 1);
5038 *code = *code == GTU ? GEU : LTU;
5039 return;
5041 break;
5042 default:
5043 gcc_unreachable ();
5047 /* If that did not work, reverse the condition. */
5048 if (!op0_preserve_value)
5050 std::swap (*op0, *op1);
5051 *code = (int)swap_condition ((enum rtx_code)*code);
5054 return;
5057 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5058 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5059 to facilitate possible combining with a cmp into 'ands'. */
5060 if (mode == SImode
5061 && GET_CODE (*op0) == ZERO_EXTEND
5062 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5063 && GET_MODE (XEXP (*op0, 0)) == QImode
5064 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5065 && subreg_lowpart_p (XEXP (*op0, 0))
5066 && *op1 == const0_rtx)
5067 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5068 GEN_INT (255));
5070 /* Comparisons smaller than DImode. Only adjust comparisons against
5071 an out-of-range constant. */
5072 if (!CONST_INT_P (*op1)
5073 || const_ok_for_arm (INTVAL (*op1))
5074 || const_ok_for_arm (- INTVAL (*op1)))
5075 return;
5077 i = INTVAL (*op1);
5079 switch (*code)
5081 case EQ:
5082 case NE:
5083 return;
5085 case GT:
5086 case LE:
5087 if (i != maxval
5088 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5090 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5091 *code = *code == GT ? GE : LT;
5092 return;
5094 break;
5096 case GE:
5097 case LT:
5098 if (i != ~maxval
5099 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5101 *op1 = GEN_INT (i - 1);
5102 *code = *code == GE ? GT : LE;
5103 return;
5105 break;
5107 case GTU:
5108 case LEU:
5109 if (i != ~((unsigned HOST_WIDE_INT) 0)
5110 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5112 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5113 *code = *code == GTU ? GEU : LTU;
5114 return;
5116 break;
5118 case GEU:
5119 case LTU:
5120 if (i != 0
5121 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5123 *op1 = GEN_INT (i - 1);
5124 *code = *code == GEU ? GTU : LEU;
5125 return;
5127 break;
5129 default:
5130 gcc_unreachable ();
5135 /* Define how to find the value returned by a function. */
5137 static rtx
5138 arm_function_value(const_tree type, const_tree func,
5139 bool outgoing ATTRIBUTE_UNUSED)
5141 machine_mode mode;
5142 int unsignedp ATTRIBUTE_UNUSED;
5143 rtx r ATTRIBUTE_UNUSED;
5145 mode = TYPE_MODE (type);
5147 if (TARGET_AAPCS_BASED)
5148 return aapcs_allocate_return_reg (mode, type, func);
5150 /* Promote integer types. */
5151 if (INTEGRAL_TYPE_P (type))
5152 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5154 /* Promotes small structs returned in a register to full-word size
5155 for big-endian AAPCS. */
5156 if (arm_return_in_msb (type))
5158 HOST_WIDE_INT size = int_size_in_bytes (type);
5159 if (size % UNITS_PER_WORD != 0)
5161 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5162 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5166 return arm_libcall_value_1 (mode);
5169 /* libcall hashtable helpers. */
5171 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5173 static inline hashval_t hash (const rtx_def *);
5174 static inline bool equal (const rtx_def *, const rtx_def *);
5175 static inline void remove (rtx_def *);
5178 inline bool
5179 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5181 return rtx_equal_p (p1, p2);
5184 inline hashval_t
5185 libcall_hasher::hash (const rtx_def *p1)
5187 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5190 typedef hash_table<libcall_hasher> libcall_table_type;
5192 static void
5193 add_libcall (libcall_table_type *htab, rtx libcall)
5195 *htab->find_slot (libcall, INSERT) = libcall;
5198 static bool
5199 arm_libcall_uses_aapcs_base (const_rtx libcall)
5201 static bool init_done = false;
5202 static libcall_table_type *libcall_htab = NULL;
5204 if (!init_done)
5206 init_done = true;
5208 libcall_htab = new libcall_table_type (31);
5209 add_libcall (libcall_htab,
5210 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5211 add_libcall (libcall_htab,
5212 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5213 add_libcall (libcall_htab,
5214 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5215 add_libcall (libcall_htab,
5216 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5218 add_libcall (libcall_htab,
5219 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5220 add_libcall (libcall_htab,
5221 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5222 add_libcall (libcall_htab,
5223 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5224 add_libcall (libcall_htab,
5225 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5227 add_libcall (libcall_htab,
5228 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5229 add_libcall (libcall_htab,
5230 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5231 add_libcall (libcall_htab,
5232 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5233 add_libcall (libcall_htab,
5234 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5235 add_libcall (libcall_htab,
5236 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5237 add_libcall (libcall_htab,
5238 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5239 add_libcall (libcall_htab,
5240 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5241 add_libcall (libcall_htab,
5242 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5244 /* Values from double-precision helper functions are returned in core
5245 registers if the selected core only supports single-precision
5246 arithmetic, even if we are using the hard-float ABI. The same is
5247 true for single-precision helpers, but we will never be using the
5248 hard-float ABI on a CPU which doesn't support single-precision
5249 operations in hardware. */
5250 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5251 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5252 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5253 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5254 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5255 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5256 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5257 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5258 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5259 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5260 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5261 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5262 SFmode));
5263 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5264 DFmode));
5267 return libcall && libcall_htab->find (libcall) != NULL;
5270 static rtx
5271 arm_libcall_value_1 (machine_mode mode)
5273 if (TARGET_AAPCS_BASED)
5274 return aapcs_libcall_value (mode);
5275 else if (TARGET_IWMMXT_ABI
5276 && arm_vector_mode_supported_p (mode))
5277 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5278 else
5279 return gen_rtx_REG (mode, ARG_REGISTER (1));
5282 /* Define how to find the value returned by a library function
5283 assuming the value has mode MODE. */
5285 static rtx
5286 arm_libcall_value (machine_mode mode, const_rtx libcall)
5288 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5289 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5291 /* The following libcalls return their result in integer registers,
5292 even though they return a floating point value. */
5293 if (arm_libcall_uses_aapcs_base (libcall))
5294 return gen_rtx_REG (mode, ARG_REGISTER(1));
5298 return arm_libcall_value_1 (mode);
5301 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5303 static bool
5304 arm_function_value_regno_p (const unsigned int regno)
5306 if (regno == ARG_REGISTER (1)
5307 || (TARGET_32BIT
5308 && TARGET_AAPCS_BASED
5309 && TARGET_HARD_FLOAT
5310 && regno == FIRST_VFP_REGNUM)
5311 || (TARGET_IWMMXT_ABI
5312 && regno == FIRST_IWMMXT_REGNUM))
5313 return true;
5315 return false;
5318 /* Determine the amount of memory needed to store the possible return
5319 registers of an untyped call. */
5321 arm_apply_result_size (void)
5323 int size = 16;
5325 if (TARGET_32BIT)
5327 if (TARGET_HARD_FLOAT_ABI)
5328 size += 32;
5329 if (TARGET_IWMMXT_ABI)
5330 size += 8;
5333 return size;
5336 /* Decide whether TYPE should be returned in memory (true)
5337 or in a register (false). FNTYPE is the type of the function making
5338 the call. */
5339 static bool
5340 arm_return_in_memory (const_tree type, const_tree fntype)
5342 HOST_WIDE_INT size;
5344 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5346 if (TARGET_AAPCS_BASED)
5348 /* Simple, non-aggregate types (ie not including vectors and
5349 complex) are always returned in a register (or registers).
5350 We don't care about which register here, so we can short-cut
5351 some of the detail. */
5352 if (!AGGREGATE_TYPE_P (type)
5353 && TREE_CODE (type) != VECTOR_TYPE
5354 && TREE_CODE (type) != COMPLEX_TYPE)
5355 return false;
5357 /* Any return value that is no larger than one word can be
5358 returned in r0. */
5359 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5360 return false;
5362 /* Check any available co-processors to see if they accept the
5363 type as a register candidate (VFP, for example, can return
5364 some aggregates in consecutive registers). These aren't
5365 available if the call is variadic. */
5366 if (aapcs_select_return_coproc (type, fntype) >= 0)
5367 return false;
5369 /* Vector values should be returned using ARM registers, not
5370 memory (unless they're over 16 bytes, which will break since
5371 we only have four call-clobbered registers to play with). */
5372 if (TREE_CODE (type) == VECTOR_TYPE)
5373 return (size < 0 || size > (4 * UNITS_PER_WORD));
5375 /* The rest go in memory. */
5376 return true;
5379 if (TREE_CODE (type) == VECTOR_TYPE)
5380 return (size < 0 || size > (4 * UNITS_PER_WORD));
5382 if (!AGGREGATE_TYPE_P (type) &&
5383 (TREE_CODE (type) != VECTOR_TYPE))
5384 /* All simple types are returned in registers. */
5385 return false;
5387 if (arm_abi != ARM_ABI_APCS)
5389 /* ATPCS and later return aggregate types in memory only if they are
5390 larger than a word (or are variable size). */
5391 return (size < 0 || size > UNITS_PER_WORD);
5394 /* For the arm-wince targets we choose to be compatible with Microsoft's
5395 ARM and Thumb compilers, which always return aggregates in memory. */
5396 #ifndef ARM_WINCE
5397 /* All structures/unions bigger than one word are returned in memory.
5398 Also catch the case where int_size_in_bytes returns -1. In this case
5399 the aggregate is either huge or of variable size, and in either case
5400 we will want to return it via memory and not in a register. */
5401 if (size < 0 || size > UNITS_PER_WORD)
5402 return true;
5404 if (TREE_CODE (type) == RECORD_TYPE)
5406 tree field;
5408 /* For a struct the APCS says that we only return in a register
5409 if the type is 'integer like' and every addressable element
5410 has an offset of zero. For practical purposes this means
5411 that the structure can have at most one non bit-field element
5412 and that this element must be the first one in the structure. */
5414 /* Find the first field, ignoring non FIELD_DECL things which will
5415 have been created by C++. */
5416 for (field = TYPE_FIELDS (type);
5417 field && TREE_CODE (field) != FIELD_DECL;
5418 field = DECL_CHAIN (field))
5419 continue;
5421 if (field == NULL)
5422 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5424 /* Check that the first field is valid for returning in a register. */
5426 /* ... Floats are not allowed */
5427 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5428 return true;
5430 /* ... Aggregates that are not themselves valid for returning in
5431 a register are not allowed. */
5432 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5433 return true;
5435 /* Now check the remaining fields, if any. Only bitfields are allowed,
5436 since they are not addressable. */
5437 for (field = DECL_CHAIN (field);
5438 field;
5439 field = DECL_CHAIN (field))
5441 if (TREE_CODE (field) != FIELD_DECL)
5442 continue;
5444 if (!DECL_BIT_FIELD_TYPE (field))
5445 return true;
5448 return false;
5451 if (TREE_CODE (type) == UNION_TYPE)
5453 tree field;
5455 /* Unions can be returned in registers if every element is
5456 integral, or can be returned in an integer register. */
5457 for (field = TYPE_FIELDS (type);
5458 field;
5459 field = DECL_CHAIN (field))
5461 if (TREE_CODE (field) != FIELD_DECL)
5462 continue;
5464 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5465 return true;
5467 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5468 return true;
5471 return false;
5473 #endif /* not ARM_WINCE */
5475 /* Return all other types in memory. */
5476 return true;
5479 const struct pcs_attribute_arg
5481 const char *arg;
5482 enum arm_pcs value;
5483 } pcs_attribute_args[] =
5485 {"aapcs", ARM_PCS_AAPCS},
5486 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5487 #if 0
5488 /* We could recognize these, but changes would be needed elsewhere
5489 * to implement them. */
5490 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5491 {"atpcs", ARM_PCS_ATPCS},
5492 {"apcs", ARM_PCS_APCS},
5493 #endif
5494 {NULL, ARM_PCS_UNKNOWN}
5497 static enum arm_pcs
5498 arm_pcs_from_attribute (tree attr)
5500 const struct pcs_attribute_arg *ptr;
5501 const char *arg;
5503 /* Get the value of the argument. */
5504 if (TREE_VALUE (attr) == NULL_TREE
5505 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5506 return ARM_PCS_UNKNOWN;
5508 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5510 /* Check it against the list of known arguments. */
5511 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5512 if (streq (arg, ptr->arg))
5513 return ptr->value;
5515 /* An unrecognized interrupt type. */
5516 return ARM_PCS_UNKNOWN;
5519 /* Get the PCS variant to use for this call. TYPE is the function's type
5520 specification, DECL is the specific declartion. DECL may be null if
5521 the call could be indirect or if this is a library call. */
5522 static enum arm_pcs
5523 arm_get_pcs_model (const_tree type, const_tree decl)
5525 bool user_convention = false;
5526 enum arm_pcs user_pcs = arm_pcs_default;
5527 tree attr;
5529 gcc_assert (type);
5531 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5532 if (attr)
5534 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5535 user_convention = true;
5538 if (TARGET_AAPCS_BASED)
5540 /* Detect varargs functions. These always use the base rules
5541 (no argument is ever a candidate for a co-processor
5542 register). */
5543 bool base_rules = stdarg_p (type);
5545 if (user_convention)
5547 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5548 sorry ("non-AAPCS derived PCS variant");
5549 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5550 error ("variadic functions must use the base AAPCS variant");
5553 if (base_rules)
5554 return ARM_PCS_AAPCS;
5555 else if (user_convention)
5556 return user_pcs;
5557 else if (decl && flag_unit_at_a_time)
5559 /* Local functions never leak outside this compilation unit,
5560 so we are free to use whatever conventions are
5561 appropriate. */
5562 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5563 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5564 if (i && i->local)
5565 return ARM_PCS_AAPCS_LOCAL;
5568 else if (user_convention && user_pcs != arm_pcs_default)
5569 sorry ("PCS variant");
5571 /* For everything else we use the target's default. */
5572 return arm_pcs_default;
5576 static void
5577 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5578 const_tree fntype ATTRIBUTE_UNUSED,
5579 rtx libcall ATTRIBUTE_UNUSED,
5580 const_tree fndecl ATTRIBUTE_UNUSED)
5582 /* Record the unallocated VFP registers. */
5583 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5584 pcum->aapcs_vfp_reg_alloc = 0;
5587 /* Walk down the type tree of TYPE counting consecutive base elements.
5588 If *MODEP is VOIDmode, then set it to the first valid floating point
5589 type. If a non-floating point type is found, or if a floating point
5590 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5591 otherwise return the count in the sub-tree. */
5592 static int
5593 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5595 machine_mode mode;
5596 HOST_WIDE_INT size;
5598 switch (TREE_CODE (type))
5600 case REAL_TYPE:
5601 mode = TYPE_MODE (type);
5602 if (mode != DFmode && mode != SFmode && mode != HFmode)
5603 return -1;
5605 if (*modep == VOIDmode)
5606 *modep = mode;
5608 if (*modep == mode)
5609 return 1;
5611 break;
5613 case COMPLEX_TYPE:
5614 mode = TYPE_MODE (TREE_TYPE (type));
5615 if (mode != DFmode && mode != SFmode)
5616 return -1;
5618 if (*modep == VOIDmode)
5619 *modep = mode;
5621 if (*modep == mode)
5622 return 2;
5624 break;
5626 case VECTOR_TYPE:
5627 /* Use V2SImode and V4SImode as representatives of all 64-bit
5628 and 128-bit vector types, whether or not those modes are
5629 supported with the present options. */
5630 size = int_size_in_bytes (type);
5631 switch (size)
5633 case 8:
5634 mode = V2SImode;
5635 break;
5636 case 16:
5637 mode = V4SImode;
5638 break;
5639 default:
5640 return -1;
5643 if (*modep == VOIDmode)
5644 *modep = mode;
5646 /* Vector modes are considered to be opaque: two vectors are
5647 equivalent for the purposes of being homogeneous aggregates
5648 if they are the same size. */
5649 if (*modep == mode)
5650 return 1;
5652 break;
5654 case ARRAY_TYPE:
5656 int count;
5657 tree index = TYPE_DOMAIN (type);
5659 /* Can't handle incomplete types nor sizes that are not
5660 fixed. */
5661 if (!COMPLETE_TYPE_P (type)
5662 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5663 return -1;
5665 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5666 if (count == -1
5667 || !index
5668 || !TYPE_MAX_VALUE (index)
5669 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5670 || !TYPE_MIN_VALUE (index)
5671 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5672 || count < 0)
5673 return -1;
5675 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5676 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5678 /* There must be no padding. */
5679 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5680 return -1;
5682 return count;
5685 case RECORD_TYPE:
5687 int count = 0;
5688 int sub_count;
5689 tree field;
5691 /* Can't handle incomplete types nor sizes that are not
5692 fixed. */
5693 if (!COMPLETE_TYPE_P (type)
5694 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5695 return -1;
5697 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5699 if (TREE_CODE (field) != FIELD_DECL)
5700 continue;
5702 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5703 if (sub_count < 0)
5704 return -1;
5705 count += sub_count;
5708 /* There must be no padding. */
5709 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5710 return -1;
5712 return count;
5715 case UNION_TYPE:
5716 case QUAL_UNION_TYPE:
5718 /* These aren't very interesting except in a degenerate case. */
5719 int count = 0;
5720 int sub_count;
5721 tree field;
5723 /* Can't handle incomplete types nor sizes that are not
5724 fixed. */
5725 if (!COMPLETE_TYPE_P (type)
5726 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5727 return -1;
5729 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5731 if (TREE_CODE (field) != FIELD_DECL)
5732 continue;
5734 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5735 if (sub_count < 0)
5736 return -1;
5737 count = count > sub_count ? count : sub_count;
5740 /* There must be no padding. */
5741 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5742 return -1;
5744 return count;
5747 default:
5748 break;
5751 return -1;
5754 /* Return true if PCS_VARIANT should use VFP registers. */
5755 static bool
5756 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5758 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5760 static bool seen_thumb1_vfp = false;
5762 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5764 sorry ("Thumb-1 hard-float VFP ABI");
5765 /* sorry() is not immediately fatal, so only display this once. */
5766 seen_thumb1_vfp = true;
5769 return true;
5772 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5773 return false;
5775 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5776 (TARGET_VFP_DOUBLE || !is_double));
5779 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5780 suitable for passing or returning in VFP registers for the PCS
5781 variant selected. If it is, then *BASE_MODE is updated to contain
5782 a machine mode describing each element of the argument's type and
5783 *COUNT to hold the number of such elements. */
5784 static bool
5785 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5786 machine_mode mode, const_tree type,
5787 machine_mode *base_mode, int *count)
5789 machine_mode new_mode = VOIDmode;
5791 /* If we have the type information, prefer that to working things
5792 out from the mode. */
5793 if (type)
5795 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5797 if (ag_count > 0 && ag_count <= 4)
5798 *count = ag_count;
5799 else
5800 return false;
5802 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5803 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5804 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5806 *count = 1;
5807 new_mode = mode;
5809 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5811 *count = 2;
5812 new_mode = (mode == DCmode ? DFmode : SFmode);
5814 else
5815 return false;
5818 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5819 return false;
5821 *base_mode = new_mode;
5822 return true;
5825 static bool
5826 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5827 machine_mode mode, const_tree type)
5829 int count ATTRIBUTE_UNUSED;
5830 machine_mode ag_mode ATTRIBUTE_UNUSED;
5832 if (!use_vfp_abi (pcs_variant, false))
5833 return false;
5834 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5835 &ag_mode, &count);
5838 static bool
5839 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5840 const_tree type)
5842 if (!use_vfp_abi (pcum->pcs_variant, false))
5843 return false;
5845 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5846 &pcum->aapcs_vfp_rmode,
5847 &pcum->aapcs_vfp_rcount);
5850 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
5851 for the behaviour of this function. */
5853 static bool
5854 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5855 const_tree type ATTRIBUTE_UNUSED)
5857 int rmode_size
5858 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
5859 int shift = rmode_size / GET_MODE_SIZE (SFmode);
5860 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5861 int regno;
5863 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5864 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5866 pcum->aapcs_vfp_reg_alloc = mask << regno;
5867 if (mode == BLKmode
5868 || (mode == TImode && ! TARGET_NEON)
5869 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5871 int i;
5872 int rcount = pcum->aapcs_vfp_rcount;
5873 int rshift = shift;
5874 machine_mode rmode = pcum->aapcs_vfp_rmode;
5875 rtx par;
5876 if (!TARGET_NEON)
5878 /* Avoid using unsupported vector modes. */
5879 if (rmode == V2SImode)
5880 rmode = DImode;
5881 else if (rmode == V4SImode)
5883 rmode = DImode;
5884 rcount *= 2;
5885 rshift /= 2;
5888 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5889 for (i = 0; i < rcount; i++)
5891 rtx tmp = gen_rtx_REG (rmode,
5892 FIRST_VFP_REGNUM + regno + i * rshift);
5893 tmp = gen_rtx_EXPR_LIST
5894 (VOIDmode, tmp,
5895 GEN_INT (i * GET_MODE_SIZE (rmode)));
5896 XVECEXP (par, 0, i) = tmp;
5899 pcum->aapcs_reg = par;
5901 else
5902 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5903 return true;
5905 return false;
5908 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
5909 comment there for the behaviour of this function. */
5911 static rtx
5912 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5913 machine_mode mode,
5914 const_tree type ATTRIBUTE_UNUSED)
5916 if (!use_vfp_abi (pcs_variant, false))
5917 return NULL;
5919 if (mode == BLKmode
5920 || (GET_MODE_CLASS (mode) == MODE_INT
5921 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
5922 && !TARGET_NEON))
5924 int count;
5925 machine_mode ag_mode;
5926 int i;
5927 rtx par;
5928 int shift;
5930 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5931 &ag_mode, &count);
5933 if (!TARGET_NEON)
5935 if (ag_mode == V2SImode)
5936 ag_mode = DImode;
5937 else if (ag_mode == V4SImode)
5939 ag_mode = DImode;
5940 count *= 2;
5943 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5944 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5945 for (i = 0; i < count; i++)
5947 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5948 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5949 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5950 XVECEXP (par, 0, i) = tmp;
5953 return par;
5956 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5959 static void
5960 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5961 machine_mode mode ATTRIBUTE_UNUSED,
5962 const_tree type ATTRIBUTE_UNUSED)
5964 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5965 pcum->aapcs_vfp_reg_alloc = 0;
5966 return;
5969 #define AAPCS_CP(X) \
5971 aapcs_ ## X ## _cum_init, \
5972 aapcs_ ## X ## _is_call_candidate, \
5973 aapcs_ ## X ## _allocate, \
5974 aapcs_ ## X ## _is_return_candidate, \
5975 aapcs_ ## X ## _allocate_return_reg, \
5976 aapcs_ ## X ## _advance \
5979 /* Table of co-processors that can be used to pass arguments in
5980 registers. Idealy no arugment should be a candidate for more than
5981 one co-processor table entry, but the table is processed in order
5982 and stops after the first match. If that entry then fails to put
5983 the argument into a co-processor register, the argument will go on
5984 the stack. */
5985 static struct
5987 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5988 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5990 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5991 BLKmode) is a candidate for this co-processor's registers; this
5992 function should ignore any position-dependent state in
5993 CUMULATIVE_ARGS and only use call-type dependent information. */
5994 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5996 /* Return true if the argument does get a co-processor register; it
5997 should set aapcs_reg to an RTX of the register allocated as is
5998 required for a return from FUNCTION_ARG. */
5999 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6001 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6002 be returned in this co-processor's registers. */
6003 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6005 /* Allocate and return an RTX element to hold the return type of a call. This
6006 routine must not fail and will only be called if is_return_candidate
6007 returned true with the same parameters. */
6008 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6010 /* Finish processing this argument and prepare to start processing
6011 the next one. */
6012 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6013 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6015 AAPCS_CP(vfp)
6018 #undef AAPCS_CP
6020 static int
6021 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6022 const_tree type)
6024 int i;
6026 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6027 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6028 return i;
6030 return -1;
6033 static int
6034 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6036 /* We aren't passed a decl, so we can't check that a call is local.
6037 However, it isn't clear that that would be a win anyway, since it
6038 might limit some tail-calling opportunities. */
6039 enum arm_pcs pcs_variant;
6041 if (fntype)
6043 const_tree fndecl = NULL_TREE;
6045 if (TREE_CODE (fntype) == FUNCTION_DECL)
6047 fndecl = fntype;
6048 fntype = TREE_TYPE (fntype);
6051 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6053 else
6054 pcs_variant = arm_pcs_default;
6056 if (pcs_variant != ARM_PCS_AAPCS)
6058 int i;
6060 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6061 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6062 TYPE_MODE (type),
6063 type))
6064 return i;
6066 return -1;
6069 static rtx
6070 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6071 const_tree fntype)
6073 /* We aren't passed a decl, so we can't check that a call is local.
6074 However, it isn't clear that that would be a win anyway, since it
6075 might limit some tail-calling opportunities. */
6076 enum arm_pcs pcs_variant;
6077 int unsignedp ATTRIBUTE_UNUSED;
6079 if (fntype)
6081 const_tree fndecl = NULL_TREE;
6083 if (TREE_CODE (fntype) == FUNCTION_DECL)
6085 fndecl = fntype;
6086 fntype = TREE_TYPE (fntype);
6089 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6091 else
6092 pcs_variant = arm_pcs_default;
6094 /* Promote integer types. */
6095 if (type && INTEGRAL_TYPE_P (type))
6096 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6098 if (pcs_variant != ARM_PCS_AAPCS)
6100 int i;
6102 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6103 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6104 type))
6105 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6106 mode, type);
6109 /* Promotes small structs returned in a register to full-word size
6110 for big-endian AAPCS. */
6111 if (type && arm_return_in_msb (type))
6113 HOST_WIDE_INT size = int_size_in_bytes (type);
6114 if (size % UNITS_PER_WORD != 0)
6116 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6117 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6121 return gen_rtx_REG (mode, R0_REGNUM);
6124 static rtx
6125 aapcs_libcall_value (machine_mode mode)
6127 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6128 && GET_MODE_SIZE (mode) <= 4)
6129 mode = SImode;
6131 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6134 /* Lay out a function argument using the AAPCS rules. The rule
6135 numbers referred to here are those in the AAPCS. */
6136 static void
6137 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6138 const_tree type, bool named)
6140 int nregs, nregs2;
6141 int ncrn;
6143 /* We only need to do this once per argument. */
6144 if (pcum->aapcs_arg_processed)
6145 return;
6147 pcum->aapcs_arg_processed = true;
6149 /* Special case: if named is false then we are handling an incoming
6150 anonymous argument which is on the stack. */
6151 if (!named)
6152 return;
6154 /* Is this a potential co-processor register candidate? */
6155 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6157 int slot = aapcs_select_call_coproc (pcum, mode, type);
6158 pcum->aapcs_cprc_slot = slot;
6160 /* We don't have to apply any of the rules from part B of the
6161 preparation phase, these are handled elsewhere in the
6162 compiler. */
6164 if (slot >= 0)
6166 /* A Co-processor register candidate goes either in its own
6167 class of registers or on the stack. */
6168 if (!pcum->aapcs_cprc_failed[slot])
6170 /* C1.cp - Try to allocate the argument to co-processor
6171 registers. */
6172 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6173 return;
6175 /* C2.cp - Put the argument on the stack and note that we
6176 can't assign any more candidates in this slot. We also
6177 need to note that we have allocated stack space, so that
6178 we won't later try to split a non-cprc candidate between
6179 core registers and the stack. */
6180 pcum->aapcs_cprc_failed[slot] = true;
6181 pcum->can_split = false;
6184 /* We didn't get a register, so this argument goes on the
6185 stack. */
6186 gcc_assert (pcum->can_split == false);
6187 return;
6191 /* C3 - For double-word aligned arguments, round the NCRN up to the
6192 next even number. */
6193 ncrn = pcum->aapcs_ncrn;
6194 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
6195 ncrn++;
6197 nregs = ARM_NUM_REGS2(mode, type);
6199 /* Sigh, this test should really assert that nregs > 0, but a GCC
6200 extension allows empty structs and then gives them empty size; it
6201 then allows such a structure to be passed by value. For some of
6202 the code below we have to pretend that such an argument has
6203 non-zero size so that we 'locate' it correctly either in
6204 registers or on the stack. */
6205 gcc_assert (nregs >= 0);
6207 nregs2 = nregs ? nregs : 1;
6209 /* C4 - Argument fits entirely in core registers. */
6210 if (ncrn + nregs2 <= NUM_ARG_REGS)
6212 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6213 pcum->aapcs_next_ncrn = ncrn + nregs;
6214 return;
6217 /* C5 - Some core registers left and there are no arguments already
6218 on the stack: split this argument between the remaining core
6219 registers and the stack. */
6220 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6222 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6223 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6224 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6225 return;
6228 /* C6 - NCRN is set to 4. */
6229 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6231 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6232 return;
6235 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6236 for a call to a function whose data type is FNTYPE.
6237 For a library call, FNTYPE is NULL. */
6238 void
6239 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6240 rtx libname,
6241 tree fndecl ATTRIBUTE_UNUSED)
6243 /* Long call handling. */
6244 if (fntype)
6245 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6246 else
6247 pcum->pcs_variant = arm_pcs_default;
6249 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6251 if (arm_libcall_uses_aapcs_base (libname))
6252 pcum->pcs_variant = ARM_PCS_AAPCS;
6254 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6255 pcum->aapcs_reg = NULL_RTX;
6256 pcum->aapcs_partial = 0;
6257 pcum->aapcs_arg_processed = false;
6258 pcum->aapcs_cprc_slot = -1;
6259 pcum->can_split = true;
6261 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6263 int i;
6265 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6267 pcum->aapcs_cprc_failed[i] = false;
6268 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6271 return;
6274 /* Legacy ABIs */
6276 /* On the ARM, the offset starts at 0. */
6277 pcum->nregs = 0;
6278 pcum->iwmmxt_nregs = 0;
6279 pcum->can_split = true;
6281 /* Varargs vectors are treated the same as long long.
6282 named_count avoids having to change the way arm handles 'named' */
6283 pcum->named_count = 0;
6284 pcum->nargs = 0;
6286 if (TARGET_REALLY_IWMMXT && fntype)
6288 tree fn_arg;
6290 for (fn_arg = TYPE_ARG_TYPES (fntype);
6291 fn_arg;
6292 fn_arg = TREE_CHAIN (fn_arg))
6293 pcum->named_count += 1;
6295 if (! pcum->named_count)
6296 pcum->named_count = INT_MAX;
6300 /* Return true if mode/type need doubleword alignment. */
6301 static bool
6302 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6304 if (!type)
6305 return PARM_BOUNDARY < GET_MODE_ALIGNMENT (mode);
6307 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6308 if (!AGGREGATE_TYPE_P (type))
6309 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6311 /* Array types: Use member alignment of element type. */
6312 if (TREE_CODE (type) == ARRAY_TYPE)
6313 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6315 /* Record/aggregate types: Use greatest member alignment of any member. */
6316 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6317 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6318 return true;
6320 return false;
6324 /* Determine where to put an argument to a function.
6325 Value is zero to push the argument on the stack,
6326 or a hard register in which to store the argument.
6328 MODE is the argument's machine mode.
6329 TYPE is the data type of the argument (as a tree).
6330 This is null for libcalls where that information may
6331 not be available.
6332 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6333 the preceding args and about the function being called.
6334 NAMED is nonzero if this argument is a named parameter
6335 (otherwise it is an extra parameter matching an ellipsis).
6337 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6338 other arguments are passed on the stack. If (NAMED == 0) (which happens
6339 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6340 defined), say it is passed in the stack (function_prologue will
6341 indeed make it pass in the stack if necessary). */
6343 static rtx
6344 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6345 const_tree type, bool named)
6347 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6348 int nregs;
6350 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6351 a call insn (op3 of a call_value insn). */
6352 if (mode == VOIDmode)
6353 return const0_rtx;
6355 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6357 aapcs_layout_arg (pcum, mode, type, named);
6358 return pcum->aapcs_reg;
6361 /* Varargs vectors are treated the same as long long.
6362 named_count avoids having to change the way arm handles 'named' */
6363 if (TARGET_IWMMXT_ABI
6364 && arm_vector_mode_supported_p (mode)
6365 && pcum->named_count > pcum->nargs + 1)
6367 if (pcum->iwmmxt_nregs <= 9)
6368 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6369 else
6371 pcum->can_split = false;
6372 return NULL_RTX;
6376 /* Put doubleword aligned quantities in even register pairs. */
6377 if (pcum->nregs & 1
6378 && ARM_DOUBLEWORD_ALIGN
6379 && arm_needs_doubleword_align (mode, type))
6380 pcum->nregs++;
6382 /* Only allow splitting an arg between regs and memory if all preceding
6383 args were allocated to regs. For args passed by reference we only count
6384 the reference pointer. */
6385 if (pcum->can_split)
6386 nregs = 1;
6387 else
6388 nregs = ARM_NUM_REGS2 (mode, type);
6390 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6391 return NULL_RTX;
6393 return gen_rtx_REG (mode, pcum->nregs);
6396 static unsigned int
6397 arm_function_arg_boundary (machine_mode mode, const_tree type)
6399 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6400 ? DOUBLEWORD_ALIGNMENT
6401 : PARM_BOUNDARY);
6404 static int
6405 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6406 tree type, bool named)
6408 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6409 int nregs = pcum->nregs;
6411 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6413 aapcs_layout_arg (pcum, mode, type, named);
6414 return pcum->aapcs_partial;
6417 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6418 return 0;
6420 if (NUM_ARG_REGS > nregs
6421 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6422 && pcum->can_split)
6423 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6425 return 0;
6428 /* Update the data in PCUM to advance over an argument
6429 of mode MODE and data type TYPE.
6430 (TYPE is null for libcalls where that information may not be available.) */
6432 static void
6433 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6434 const_tree type, bool named)
6436 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6438 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6440 aapcs_layout_arg (pcum, mode, type, named);
6442 if (pcum->aapcs_cprc_slot >= 0)
6444 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6445 type);
6446 pcum->aapcs_cprc_slot = -1;
6449 /* Generic stuff. */
6450 pcum->aapcs_arg_processed = false;
6451 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6452 pcum->aapcs_reg = NULL_RTX;
6453 pcum->aapcs_partial = 0;
6455 else
6457 pcum->nargs += 1;
6458 if (arm_vector_mode_supported_p (mode)
6459 && pcum->named_count > pcum->nargs
6460 && TARGET_IWMMXT_ABI)
6461 pcum->iwmmxt_nregs += 1;
6462 else
6463 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6467 /* Variable sized types are passed by reference. This is a GCC
6468 extension to the ARM ABI. */
6470 static bool
6471 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6472 machine_mode mode ATTRIBUTE_UNUSED,
6473 const_tree type, bool named ATTRIBUTE_UNUSED)
6475 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6478 /* Encode the current state of the #pragma [no_]long_calls. */
6479 typedef enum
6481 OFF, /* No #pragma [no_]long_calls is in effect. */
6482 LONG, /* #pragma long_calls is in effect. */
6483 SHORT /* #pragma no_long_calls is in effect. */
6484 } arm_pragma_enum;
6486 static arm_pragma_enum arm_pragma_long_calls = OFF;
6488 void
6489 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6491 arm_pragma_long_calls = LONG;
6494 void
6495 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6497 arm_pragma_long_calls = SHORT;
6500 void
6501 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6503 arm_pragma_long_calls = OFF;
6506 /* Handle an attribute requiring a FUNCTION_DECL;
6507 arguments as in struct attribute_spec.handler. */
6508 static tree
6509 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6510 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6512 if (TREE_CODE (*node) != FUNCTION_DECL)
6514 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6515 name);
6516 *no_add_attrs = true;
6519 return NULL_TREE;
6522 /* Handle an "interrupt" or "isr" attribute;
6523 arguments as in struct attribute_spec.handler. */
6524 static tree
6525 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6526 bool *no_add_attrs)
6528 if (DECL_P (*node))
6530 if (TREE_CODE (*node) != FUNCTION_DECL)
6532 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6533 name);
6534 *no_add_attrs = true;
6536 /* FIXME: the argument if any is checked for type attributes;
6537 should it be checked for decl ones? */
6539 else
6541 if (TREE_CODE (*node) == FUNCTION_TYPE
6542 || TREE_CODE (*node) == METHOD_TYPE)
6544 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6546 warning (OPT_Wattributes, "%qE attribute ignored",
6547 name);
6548 *no_add_attrs = true;
6551 else if (TREE_CODE (*node) == POINTER_TYPE
6552 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6553 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6554 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6556 *node = build_variant_type_copy (*node);
6557 TREE_TYPE (*node) = build_type_attribute_variant
6558 (TREE_TYPE (*node),
6559 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6560 *no_add_attrs = true;
6562 else
6564 /* Possibly pass this attribute on from the type to a decl. */
6565 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6566 | (int) ATTR_FLAG_FUNCTION_NEXT
6567 | (int) ATTR_FLAG_ARRAY_NEXT))
6569 *no_add_attrs = true;
6570 return tree_cons (name, args, NULL_TREE);
6572 else
6574 warning (OPT_Wattributes, "%qE attribute ignored",
6575 name);
6580 return NULL_TREE;
6583 /* Handle a "pcs" attribute; arguments as in struct
6584 attribute_spec.handler. */
6585 static tree
6586 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6587 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6589 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6591 warning (OPT_Wattributes, "%qE attribute ignored", name);
6592 *no_add_attrs = true;
6594 return NULL_TREE;
6597 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6598 /* Handle the "notshared" attribute. This attribute is another way of
6599 requesting hidden visibility. ARM's compiler supports
6600 "__declspec(notshared)"; we support the same thing via an
6601 attribute. */
6603 static tree
6604 arm_handle_notshared_attribute (tree *node,
6605 tree name ATTRIBUTE_UNUSED,
6606 tree args ATTRIBUTE_UNUSED,
6607 int flags ATTRIBUTE_UNUSED,
6608 bool *no_add_attrs)
6610 tree decl = TYPE_NAME (*node);
6612 if (decl)
6614 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6615 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6616 *no_add_attrs = false;
6618 return NULL_TREE;
6620 #endif
6622 /* Return 0 if the attributes for two types are incompatible, 1 if they
6623 are compatible, and 2 if they are nearly compatible (which causes a
6624 warning to be generated). */
6625 static int
6626 arm_comp_type_attributes (const_tree type1, const_tree type2)
6628 int l1, l2, s1, s2;
6630 /* Check for mismatch of non-default calling convention. */
6631 if (TREE_CODE (type1) != FUNCTION_TYPE)
6632 return 1;
6634 /* Check for mismatched call attributes. */
6635 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6636 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6637 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6638 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6640 /* Only bother to check if an attribute is defined. */
6641 if (l1 | l2 | s1 | s2)
6643 /* If one type has an attribute, the other must have the same attribute. */
6644 if ((l1 != l2) || (s1 != s2))
6645 return 0;
6647 /* Disallow mixed attributes. */
6648 if ((l1 & s2) || (l2 & s1))
6649 return 0;
6652 /* Check for mismatched ISR attribute. */
6653 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6654 if (! l1)
6655 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6656 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6657 if (! l2)
6658 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6659 if (l1 != l2)
6660 return 0;
6662 return 1;
6665 /* Assigns default attributes to newly defined type. This is used to
6666 set short_call/long_call attributes for function types of
6667 functions defined inside corresponding #pragma scopes. */
6668 static void
6669 arm_set_default_type_attributes (tree type)
6671 /* Add __attribute__ ((long_call)) to all functions, when
6672 inside #pragma long_calls or __attribute__ ((short_call)),
6673 when inside #pragma no_long_calls. */
6674 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6676 tree type_attr_list, attr_name;
6677 type_attr_list = TYPE_ATTRIBUTES (type);
6679 if (arm_pragma_long_calls == LONG)
6680 attr_name = get_identifier ("long_call");
6681 else if (arm_pragma_long_calls == SHORT)
6682 attr_name = get_identifier ("short_call");
6683 else
6684 return;
6686 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6687 TYPE_ATTRIBUTES (type) = type_attr_list;
6691 /* Return true if DECL is known to be linked into section SECTION. */
6693 static bool
6694 arm_function_in_section_p (tree decl, section *section)
6696 /* We can only be certain about the prevailing symbol definition. */
6697 if (!decl_binds_to_current_def_p (decl))
6698 return false;
6700 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6701 if (!DECL_SECTION_NAME (decl))
6703 /* Make sure that we will not create a unique section for DECL. */
6704 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6705 return false;
6708 return function_section (decl) == section;
6711 /* Return nonzero if a 32-bit "long_call" should be generated for
6712 a call from the current function to DECL. We generate a long_call
6713 if the function:
6715 a. has an __attribute__((long call))
6716 or b. is within the scope of a #pragma long_calls
6717 or c. the -mlong-calls command line switch has been specified
6719 However we do not generate a long call if the function:
6721 d. has an __attribute__ ((short_call))
6722 or e. is inside the scope of a #pragma no_long_calls
6723 or f. is defined in the same section as the current function. */
6725 bool
6726 arm_is_long_call_p (tree decl)
6728 tree attrs;
6730 if (!decl)
6731 return TARGET_LONG_CALLS;
6733 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6734 if (lookup_attribute ("short_call", attrs))
6735 return false;
6737 /* For "f", be conservative, and only cater for cases in which the
6738 whole of the current function is placed in the same section. */
6739 if (!flag_reorder_blocks_and_partition
6740 && TREE_CODE (decl) == FUNCTION_DECL
6741 && arm_function_in_section_p (decl, current_function_section ()))
6742 return false;
6744 if (lookup_attribute ("long_call", attrs))
6745 return true;
6747 return TARGET_LONG_CALLS;
6750 /* Return nonzero if it is ok to make a tail-call to DECL. */
6751 static bool
6752 arm_function_ok_for_sibcall (tree decl, tree exp)
6754 unsigned long func_type;
6756 if (cfun->machine->sibcall_blocked)
6757 return false;
6759 /* Never tailcall something if we are generating code for Thumb-1. */
6760 if (TARGET_THUMB1)
6761 return false;
6763 /* The PIC register is live on entry to VxWorks PLT entries, so we
6764 must make the call before restoring the PIC register. */
6765 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
6766 return false;
6768 /* If we are interworking and the function is not declared static
6769 then we can't tail-call it unless we know that it exists in this
6770 compilation unit (since it might be a Thumb routine). */
6771 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6772 && !TREE_ASM_WRITTEN (decl))
6773 return false;
6775 func_type = arm_current_func_type ();
6776 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6777 if (IS_INTERRUPT (func_type))
6778 return false;
6780 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6782 /* Check that the return value locations are the same. For
6783 example that we aren't returning a value from the sibling in
6784 a VFP register but then need to transfer it to a core
6785 register. */
6786 rtx a, b;
6787 tree decl_or_type = decl;
6789 /* If it is an indirect function pointer, get the function type. */
6790 if (!decl)
6791 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
6793 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
6794 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6795 cfun->decl, false);
6796 if (!rtx_equal_p (a, b))
6797 return false;
6800 /* Never tailcall if function may be called with a misaligned SP. */
6801 if (IS_STACKALIGN (func_type))
6802 return false;
6804 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6805 references should become a NOP. Don't convert such calls into
6806 sibling calls. */
6807 if (TARGET_AAPCS_BASED
6808 && arm_abi == ARM_ABI_AAPCS
6809 && decl
6810 && DECL_WEAK (decl))
6811 return false;
6813 /* Everything else is ok. */
6814 return true;
6818 /* Addressing mode support functions. */
6820 /* Return nonzero if X is a legitimate immediate operand when compiling
6821 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6823 legitimate_pic_operand_p (rtx x)
6825 if (GET_CODE (x) == SYMBOL_REF
6826 || (GET_CODE (x) == CONST
6827 && GET_CODE (XEXP (x, 0)) == PLUS
6828 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6829 return 0;
6831 return 1;
6834 /* Record that the current function needs a PIC register. Initialize
6835 cfun->machine->pic_reg if we have not already done so. */
6837 static void
6838 require_pic_register (void)
6840 /* A lot of the logic here is made obscure by the fact that this
6841 routine gets called as part of the rtx cost estimation process.
6842 We don't want those calls to affect any assumptions about the real
6843 function; and further, we can't call entry_of_function() until we
6844 start the real expansion process. */
6845 if (!crtl->uses_pic_offset_table)
6847 gcc_assert (can_create_pseudo_p ());
6848 if (arm_pic_register != INVALID_REGNUM
6849 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6851 if (!cfun->machine->pic_reg)
6852 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6854 /* Play games to avoid marking the function as needing pic
6855 if we are being called as part of the cost-estimation
6856 process. */
6857 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6858 crtl->uses_pic_offset_table = 1;
6860 else
6862 rtx_insn *seq, *insn;
6864 if (!cfun->machine->pic_reg)
6865 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6867 /* Play games to avoid marking the function as needing pic
6868 if we are being called as part of the cost-estimation
6869 process. */
6870 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6872 crtl->uses_pic_offset_table = 1;
6873 start_sequence ();
6875 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6876 && arm_pic_register > LAST_LO_REGNUM)
6877 emit_move_insn (cfun->machine->pic_reg,
6878 gen_rtx_REG (Pmode, arm_pic_register));
6879 else
6880 arm_load_pic_register (0UL);
6882 seq = get_insns ();
6883 end_sequence ();
6885 for (insn = seq; insn; insn = NEXT_INSN (insn))
6886 if (INSN_P (insn))
6887 INSN_LOCATION (insn) = prologue_location;
6889 /* We can be called during expansion of PHI nodes, where
6890 we can't yet emit instructions directly in the final
6891 insn stream. Queue the insns on the entry edge, they will
6892 be committed after everything else is expanded. */
6893 insert_insn_on_edge (seq,
6894 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6901 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6903 if (GET_CODE (orig) == SYMBOL_REF
6904 || GET_CODE (orig) == LABEL_REF)
6906 rtx insn;
6908 if (reg == 0)
6910 gcc_assert (can_create_pseudo_p ());
6911 reg = gen_reg_rtx (Pmode);
6914 /* VxWorks does not impose a fixed gap between segments; the run-time
6915 gap can be different from the object-file gap. We therefore can't
6916 use GOTOFF unless we are absolutely sure that the symbol is in the
6917 same segment as the GOT. Unfortunately, the flexibility of linker
6918 scripts means that we can't be sure of that in general, so assume
6919 that GOTOFF is never valid on VxWorks. */
6920 if ((GET_CODE (orig) == LABEL_REF
6921 || (GET_CODE (orig) == SYMBOL_REF &&
6922 SYMBOL_REF_LOCAL_P (orig)))
6923 && NEED_GOT_RELOC
6924 && arm_pic_data_is_text_relative)
6925 insn = arm_pic_static_addr (orig, reg);
6926 else
6928 rtx pat;
6929 rtx mem;
6931 /* If this function doesn't have a pic register, create one now. */
6932 require_pic_register ();
6934 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6936 /* Make the MEM as close to a constant as possible. */
6937 mem = SET_SRC (pat);
6938 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6939 MEM_READONLY_P (mem) = 1;
6940 MEM_NOTRAP_P (mem) = 1;
6942 insn = emit_insn (pat);
6945 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6946 by loop. */
6947 set_unique_reg_note (insn, REG_EQUAL, orig);
6949 return reg;
6951 else if (GET_CODE (orig) == CONST)
6953 rtx base, offset;
6955 if (GET_CODE (XEXP (orig, 0)) == PLUS
6956 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6957 return orig;
6959 /* Handle the case where we have: const (UNSPEC_TLS). */
6960 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6961 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6962 return orig;
6964 /* Handle the case where we have:
6965 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6966 CONST_INT. */
6967 if (GET_CODE (XEXP (orig, 0)) == PLUS
6968 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6969 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6971 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6972 return orig;
6975 if (reg == 0)
6977 gcc_assert (can_create_pseudo_p ());
6978 reg = gen_reg_rtx (Pmode);
6981 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6983 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6984 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6985 base == reg ? 0 : reg);
6987 if (CONST_INT_P (offset))
6989 /* The base register doesn't really matter, we only want to
6990 test the index for the appropriate mode. */
6991 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6993 gcc_assert (can_create_pseudo_p ());
6994 offset = force_reg (Pmode, offset);
6997 if (CONST_INT_P (offset))
6998 return plus_constant (Pmode, base, INTVAL (offset));
7001 if (GET_MODE_SIZE (mode) > 4
7002 && (GET_MODE_CLASS (mode) == MODE_INT
7003 || TARGET_SOFT_FLOAT))
7005 emit_insn (gen_addsi3 (reg, base, offset));
7006 return reg;
7009 return gen_rtx_PLUS (Pmode, base, offset);
7012 return orig;
7016 /* Find a spare register to use during the prolog of a function. */
7018 static int
7019 thumb_find_work_register (unsigned long pushed_regs_mask)
7021 int reg;
7023 /* Check the argument registers first as these are call-used. The
7024 register allocation order means that sometimes r3 might be used
7025 but earlier argument registers might not, so check them all. */
7026 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7027 if (!df_regs_ever_live_p (reg))
7028 return reg;
7030 /* Before going on to check the call-saved registers we can try a couple
7031 more ways of deducing that r3 is available. The first is when we are
7032 pushing anonymous arguments onto the stack and we have less than 4
7033 registers worth of fixed arguments(*). In this case r3 will be part of
7034 the variable argument list and so we can be sure that it will be
7035 pushed right at the start of the function. Hence it will be available
7036 for the rest of the prologue.
7037 (*): ie crtl->args.pretend_args_size is greater than 0. */
7038 if (cfun->machine->uses_anonymous_args
7039 && crtl->args.pretend_args_size > 0)
7040 return LAST_ARG_REGNUM;
7042 /* The other case is when we have fixed arguments but less than 4 registers
7043 worth. In this case r3 might be used in the body of the function, but
7044 it is not being used to convey an argument into the function. In theory
7045 we could just check crtl->args.size to see how many bytes are
7046 being passed in argument registers, but it seems that it is unreliable.
7047 Sometimes it will have the value 0 when in fact arguments are being
7048 passed. (See testcase execute/20021111-1.c for an example). So we also
7049 check the args_info.nregs field as well. The problem with this field is
7050 that it makes no allowances for arguments that are passed to the
7051 function but which are not used. Hence we could miss an opportunity
7052 when a function has an unused argument in r3. But it is better to be
7053 safe than to be sorry. */
7054 if (! cfun->machine->uses_anonymous_args
7055 && crtl->args.size >= 0
7056 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7057 && (TARGET_AAPCS_BASED
7058 ? crtl->args.info.aapcs_ncrn < 4
7059 : crtl->args.info.nregs < 4))
7060 return LAST_ARG_REGNUM;
7062 /* Otherwise look for a call-saved register that is going to be pushed. */
7063 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7064 if (pushed_regs_mask & (1 << reg))
7065 return reg;
7067 if (TARGET_THUMB2)
7069 /* Thumb-2 can use high regs. */
7070 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7071 if (pushed_regs_mask & (1 << reg))
7072 return reg;
7074 /* Something went wrong - thumb_compute_save_reg_mask()
7075 should have arranged for a suitable register to be pushed. */
7076 gcc_unreachable ();
7079 static GTY(()) int pic_labelno;
7081 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7082 low register. */
7084 void
7085 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7087 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7089 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7090 return;
7092 gcc_assert (flag_pic);
7094 pic_reg = cfun->machine->pic_reg;
7095 if (TARGET_VXWORKS_RTP)
7097 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7098 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7099 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7101 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7103 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7104 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7106 else
7108 /* We use an UNSPEC rather than a LABEL_REF because this label
7109 never appears in the code stream. */
7111 labelno = GEN_INT (pic_labelno++);
7112 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7113 l1 = gen_rtx_CONST (VOIDmode, l1);
7115 /* On the ARM the PC register contains 'dot + 8' at the time of the
7116 addition, on the Thumb it is 'dot + 4'. */
7117 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7118 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7119 UNSPEC_GOTSYM_OFF);
7120 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7122 if (TARGET_32BIT)
7124 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7126 else /* TARGET_THUMB1 */
7128 if (arm_pic_register != INVALID_REGNUM
7129 && REGNO (pic_reg) > LAST_LO_REGNUM)
7131 /* We will have pushed the pic register, so we should always be
7132 able to find a work register. */
7133 pic_tmp = gen_rtx_REG (SImode,
7134 thumb_find_work_register (saved_regs));
7135 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7136 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7137 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7139 else if (arm_pic_register != INVALID_REGNUM
7140 && arm_pic_register > LAST_LO_REGNUM
7141 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7143 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7144 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7145 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7147 else
7148 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7152 /* Need to emit this whether or not we obey regdecls,
7153 since setjmp/longjmp can cause life info to screw up. */
7154 emit_use (pic_reg);
7157 /* Generate code to load the address of a static var when flag_pic is set. */
7158 static rtx
7159 arm_pic_static_addr (rtx orig, rtx reg)
7161 rtx l1, labelno, offset_rtx, insn;
7163 gcc_assert (flag_pic);
7165 /* We use an UNSPEC rather than a LABEL_REF because this label
7166 never appears in the code stream. */
7167 labelno = GEN_INT (pic_labelno++);
7168 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7169 l1 = gen_rtx_CONST (VOIDmode, l1);
7171 /* On the ARM the PC register contains 'dot + 8' at the time of the
7172 addition, on the Thumb it is 'dot + 4'. */
7173 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7174 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7175 UNSPEC_SYMBOL_OFFSET);
7176 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7178 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7179 return insn;
7182 /* Return nonzero if X is valid as an ARM state addressing register. */
7183 static int
7184 arm_address_register_rtx_p (rtx x, int strict_p)
7186 int regno;
7188 if (!REG_P (x))
7189 return 0;
7191 regno = REGNO (x);
7193 if (strict_p)
7194 return ARM_REGNO_OK_FOR_BASE_P (regno);
7196 return (regno <= LAST_ARM_REGNUM
7197 || regno >= FIRST_PSEUDO_REGISTER
7198 || regno == FRAME_POINTER_REGNUM
7199 || regno == ARG_POINTER_REGNUM);
7202 /* Return TRUE if this rtx is the difference of a symbol and a label,
7203 and will reduce to a PC-relative relocation in the object file.
7204 Expressions like this can be left alone when generating PIC, rather
7205 than forced through the GOT. */
7206 static int
7207 pcrel_constant_p (rtx x)
7209 if (GET_CODE (x) == MINUS)
7210 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7212 return FALSE;
7215 /* Return true if X will surely end up in an index register after next
7216 splitting pass. */
7217 static bool
7218 will_be_in_index_register (const_rtx x)
7220 /* arm.md: calculate_pic_address will split this into a register. */
7221 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7224 /* Return nonzero if X is a valid ARM state address operand. */
7226 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7227 int strict_p)
7229 bool use_ldrd;
7230 enum rtx_code code = GET_CODE (x);
7232 if (arm_address_register_rtx_p (x, strict_p))
7233 return 1;
7235 use_ldrd = (TARGET_LDRD
7236 && (mode == DImode || mode == DFmode));
7238 if (code == POST_INC || code == PRE_DEC
7239 || ((code == PRE_INC || code == POST_DEC)
7240 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7241 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7243 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7244 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7245 && GET_CODE (XEXP (x, 1)) == PLUS
7246 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7248 rtx addend = XEXP (XEXP (x, 1), 1);
7250 /* Don't allow ldrd post increment by register because it's hard
7251 to fixup invalid register choices. */
7252 if (use_ldrd
7253 && GET_CODE (x) == POST_MODIFY
7254 && REG_P (addend))
7255 return 0;
7257 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7258 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7261 /* After reload constants split into minipools will have addresses
7262 from a LABEL_REF. */
7263 else if (reload_completed
7264 && (code == LABEL_REF
7265 || (code == CONST
7266 && GET_CODE (XEXP (x, 0)) == PLUS
7267 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7268 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7269 return 1;
7271 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7272 return 0;
7274 else if (code == PLUS)
7276 rtx xop0 = XEXP (x, 0);
7277 rtx xop1 = XEXP (x, 1);
7279 return ((arm_address_register_rtx_p (xop0, strict_p)
7280 && ((CONST_INT_P (xop1)
7281 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7282 || (!strict_p && will_be_in_index_register (xop1))))
7283 || (arm_address_register_rtx_p (xop1, strict_p)
7284 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7287 #if 0
7288 /* Reload currently can't handle MINUS, so disable this for now */
7289 else if (GET_CODE (x) == MINUS)
7291 rtx xop0 = XEXP (x, 0);
7292 rtx xop1 = XEXP (x, 1);
7294 return (arm_address_register_rtx_p (xop0, strict_p)
7295 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7297 #endif
7299 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7300 && code == SYMBOL_REF
7301 && CONSTANT_POOL_ADDRESS_P (x)
7302 && ! (flag_pic
7303 && symbol_mentioned_p (get_pool_constant (x))
7304 && ! pcrel_constant_p (get_pool_constant (x))))
7305 return 1;
7307 return 0;
7310 /* Return nonzero if X is a valid Thumb-2 address operand. */
7311 static int
7312 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7314 bool use_ldrd;
7315 enum rtx_code code = GET_CODE (x);
7317 if (arm_address_register_rtx_p (x, strict_p))
7318 return 1;
7320 use_ldrd = (TARGET_LDRD
7321 && (mode == DImode || mode == DFmode));
7323 if (code == POST_INC || code == PRE_DEC
7324 || ((code == PRE_INC || code == POST_DEC)
7325 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7326 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7328 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7329 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7330 && GET_CODE (XEXP (x, 1)) == PLUS
7331 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7333 /* Thumb-2 only has autoincrement by constant. */
7334 rtx addend = XEXP (XEXP (x, 1), 1);
7335 HOST_WIDE_INT offset;
7337 if (!CONST_INT_P (addend))
7338 return 0;
7340 offset = INTVAL(addend);
7341 if (GET_MODE_SIZE (mode) <= 4)
7342 return (offset > -256 && offset < 256);
7344 return (use_ldrd && offset > -1024 && offset < 1024
7345 && (offset & 3) == 0);
7348 /* After reload constants split into minipools will have addresses
7349 from a LABEL_REF. */
7350 else if (reload_completed
7351 && (code == LABEL_REF
7352 || (code == CONST
7353 && GET_CODE (XEXP (x, 0)) == PLUS
7354 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7355 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7356 return 1;
7358 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7359 return 0;
7361 else if (code == PLUS)
7363 rtx xop0 = XEXP (x, 0);
7364 rtx xop1 = XEXP (x, 1);
7366 return ((arm_address_register_rtx_p (xop0, strict_p)
7367 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7368 || (!strict_p && will_be_in_index_register (xop1))))
7369 || (arm_address_register_rtx_p (xop1, strict_p)
7370 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7373 /* Normally we can assign constant values to target registers without
7374 the help of constant pool. But there are cases we have to use constant
7375 pool like:
7376 1) assign a label to register.
7377 2) sign-extend a 8bit value to 32bit and then assign to register.
7379 Constant pool access in format:
7380 (set (reg r0) (mem (symbol_ref (".LC0"))))
7381 will cause the use of literal pool (later in function arm_reorg).
7382 So here we mark such format as an invalid format, then the compiler
7383 will adjust it into:
7384 (set (reg r0) (symbol_ref (".LC0")))
7385 (set (reg r0) (mem (reg r0))).
7386 No extra register is required, and (mem (reg r0)) won't cause the use
7387 of literal pools. */
7388 else if (arm_disable_literal_pool && code == SYMBOL_REF
7389 && CONSTANT_POOL_ADDRESS_P (x))
7390 return 0;
7392 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7393 && code == SYMBOL_REF
7394 && CONSTANT_POOL_ADDRESS_P (x)
7395 && ! (flag_pic
7396 && symbol_mentioned_p (get_pool_constant (x))
7397 && ! pcrel_constant_p (get_pool_constant (x))))
7398 return 1;
7400 return 0;
7403 /* Return nonzero if INDEX is valid for an address index operand in
7404 ARM state. */
7405 static int
7406 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7407 int strict_p)
7409 HOST_WIDE_INT range;
7410 enum rtx_code code = GET_CODE (index);
7412 /* Standard coprocessor addressing modes. */
7413 if (TARGET_HARD_FLOAT
7414 && (mode == SFmode || mode == DFmode))
7415 return (code == CONST_INT && INTVAL (index) < 1024
7416 && INTVAL (index) > -1024
7417 && (INTVAL (index) & 3) == 0);
7419 /* For quad modes, we restrict the constant offset to be slightly less
7420 than what the instruction format permits. We do this because for
7421 quad mode moves, we will actually decompose them into two separate
7422 double-mode reads or writes. INDEX must therefore be a valid
7423 (double-mode) offset and so should INDEX+8. */
7424 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7425 return (code == CONST_INT
7426 && INTVAL (index) < 1016
7427 && INTVAL (index) > -1024
7428 && (INTVAL (index) & 3) == 0);
7430 /* We have no such constraint on double mode offsets, so we permit the
7431 full range of the instruction format. */
7432 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7433 return (code == CONST_INT
7434 && INTVAL (index) < 1024
7435 && INTVAL (index) > -1024
7436 && (INTVAL (index) & 3) == 0);
7438 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7439 return (code == CONST_INT
7440 && INTVAL (index) < 1024
7441 && INTVAL (index) > -1024
7442 && (INTVAL (index) & 3) == 0);
7444 if (arm_address_register_rtx_p (index, strict_p)
7445 && (GET_MODE_SIZE (mode) <= 4))
7446 return 1;
7448 if (mode == DImode || mode == DFmode)
7450 if (code == CONST_INT)
7452 HOST_WIDE_INT val = INTVAL (index);
7454 if (TARGET_LDRD)
7455 return val > -256 && val < 256;
7456 else
7457 return val > -4096 && val < 4092;
7460 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7463 if (GET_MODE_SIZE (mode) <= 4
7464 && ! (arm_arch4
7465 && (mode == HImode
7466 || mode == HFmode
7467 || (mode == QImode && outer == SIGN_EXTEND))))
7469 if (code == MULT)
7471 rtx xiop0 = XEXP (index, 0);
7472 rtx xiop1 = XEXP (index, 1);
7474 return ((arm_address_register_rtx_p (xiop0, strict_p)
7475 && power_of_two_operand (xiop1, SImode))
7476 || (arm_address_register_rtx_p (xiop1, strict_p)
7477 && power_of_two_operand (xiop0, SImode)));
7479 else if (code == LSHIFTRT || code == ASHIFTRT
7480 || code == ASHIFT || code == ROTATERT)
7482 rtx op = XEXP (index, 1);
7484 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7485 && CONST_INT_P (op)
7486 && INTVAL (op) > 0
7487 && INTVAL (op) <= 31);
7491 /* For ARM v4 we may be doing a sign-extend operation during the
7492 load. */
7493 if (arm_arch4)
7495 if (mode == HImode
7496 || mode == HFmode
7497 || (outer == SIGN_EXTEND && mode == QImode))
7498 range = 256;
7499 else
7500 range = 4096;
7502 else
7503 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7505 return (code == CONST_INT
7506 && INTVAL (index) < range
7507 && INTVAL (index) > -range);
7510 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7511 index operand. i.e. 1, 2, 4 or 8. */
7512 static bool
7513 thumb2_index_mul_operand (rtx op)
7515 HOST_WIDE_INT val;
7517 if (!CONST_INT_P (op))
7518 return false;
7520 val = INTVAL(op);
7521 return (val == 1 || val == 2 || val == 4 || val == 8);
7524 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7525 static int
7526 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7528 enum rtx_code code = GET_CODE (index);
7530 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7531 /* Standard coprocessor addressing modes. */
7532 if (TARGET_HARD_FLOAT
7533 && (mode == SFmode || mode == DFmode))
7534 return (code == CONST_INT && INTVAL (index) < 1024
7535 /* Thumb-2 allows only > -256 index range for it's core register
7536 load/stores. Since we allow SF/DF in core registers, we have
7537 to use the intersection between -256~4096 (core) and -1024~1024
7538 (coprocessor). */
7539 && INTVAL (index) > -256
7540 && (INTVAL (index) & 3) == 0);
7542 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7544 /* For DImode assume values will usually live in core regs
7545 and only allow LDRD addressing modes. */
7546 if (!TARGET_LDRD || mode != DImode)
7547 return (code == CONST_INT
7548 && INTVAL (index) < 1024
7549 && INTVAL (index) > -1024
7550 && (INTVAL (index) & 3) == 0);
7553 /* For quad modes, we restrict the constant offset to be slightly less
7554 than what the instruction format permits. We do this because for
7555 quad mode moves, we will actually decompose them into two separate
7556 double-mode reads or writes. INDEX must therefore be a valid
7557 (double-mode) offset and so should INDEX+8. */
7558 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7559 return (code == CONST_INT
7560 && INTVAL (index) < 1016
7561 && INTVAL (index) > -1024
7562 && (INTVAL (index) & 3) == 0);
7564 /* We have no such constraint on double mode offsets, so we permit the
7565 full range of the instruction format. */
7566 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7567 return (code == CONST_INT
7568 && INTVAL (index) < 1024
7569 && INTVAL (index) > -1024
7570 && (INTVAL (index) & 3) == 0);
7572 if (arm_address_register_rtx_p (index, strict_p)
7573 && (GET_MODE_SIZE (mode) <= 4))
7574 return 1;
7576 if (mode == DImode || mode == DFmode)
7578 if (code == CONST_INT)
7580 HOST_WIDE_INT val = INTVAL (index);
7581 /* ??? Can we assume ldrd for thumb2? */
7582 /* Thumb-2 ldrd only has reg+const addressing modes. */
7583 /* ldrd supports offsets of +-1020.
7584 However the ldr fallback does not. */
7585 return val > -256 && val < 256 && (val & 3) == 0;
7587 else
7588 return 0;
7591 if (code == MULT)
7593 rtx xiop0 = XEXP (index, 0);
7594 rtx xiop1 = XEXP (index, 1);
7596 return ((arm_address_register_rtx_p (xiop0, strict_p)
7597 && thumb2_index_mul_operand (xiop1))
7598 || (arm_address_register_rtx_p (xiop1, strict_p)
7599 && thumb2_index_mul_operand (xiop0)));
7601 else if (code == ASHIFT)
7603 rtx op = XEXP (index, 1);
7605 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7606 && CONST_INT_P (op)
7607 && INTVAL (op) > 0
7608 && INTVAL (op) <= 3);
7611 return (code == CONST_INT
7612 && INTVAL (index) < 4096
7613 && INTVAL (index) > -256);
7616 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7617 static int
7618 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7620 int regno;
7622 if (!REG_P (x))
7623 return 0;
7625 regno = REGNO (x);
7627 if (strict_p)
7628 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7630 return (regno <= LAST_LO_REGNUM
7631 || regno > LAST_VIRTUAL_REGISTER
7632 || regno == FRAME_POINTER_REGNUM
7633 || (GET_MODE_SIZE (mode) >= 4
7634 && (regno == STACK_POINTER_REGNUM
7635 || regno >= FIRST_PSEUDO_REGISTER
7636 || x == hard_frame_pointer_rtx
7637 || x == arg_pointer_rtx)));
7640 /* Return nonzero if x is a legitimate index register. This is the case
7641 for any base register that can access a QImode object. */
7642 inline static int
7643 thumb1_index_register_rtx_p (rtx x, int strict_p)
7645 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7648 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7650 The AP may be eliminated to either the SP or the FP, so we use the
7651 least common denominator, e.g. SImode, and offsets from 0 to 64.
7653 ??? Verify whether the above is the right approach.
7655 ??? Also, the FP may be eliminated to the SP, so perhaps that
7656 needs special handling also.
7658 ??? Look at how the mips16 port solves this problem. It probably uses
7659 better ways to solve some of these problems.
7661 Although it is not incorrect, we don't accept QImode and HImode
7662 addresses based on the frame pointer or arg pointer until the
7663 reload pass starts. This is so that eliminating such addresses
7664 into stack based ones won't produce impossible code. */
7666 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7668 /* ??? Not clear if this is right. Experiment. */
7669 if (GET_MODE_SIZE (mode) < 4
7670 && !(reload_in_progress || reload_completed)
7671 && (reg_mentioned_p (frame_pointer_rtx, x)
7672 || reg_mentioned_p (arg_pointer_rtx, x)
7673 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7674 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7675 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7676 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7677 return 0;
7679 /* Accept any base register. SP only in SImode or larger. */
7680 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7681 return 1;
7683 /* This is PC relative data before arm_reorg runs. */
7684 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7685 && GET_CODE (x) == SYMBOL_REF
7686 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7687 return 1;
7689 /* This is PC relative data after arm_reorg runs. */
7690 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7691 && reload_completed
7692 && (GET_CODE (x) == LABEL_REF
7693 || (GET_CODE (x) == CONST
7694 && GET_CODE (XEXP (x, 0)) == PLUS
7695 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7696 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7697 return 1;
7699 /* Post-inc indexing only supported for SImode and larger. */
7700 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7701 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7702 return 1;
7704 else if (GET_CODE (x) == PLUS)
7706 /* REG+REG address can be any two index registers. */
7707 /* We disallow FRAME+REG addressing since we know that FRAME
7708 will be replaced with STACK, and SP relative addressing only
7709 permits SP+OFFSET. */
7710 if (GET_MODE_SIZE (mode) <= 4
7711 && XEXP (x, 0) != frame_pointer_rtx
7712 && XEXP (x, 1) != frame_pointer_rtx
7713 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7714 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7715 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7716 return 1;
7718 /* REG+const has 5-7 bit offset for non-SP registers. */
7719 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7720 || XEXP (x, 0) == arg_pointer_rtx)
7721 && CONST_INT_P (XEXP (x, 1))
7722 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7723 return 1;
7725 /* REG+const has 10-bit offset for SP, but only SImode and
7726 larger is supported. */
7727 /* ??? Should probably check for DI/DFmode overflow here
7728 just like GO_IF_LEGITIMATE_OFFSET does. */
7729 else if (REG_P (XEXP (x, 0))
7730 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7731 && GET_MODE_SIZE (mode) >= 4
7732 && CONST_INT_P (XEXP (x, 1))
7733 && INTVAL (XEXP (x, 1)) >= 0
7734 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7735 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7736 return 1;
7738 else if (REG_P (XEXP (x, 0))
7739 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7740 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7741 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7742 && REGNO (XEXP (x, 0))
7743 <= LAST_VIRTUAL_POINTER_REGISTER))
7744 && GET_MODE_SIZE (mode) >= 4
7745 && CONST_INT_P (XEXP (x, 1))
7746 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7747 return 1;
7750 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7751 && GET_MODE_SIZE (mode) == 4
7752 && GET_CODE (x) == SYMBOL_REF
7753 && CONSTANT_POOL_ADDRESS_P (x)
7754 && ! (flag_pic
7755 && symbol_mentioned_p (get_pool_constant (x))
7756 && ! pcrel_constant_p (get_pool_constant (x))))
7757 return 1;
7759 return 0;
7762 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7763 instruction of mode MODE. */
7765 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7767 switch (GET_MODE_SIZE (mode))
7769 case 1:
7770 return val >= 0 && val < 32;
7772 case 2:
7773 return val >= 0 && val < 64 && (val & 1) == 0;
7775 default:
7776 return (val >= 0
7777 && (val + GET_MODE_SIZE (mode)) <= 128
7778 && (val & 3) == 0);
7782 bool
7783 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7785 if (TARGET_ARM)
7786 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7787 else if (TARGET_THUMB2)
7788 return thumb2_legitimate_address_p (mode, x, strict_p);
7789 else /* if (TARGET_THUMB1) */
7790 return thumb1_legitimate_address_p (mode, x, strict_p);
7793 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7795 Given an rtx X being reloaded into a reg required to be
7796 in class CLASS, return the class of reg to actually use.
7797 In general this is just CLASS, but for the Thumb core registers and
7798 immediate constants we prefer a LO_REGS class or a subset. */
7800 static reg_class_t
7801 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7803 if (TARGET_32BIT)
7804 return rclass;
7805 else
7807 if (rclass == GENERAL_REGS)
7808 return LO_REGS;
7809 else
7810 return rclass;
7814 /* Build the SYMBOL_REF for __tls_get_addr. */
7816 static GTY(()) rtx tls_get_addr_libfunc;
7818 static rtx
7819 get_tls_get_addr (void)
7821 if (!tls_get_addr_libfunc)
7822 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7823 return tls_get_addr_libfunc;
7827 arm_load_tp (rtx target)
7829 if (!target)
7830 target = gen_reg_rtx (SImode);
7832 if (TARGET_HARD_TP)
7834 /* Can return in any reg. */
7835 emit_insn (gen_load_tp_hard (target));
7837 else
7839 /* Always returned in r0. Immediately copy the result into a pseudo,
7840 otherwise other uses of r0 (e.g. setting up function arguments) may
7841 clobber the value. */
7843 rtx tmp;
7845 emit_insn (gen_load_tp_soft ());
7847 tmp = gen_rtx_REG (SImode, R0_REGNUM);
7848 emit_move_insn (target, tmp);
7850 return target;
7853 static rtx
7854 load_tls_operand (rtx x, rtx reg)
7856 rtx tmp;
7858 if (reg == NULL_RTX)
7859 reg = gen_reg_rtx (SImode);
7861 tmp = gen_rtx_CONST (SImode, x);
7863 emit_move_insn (reg, tmp);
7865 return reg;
7868 static rtx_insn *
7869 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7871 rtx label, labelno, sum;
7873 gcc_assert (reloc != TLS_DESCSEQ);
7874 start_sequence ();
7876 labelno = GEN_INT (pic_labelno++);
7877 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7878 label = gen_rtx_CONST (VOIDmode, label);
7880 sum = gen_rtx_UNSPEC (Pmode,
7881 gen_rtvec (4, x, GEN_INT (reloc), label,
7882 GEN_INT (TARGET_ARM ? 8 : 4)),
7883 UNSPEC_TLS);
7884 reg = load_tls_operand (sum, reg);
7886 if (TARGET_ARM)
7887 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7888 else
7889 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7891 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7892 LCT_PURE, /* LCT_CONST? */
7893 Pmode, 1, reg, Pmode);
7895 rtx_insn *insns = get_insns ();
7896 end_sequence ();
7898 return insns;
7901 static rtx
7902 arm_tls_descseq_addr (rtx x, rtx reg)
7904 rtx labelno = GEN_INT (pic_labelno++);
7905 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7906 rtx sum = gen_rtx_UNSPEC (Pmode,
7907 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7908 gen_rtx_CONST (VOIDmode, label),
7909 GEN_INT (!TARGET_ARM)),
7910 UNSPEC_TLS);
7911 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7913 emit_insn (gen_tlscall (x, labelno));
7914 if (!reg)
7915 reg = gen_reg_rtx (SImode);
7916 else
7917 gcc_assert (REGNO (reg) != R0_REGNUM);
7919 emit_move_insn (reg, reg0);
7921 return reg;
7925 legitimize_tls_address (rtx x, rtx reg)
7927 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
7928 rtx_insn *insns;
7929 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7931 switch (model)
7933 case TLS_MODEL_GLOBAL_DYNAMIC:
7934 if (TARGET_GNU2_TLS)
7936 reg = arm_tls_descseq_addr (x, reg);
7938 tp = arm_load_tp (NULL_RTX);
7940 dest = gen_rtx_PLUS (Pmode, tp, reg);
7942 else
7944 /* Original scheme */
7945 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7946 dest = gen_reg_rtx (Pmode);
7947 emit_libcall_block (insns, dest, ret, x);
7949 return dest;
7951 case TLS_MODEL_LOCAL_DYNAMIC:
7952 if (TARGET_GNU2_TLS)
7954 reg = arm_tls_descseq_addr (x, reg);
7956 tp = arm_load_tp (NULL_RTX);
7958 dest = gen_rtx_PLUS (Pmode, tp, reg);
7960 else
7962 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7964 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7965 share the LDM result with other LD model accesses. */
7966 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7967 UNSPEC_TLS);
7968 dest = gen_reg_rtx (Pmode);
7969 emit_libcall_block (insns, dest, ret, eqv);
7971 /* Load the addend. */
7972 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7973 GEN_INT (TLS_LDO32)),
7974 UNSPEC_TLS);
7975 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7976 dest = gen_rtx_PLUS (Pmode, dest, addend);
7978 return dest;
7980 case TLS_MODEL_INITIAL_EXEC:
7981 labelno = GEN_INT (pic_labelno++);
7982 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7983 label = gen_rtx_CONST (VOIDmode, label);
7984 sum = gen_rtx_UNSPEC (Pmode,
7985 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7986 GEN_INT (TARGET_ARM ? 8 : 4)),
7987 UNSPEC_TLS);
7988 reg = load_tls_operand (sum, reg);
7990 if (TARGET_ARM)
7991 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7992 else if (TARGET_THUMB2)
7993 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7994 else
7996 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7997 emit_move_insn (reg, gen_const_mem (SImode, reg));
8000 tp = arm_load_tp (NULL_RTX);
8002 return gen_rtx_PLUS (Pmode, tp, reg);
8004 case TLS_MODEL_LOCAL_EXEC:
8005 tp = arm_load_tp (NULL_RTX);
8007 reg = gen_rtx_UNSPEC (Pmode,
8008 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8009 UNSPEC_TLS);
8010 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8012 return gen_rtx_PLUS (Pmode, tp, reg);
8014 default:
8015 abort ();
8019 /* Try machine-dependent ways of modifying an illegitimate address
8020 to be legitimate. If we find one, return the new, valid address. */
8022 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8024 if (arm_tls_referenced_p (x))
8026 rtx addend = NULL;
8028 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8030 addend = XEXP (XEXP (x, 0), 1);
8031 x = XEXP (XEXP (x, 0), 0);
8034 if (GET_CODE (x) != SYMBOL_REF)
8035 return x;
8037 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8039 x = legitimize_tls_address (x, NULL_RTX);
8041 if (addend)
8043 x = gen_rtx_PLUS (SImode, x, addend);
8044 orig_x = x;
8046 else
8047 return x;
8050 if (!TARGET_ARM)
8052 /* TODO: legitimize_address for Thumb2. */
8053 if (TARGET_THUMB2)
8054 return x;
8055 return thumb_legitimize_address (x, orig_x, mode);
8058 if (GET_CODE (x) == PLUS)
8060 rtx xop0 = XEXP (x, 0);
8061 rtx xop1 = XEXP (x, 1);
8063 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8064 xop0 = force_reg (SImode, xop0);
8066 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8067 && !symbol_mentioned_p (xop1))
8068 xop1 = force_reg (SImode, xop1);
8070 if (ARM_BASE_REGISTER_RTX_P (xop0)
8071 && CONST_INT_P (xop1))
8073 HOST_WIDE_INT n, low_n;
8074 rtx base_reg, val;
8075 n = INTVAL (xop1);
8077 /* VFP addressing modes actually allow greater offsets, but for
8078 now we just stick with the lowest common denominator. */
8079 if (mode == DImode || mode == DFmode)
8081 low_n = n & 0x0f;
8082 n &= ~0x0f;
8083 if (low_n > 4)
8085 n += 16;
8086 low_n -= 16;
8089 else
8091 low_n = ((mode) == TImode ? 0
8092 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8093 n -= low_n;
8096 base_reg = gen_reg_rtx (SImode);
8097 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8098 emit_move_insn (base_reg, val);
8099 x = plus_constant (Pmode, base_reg, low_n);
8101 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8102 x = gen_rtx_PLUS (SImode, xop0, xop1);
8105 /* XXX We don't allow MINUS any more -- see comment in
8106 arm_legitimate_address_outer_p (). */
8107 else if (GET_CODE (x) == MINUS)
8109 rtx xop0 = XEXP (x, 0);
8110 rtx xop1 = XEXP (x, 1);
8112 if (CONSTANT_P (xop0))
8113 xop0 = force_reg (SImode, xop0);
8115 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8116 xop1 = force_reg (SImode, xop1);
8118 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8119 x = gen_rtx_MINUS (SImode, xop0, xop1);
8122 /* Make sure to take full advantage of the pre-indexed addressing mode
8123 with absolute addresses which often allows for the base register to
8124 be factorized for multiple adjacent memory references, and it might
8125 even allows for the mini pool to be avoided entirely. */
8126 else if (CONST_INT_P (x) && optimize > 0)
8128 unsigned int bits;
8129 HOST_WIDE_INT mask, base, index;
8130 rtx base_reg;
8132 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8133 use a 8-bit index. So let's use a 12-bit index for SImode only and
8134 hope that arm_gen_constant will enable ldrb to use more bits. */
8135 bits = (mode == SImode) ? 12 : 8;
8136 mask = (1 << bits) - 1;
8137 base = INTVAL (x) & ~mask;
8138 index = INTVAL (x) & mask;
8139 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8141 /* It'll most probably be more efficient to generate the base
8142 with more bits set and use a negative index instead. */
8143 base |= mask;
8144 index -= mask;
8146 base_reg = force_reg (SImode, GEN_INT (base));
8147 x = plus_constant (Pmode, base_reg, index);
8150 if (flag_pic)
8152 /* We need to find and carefully transform any SYMBOL and LABEL
8153 references; so go back to the original address expression. */
8154 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8156 if (new_x != orig_x)
8157 x = new_x;
8160 return x;
8164 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8165 to be legitimate. If we find one, return the new, valid address. */
8167 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8169 if (GET_CODE (x) == PLUS
8170 && CONST_INT_P (XEXP (x, 1))
8171 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8172 || INTVAL (XEXP (x, 1)) < 0))
8174 rtx xop0 = XEXP (x, 0);
8175 rtx xop1 = XEXP (x, 1);
8176 HOST_WIDE_INT offset = INTVAL (xop1);
8178 /* Try and fold the offset into a biasing of the base register and
8179 then offsetting that. Don't do this when optimizing for space
8180 since it can cause too many CSEs. */
8181 if (optimize_size && offset >= 0
8182 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8184 HOST_WIDE_INT delta;
8186 if (offset >= 256)
8187 delta = offset - (256 - GET_MODE_SIZE (mode));
8188 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8189 delta = 31 * GET_MODE_SIZE (mode);
8190 else
8191 delta = offset & (~31 * GET_MODE_SIZE (mode));
8193 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8194 NULL_RTX);
8195 x = plus_constant (Pmode, xop0, delta);
8197 else if (offset < 0 && offset > -256)
8198 /* Small negative offsets are best done with a subtract before the
8199 dereference, forcing these into a register normally takes two
8200 instructions. */
8201 x = force_operand (x, NULL_RTX);
8202 else
8204 /* For the remaining cases, force the constant into a register. */
8205 xop1 = force_reg (SImode, xop1);
8206 x = gen_rtx_PLUS (SImode, xop0, xop1);
8209 else if (GET_CODE (x) == PLUS
8210 && s_register_operand (XEXP (x, 1), SImode)
8211 && !s_register_operand (XEXP (x, 0), SImode))
8213 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8215 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8218 if (flag_pic)
8220 /* We need to find and carefully transform any SYMBOL and LABEL
8221 references; so go back to the original address expression. */
8222 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8224 if (new_x != orig_x)
8225 x = new_x;
8228 return x;
8231 /* Return TRUE if X contains any TLS symbol references. */
8233 bool
8234 arm_tls_referenced_p (rtx x)
8236 if (! TARGET_HAVE_TLS)
8237 return false;
8239 subrtx_iterator::array_type array;
8240 FOR_EACH_SUBRTX (iter, array, x, ALL)
8242 const_rtx x = *iter;
8243 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8244 return true;
8246 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8247 TLS offsets, not real symbol references. */
8248 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8249 iter.skip_subrtxes ();
8251 return false;
8254 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8256 On the ARM, allow any integer (invalid ones are removed later by insn
8257 patterns), nice doubles and symbol_refs which refer to the function's
8258 constant pool XXX.
8260 When generating pic allow anything. */
8262 static bool
8263 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8265 return flag_pic || !label_mentioned_p (x);
8268 static bool
8269 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8271 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8272 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8273 for ARMv8-M Baseline or later the result is valid. */
8274 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8275 x = XEXP (x, 0);
8277 return (CONST_INT_P (x)
8278 || CONST_DOUBLE_P (x)
8279 || CONSTANT_ADDRESS_P (x)
8280 || flag_pic);
8283 static bool
8284 arm_legitimate_constant_p (machine_mode mode, rtx x)
8286 return (!arm_cannot_force_const_mem (mode, x)
8287 && (TARGET_32BIT
8288 ? arm_legitimate_constant_p_1 (mode, x)
8289 : thumb_legitimate_constant_p (mode, x)));
8292 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8294 static bool
8295 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8297 rtx base, offset;
8299 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8301 split_const (x, &base, &offset);
8302 if (GET_CODE (base) == SYMBOL_REF
8303 && !offset_within_block_p (base, INTVAL (offset)))
8304 return true;
8306 return arm_tls_referenced_p (x);
8309 #define REG_OR_SUBREG_REG(X) \
8310 (REG_P (X) \
8311 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8313 #define REG_OR_SUBREG_RTX(X) \
8314 (REG_P (X) ? (X) : SUBREG_REG (X))
8316 static inline int
8317 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8319 machine_mode mode = GET_MODE (x);
8320 int total, words;
8322 switch (code)
8324 case ASHIFT:
8325 case ASHIFTRT:
8326 case LSHIFTRT:
8327 case ROTATERT:
8328 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8330 case PLUS:
8331 case MINUS:
8332 case COMPARE:
8333 case NEG:
8334 case NOT:
8335 return COSTS_N_INSNS (1);
8337 case MULT:
8338 if (CONST_INT_P (XEXP (x, 1)))
8340 int cycles = 0;
8341 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8343 while (i)
8345 i >>= 2;
8346 cycles++;
8348 return COSTS_N_INSNS (2) + cycles;
8350 return COSTS_N_INSNS (1) + 16;
8352 case SET:
8353 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8354 the mode. */
8355 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8356 return (COSTS_N_INSNS (words)
8357 + 4 * ((MEM_P (SET_SRC (x)))
8358 + MEM_P (SET_DEST (x))));
8360 case CONST_INT:
8361 if (outer == SET)
8363 if (UINTVAL (x) < 256
8364 /* 16-bit constant. */
8365 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8366 return 0;
8367 if (thumb_shiftable_const (INTVAL (x)))
8368 return COSTS_N_INSNS (2);
8369 return COSTS_N_INSNS (3);
8371 else if ((outer == PLUS || outer == COMPARE)
8372 && INTVAL (x) < 256 && INTVAL (x) > -256)
8373 return 0;
8374 else if ((outer == IOR || outer == XOR || outer == AND)
8375 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8376 return COSTS_N_INSNS (1);
8377 else if (outer == AND)
8379 int i;
8380 /* This duplicates the tests in the andsi3 expander. */
8381 for (i = 9; i <= 31; i++)
8382 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8383 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8384 return COSTS_N_INSNS (2);
8386 else if (outer == ASHIFT || outer == ASHIFTRT
8387 || outer == LSHIFTRT)
8388 return 0;
8389 return COSTS_N_INSNS (2);
8391 case CONST:
8392 case CONST_DOUBLE:
8393 case LABEL_REF:
8394 case SYMBOL_REF:
8395 return COSTS_N_INSNS (3);
8397 case UDIV:
8398 case UMOD:
8399 case DIV:
8400 case MOD:
8401 return 100;
8403 case TRUNCATE:
8404 return 99;
8406 case AND:
8407 case XOR:
8408 case IOR:
8409 /* XXX guess. */
8410 return 8;
8412 case MEM:
8413 /* XXX another guess. */
8414 /* Memory costs quite a lot for the first word, but subsequent words
8415 load at the equivalent of a single insn each. */
8416 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8417 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8418 ? 4 : 0));
8420 case IF_THEN_ELSE:
8421 /* XXX a guess. */
8422 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8423 return 14;
8424 return 2;
8426 case SIGN_EXTEND:
8427 case ZERO_EXTEND:
8428 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8429 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8431 if (mode == SImode)
8432 return total;
8434 if (arm_arch6)
8435 return total + COSTS_N_INSNS (1);
8437 /* Assume a two-shift sequence. Increase the cost slightly so
8438 we prefer actual shifts over an extend operation. */
8439 return total + 1 + COSTS_N_INSNS (2);
8441 default:
8442 return 99;
8446 /* Estimates the size cost of thumb1 instructions.
8447 For now most of the code is copied from thumb1_rtx_costs. We need more
8448 fine grain tuning when we have more related test cases. */
8449 static inline int
8450 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8452 machine_mode mode = GET_MODE (x);
8453 int words, cost;
8455 switch (code)
8457 case ASHIFT:
8458 case ASHIFTRT:
8459 case LSHIFTRT:
8460 case ROTATERT:
8461 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8463 case PLUS:
8464 case MINUS:
8465 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8466 defined by RTL expansion, especially for the expansion of
8467 multiplication. */
8468 if ((GET_CODE (XEXP (x, 0)) == MULT
8469 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8470 || (GET_CODE (XEXP (x, 1)) == MULT
8471 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8472 return COSTS_N_INSNS (2);
8473 /* Fall through. */
8474 case COMPARE:
8475 case NEG:
8476 case NOT:
8477 return COSTS_N_INSNS (1);
8479 case MULT:
8480 if (CONST_INT_P (XEXP (x, 1)))
8482 /* Thumb1 mul instruction can't operate on const. We must Load it
8483 into a register first. */
8484 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8485 /* For the targets which have a very small and high-latency multiply
8486 unit, we prefer to synthesize the mult with up to 5 instructions,
8487 giving a good balance between size and performance. */
8488 if (arm_arch6m && arm_m_profile_small_mul)
8489 return COSTS_N_INSNS (5);
8490 else
8491 return COSTS_N_INSNS (1) + const_size;
8493 return COSTS_N_INSNS (1);
8495 case SET:
8496 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8497 the mode. */
8498 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8499 cost = COSTS_N_INSNS (words);
8500 if (satisfies_constraint_J (SET_SRC (x))
8501 || satisfies_constraint_K (SET_SRC (x))
8502 /* Too big an immediate for a 2-byte mov, using MOVT. */
8503 || (CONST_INT_P (SET_SRC (x))
8504 && UINTVAL (SET_SRC (x)) >= 256
8505 && TARGET_HAVE_MOVT
8506 && satisfies_constraint_j (SET_SRC (x)))
8507 /* thumb1_movdi_insn. */
8508 || ((words > 1) && MEM_P (SET_SRC (x))))
8509 cost += COSTS_N_INSNS (1);
8510 return cost;
8512 case CONST_INT:
8513 if (outer == SET)
8515 if (UINTVAL (x) < 256)
8516 return COSTS_N_INSNS (1);
8517 /* movw is 4byte long. */
8518 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
8519 return COSTS_N_INSNS (2);
8520 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8521 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8522 return COSTS_N_INSNS (2);
8523 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8524 if (thumb_shiftable_const (INTVAL (x)))
8525 return COSTS_N_INSNS (2);
8526 return COSTS_N_INSNS (3);
8528 else if ((outer == PLUS || outer == COMPARE)
8529 && INTVAL (x) < 256 && INTVAL (x) > -256)
8530 return 0;
8531 else if ((outer == IOR || outer == XOR || outer == AND)
8532 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8533 return COSTS_N_INSNS (1);
8534 else if (outer == AND)
8536 int i;
8537 /* This duplicates the tests in the andsi3 expander. */
8538 for (i = 9; i <= 31; i++)
8539 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8540 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8541 return COSTS_N_INSNS (2);
8543 else if (outer == ASHIFT || outer == ASHIFTRT
8544 || outer == LSHIFTRT)
8545 return 0;
8546 return COSTS_N_INSNS (2);
8548 case CONST:
8549 case CONST_DOUBLE:
8550 case LABEL_REF:
8551 case SYMBOL_REF:
8552 return COSTS_N_INSNS (3);
8554 case UDIV:
8555 case UMOD:
8556 case DIV:
8557 case MOD:
8558 return 100;
8560 case TRUNCATE:
8561 return 99;
8563 case AND:
8564 case XOR:
8565 case IOR:
8566 return COSTS_N_INSNS (1);
8568 case MEM:
8569 return (COSTS_N_INSNS (1)
8570 + COSTS_N_INSNS (1)
8571 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8572 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8573 ? COSTS_N_INSNS (1) : 0));
8575 case IF_THEN_ELSE:
8576 /* XXX a guess. */
8577 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8578 return 14;
8579 return 2;
8581 case ZERO_EXTEND:
8582 /* XXX still guessing. */
8583 switch (GET_MODE (XEXP (x, 0)))
8585 case QImode:
8586 return (1 + (mode == DImode ? 4 : 0)
8587 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8589 case HImode:
8590 return (4 + (mode == DImode ? 4 : 0)
8591 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8593 case SImode:
8594 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8596 default:
8597 return 99;
8600 default:
8601 return 99;
8605 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8606 operand, then return the operand that is being shifted. If the shift
8607 is not by a constant, then set SHIFT_REG to point to the operand.
8608 Return NULL if OP is not a shifter operand. */
8609 static rtx
8610 shifter_op_p (rtx op, rtx *shift_reg)
8612 enum rtx_code code = GET_CODE (op);
8614 if (code == MULT && CONST_INT_P (XEXP (op, 1))
8615 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
8616 return XEXP (op, 0);
8617 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
8618 return XEXP (op, 0);
8619 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
8620 || code == ASHIFTRT)
8622 if (!CONST_INT_P (XEXP (op, 1)))
8623 *shift_reg = XEXP (op, 1);
8624 return XEXP (op, 0);
8627 return NULL;
8630 static bool
8631 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
8633 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
8634 rtx_code code = GET_CODE (x);
8635 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
8637 switch (XINT (x, 1))
8639 case UNSPEC_UNALIGNED_LOAD:
8640 /* We can only do unaligned loads into the integer unit, and we can't
8641 use LDM or LDRD. */
8642 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
8643 if (speed_p)
8644 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
8645 + extra_cost->ldst.load_unaligned);
8647 #ifdef NOT_YET
8648 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
8649 ADDR_SPACE_GENERIC, speed_p);
8650 #endif
8651 return true;
8653 case UNSPEC_UNALIGNED_STORE:
8654 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
8655 if (speed_p)
8656 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
8657 + extra_cost->ldst.store_unaligned);
8659 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
8660 #ifdef NOT_YET
8661 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
8662 ADDR_SPACE_GENERIC, speed_p);
8663 #endif
8664 return true;
8666 case UNSPEC_VRINTZ:
8667 case UNSPEC_VRINTP:
8668 case UNSPEC_VRINTM:
8669 case UNSPEC_VRINTR:
8670 case UNSPEC_VRINTX:
8671 case UNSPEC_VRINTA:
8672 if (speed_p)
8673 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
8675 return true;
8676 default:
8677 *cost = COSTS_N_INSNS (2);
8678 break;
8680 return true;
8683 /* Cost of a libcall. We assume one insn per argument, an amount for the
8684 call (one insn for -Os) and then one for processing the result. */
8685 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
8687 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
8688 do \
8690 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
8691 if (shift_op != NULL \
8692 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
8694 if (shift_reg) \
8696 if (speed_p) \
8697 *cost += extra_cost->alu.arith_shift_reg; \
8698 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
8699 ASHIFT, 1, speed_p); \
8701 else if (speed_p) \
8702 *cost += extra_cost->alu.arith_shift; \
8704 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
8705 ASHIFT, 0, speed_p) \
8706 + rtx_cost (XEXP (x, 1 - IDX), \
8707 GET_MODE (shift_op), \
8708 OP, 1, speed_p)); \
8709 return true; \
8712 while (0);
8714 /* RTX costs. Make an estimate of the cost of executing the operation
8715 X, which is contained with an operation with code OUTER_CODE.
8716 SPEED_P indicates whether the cost desired is the performance cost,
8717 or the size cost. The estimate is stored in COST and the return
8718 value is TRUE if the cost calculation is final, or FALSE if the
8719 caller should recurse through the operands of X to add additional
8720 costs.
8722 We currently make no attempt to model the size savings of Thumb-2
8723 16-bit instructions. At the normal points in compilation where
8724 this code is called we have no measure of whether the condition
8725 flags are live or not, and thus no realistic way to determine what
8726 the size will eventually be. */
8727 static bool
8728 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
8729 const struct cpu_cost_table *extra_cost,
8730 int *cost, bool speed_p)
8732 machine_mode mode = GET_MODE (x);
8734 *cost = COSTS_N_INSNS (1);
8736 if (TARGET_THUMB1)
8738 if (speed_p)
8739 *cost = thumb1_rtx_costs (x, code, outer_code);
8740 else
8741 *cost = thumb1_size_rtx_costs (x, code, outer_code);
8742 return true;
8745 switch (code)
8747 case SET:
8748 *cost = 0;
8749 /* SET RTXs don't have a mode so we get it from the destination. */
8750 mode = GET_MODE (SET_DEST (x));
8752 if (REG_P (SET_SRC (x))
8753 && REG_P (SET_DEST (x)))
8755 /* Assume that most copies can be done with a single insn,
8756 unless we don't have HW FP, in which case everything
8757 larger than word mode will require two insns. */
8758 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
8759 && GET_MODE_SIZE (mode) > 4)
8760 || mode == DImode)
8761 ? 2 : 1);
8762 /* Conditional register moves can be encoded
8763 in 16 bits in Thumb mode. */
8764 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
8765 *cost >>= 1;
8767 return true;
8770 if (CONST_INT_P (SET_SRC (x)))
8772 /* Handle CONST_INT here, since the value doesn't have a mode
8773 and we would otherwise be unable to work out the true cost. */
8774 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
8775 0, speed_p);
8776 outer_code = SET;
8777 /* Slightly lower the cost of setting a core reg to a constant.
8778 This helps break up chains and allows for better scheduling. */
8779 if (REG_P (SET_DEST (x))
8780 && REGNO (SET_DEST (x)) <= LR_REGNUM)
8781 *cost -= 1;
8782 x = SET_SRC (x);
8783 /* Immediate moves with an immediate in the range [0, 255] can be
8784 encoded in 16 bits in Thumb mode. */
8785 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
8786 && INTVAL (x) >= 0 && INTVAL (x) <=255)
8787 *cost >>= 1;
8788 goto const_int_cost;
8791 return false;
8793 case MEM:
8794 /* A memory access costs 1 insn if the mode is small, or the address is
8795 a single register, otherwise it costs one insn per word. */
8796 if (REG_P (XEXP (x, 0)))
8797 *cost = COSTS_N_INSNS (1);
8798 else if (flag_pic
8799 && GET_CODE (XEXP (x, 0)) == PLUS
8800 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8801 /* This will be split into two instructions.
8802 See arm.md:calculate_pic_address. */
8803 *cost = COSTS_N_INSNS (2);
8804 else
8805 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8807 /* For speed optimizations, add the costs of the address and
8808 accessing memory. */
8809 if (speed_p)
8810 #ifdef NOT_YET
8811 *cost += (extra_cost->ldst.load
8812 + arm_address_cost (XEXP (x, 0), mode,
8813 ADDR_SPACE_GENERIC, speed_p));
8814 #else
8815 *cost += extra_cost->ldst.load;
8816 #endif
8817 return true;
8819 case PARALLEL:
8821 /* Calculations of LDM costs are complex. We assume an initial cost
8822 (ldm_1st) which will load the number of registers mentioned in
8823 ldm_regs_per_insn_1st registers; then each additional
8824 ldm_regs_per_insn_subsequent registers cost one more insn. The
8825 formula for N regs is thus:
8827 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
8828 + ldm_regs_per_insn_subsequent - 1)
8829 / ldm_regs_per_insn_subsequent).
8831 Additional costs may also be added for addressing. A similar
8832 formula is used for STM. */
8834 bool is_ldm = load_multiple_operation (x, SImode);
8835 bool is_stm = store_multiple_operation (x, SImode);
8837 if (is_ldm || is_stm)
8839 if (speed_p)
8841 HOST_WIDE_INT nregs = XVECLEN (x, 0);
8842 HOST_WIDE_INT regs_per_insn_1st = is_ldm
8843 ? extra_cost->ldst.ldm_regs_per_insn_1st
8844 : extra_cost->ldst.stm_regs_per_insn_1st;
8845 HOST_WIDE_INT regs_per_insn_sub = is_ldm
8846 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
8847 : extra_cost->ldst.stm_regs_per_insn_subsequent;
8849 *cost += regs_per_insn_1st
8850 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
8851 + regs_per_insn_sub - 1)
8852 / regs_per_insn_sub);
8853 return true;
8857 return false;
8859 case DIV:
8860 case UDIV:
8861 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8862 && (mode == SFmode || !TARGET_VFP_SINGLE))
8863 *cost += COSTS_N_INSNS (speed_p
8864 ? extra_cost->fp[mode != SFmode].div : 0);
8865 else if (mode == SImode && TARGET_IDIV)
8866 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
8867 else
8868 *cost = LIBCALL_COST (2);
8869 return false; /* All arguments must be in registers. */
8871 case MOD:
8872 /* MOD by a power of 2 can be expanded as:
8873 rsbs r1, r0, #0
8874 and r0, r0, #(n - 1)
8875 and r1, r1, #(n - 1)
8876 rsbpl r0, r1, #0. */
8877 if (CONST_INT_P (XEXP (x, 1))
8878 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
8879 && mode == SImode)
8881 *cost += COSTS_N_INSNS (3);
8883 if (speed_p)
8884 *cost += 2 * extra_cost->alu.logical
8885 + extra_cost->alu.arith;
8886 return true;
8889 /* Fall-through. */
8890 case UMOD:
8891 *cost = LIBCALL_COST (2);
8892 return false; /* All arguments must be in registers. */
8894 case ROTATE:
8895 if (mode == SImode && REG_P (XEXP (x, 1)))
8897 *cost += (COSTS_N_INSNS (1)
8898 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
8899 if (speed_p)
8900 *cost += extra_cost->alu.shift_reg;
8901 return true;
8903 /* Fall through */
8904 case ROTATERT:
8905 case ASHIFT:
8906 case LSHIFTRT:
8907 case ASHIFTRT:
8908 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8910 *cost += (COSTS_N_INSNS (2)
8911 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
8912 if (speed_p)
8913 *cost += 2 * extra_cost->alu.shift;
8914 return true;
8916 else if (mode == SImode)
8918 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
8919 /* Slightly disparage register shifts at -Os, but not by much. */
8920 if (!CONST_INT_P (XEXP (x, 1)))
8921 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
8922 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
8923 return true;
8925 else if (GET_MODE_CLASS (mode) == MODE_INT
8926 && GET_MODE_SIZE (mode) < 4)
8928 if (code == ASHIFT)
8930 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
8931 /* Slightly disparage register shifts at -Os, but not by
8932 much. */
8933 if (!CONST_INT_P (XEXP (x, 1)))
8934 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
8935 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
8937 else if (code == LSHIFTRT || code == ASHIFTRT)
8939 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
8941 /* Can use SBFX/UBFX. */
8942 if (speed_p)
8943 *cost += extra_cost->alu.bfx;
8944 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
8946 else
8948 *cost += COSTS_N_INSNS (1);
8949 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
8950 if (speed_p)
8952 if (CONST_INT_P (XEXP (x, 1)))
8953 *cost += 2 * extra_cost->alu.shift;
8954 else
8955 *cost += (extra_cost->alu.shift
8956 + extra_cost->alu.shift_reg);
8958 else
8959 /* Slightly disparage register shifts. */
8960 *cost += !CONST_INT_P (XEXP (x, 1));
8963 else /* Rotates. */
8965 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
8966 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
8967 if (speed_p)
8969 if (CONST_INT_P (XEXP (x, 1)))
8970 *cost += (2 * extra_cost->alu.shift
8971 + extra_cost->alu.log_shift);
8972 else
8973 *cost += (extra_cost->alu.shift
8974 + extra_cost->alu.shift_reg
8975 + extra_cost->alu.log_shift_reg);
8978 return true;
8981 *cost = LIBCALL_COST (2);
8982 return false;
8984 case BSWAP:
8985 if (arm_arch6)
8987 if (mode == SImode)
8989 if (speed_p)
8990 *cost += extra_cost->alu.rev;
8992 return false;
8995 else
8997 /* No rev instruction available. Look at arm_legacy_rev
8998 and thumb_legacy_rev for the form of RTL used then. */
8999 if (TARGET_THUMB)
9001 *cost += COSTS_N_INSNS (9);
9003 if (speed_p)
9005 *cost += 6 * extra_cost->alu.shift;
9006 *cost += 3 * extra_cost->alu.logical;
9009 else
9011 *cost += COSTS_N_INSNS (4);
9013 if (speed_p)
9015 *cost += 2 * extra_cost->alu.shift;
9016 *cost += extra_cost->alu.arith_shift;
9017 *cost += 2 * extra_cost->alu.logical;
9020 return true;
9022 return false;
9024 case MINUS:
9025 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9026 && (mode == SFmode || !TARGET_VFP_SINGLE))
9028 if (GET_CODE (XEXP (x, 0)) == MULT
9029 || GET_CODE (XEXP (x, 1)) == MULT)
9031 rtx mul_op0, mul_op1, sub_op;
9033 if (speed_p)
9034 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9036 if (GET_CODE (XEXP (x, 0)) == MULT)
9038 mul_op0 = XEXP (XEXP (x, 0), 0);
9039 mul_op1 = XEXP (XEXP (x, 0), 1);
9040 sub_op = XEXP (x, 1);
9042 else
9044 mul_op0 = XEXP (XEXP (x, 1), 0);
9045 mul_op1 = XEXP (XEXP (x, 1), 1);
9046 sub_op = XEXP (x, 0);
9049 /* The first operand of the multiply may be optionally
9050 negated. */
9051 if (GET_CODE (mul_op0) == NEG)
9052 mul_op0 = XEXP (mul_op0, 0);
9054 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9055 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9056 + rtx_cost (sub_op, mode, code, 0, speed_p));
9058 return true;
9061 if (speed_p)
9062 *cost += extra_cost->fp[mode != SFmode].addsub;
9063 return false;
9066 if (mode == SImode)
9068 rtx shift_by_reg = NULL;
9069 rtx shift_op;
9070 rtx non_shift_op;
9072 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9073 if (shift_op == NULL)
9075 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9076 non_shift_op = XEXP (x, 0);
9078 else
9079 non_shift_op = XEXP (x, 1);
9081 if (shift_op != NULL)
9083 if (shift_by_reg != NULL)
9085 if (speed_p)
9086 *cost += extra_cost->alu.arith_shift_reg;
9087 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9089 else if (speed_p)
9090 *cost += extra_cost->alu.arith_shift;
9092 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9093 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9094 return true;
9097 if (arm_arch_thumb2
9098 && GET_CODE (XEXP (x, 1)) == MULT)
9100 /* MLS. */
9101 if (speed_p)
9102 *cost += extra_cost->mult[0].add;
9103 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9104 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9105 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9106 return true;
9109 if (CONST_INT_P (XEXP (x, 0)))
9111 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9112 INTVAL (XEXP (x, 0)), NULL_RTX,
9113 NULL_RTX, 1, 0);
9114 *cost = COSTS_N_INSNS (insns);
9115 if (speed_p)
9116 *cost += insns * extra_cost->alu.arith;
9117 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9118 return true;
9120 else if (speed_p)
9121 *cost += extra_cost->alu.arith;
9123 return false;
9126 if (GET_MODE_CLASS (mode) == MODE_INT
9127 && GET_MODE_SIZE (mode) < 4)
9129 rtx shift_op, shift_reg;
9130 shift_reg = NULL;
9132 /* We check both sides of the MINUS for shifter operands since,
9133 unlike PLUS, it's not commutative. */
9135 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9136 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9138 /* Slightly disparage, as we might need to widen the result. */
9139 *cost += 1;
9140 if (speed_p)
9141 *cost += extra_cost->alu.arith;
9143 if (CONST_INT_P (XEXP (x, 0)))
9145 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9146 return true;
9149 return false;
9152 if (mode == DImode)
9154 *cost += COSTS_N_INSNS (1);
9156 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9158 rtx op1 = XEXP (x, 1);
9160 if (speed_p)
9161 *cost += 2 * extra_cost->alu.arith;
9163 if (GET_CODE (op1) == ZERO_EXTEND)
9164 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9165 0, speed_p);
9166 else
9167 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9168 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9169 0, speed_p);
9170 return true;
9172 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9174 if (speed_p)
9175 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9176 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9177 0, speed_p)
9178 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9179 return true;
9181 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9182 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9184 if (speed_p)
9185 *cost += (extra_cost->alu.arith
9186 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9187 ? extra_cost->alu.arith
9188 : extra_cost->alu.arith_shift));
9189 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9190 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9191 GET_CODE (XEXP (x, 1)), 0, speed_p));
9192 return true;
9195 if (speed_p)
9196 *cost += 2 * extra_cost->alu.arith;
9197 return false;
9200 /* Vector mode? */
9202 *cost = LIBCALL_COST (2);
9203 return false;
9205 case PLUS:
9206 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9207 && (mode == SFmode || !TARGET_VFP_SINGLE))
9209 if (GET_CODE (XEXP (x, 0)) == MULT)
9211 rtx mul_op0, mul_op1, add_op;
9213 if (speed_p)
9214 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9216 mul_op0 = XEXP (XEXP (x, 0), 0);
9217 mul_op1 = XEXP (XEXP (x, 0), 1);
9218 add_op = XEXP (x, 1);
9220 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9221 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9222 + rtx_cost (add_op, mode, code, 0, speed_p));
9224 return true;
9227 if (speed_p)
9228 *cost += extra_cost->fp[mode != SFmode].addsub;
9229 return false;
9231 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9233 *cost = LIBCALL_COST (2);
9234 return false;
9237 /* Narrow modes can be synthesized in SImode, but the range
9238 of useful sub-operations is limited. Check for shift operations
9239 on one of the operands. Only left shifts can be used in the
9240 narrow modes. */
9241 if (GET_MODE_CLASS (mode) == MODE_INT
9242 && GET_MODE_SIZE (mode) < 4)
9244 rtx shift_op, shift_reg;
9245 shift_reg = NULL;
9247 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9249 if (CONST_INT_P (XEXP (x, 1)))
9251 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9252 INTVAL (XEXP (x, 1)), NULL_RTX,
9253 NULL_RTX, 1, 0);
9254 *cost = COSTS_N_INSNS (insns);
9255 if (speed_p)
9256 *cost += insns * extra_cost->alu.arith;
9257 /* Slightly penalize a narrow operation as the result may
9258 need widening. */
9259 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9260 return true;
9263 /* Slightly penalize a narrow operation as the result may
9264 need widening. */
9265 *cost += 1;
9266 if (speed_p)
9267 *cost += extra_cost->alu.arith;
9269 return false;
9272 if (mode == SImode)
9274 rtx shift_op, shift_reg;
9276 if (TARGET_INT_SIMD
9277 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9278 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9280 /* UXTA[BH] or SXTA[BH]. */
9281 if (speed_p)
9282 *cost += extra_cost->alu.extend_arith;
9283 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9284 0, speed_p)
9285 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9286 return true;
9289 shift_reg = NULL;
9290 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9291 if (shift_op != NULL)
9293 if (shift_reg)
9295 if (speed_p)
9296 *cost += extra_cost->alu.arith_shift_reg;
9297 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9299 else if (speed_p)
9300 *cost += extra_cost->alu.arith_shift;
9302 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9303 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9304 return true;
9306 if (GET_CODE (XEXP (x, 0)) == MULT)
9308 rtx mul_op = XEXP (x, 0);
9310 if (TARGET_DSP_MULTIPLY
9311 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9312 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9313 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9314 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9315 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9316 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9317 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9318 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9319 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9320 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9321 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9322 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9323 == 16))))))
9325 /* SMLA[BT][BT]. */
9326 if (speed_p)
9327 *cost += extra_cost->mult[0].extend_add;
9328 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9329 SIGN_EXTEND, 0, speed_p)
9330 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9331 SIGN_EXTEND, 0, speed_p)
9332 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9333 return true;
9336 if (speed_p)
9337 *cost += extra_cost->mult[0].add;
9338 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9339 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9340 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9341 return true;
9343 if (CONST_INT_P (XEXP (x, 1)))
9345 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9346 INTVAL (XEXP (x, 1)), NULL_RTX,
9347 NULL_RTX, 1, 0);
9348 *cost = COSTS_N_INSNS (insns);
9349 if (speed_p)
9350 *cost += insns * extra_cost->alu.arith;
9351 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9352 return true;
9354 else if (speed_p)
9355 *cost += extra_cost->alu.arith;
9357 return false;
9360 if (mode == DImode)
9362 if (arm_arch3m
9363 && GET_CODE (XEXP (x, 0)) == MULT
9364 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9365 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9366 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9367 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9369 if (speed_p)
9370 *cost += extra_cost->mult[1].extend_add;
9371 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9372 ZERO_EXTEND, 0, speed_p)
9373 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9374 ZERO_EXTEND, 0, speed_p)
9375 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9376 return true;
9379 *cost += COSTS_N_INSNS (1);
9381 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9382 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9384 if (speed_p)
9385 *cost += (extra_cost->alu.arith
9386 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9387 ? extra_cost->alu.arith
9388 : extra_cost->alu.arith_shift));
9390 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9391 0, speed_p)
9392 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9393 return true;
9396 if (speed_p)
9397 *cost += 2 * extra_cost->alu.arith;
9398 return false;
9401 /* Vector mode? */
9402 *cost = LIBCALL_COST (2);
9403 return false;
9404 case IOR:
9405 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9407 if (speed_p)
9408 *cost += extra_cost->alu.rev;
9410 return true;
9412 /* Fall through. */
9413 case AND: case XOR:
9414 if (mode == SImode)
9416 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9417 rtx op0 = XEXP (x, 0);
9418 rtx shift_op, shift_reg;
9420 if (subcode == NOT
9421 && (code == AND
9422 || (code == IOR && TARGET_THUMB2)))
9423 op0 = XEXP (op0, 0);
9425 shift_reg = NULL;
9426 shift_op = shifter_op_p (op0, &shift_reg);
9427 if (shift_op != NULL)
9429 if (shift_reg)
9431 if (speed_p)
9432 *cost += extra_cost->alu.log_shift_reg;
9433 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9435 else if (speed_p)
9436 *cost += extra_cost->alu.log_shift;
9438 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9439 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9440 return true;
9443 if (CONST_INT_P (XEXP (x, 1)))
9445 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9446 INTVAL (XEXP (x, 1)), NULL_RTX,
9447 NULL_RTX, 1, 0);
9449 *cost = COSTS_N_INSNS (insns);
9450 if (speed_p)
9451 *cost += insns * extra_cost->alu.logical;
9452 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9453 return true;
9456 if (speed_p)
9457 *cost += extra_cost->alu.logical;
9458 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9459 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9460 return true;
9463 if (mode == DImode)
9465 rtx op0 = XEXP (x, 0);
9466 enum rtx_code subcode = GET_CODE (op0);
9468 *cost += COSTS_N_INSNS (1);
9470 if (subcode == NOT
9471 && (code == AND
9472 || (code == IOR && TARGET_THUMB2)))
9473 op0 = XEXP (op0, 0);
9475 if (GET_CODE (op0) == ZERO_EXTEND)
9477 if (speed_p)
9478 *cost += 2 * extra_cost->alu.logical;
9480 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9481 0, speed_p)
9482 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9483 return true;
9485 else if (GET_CODE (op0) == SIGN_EXTEND)
9487 if (speed_p)
9488 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9490 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9491 0, speed_p)
9492 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9493 return true;
9496 if (speed_p)
9497 *cost += 2 * extra_cost->alu.logical;
9499 return true;
9501 /* Vector mode? */
9503 *cost = LIBCALL_COST (2);
9504 return false;
9506 case MULT:
9507 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9508 && (mode == SFmode || !TARGET_VFP_SINGLE))
9510 rtx op0 = XEXP (x, 0);
9512 if (GET_CODE (op0) == NEG && !flag_rounding_math)
9513 op0 = XEXP (op0, 0);
9515 if (speed_p)
9516 *cost += extra_cost->fp[mode != SFmode].mult;
9518 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
9519 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
9520 return true;
9522 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9524 *cost = LIBCALL_COST (2);
9525 return false;
9528 if (mode == SImode)
9530 if (TARGET_DSP_MULTIPLY
9531 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9532 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9533 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9534 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9535 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9536 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
9537 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9538 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
9539 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9540 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9541 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9542 && (INTVAL (XEXP (XEXP (x, 1), 1))
9543 == 16))))))
9545 /* SMUL[TB][TB]. */
9546 if (speed_p)
9547 *cost += extra_cost->mult[0].extend;
9548 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
9549 SIGN_EXTEND, 0, speed_p);
9550 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
9551 SIGN_EXTEND, 1, speed_p);
9552 return true;
9554 if (speed_p)
9555 *cost += extra_cost->mult[0].simple;
9556 return false;
9559 if (mode == DImode)
9561 if (arm_arch3m
9562 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9563 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
9564 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9565 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
9567 if (speed_p)
9568 *cost += extra_cost->mult[1].extend;
9569 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
9570 ZERO_EXTEND, 0, speed_p)
9571 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9572 ZERO_EXTEND, 0, speed_p));
9573 return true;
9576 *cost = LIBCALL_COST (2);
9577 return false;
9580 /* Vector mode? */
9581 *cost = LIBCALL_COST (2);
9582 return false;
9584 case NEG:
9585 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9586 && (mode == SFmode || !TARGET_VFP_SINGLE))
9588 if (GET_CODE (XEXP (x, 0)) == MULT)
9590 /* VNMUL. */
9591 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
9592 return true;
9595 if (speed_p)
9596 *cost += extra_cost->fp[mode != SFmode].neg;
9598 return false;
9600 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9602 *cost = LIBCALL_COST (1);
9603 return false;
9606 if (mode == SImode)
9608 if (GET_CODE (XEXP (x, 0)) == ABS)
9610 *cost += COSTS_N_INSNS (1);
9611 /* Assume the non-flag-changing variant. */
9612 if (speed_p)
9613 *cost += (extra_cost->alu.log_shift
9614 + extra_cost->alu.arith_shift);
9615 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
9616 return true;
9619 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
9620 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
9622 *cost += COSTS_N_INSNS (1);
9623 /* No extra cost for MOV imm and MVN imm. */
9624 /* If the comparison op is using the flags, there's no further
9625 cost, otherwise we need to add the cost of the comparison. */
9626 if (!(REG_P (XEXP (XEXP (x, 0), 0))
9627 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
9628 && XEXP (XEXP (x, 0), 1) == const0_rtx))
9630 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
9631 *cost += (COSTS_N_INSNS (1)
9632 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
9633 0, speed_p)
9634 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
9635 1, speed_p));
9636 if (speed_p)
9637 *cost += extra_cost->alu.arith;
9639 return true;
9642 if (speed_p)
9643 *cost += extra_cost->alu.arith;
9644 return false;
9647 if (GET_MODE_CLASS (mode) == MODE_INT
9648 && GET_MODE_SIZE (mode) < 4)
9650 /* Slightly disparage, as we might need an extend operation. */
9651 *cost += 1;
9652 if (speed_p)
9653 *cost += extra_cost->alu.arith;
9654 return false;
9657 if (mode == DImode)
9659 *cost += COSTS_N_INSNS (1);
9660 if (speed_p)
9661 *cost += 2 * extra_cost->alu.arith;
9662 return false;
9665 /* Vector mode? */
9666 *cost = LIBCALL_COST (1);
9667 return false;
9669 case NOT:
9670 if (mode == SImode)
9672 rtx shift_op;
9673 rtx shift_reg = NULL;
9675 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9677 if (shift_op)
9679 if (shift_reg != NULL)
9681 if (speed_p)
9682 *cost += extra_cost->alu.log_shift_reg;
9683 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9685 else if (speed_p)
9686 *cost += extra_cost->alu.log_shift;
9687 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
9688 return true;
9691 if (speed_p)
9692 *cost += extra_cost->alu.logical;
9693 return false;
9695 if (mode == DImode)
9697 *cost += COSTS_N_INSNS (1);
9698 return false;
9701 /* Vector mode? */
9703 *cost += LIBCALL_COST (1);
9704 return false;
9706 case IF_THEN_ELSE:
9708 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9710 *cost += COSTS_N_INSNS (3);
9711 return true;
9713 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
9714 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
9716 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
9717 /* Assume that if one arm of the if_then_else is a register,
9718 that it will be tied with the result and eliminate the
9719 conditional insn. */
9720 if (REG_P (XEXP (x, 1)))
9721 *cost += op2cost;
9722 else if (REG_P (XEXP (x, 2)))
9723 *cost += op1cost;
9724 else
9726 if (speed_p)
9728 if (extra_cost->alu.non_exec_costs_exec)
9729 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
9730 else
9731 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
9733 else
9734 *cost += op1cost + op2cost;
9737 return true;
9739 case COMPARE:
9740 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
9741 *cost = 0;
9742 else
9744 machine_mode op0mode;
9745 /* We'll mostly assume that the cost of a compare is the cost of the
9746 LHS. However, there are some notable exceptions. */
9748 /* Floating point compares are never done as side-effects. */
9749 op0mode = GET_MODE (XEXP (x, 0));
9750 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
9751 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
9753 if (speed_p)
9754 *cost += extra_cost->fp[op0mode != SFmode].compare;
9756 if (XEXP (x, 1) == CONST0_RTX (op0mode))
9758 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
9759 return true;
9762 return false;
9764 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
9766 *cost = LIBCALL_COST (2);
9767 return false;
9770 /* DImode compares normally take two insns. */
9771 if (op0mode == DImode)
9773 *cost += COSTS_N_INSNS (1);
9774 if (speed_p)
9775 *cost += 2 * extra_cost->alu.arith;
9776 return false;
9779 if (op0mode == SImode)
9781 rtx shift_op;
9782 rtx shift_reg;
9784 if (XEXP (x, 1) == const0_rtx
9785 && !(REG_P (XEXP (x, 0))
9786 || (GET_CODE (XEXP (x, 0)) == SUBREG
9787 && REG_P (SUBREG_REG (XEXP (x, 0))))))
9789 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
9791 /* Multiply operations that set the flags are often
9792 significantly more expensive. */
9793 if (speed_p
9794 && GET_CODE (XEXP (x, 0)) == MULT
9795 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
9796 *cost += extra_cost->mult[0].flag_setting;
9798 if (speed_p
9799 && GET_CODE (XEXP (x, 0)) == PLUS
9800 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
9801 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
9802 0), 1), mode))
9803 *cost += extra_cost->mult[0].flag_setting;
9804 return true;
9807 shift_reg = NULL;
9808 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9809 if (shift_op != NULL)
9811 if (shift_reg != NULL)
9813 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
9814 1, speed_p);
9815 if (speed_p)
9816 *cost += extra_cost->alu.arith_shift_reg;
9818 else if (speed_p)
9819 *cost += extra_cost->alu.arith_shift;
9820 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
9821 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
9822 return true;
9825 if (speed_p)
9826 *cost += extra_cost->alu.arith;
9827 if (CONST_INT_P (XEXP (x, 1))
9828 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
9830 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
9831 return true;
9833 return false;
9836 /* Vector mode? */
9838 *cost = LIBCALL_COST (2);
9839 return false;
9841 return true;
9843 case EQ:
9844 case NE:
9845 case LT:
9846 case LE:
9847 case GT:
9848 case GE:
9849 case LTU:
9850 case LEU:
9851 case GEU:
9852 case GTU:
9853 case ORDERED:
9854 case UNORDERED:
9855 case UNEQ:
9856 case UNLE:
9857 case UNLT:
9858 case UNGE:
9859 case UNGT:
9860 case LTGT:
9861 if (outer_code == SET)
9863 /* Is it a store-flag operation? */
9864 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
9865 && XEXP (x, 1) == const0_rtx)
9867 /* Thumb also needs an IT insn. */
9868 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
9869 return true;
9871 if (XEXP (x, 1) == const0_rtx)
9873 switch (code)
9875 case LT:
9876 /* LSR Rd, Rn, #31. */
9877 if (speed_p)
9878 *cost += extra_cost->alu.shift;
9879 break;
9881 case EQ:
9882 /* RSBS T1, Rn, #0
9883 ADC Rd, Rn, T1. */
9885 case NE:
9886 /* SUBS T1, Rn, #1
9887 SBC Rd, Rn, T1. */
9888 *cost += COSTS_N_INSNS (1);
9889 break;
9891 case LE:
9892 /* RSBS T1, Rn, Rn, LSR #31
9893 ADC Rd, Rn, T1. */
9894 *cost += COSTS_N_INSNS (1);
9895 if (speed_p)
9896 *cost += extra_cost->alu.arith_shift;
9897 break;
9899 case GT:
9900 /* RSB Rd, Rn, Rn, ASR #1
9901 LSR Rd, Rd, #31. */
9902 *cost += COSTS_N_INSNS (1);
9903 if (speed_p)
9904 *cost += (extra_cost->alu.arith_shift
9905 + extra_cost->alu.shift);
9906 break;
9908 case GE:
9909 /* ASR Rd, Rn, #31
9910 ADD Rd, Rn, #1. */
9911 *cost += COSTS_N_INSNS (1);
9912 if (speed_p)
9913 *cost += extra_cost->alu.shift;
9914 break;
9916 default:
9917 /* Remaining cases are either meaningless or would take
9918 three insns anyway. */
9919 *cost = COSTS_N_INSNS (3);
9920 break;
9922 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9923 return true;
9925 else
9927 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
9928 if (CONST_INT_P (XEXP (x, 1))
9929 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
9931 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9932 return true;
9935 return false;
9938 /* Not directly inside a set. If it involves the condition code
9939 register it must be the condition for a branch, cond_exec or
9940 I_T_E operation. Since the comparison is performed elsewhere
9941 this is just the control part which has no additional
9942 cost. */
9943 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
9944 && XEXP (x, 1) == const0_rtx)
9946 *cost = 0;
9947 return true;
9949 return false;
9951 case ABS:
9952 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9953 && (mode == SFmode || !TARGET_VFP_SINGLE))
9955 if (speed_p)
9956 *cost += extra_cost->fp[mode != SFmode].neg;
9958 return false;
9960 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9962 *cost = LIBCALL_COST (1);
9963 return false;
9966 if (mode == SImode)
9968 if (speed_p)
9969 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
9970 return false;
9972 /* Vector mode? */
9973 *cost = LIBCALL_COST (1);
9974 return false;
9976 case SIGN_EXTEND:
9977 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
9978 && MEM_P (XEXP (x, 0)))
9980 if (mode == DImode)
9981 *cost += COSTS_N_INSNS (1);
9983 if (!speed_p)
9984 return true;
9986 if (GET_MODE (XEXP (x, 0)) == SImode)
9987 *cost += extra_cost->ldst.load;
9988 else
9989 *cost += extra_cost->ldst.load_sign_extend;
9991 if (mode == DImode)
9992 *cost += extra_cost->alu.shift;
9994 return true;
9997 /* Widening from less than 32-bits requires an extend operation. */
9998 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10000 /* We have SXTB/SXTH. */
10001 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10002 if (speed_p)
10003 *cost += extra_cost->alu.extend;
10005 else if (GET_MODE (XEXP (x, 0)) != SImode)
10007 /* Needs two shifts. */
10008 *cost += COSTS_N_INSNS (1);
10009 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10010 if (speed_p)
10011 *cost += 2 * extra_cost->alu.shift;
10014 /* Widening beyond 32-bits requires one more insn. */
10015 if (mode == DImode)
10017 *cost += COSTS_N_INSNS (1);
10018 if (speed_p)
10019 *cost += extra_cost->alu.shift;
10022 return true;
10024 case ZERO_EXTEND:
10025 if ((arm_arch4
10026 || GET_MODE (XEXP (x, 0)) == SImode
10027 || GET_MODE (XEXP (x, 0)) == QImode)
10028 && MEM_P (XEXP (x, 0)))
10030 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10032 if (mode == DImode)
10033 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10035 return true;
10038 /* Widening from less than 32-bits requires an extend operation. */
10039 if (GET_MODE (XEXP (x, 0)) == QImode)
10041 /* UXTB can be a shorter instruction in Thumb2, but it might
10042 be slower than the AND Rd, Rn, #255 alternative. When
10043 optimizing for speed it should never be slower to use
10044 AND, and we don't really model 16-bit vs 32-bit insns
10045 here. */
10046 if (speed_p)
10047 *cost += extra_cost->alu.logical;
10049 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10051 /* We have UXTB/UXTH. */
10052 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10053 if (speed_p)
10054 *cost += extra_cost->alu.extend;
10056 else if (GET_MODE (XEXP (x, 0)) != SImode)
10058 /* Needs two shifts. It's marginally preferable to use
10059 shifts rather than two BIC instructions as the second
10060 shift may merge with a subsequent insn as a shifter
10061 op. */
10062 *cost = COSTS_N_INSNS (2);
10063 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10064 if (speed_p)
10065 *cost += 2 * extra_cost->alu.shift;
10068 /* Widening beyond 32-bits requires one more insn. */
10069 if (mode == DImode)
10071 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10074 return true;
10076 case CONST_INT:
10077 *cost = 0;
10078 /* CONST_INT has no mode, so we cannot tell for sure how many
10079 insns are really going to be needed. The best we can do is
10080 look at the value passed. If it fits in SImode, then assume
10081 that's the mode it will be used for. Otherwise assume it
10082 will be used in DImode. */
10083 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10084 mode = SImode;
10085 else
10086 mode = DImode;
10088 /* Avoid blowing up in arm_gen_constant (). */
10089 if (!(outer_code == PLUS
10090 || outer_code == AND
10091 || outer_code == IOR
10092 || outer_code == XOR
10093 || outer_code == MINUS))
10094 outer_code = SET;
10096 const_int_cost:
10097 if (mode == SImode)
10099 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10100 INTVAL (x), NULL, NULL,
10101 0, 0));
10102 /* Extra costs? */
10104 else
10106 *cost += COSTS_N_INSNS (arm_gen_constant
10107 (outer_code, SImode, NULL,
10108 trunc_int_for_mode (INTVAL (x), SImode),
10109 NULL, NULL, 0, 0)
10110 + arm_gen_constant (outer_code, SImode, NULL,
10111 INTVAL (x) >> 32, NULL,
10112 NULL, 0, 0));
10113 /* Extra costs? */
10116 return true;
10118 case CONST:
10119 case LABEL_REF:
10120 case SYMBOL_REF:
10121 if (speed_p)
10123 if (arm_arch_thumb2 && !flag_pic)
10124 *cost += COSTS_N_INSNS (1);
10125 else
10126 *cost += extra_cost->ldst.load;
10128 else
10129 *cost += COSTS_N_INSNS (1);
10131 if (flag_pic)
10133 *cost += COSTS_N_INSNS (1);
10134 if (speed_p)
10135 *cost += extra_cost->alu.arith;
10138 return true;
10140 case CONST_FIXED:
10141 *cost = COSTS_N_INSNS (4);
10142 /* Fixme. */
10143 return true;
10145 case CONST_DOUBLE:
10146 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10147 && (mode == SFmode || !TARGET_VFP_SINGLE))
10149 if (vfp3_const_double_rtx (x))
10151 if (speed_p)
10152 *cost += extra_cost->fp[mode == DFmode].fpconst;
10153 return true;
10156 if (speed_p)
10158 if (mode == DFmode)
10159 *cost += extra_cost->ldst.loadd;
10160 else
10161 *cost += extra_cost->ldst.loadf;
10163 else
10164 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10166 return true;
10168 *cost = COSTS_N_INSNS (4);
10169 return true;
10171 case CONST_VECTOR:
10172 /* Fixme. */
10173 if (TARGET_NEON
10174 && TARGET_HARD_FLOAT
10175 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10176 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10177 *cost = COSTS_N_INSNS (1);
10178 else
10179 *cost = COSTS_N_INSNS (4);
10180 return true;
10182 case HIGH:
10183 case LO_SUM:
10184 /* When optimizing for size, we prefer constant pool entries to
10185 MOVW/MOVT pairs, so bump the cost of these slightly. */
10186 if (!speed_p)
10187 *cost += 1;
10188 return true;
10190 case CLZ:
10191 if (speed_p)
10192 *cost += extra_cost->alu.clz;
10193 return false;
10195 case SMIN:
10196 if (XEXP (x, 1) == const0_rtx)
10198 if (speed_p)
10199 *cost += extra_cost->alu.log_shift;
10200 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10201 return true;
10203 /* Fall through. */
10204 case SMAX:
10205 case UMIN:
10206 case UMAX:
10207 *cost += COSTS_N_INSNS (1);
10208 return false;
10210 case TRUNCATE:
10211 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10212 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10213 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10214 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10215 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10216 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10217 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10218 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10219 == ZERO_EXTEND))))
10221 if (speed_p)
10222 *cost += extra_cost->mult[1].extend;
10223 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10224 ZERO_EXTEND, 0, speed_p)
10225 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10226 ZERO_EXTEND, 0, speed_p));
10227 return true;
10229 *cost = LIBCALL_COST (1);
10230 return false;
10232 case UNSPEC_VOLATILE:
10233 case UNSPEC:
10234 return arm_unspec_cost (x, outer_code, speed_p, cost);
10236 case PC:
10237 /* Reading the PC is like reading any other register. Writing it
10238 is more expensive, but we take that into account elsewhere. */
10239 *cost = 0;
10240 return true;
10242 case ZERO_EXTRACT:
10243 /* TODO: Simple zero_extract of bottom bits using AND. */
10244 /* Fall through. */
10245 case SIGN_EXTRACT:
10246 if (arm_arch6
10247 && mode == SImode
10248 && CONST_INT_P (XEXP (x, 1))
10249 && CONST_INT_P (XEXP (x, 2)))
10251 if (speed_p)
10252 *cost += extra_cost->alu.bfx;
10253 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10254 return true;
10256 /* Without UBFX/SBFX, need to resort to shift operations. */
10257 *cost += COSTS_N_INSNS (1);
10258 if (speed_p)
10259 *cost += 2 * extra_cost->alu.shift;
10260 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10261 return true;
10263 case FLOAT_EXTEND:
10264 if (TARGET_HARD_FLOAT)
10266 if (speed_p)
10267 *cost += extra_cost->fp[mode == DFmode].widen;
10268 if (!TARGET_FPU_ARMV8
10269 && GET_MODE (XEXP (x, 0)) == HFmode)
10271 /* Pre v8, widening HF->DF is a two-step process, first
10272 widening to SFmode. */
10273 *cost += COSTS_N_INSNS (1);
10274 if (speed_p)
10275 *cost += extra_cost->fp[0].widen;
10277 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10278 return true;
10281 *cost = LIBCALL_COST (1);
10282 return false;
10284 case FLOAT_TRUNCATE:
10285 if (TARGET_HARD_FLOAT)
10287 if (speed_p)
10288 *cost += extra_cost->fp[mode == DFmode].narrow;
10289 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10290 return true;
10291 /* Vector modes? */
10293 *cost = LIBCALL_COST (1);
10294 return false;
10296 case FMA:
10297 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10299 rtx op0 = XEXP (x, 0);
10300 rtx op1 = XEXP (x, 1);
10301 rtx op2 = XEXP (x, 2);
10304 /* vfms or vfnma. */
10305 if (GET_CODE (op0) == NEG)
10306 op0 = XEXP (op0, 0);
10308 /* vfnms or vfnma. */
10309 if (GET_CODE (op2) == NEG)
10310 op2 = XEXP (op2, 0);
10312 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10313 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10314 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10316 if (speed_p)
10317 *cost += extra_cost->fp[mode ==DFmode].fma;
10319 return true;
10322 *cost = LIBCALL_COST (3);
10323 return false;
10325 case FIX:
10326 case UNSIGNED_FIX:
10327 if (TARGET_HARD_FLOAT)
10329 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10330 a vcvt fixed-point conversion. */
10331 if (code == FIX && mode == SImode
10332 && GET_CODE (XEXP (x, 0)) == FIX
10333 && GET_MODE (XEXP (x, 0)) == SFmode
10334 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10335 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10336 > 0)
10338 if (speed_p)
10339 *cost += extra_cost->fp[0].toint;
10341 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10342 code, 0, speed_p);
10343 return true;
10346 if (GET_MODE_CLASS (mode) == MODE_INT)
10348 mode = GET_MODE (XEXP (x, 0));
10349 if (speed_p)
10350 *cost += extra_cost->fp[mode == DFmode].toint;
10351 /* Strip of the 'cost' of rounding towards zero. */
10352 if (GET_CODE (XEXP (x, 0)) == FIX)
10353 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10354 0, speed_p);
10355 else
10356 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10357 /* ??? Increase the cost to deal with transferring from
10358 FP -> CORE registers? */
10359 return true;
10361 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10362 && TARGET_FPU_ARMV8)
10364 if (speed_p)
10365 *cost += extra_cost->fp[mode == DFmode].roundint;
10366 return false;
10368 /* Vector costs? */
10370 *cost = LIBCALL_COST (1);
10371 return false;
10373 case FLOAT:
10374 case UNSIGNED_FLOAT:
10375 if (TARGET_HARD_FLOAT)
10377 /* ??? Increase the cost to deal with transferring from CORE
10378 -> FP registers? */
10379 if (speed_p)
10380 *cost += extra_cost->fp[mode == DFmode].fromint;
10381 return false;
10383 *cost = LIBCALL_COST (1);
10384 return false;
10386 case CALL:
10387 return true;
10389 case ASM_OPERANDS:
10391 /* Just a guess. Guess number of instructions in the asm
10392 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10393 though (see PR60663). */
10394 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10395 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10397 *cost = COSTS_N_INSNS (asm_length + num_operands);
10398 return true;
10400 default:
10401 if (mode != VOIDmode)
10402 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10403 else
10404 *cost = COSTS_N_INSNS (4); /* Who knows? */
10405 return false;
10409 #undef HANDLE_NARROW_SHIFT_ARITH
10411 /* RTX costs entry point. */
10413 static bool
10414 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10415 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10417 bool result;
10418 int code = GET_CODE (x);
10419 gcc_assert (current_tune->insn_extra_cost);
10421 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10422 (enum rtx_code) outer_code,
10423 current_tune->insn_extra_cost,
10424 total, speed);
10426 if (dump_file && (dump_flags & TDF_DETAILS))
10428 print_rtl_single (dump_file, x);
10429 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10430 *total, result ? "final" : "partial");
10432 return result;
10435 /* All address computations that can be done are free, but rtx cost returns
10436 the same for practically all of them. So we weight the different types
10437 of address here in the order (most pref first):
10438 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10439 static inline int
10440 arm_arm_address_cost (rtx x)
10442 enum rtx_code c = GET_CODE (x);
10444 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10445 return 0;
10446 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10447 return 10;
10449 if (c == PLUS)
10451 if (CONST_INT_P (XEXP (x, 1)))
10452 return 2;
10454 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10455 return 3;
10457 return 4;
10460 return 6;
10463 static inline int
10464 arm_thumb_address_cost (rtx x)
10466 enum rtx_code c = GET_CODE (x);
10468 if (c == REG)
10469 return 1;
10470 if (c == PLUS
10471 && REG_P (XEXP (x, 0))
10472 && CONST_INT_P (XEXP (x, 1)))
10473 return 1;
10475 return 2;
10478 static int
10479 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10480 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10482 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10485 /* Adjust cost hook for XScale. */
10486 static bool
10487 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10488 int * cost)
10490 /* Some true dependencies can have a higher cost depending
10491 on precisely how certain input operands are used. */
10492 if (dep_type == 0
10493 && recog_memoized (insn) >= 0
10494 && recog_memoized (dep) >= 0)
10496 int shift_opnum = get_attr_shift (insn);
10497 enum attr_type attr_type = get_attr_type (dep);
10499 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10500 operand for INSN. If we have a shifted input operand and the
10501 instruction we depend on is another ALU instruction, then we may
10502 have to account for an additional stall. */
10503 if (shift_opnum != 0
10504 && (attr_type == TYPE_ALU_SHIFT_IMM
10505 || attr_type == TYPE_ALUS_SHIFT_IMM
10506 || attr_type == TYPE_LOGIC_SHIFT_IMM
10507 || attr_type == TYPE_LOGICS_SHIFT_IMM
10508 || attr_type == TYPE_ALU_SHIFT_REG
10509 || attr_type == TYPE_ALUS_SHIFT_REG
10510 || attr_type == TYPE_LOGIC_SHIFT_REG
10511 || attr_type == TYPE_LOGICS_SHIFT_REG
10512 || attr_type == TYPE_MOV_SHIFT
10513 || attr_type == TYPE_MVN_SHIFT
10514 || attr_type == TYPE_MOV_SHIFT_REG
10515 || attr_type == TYPE_MVN_SHIFT_REG))
10517 rtx shifted_operand;
10518 int opno;
10520 /* Get the shifted operand. */
10521 extract_insn (insn);
10522 shifted_operand = recog_data.operand[shift_opnum];
10524 /* Iterate over all the operands in DEP. If we write an operand
10525 that overlaps with SHIFTED_OPERAND, then we have increase the
10526 cost of this dependency. */
10527 extract_insn (dep);
10528 preprocess_constraints (dep);
10529 for (opno = 0; opno < recog_data.n_operands; opno++)
10531 /* We can ignore strict inputs. */
10532 if (recog_data.operand_type[opno] == OP_IN)
10533 continue;
10535 if (reg_overlap_mentioned_p (recog_data.operand[opno],
10536 shifted_operand))
10538 *cost = 2;
10539 return false;
10544 return true;
10547 /* Adjust cost hook for Cortex A9. */
10548 static bool
10549 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10550 int * cost)
10552 switch (dep_type)
10554 case REG_DEP_ANTI:
10555 *cost = 0;
10556 return false;
10558 case REG_DEP_TRUE:
10559 case REG_DEP_OUTPUT:
10560 if (recog_memoized (insn) >= 0
10561 && recog_memoized (dep) >= 0)
10563 if (GET_CODE (PATTERN (insn)) == SET)
10565 if (GET_MODE_CLASS
10566 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
10567 || GET_MODE_CLASS
10568 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
10570 enum attr_type attr_type_insn = get_attr_type (insn);
10571 enum attr_type attr_type_dep = get_attr_type (dep);
10573 /* By default all dependencies of the form
10574 s0 = s0 <op> s1
10575 s0 = s0 <op> s2
10576 have an extra latency of 1 cycle because
10577 of the input and output dependency in this
10578 case. However this gets modeled as an true
10579 dependency and hence all these checks. */
10580 if (REG_P (SET_DEST (PATTERN (insn)))
10581 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
10583 /* FMACS is a special case where the dependent
10584 instruction can be issued 3 cycles before
10585 the normal latency in case of an output
10586 dependency. */
10587 if ((attr_type_insn == TYPE_FMACS
10588 || attr_type_insn == TYPE_FMACD)
10589 && (attr_type_dep == TYPE_FMACS
10590 || attr_type_dep == TYPE_FMACD))
10592 if (dep_type == REG_DEP_OUTPUT)
10593 *cost = insn_default_latency (dep) - 3;
10594 else
10595 *cost = insn_default_latency (dep);
10596 return false;
10598 else
10600 if (dep_type == REG_DEP_OUTPUT)
10601 *cost = insn_default_latency (dep) + 1;
10602 else
10603 *cost = insn_default_latency (dep);
10605 return false;
10610 break;
10612 default:
10613 gcc_unreachable ();
10616 return true;
10619 /* Adjust cost hook for FA726TE. */
10620 static bool
10621 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10622 int * cost)
10624 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
10625 have penalty of 3. */
10626 if (dep_type == REG_DEP_TRUE
10627 && recog_memoized (insn) >= 0
10628 && recog_memoized (dep) >= 0
10629 && get_attr_conds (dep) == CONDS_SET)
10631 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
10632 if (get_attr_conds (insn) == CONDS_USE
10633 && get_attr_type (insn) != TYPE_BRANCH)
10635 *cost = 3;
10636 return false;
10639 if (GET_CODE (PATTERN (insn)) == COND_EXEC
10640 || get_attr_conds (insn) == CONDS_USE)
10642 *cost = 0;
10643 return false;
10647 return true;
10650 /* Implement TARGET_REGISTER_MOVE_COST.
10652 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
10653 it is typically more expensive than a single memory access. We set
10654 the cost to less than two memory accesses so that floating
10655 point to integer conversion does not go through memory. */
10658 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
10659 reg_class_t from, reg_class_t to)
10661 if (TARGET_32BIT)
10663 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
10664 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
10665 return 15;
10666 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
10667 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
10668 return 4;
10669 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
10670 return 20;
10671 else
10672 return 2;
10674 else
10676 if (from == HI_REGS || to == HI_REGS)
10677 return 4;
10678 else
10679 return 2;
10683 /* Implement TARGET_MEMORY_MOVE_COST. */
10686 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
10687 bool in ATTRIBUTE_UNUSED)
10689 if (TARGET_32BIT)
10690 return 10;
10691 else
10693 if (GET_MODE_SIZE (mode) < 4)
10694 return 8;
10695 else
10696 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
10700 /* Vectorizer cost model implementation. */
10702 /* Implement targetm.vectorize.builtin_vectorization_cost. */
10703 static int
10704 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
10705 tree vectype,
10706 int misalign ATTRIBUTE_UNUSED)
10708 unsigned elements;
10710 switch (type_of_cost)
10712 case scalar_stmt:
10713 return current_tune->vec_costs->scalar_stmt_cost;
10715 case scalar_load:
10716 return current_tune->vec_costs->scalar_load_cost;
10718 case scalar_store:
10719 return current_tune->vec_costs->scalar_store_cost;
10721 case vector_stmt:
10722 return current_tune->vec_costs->vec_stmt_cost;
10724 case vector_load:
10725 return current_tune->vec_costs->vec_align_load_cost;
10727 case vector_store:
10728 return current_tune->vec_costs->vec_store_cost;
10730 case vec_to_scalar:
10731 return current_tune->vec_costs->vec_to_scalar_cost;
10733 case scalar_to_vec:
10734 return current_tune->vec_costs->scalar_to_vec_cost;
10736 case unaligned_load:
10737 return current_tune->vec_costs->vec_unalign_load_cost;
10739 case unaligned_store:
10740 return current_tune->vec_costs->vec_unalign_store_cost;
10742 case cond_branch_taken:
10743 return current_tune->vec_costs->cond_taken_branch_cost;
10745 case cond_branch_not_taken:
10746 return current_tune->vec_costs->cond_not_taken_branch_cost;
10748 case vec_perm:
10749 case vec_promote_demote:
10750 return current_tune->vec_costs->vec_stmt_cost;
10752 case vec_construct:
10753 elements = TYPE_VECTOR_SUBPARTS (vectype);
10754 return elements / 2 + 1;
10756 default:
10757 gcc_unreachable ();
10761 /* Implement targetm.vectorize.add_stmt_cost. */
10763 static unsigned
10764 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
10765 struct _stmt_vec_info *stmt_info, int misalign,
10766 enum vect_cost_model_location where)
10768 unsigned *cost = (unsigned *) data;
10769 unsigned retval = 0;
10771 if (flag_vect_cost_model)
10773 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
10774 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
10776 /* Statements in an inner loop relative to the loop being
10777 vectorized are weighted more heavily. The value here is
10778 arbitrary and could potentially be improved with analysis. */
10779 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
10780 count *= 50; /* FIXME. */
10782 retval = (unsigned) (count * stmt_cost);
10783 cost[where] += retval;
10786 return retval;
10789 /* Return true if and only if this insn can dual-issue only as older. */
10790 static bool
10791 cortexa7_older_only (rtx_insn *insn)
10793 if (recog_memoized (insn) < 0)
10794 return false;
10796 switch (get_attr_type (insn))
10798 case TYPE_ALU_DSP_REG:
10799 case TYPE_ALU_SREG:
10800 case TYPE_ALUS_SREG:
10801 case TYPE_LOGIC_REG:
10802 case TYPE_LOGICS_REG:
10803 case TYPE_ADC_REG:
10804 case TYPE_ADCS_REG:
10805 case TYPE_ADR:
10806 case TYPE_BFM:
10807 case TYPE_REV:
10808 case TYPE_MVN_REG:
10809 case TYPE_SHIFT_IMM:
10810 case TYPE_SHIFT_REG:
10811 case TYPE_LOAD_BYTE:
10812 case TYPE_LOAD1:
10813 case TYPE_STORE1:
10814 case TYPE_FFARITHS:
10815 case TYPE_FADDS:
10816 case TYPE_FFARITHD:
10817 case TYPE_FADDD:
10818 case TYPE_FMOV:
10819 case TYPE_F_CVT:
10820 case TYPE_FCMPS:
10821 case TYPE_FCMPD:
10822 case TYPE_FCONSTS:
10823 case TYPE_FCONSTD:
10824 case TYPE_FMULS:
10825 case TYPE_FMACS:
10826 case TYPE_FMULD:
10827 case TYPE_FMACD:
10828 case TYPE_FDIVS:
10829 case TYPE_FDIVD:
10830 case TYPE_F_MRC:
10831 case TYPE_F_MRRC:
10832 case TYPE_F_FLAG:
10833 case TYPE_F_LOADS:
10834 case TYPE_F_STORES:
10835 return true;
10836 default:
10837 return false;
10841 /* Return true if and only if this insn can dual-issue as younger. */
10842 static bool
10843 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
10845 if (recog_memoized (insn) < 0)
10847 if (verbose > 5)
10848 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
10849 return false;
10852 switch (get_attr_type (insn))
10854 case TYPE_ALU_IMM:
10855 case TYPE_ALUS_IMM:
10856 case TYPE_LOGIC_IMM:
10857 case TYPE_LOGICS_IMM:
10858 case TYPE_EXTEND:
10859 case TYPE_MVN_IMM:
10860 case TYPE_MOV_IMM:
10861 case TYPE_MOV_REG:
10862 case TYPE_MOV_SHIFT:
10863 case TYPE_MOV_SHIFT_REG:
10864 case TYPE_BRANCH:
10865 case TYPE_CALL:
10866 return true;
10867 default:
10868 return false;
10873 /* Look for an instruction that can dual issue only as an older
10874 instruction, and move it in front of any instructions that can
10875 dual-issue as younger, while preserving the relative order of all
10876 other instructions in the ready list. This is a hueuristic to help
10877 dual-issue in later cycles, by postponing issue of more flexible
10878 instructions. This heuristic may affect dual issue opportunities
10879 in the current cycle. */
10880 static void
10881 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
10882 int *n_readyp, int clock)
10884 int i;
10885 int first_older_only = -1, first_younger = -1;
10887 if (verbose > 5)
10888 fprintf (file,
10889 ";; sched_reorder for cycle %d with %d insns in ready list\n",
10890 clock,
10891 *n_readyp);
10893 /* Traverse the ready list from the head (the instruction to issue
10894 first), and looking for the first instruction that can issue as
10895 younger and the first instruction that can dual-issue only as
10896 older. */
10897 for (i = *n_readyp - 1; i >= 0; i--)
10899 rtx_insn *insn = ready[i];
10900 if (cortexa7_older_only (insn))
10902 first_older_only = i;
10903 if (verbose > 5)
10904 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
10905 break;
10907 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
10908 first_younger = i;
10911 /* Nothing to reorder because either no younger insn found or insn
10912 that can dual-issue only as older appears before any insn that
10913 can dual-issue as younger. */
10914 if (first_younger == -1)
10916 if (verbose > 5)
10917 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
10918 return;
10921 /* Nothing to reorder because no older-only insn in the ready list. */
10922 if (first_older_only == -1)
10924 if (verbose > 5)
10925 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
10926 return;
10929 /* Move first_older_only insn before first_younger. */
10930 if (verbose > 5)
10931 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
10932 INSN_UID(ready [first_older_only]),
10933 INSN_UID(ready [first_younger]));
10934 rtx_insn *first_older_only_insn = ready [first_older_only];
10935 for (i = first_older_only; i < first_younger; i++)
10937 ready[i] = ready[i+1];
10940 ready[i] = first_older_only_insn;
10941 return;
10944 /* Implement TARGET_SCHED_REORDER. */
10945 static int
10946 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
10947 int clock)
10949 switch (arm_tune)
10951 case TARGET_CPU_cortexa7:
10952 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
10953 break;
10954 default:
10955 /* Do nothing for other cores. */
10956 break;
10959 return arm_issue_rate ();
10962 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
10963 It corrects the value of COST based on the relationship between
10964 INSN and DEP through the dependence LINK. It returns the new
10965 value. There is a per-core adjust_cost hook to adjust scheduler costs
10966 and the per-core hook can choose to completely override the generic
10967 adjust_cost function. Only put bits of code into arm_adjust_cost that
10968 are common across all cores. */
10969 static int
10970 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10971 unsigned int)
10973 rtx i_pat, d_pat;
10975 /* When generating Thumb-1 code, we want to place flag-setting operations
10976 close to a conditional branch which depends on them, so that we can
10977 omit the comparison. */
10978 if (TARGET_THUMB1
10979 && dep_type == 0
10980 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
10981 && recog_memoized (dep) >= 0
10982 && get_attr_conds (dep) == CONDS_SET)
10983 return 0;
10985 if (current_tune->sched_adjust_cost != NULL)
10987 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
10988 return cost;
10991 /* XXX Is this strictly true? */
10992 if (dep_type == REG_DEP_ANTI
10993 || dep_type == REG_DEP_OUTPUT)
10994 return 0;
10996 /* Call insns don't incur a stall, even if they follow a load. */
10997 if (dep_type == 0
10998 && CALL_P (insn))
10999 return 1;
11001 if ((i_pat = single_set (insn)) != NULL
11002 && MEM_P (SET_SRC (i_pat))
11003 && (d_pat = single_set (dep)) != NULL
11004 && MEM_P (SET_DEST (d_pat)))
11006 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11007 /* This is a load after a store, there is no conflict if the load reads
11008 from a cached area. Assume that loads from the stack, and from the
11009 constant pool are cached, and that others will miss. This is a
11010 hack. */
11012 if ((GET_CODE (src_mem) == SYMBOL_REF
11013 && CONSTANT_POOL_ADDRESS_P (src_mem))
11014 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11015 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11016 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11017 return 1;
11020 return cost;
11024 arm_max_conditional_execute (void)
11026 return max_insns_skipped;
11029 static int
11030 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11032 if (TARGET_32BIT)
11033 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11034 else
11035 return (optimize > 0) ? 2 : 0;
11038 static int
11039 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11041 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11044 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11045 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11046 sequences of non-executed instructions in IT blocks probably take the same
11047 amount of time as executed instructions (and the IT instruction itself takes
11048 space in icache). This function was experimentally determined to give good
11049 results on a popular embedded benchmark. */
11051 static int
11052 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11054 return (TARGET_32BIT && speed_p) ? 1
11055 : arm_default_branch_cost (speed_p, predictable_p);
11058 static int
11059 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11061 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11064 static bool fp_consts_inited = false;
11066 static REAL_VALUE_TYPE value_fp0;
11068 static void
11069 init_fp_table (void)
11071 REAL_VALUE_TYPE r;
11073 r = REAL_VALUE_ATOF ("0", DFmode);
11074 value_fp0 = r;
11075 fp_consts_inited = true;
11078 /* Return TRUE if rtx X is a valid immediate FP constant. */
11080 arm_const_double_rtx (rtx x)
11082 const REAL_VALUE_TYPE *r;
11084 if (!fp_consts_inited)
11085 init_fp_table ();
11087 r = CONST_DOUBLE_REAL_VALUE (x);
11088 if (REAL_VALUE_MINUS_ZERO (*r))
11089 return 0;
11091 if (real_equal (r, &value_fp0))
11092 return 1;
11094 return 0;
11097 /* VFPv3 has a fairly wide range of representable immediates, formed from
11098 "quarter-precision" floating-point values. These can be evaluated using this
11099 formula (with ^ for exponentiation):
11101 -1^s * n * 2^-r
11103 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11104 16 <= n <= 31 and 0 <= r <= 7.
11106 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11108 - A (most-significant) is the sign bit.
11109 - BCD are the exponent (encoded as r XOR 3).
11110 - EFGH are the mantissa (encoded as n - 16).
11113 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11114 fconst[sd] instruction, or -1 if X isn't suitable. */
11115 static int
11116 vfp3_const_double_index (rtx x)
11118 REAL_VALUE_TYPE r, m;
11119 int sign, exponent;
11120 unsigned HOST_WIDE_INT mantissa, mant_hi;
11121 unsigned HOST_WIDE_INT mask;
11122 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11123 bool fail;
11125 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11126 return -1;
11128 r = *CONST_DOUBLE_REAL_VALUE (x);
11130 /* We can't represent these things, so detect them first. */
11131 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11132 return -1;
11134 /* Extract sign, exponent and mantissa. */
11135 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11136 r = real_value_abs (&r);
11137 exponent = REAL_EXP (&r);
11138 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11139 highest (sign) bit, with a fixed binary point at bit point_pos.
11140 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11141 bits for the mantissa, this may fail (low bits would be lost). */
11142 real_ldexp (&m, &r, point_pos - exponent);
11143 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11144 mantissa = w.elt (0);
11145 mant_hi = w.elt (1);
11147 /* If there are bits set in the low part of the mantissa, we can't
11148 represent this value. */
11149 if (mantissa != 0)
11150 return -1;
11152 /* Now make it so that mantissa contains the most-significant bits, and move
11153 the point_pos to indicate that the least-significant bits have been
11154 discarded. */
11155 point_pos -= HOST_BITS_PER_WIDE_INT;
11156 mantissa = mant_hi;
11158 /* We can permit four significant bits of mantissa only, plus a high bit
11159 which is always 1. */
11160 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11161 if ((mantissa & mask) != 0)
11162 return -1;
11164 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11165 mantissa >>= point_pos - 5;
11167 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11168 floating-point immediate zero with Neon using an integer-zero load, but
11169 that case is handled elsewhere.) */
11170 if (mantissa == 0)
11171 return -1;
11173 gcc_assert (mantissa >= 16 && mantissa <= 31);
11175 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11176 normalized significands are in the range [1, 2). (Our mantissa is shifted
11177 left 4 places at this point relative to normalized IEEE754 values). GCC
11178 internally uses [0.5, 1) (see real.c), so the exponent returned from
11179 REAL_EXP must be altered. */
11180 exponent = 5 - exponent;
11182 if (exponent < 0 || exponent > 7)
11183 return -1;
11185 /* Sign, mantissa and exponent are now in the correct form to plug into the
11186 formula described in the comment above. */
11187 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11190 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11192 vfp3_const_double_rtx (rtx x)
11194 if (!TARGET_VFP3)
11195 return 0;
11197 return vfp3_const_double_index (x) != -1;
11200 /* Recognize immediates which can be used in various Neon instructions. Legal
11201 immediates are described by the following table (for VMVN variants, the
11202 bitwise inverse of the constant shown is recognized. In either case, VMOV
11203 is output and the correct instruction to use for a given constant is chosen
11204 by the assembler). The constant shown is replicated across all elements of
11205 the destination vector.
11207 insn elems variant constant (binary)
11208 ---- ----- ------- -----------------
11209 vmov i32 0 00000000 00000000 00000000 abcdefgh
11210 vmov i32 1 00000000 00000000 abcdefgh 00000000
11211 vmov i32 2 00000000 abcdefgh 00000000 00000000
11212 vmov i32 3 abcdefgh 00000000 00000000 00000000
11213 vmov i16 4 00000000 abcdefgh
11214 vmov i16 5 abcdefgh 00000000
11215 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11216 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11217 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11218 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11219 vmvn i16 10 00000000 abcdefgh
11220 vmvn i16 11 abcdefgh 00000000
11221 vmov i32 12 00000000 00000000 abcdefgh 11111111
11222 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11223 vmov i32 14 00000000 abcdefgh 11111111 11111111
11224 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11225 vmov i8 16 abcdefgh
11226 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11227 eeeeeeee ffffffff gggggggg hhhhhhhh
11228 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11229 vmov f32 19 00000000 00000000 00000000 00000000
11231 For case 18, B = !b. Representable values are exactly those accepted by
11232 vfp3_const_double_index, but are output as floating-point numbers rather
11233 than indices.
11235 For case 19, we will change it to vmov.i32 when assembling.
11237 Variants 0-5 (inclusive) may also be used as immediates for the second
11238 operand of VORR/VBIC instructions.
11240 The INVERSE argument causes the bitwise inverse of the given operand to be
11241 recognized instead (used for recognizing legal immediates for the VAND/VORN
11242 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11243 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11244 output, rather than the real insns vbic/vorr).
11246 INVERSE makes no difference to the recognition of float vectors.
11248 The return value is the variant of immediate as shown in the above table, or
11249 -1 if the given value doesn't match any of the listed patterns.
11251 static int
11252 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11253 rtx *modconst, int *elementwidth)
11255 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11256 matches = 1; \
11257 for (i = 0; i < idx; i += (STRIDE)) \
11258 if (!(TEST)) \
11259 matches = 0; \
11260 if (matches) \
11262 immtype = (CLASS); \
11263 elsize = (ELSIZE); \
11264 break; \
11267 unsigned int i, elsize = 0, idx = 0, n_elts;
11268 unsigned int innersize;
11269 unsigned char bytes[16];
11270 int immtype = -1, matches;
11271 unsigned int invmask = inverse ? 0xff : 0;
11272 bool vector = GET_CODE (op) == CONST_VECTOR;
11274 if (vector)
11275 n_elts = CONST_VECTOR_NUNITS (op);
11276 else
11278 n_elts = 1;
11279 if (mode == VOIDmode)
11280 mode = DImode;
11283 innersize = GET_MODE_UNIT_SIZE (mode);
11285 /* Vectors of float constants. */
11286 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11288 rtx el0 = CONST_VECTOR_ELT (op, 0);
11290 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11291 return -1;
11293 /* FP16 vectors cannot be represented. */
11294 if (GET_MODE_INNER (mode) == HFmode)
11295 return -1;
11297 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11298 are distinct in this context. */
11299 if (!const_vec_duplicate_p (op))
11300 return -1;
11302 if (modconst)
11303 *modconst = CONST_VECTOR_ELT (op, 0);
11305 if (elementwidth)
11306 *elementwidth = 0;
11308 if (el0 == CONST0_RTX (GET_MODE (el0)))
11309 return 19;
11310 else
11311 return 18;
11314 /* Splat vector constant out into a byte vector. */
11315 for (i = 0; i < n_elts; i++)
11317 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11318 unsigned HOST_WIDE_INT elpart;
11320 gcc_assert (CONST_INT_P (el));
11321 elpart = INTVAL (el);
11323 for (unsigned int byte = 0; byte < innersize; byte++)
11325 bytes[idx++] = (elpart & 0xff) ^ invmask;
11326 elpart >>= BITS_PER_UNIT;
11330 /* Sanity check. */
11331 gcc_assert (idx == GET_MODE_SIZE (mode));
11335 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11336 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11338 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11339 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11341 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11342 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11344 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11345 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11347 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11349 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11351 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11352 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11354 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11355 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11357 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11358 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11360 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11361 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11363 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11365 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11367 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11368 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11370 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11371 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11373 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11374 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11376 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11377 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11379 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11381 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11382 && bytes[i] == bytes[(i + 8) % idx]);
11384 while (0);
11386 if (immtype == -1)
11387 return -1;
11389 if (elementwidth)
11390 *elementwidth = elsize;
11392 if (modconst)
11394 unsigned HOST_WIDE_INT imm = 0;
11396 /* Un-invert bytes of recognized vector, if necessary. */
11397 if (invmask != 0)
11398 for (i = 0; i < idx; i++)
11399 bytes[i] ^= invmask;
11401 if (immtype == 17)
11403 /* FIXME: Broken on 32-bit H_W_I hosts. */
11404 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11406 for (i = 0; i < 8; i++)
11407 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11408 << (i * BITS_PER_UNIT);
11410 *modconst = GEN_INT (imm);
11412 else
11414 unsigned HOST_WIDE_INT imm = 0;
11416 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11417 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11419 *modconst = GEN_INT (imm);
11423 return immtype;
11424 #undef CHECK
11427 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11428 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11429 float elements), and a modified constant (whatever should be output for a
11430 VMOV) in *MODCONST. */
11433 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11434 rtx *modconst, int *elementwidth)
11436 rtx tmpconst;
11437 int tmpwidth;
11438 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11440 if (retval == -1)
11441 return 0;
11443 if (modconst)
11444 *modconst = tmpconst;
11446 if (elementwidth)
11447 *elementwidth = tmpwidth;
11449 return 1;
11452 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11453 the immediate is valid, write a constant suitable for using as an operand
11454 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11455 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11458 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11459 rtx *modconst, int *elementwidth)
11461 rtx tmpconst;
11462 int tmpwidth;
11463 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11465 if (retval < 0 || retval > 5)
11466 return 0;
11468 if (modconst)
11469 *modconst = tmpconst;
11471 if (elementwidth)
11472 *elementwidth = tmpwidth;
11474 return 1;
11477 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11478 the immediate is valid, write a constant suitable for using as an operand
11479 to VSHR/VSHL to *MODCONST and the corresponding element width to
11480 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11481 because they have different limitations. */
11484 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11485 rtx *modconst, int *elementwidth,
11486 bool isleftshift)
11488 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11489 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11490 unsigned HOST_WIDE_INT last_elt = 0;
11491 unsigned HOST_WIDE_INT maxshift;
11493 /* Split vector constant out into a byte vector. */
11494 for (i = 0; i < n_elts; i++)
11496 rtx el = CONST_VECTOR_ELT (op, i);
11497 unsigned HOST_WIDE_INT elpart;
11499 if (CONST_INT_P (el))
11500 elpart = INTVAL (el);
11501 else if (CONST_DOUBLE_P (el))
11502 return 0;
11503 else
11504 gcc_unreachable ();
11506 if (i != 0 && elpart != last_elt)
11507 return 0;
11509 last_elt = elpart;
11512 /* Shift less than element size. */
11513 maxshift = innersize * 8;
11515 if (isleftshift)
11517 /* Left shift immediate value can be from 0 to <size>-1. */
11518 if (last_elt >= maxshift)
11519 return 0;
11521 else
11523 /* Right shift immediate value can be from 1 to <size>. */
11524 if (last_elt == 0 || last_elt > maxshift)
11525 return 0;
11528 if (elementwidth)
11529 *elementwidth = innersize * 8;
11531 if (modconst)
11532 *modconst = CONST_VECTOR_ELT (op, 0);
11534 return 1;
11537 /* Return a string suitable for output of Neon immediate logic operation
11538 MNEM. */
11540 char *
11541 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
11542 int inverse, int quad)
11544 int width, is_valid;
11545 static char templ[40];
11547 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
11549 gcc_assert (is_valid != 0);
11551 if (quad)
11552 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
11553 else
11554 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
11556 return templ;
11559 /* Return a string suitable for output of Neon immediate shift operation
11560 (VSHR or VSHL) MNEM. */
11562 char *
11563 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
11564 machine_mode mode, int quad,
11565 bool isleftshift)
11567 int width, is_valid;
11568 static char templ[40];
11570 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
11571 gcc_assert (is_valid != 0);
11573 if (quad)
11574 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
11575 else
11576 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
11578 return templ;
11581 /* Output a sequence of pairwise operations to implement a reduction.
11582 NOTE: We do "too much work" here, because pairwise operations work on two
11583 registers-worth of operands in one go. Unfortunately we can't exploit those
11584 extra calculations to do the full operation in fewer steps, I don't think.
11585 Although all vector elements of the result but the first are ignored, we
11586 actually calculate the same result in each of the elements. An alternative
11587 such as initially loading a vector with zero to use as each of the second
11588 operands would use up an additional register and take an extra instruction,
11589 for no particular gain. */
11591 void
11592 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
11593 rtx (*reduc) (rtx, rtx, rtx))
11595 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
11596 rtx tmpsum = op1;
11598 for (i = parts / 2; i >= 1; i /= 2)
11600 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
11601 emit_insn (reduc (dest, tmpsum, tmpsum));
11602 tmpsum = dest;
11606 /* If VALS is a vector constant that can be loaded into a register
11607 using VDUP, generate instructions to do so and return an RTX to
11608 assign to the register. Otherwise return NULL_RTX. */
11610 static rtx
11611 neon_vdup_constant (rtx vals)
11613 machine_mode mode = GET_MODE (vals);
11614 machine_mode inner_mode = GET_MODE_INNER (mode);
11615 rtx x;
11617 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
11618 return NULL_RTX;
11620 if (!const_vec_duplicate_p (vals, &x))
11621 /* The elements are not all the same. We could handle repeating
11622 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
11623 {0, C, 0, C, 0, C, 0, C} which can be loaded using
11624 vdup.i16). */
11625 return NULL_RTX;
11627 /* We can load this constant by using VDUP and a constant in a
11628 single ARM register. This will be cheaper than a vector
11629 load. */
11631 x = copy_to_mode_reg (inner_mode, x);
11632 return gen_rtx_VEC_DUPLICATE (mode, x);
11635 /* Generate code to load VALS, which is a PARALLEL containing only
11636 constants (for vec_init) or CONST_VECTOR, efficiently into a
11637 register. Returns an RTX to copy into the register, or NULL_RTX
11638 for a PARALLEL that can not be converted into a CONST_VECTOR. */
11641 neon_make_constant (rtx vals)
11643 machine_mode mode = GET_MODE (vals);
11644 rtx target;
11645 rtx const_vec = NULL_RTX;
11646 int n_elts = GET_MODE_NUNITS (mode);
11647 int n_const = 0;
11648 int i;
11650 if (GET_CODE (vals) == CONST_VECTOR)
11651 const_vec = vals;
11652 else if (GET_CODE (vals) == PARALLEL)
11654 /* A CONST_VECTOR must contain only CONST_INTs and
11655 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
11656 Only store valid constants in a CONST_VECTOR. */
11657 for (i = 0; i < n_elts; ++i)
11659 rtx x = XVECEXP (vals, 0, i);
11660 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11661 n_const++;
11663 if (n_const == n_elts)
11664 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
11666 else
11667 gcc_unreachable ();
11669 if (const_vec != NULL
11670 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
11671 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
11672 return const_vec;
11673 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
11674 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
11675 pipeline cycle; creating the constant takes one or two ARM
11676 pipeline cycles. */
11677 return target;
11678 else if (const_vec != NULL_RTX)
11679 /* Load from constant pool. On Cortex-A8 this takes two cycles
11680 (for either double or quad vectors). We can not take advantage
11681 of single-cycle VLD1 because we need a PC-relative addressing
11682 mode. */
11683 return const_vec;
11684 else
11685 /* A PARALLEL containing something not valid inside CONST_VECTOR.
11686 We can not construct an initializer. */
11687 return NULL_RTX;
11690 /* Initialize vector TARGET to VALS. */
11692 void
11693 neon_expand_vector_init (rtx target, rtx vals)
11695 machine_mode mode = GET_MODE (target);
11696 machine_mode inner_mode = GET_MODE_INNER (mode);
11697 int n_elts = GET_MODE_NUNITS (mode);
11698 int n_var = 0, one_var = -1;
11699 bool all_same = true;
11700 rtx x, mem;
11701 int i;
11703 for (i = 0; i < n_elts; ++i)
11705 x = XVECEXP (vals, 0, i);
11706 if (!CONSTANT_P (x))
11707 ++n_var, one_var = i;
11709 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
11710 all_same = false;
11713 if (n_var == 0)
11715 rtx constant = neon_make_constant (vals);
11716 if (constant != NULL_RTX)
11718 emit_move_insn (target, constant);
11719 return;
11723 /* Splat a single non-constant element if we can. */
11724 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
11726 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
11727 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
11728 return;
11731 /* One field is non-constant. Load constant then overwrite varying
11732 field. This is more efficient than using the stack. */
11733 if (n_var == 1)
11735 rtx copy = copy_rtx (vals);
11736 rtx index = GEN_INT (one_var);
11738 /* Load constant part of vector, substitute neighboring value for
11739 varying element. */
11740 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
11741 neon_expand_vector_init (target, copy);
11743 /* Insert variable. */
11744 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
11745 switch (mode)
11747 case V8QImode:
11748 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
11749 break;
11750 case V16QImode:
11751 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
11752 break;
11753 case V4HImode:
11754 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
11755 break;
11756 case V8HImode:
11757 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
11758 break;
11759 case V2SImode:
11760 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
11761 break;
11762 case V4SImode:
11763 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
11764 break;
11765 case V2SFmode:
11766 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
11767 break;
11768 case V4SFmode:
11769 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
11770 break;
11771 case V2DImode:
11772 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
11773 break;
11774 default:
11775 gcc_unreachable ();
11777 return;
11780 /* Construct the vector in memory one field at a time
11781 and load the whole vector. */
11782 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
11783 for (i = 0; i < n_elts; i++)
11784 emit_move_insn (adjust_address_nv (mem, inner_mode,
11785 i * GET_MODE_SIZE (inner_mode)),
11786 XVECEXP (vals, 0, i));
11787 emit_move_insn (target, mem);
11790 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
11791 ERR if it doesn't. EXP indicates the source location, which includes the
11792 inlining history for intrinsics. */
11794 static void
11795 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
11796 const_tree exp, const char *desc)
11798 HOST_WIDE_INT lane;
11800 gcc_assert (CONST_INT_P (operand));
11802 lane = INTVAL (operand);
11804 if (lane < low || lane >= high)
11806 if (exp)
11807 error ("%K%s %wd out of range %wd - %wd",
11808 exp, desc, lane, low, high - 1);
11809 else
11810 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
11814 /* Bounds-check lanes. */
11816 void
11817 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
11818 const_tree exp)
11820 bounds_check (operand, low, high, exp, "lane");
11823 /* Bounds-check constants. */
11825 void
11826 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
11828 bounds_check (operand, low, high, NULL_TREE, "constant");
11831 HOST_WIDE_INT
11832 neon_element_bits (machine_mode mode)
11834 return GET_MODE_UNIT_BITSIZE (mode);
11838 /* Predicates for `match_operand' and `match_operator'. */
11840 /* Return TRUE if OP is a valid coprocessor memory address pattern.
11841 WB is true if full writeback address modes are allowed and is false
11842 if limited writeback address modes (POST_INC and PRE_DEC) are
11843 allowed. */
11846 arm_coproc_mem_operand (rtx op, bool wb)
11848 rtx ind;
11850 /* Reject eliminable registers. */
11851 if (! (reload_in_progress || reload_completed || lra_in_progress)
11852 && ( reg_mentioned_p (frame_pointer_rtx, op)
11853 || reg_mentioned_p (arg_pointer_rtx, op)
11854 || reg_mentioned_p (virtual_incoming_args_rtx, op)
11855 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
11856 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
11857 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
11858 return FALSE;
11860 /* Constants are converted into offsets from labels. */
11861 if (!MEM_P (op))
11862 return FALSE;
11864 ind = XEXP (op, 0);
11866 if (reload_completed
11867 && (GET_CODE (ind) == LABEL_REF
11868 || (GET_CODE (ind) == CONST
11869 && GET_CODE (XEXP (ind, 0)) == PLUS
11870 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
11871 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
11872 return TRUE;
11874 /* Match: (mem (reg)). */
11875 if (REG_P (ind))
11876 return arm_address_register_rtx_p (ind, 0);
11878 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
11879 acceptable in any case (subject to verification by
11880 arm_address_register_rtx_p). We need WB to be true to accept
11881 PRE_INC and POST_DEC. */
11882 if (GET_CODE (ind) == POST_INC
11883 || GET_CODE (ind) == PRE_DEC
11884 || (wb
11885 && (GET_CODE (ind) == PRE_INC
11886 || GET_CODE (ind) == POST_DEC)))
11887 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
11889 if (wb
11890 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
11891 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
11892 && GET_CODE (XEXP (ind, 1)) == PLUS
11893 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
11894 ind = XEXP (ind, 1);
11896 /* Match:
11897 (plus (reg)
11898 (const)). */
11899 if (GET_CODE (ind) == PLUS
11900 && REG_P (XEXP (ind, 0))
11901 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
11902 && CONST_INT_P (XEXP (ind, 1))
11903 && INTVAL (XEXP (ind, 1)) > -1024
11904 && INTVAL (XEXP (ind, 1)) < 1024
11905 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
11906 return TRUE;
11908 return FALSE;
11911 /* Return TRUE if OP is a memory operand which we can load or store a vector
11912 to/from. TYPE is one of the following values:
11913 0 - Vector load/stor (vldr)
11914 1 - Core registers (ldm)
11915 2 - Element/structure loads (vld1)
11918 neon_vector_mem_operand (rtx op, int type, bool strict)
11920 rtx ind;
11922 /* Reject eliminable registers. */
11923 if (strict && ! (reload_in_progress || reload_completed)
11924 && (reg_mentioned_p (frame_pointer_rtx, op)
11925 || reg_mentioned_p (arg_pointer_rtx, op)
11926 || reg_mentioned_p (virtual_incoming_args_rtx, op)
11927 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
11928 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
11929 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
11930 return FALSE;
11932 /* Constants are converted into offsets from labels. */
11933 if (!MEM_P (op))
11934 return FALSE;
11936 ind = XEXP (op, 0);
11938 if (reload_completed
11939 && (GET_CODE (ind) == LABEL_REF
11940 || (GET_CODE (ind) == CONST
11941 && GET_CODE (XEXP (ind, 0)) == PLUS
11942 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
11943 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
11944 return TRUE;
11946 /* Match: (mem (reg)). */
11947 if (REG_P (ind))
11948 return arm_address_register_rtx_p (ind, 0);
11950 /* Allow post-increment with Neon registers. */
11951 if ((type != 1 && GET_CODE (ind) == POST_INC)
11952 || (type == 0 && GET_CODE (ind) == PRE_DEC))
11953 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
11955 /* Allow post-increment by register for VLDn */
11956 if (type == 2 && GET_CODE (ind) == POST_MODIFY
11957 && GET_CODE (XEXP (ind, 1)) == PLUS
11958 && REG_P (XEXP (XEXP (ind, 1), 1)))
11959 return true;
11961 /* Match:
11962 (plus (reg)
11963 (const)). */
11964 if (type == 0
11965 && GET_CODE (ind) == PLUS
11966 && REG_P (XEXP (ind, 0))
11967 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
11968 && CONST_INT_P (XEXP (ind, 1))
11969 && INTVAL (XEXP (ind, 1)) > -1024
11970 /* For quad modes, we restrict the constant offset to be slightly less
11971 than what the instruction format permits. We have no such constraint
11972 on double mode offsets. (This must match arm_legitimate_index_p.) */
11973 && (INTVAL (XEXP (ind, 1))
11974 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
11975 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
11976 return TRUE;
11978 return FALSE;
11981 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
11982 type. */
11984 neon_struct_mem_operand (rtx op)
11986 rtx ind;
11988 /* Reject eliminable registers. */
11989 if (! (reload_in_progress || reload_completed)
11990 && ( reg_mentioned_p (frame_pointer_rtx, op)
11991 || reg_mentioned_p (arg_pointer_rtx, op)
11992 || reg_mentioned_p (virtual_incoming_args_rtx, op)
11993 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
11994 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
11995 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
11996 return FALSE;
11998 /* Constants are converted into offsets from labels. */
11999 if (!MEM_P (op))
12000 return FALSE;
12002 ind = XEXP (op, 0);
12004 if (reload_completed
12005 && (GET_CODE (ind) == LABEL_REF
12006 || (GET_CODE (ind) == CONST
12007 && GET_CODE (XEXP (ind, 0)) == PLUS
12008 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12009 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12010 return TRUE;
12012 /* Match: (mem (reg)). */
12013 if (REG_P (ind))
12014 return arm_address_register_rtx_p (ind, 0);
12016 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12017 if (GET_CODE (ind) == POST_INC
12018 || GET_CODE (ind) == PRE_DEC)
12019 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12021 return FALSE;
12024 /* Return true if X is a register that will be eliminated later on. */
12026 arm_eliminable_register (rtx x)
12028 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12029 || REGNO (x) == ARG_POINTER_REGNUM
12030 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12031 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12034 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12035 coprocessor registers. Otherwise return NO_REGS. */
12037 enum reg_class
12038 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12040 if (mode == HFmode)
12042 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12043 return GENERAL_REGS;
12044 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12045 return NO_REGS;
12046 return GENERAL_REGS;
12049 /* The neon move patterns handle all legitimate vector and struct
12050 addresses. */
12051 if (TARGET_NEON
12052 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12053 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12054 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12055 || VALID_NEON_STRUCT_MODE (mode)))
12056 return NO_REGS;
12058 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12059 return NO_REGS;
12061 return GENERAL_REGS;
12064 /* Values which must be returned in the most-significant end of the return
12065 register. */
12067 static bool
12068 arm_return_in_msb (const_tree valtype)
12070 return (TARGET_AAPCS_BASED
12071 && BYTES_BIG_ENDIAN
12072 && (AGGREGATE_TYPE_P (valtype)
12073 || TREE_CODE (valtype) == COMPLEX_TYPE
12074 || FIXED_POINT_TYPE_P (valtype)));
12077 /* Return TRUE if X references a SYMBOL_REF. */
12079 symbol_mentioned_p (rtx x)
12081 const char * fmt;
12082 int i;
12084 if (GET_CODE (x) == SYMBOL_REF)
12085 return 1;
12087 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12088 are constant offsets, not symbols. */
12089 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12090 return 0;
12092 fmt = GET_RTX_FORMAT (GET_CODE (x));
12094 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12096 if (fmt[i] == 'E')
12098 int j;
12100 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12101 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12102 return 1;
12104 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12105 return 1;
12108 return 0;
12111 /* Return TRUE if X references a LABEL_REF. */
12113 label_mentioned_p (rtx x)
12115 const char * fmt;
12116 int i;
12118 if (GET_CODE (x) == LABEL_REF)
12119 return 1;
12121 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12122 instruction, but they are constant offsets, not symbols. */
12123 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12124 return 0;
12126 fmt = GET_RTX_FORMAT (GET_CODE (x));
12127 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12129 if (fmt[i] == 'E')
12131 int j;
12133 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12134 if (label_mentioned_p (XVECEXP (x, i, j)))
12135 return 1;
12137 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12138 return 1;
12141 return 0;
12145 tls_mentioned_p (rtx x)
12147 switch (GET_CODE (x))
12149 case CONST:
12150 return tls_mentioned_p (XEXP (x, 0));
12152 case UNSPEC:
12153 if (XINT (x, 1) == UNSPEC_TLS)
12154 return 1;
12156 /* Fall through. */
12157 default:
12158 return 0;
12162 /* Must not copy any rtx that uses a pc-relative address.
12163 Also, disallow copying of load-exclusive instructions that
12164 may appear after splitting of compare-and-swap-style operations
12165 so as to prevent those loops from being transformed away from their
12166 canonical forms (see PR 69904). */
12168 static bool
12169 arm_cannot_copy_insn_p (rtx_insn *insn)
12171 /* The tls call insn cannot be copied, as it is paired with a data
12172 word. */
12173 if (recog_memoized (insn) == CODE_FOR_tlscall)
12174 return true;
12176 subrtx_iterator::array_type array;
12177 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12179 const_rtx x = *iter;
12180 if (GET_CODE (x) == UNSPEC
12181 && (XINT (x, 1) == UNSPEC_PIC_BASE
12182 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12183 return true;
12186 rtx set = single_set (insn);
12187 if (set)
12189 rtx src = SET_SRC (set);
12190 if (GET_CODE (src) == ZERO_EXTEND)
12191 src = XEXP (src, 0);
12193 /* Catch the load-exclusive and load-acquire operations. */
12194 if (GET_CODE (src) == UNSPEC_VOLATILE
12195 && (XINT (src, 1) == VUNSPEC_LL
12196 || XINT (src, 1) == VUNSPEC_LAX))
12197 return true;
12199 return false;
12202 enum rtx_code
12203 minmax_code (rtx x)
12205 enum rtx_code code = GET_CODE (x);
12207 switch (code)
12209 case SMAX:
12210 return GE;
12211 case SMIN:
12212 return LE;
12213 case UMIN:
12214 return LEU;
12215 case UMAX:
12216 return GEU;
12217 default:
12218 gcc_unreachable ();
12222 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12224 bool
12225 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12226 int *mask, bool *signed_sat)
12228 /* The high bound must be a power of two minus one. */
12229 int log = exact_log2 (INTVAL (hi_bound) + 1);
12230 if (log == -1)
12231 return false;
12233 /* The low bound is either zero (for usat) or one less than the
12234 negation of the high bound (for ssat). */
12235 if (INTVAL (lo_bound) == 0)
12237 if (mask)
12238 *mask = log;
12239 if (signed_sat)
12240 *signed_sat = false;
12242 return true;
12245 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12247 if (mask)
12248 *mask = log + 1;
12249 if (signed_sat)
12250 *signed_sat = true;
12252 return true;
12255 return false;
12258 /* Return 1 if memory locations are adjacent. */
12260 adjacent_mem_locations (rtx a, rtx b)
12262 /* We don't guarantee to preserve the order of these memory refs. */
12263 if (volatile_refs_p (a) || volatile_refs_p (b))
12264 return 0;
12266 if ((REG_P (XEXP (a, 0))
12267 || (GET_CODE (XEXP (a, 0)) == PLUS
12268 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12269 && (REG_P (XEXP (b, 0))
12270 || (GET_CODE (XEXP (b, 0)) == PLUS
12271 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12273 HOST_WIDE_INT val0 = 0, val1 = 0;
12274 rtx reg0, reg1;
12275 int val_diff;
12277 if (GET_CODE (XEXP (a, 0)) == PLUS)
12279 reg0 = XEXP (XEXP (a, 0), 0);
12280 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12282 else
12283 reg0 = XEXP (a, 0);
12285 if (GET_CODE (XEXP (b, 0)) == PLUS)
12287 reg1 = XEXP (XEXP (b, 0), 0);
12288 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12290 else
12291 reg1 = XEXP (b, 0);
12293 /* Don't accept any offset that will require multiple
12294 instructions to handle, since this would cause the
12295 arith_adjacentmem pattern to output an overlong sequence. */
12296 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12297 return 0;
12299 /* Don't allow an eliminable register: register elimination can make
12300 the offset too large. */
12301 if (arm_eliminable_register (reg0))
12302 return 0;
12304 val_diff = val1 - val0;
12306 if (arm_ld_sched)
12308 /* If the target has load delay slots, then there's no benefit
12309 to using an ldm instruction unless the offset is zero and
12310 we are optimizing for size. */
12311 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12312 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12313 && (val_diff == 4 || val_diff == -4));
12316 return ((REGNO (reg0) == REGNO (reg1))
12317 && (val_diff == 4 || val_diff == -4));
12320 return 0;
12323 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12324 for load operations, false for store operations. CONSECUTIVE is true
12325 if the register numbers in the operation must be consecutive in the register
12326 bank. RETURN_PC is true if value is to be loaded in PC.
12327 The pattern we are trying to match for load is:
12328 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12329 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12332 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12334 where
12335 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12336 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12337 3. If consecutive is TRUE, then for kth register being loaded,
12338 REGNO (R_dk) = REGNO (R_d0) + k.
12339 The pattern for store is similar. */
12340 bool
12341 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12342 bool consecutive, bool return_pc)
12344 HOST_WIDE_INT count = XVECLEN (op, 0);
12345 rtx reg, mem, addr;
12346 unsigned regno;
12347 unsigned first_regno;
12348 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12349 rtx elt;
12350 bool addr_reg_in_reglist = false;
12351 bool update = false;
12352 int reg_increment;
12353 int offset_adj;
12354 int regs_per_val;
12356 /* If not in SImode, then registers must be consecutive
12357 (e.g., VLDM instructions for DFmode). */
12358 gcc_assert ((mode == SImode) || consecutive);
12359 /* Setting return_pc for stores is illegal. */
12360 gcc_assert (!return_pc || load);
12362 /* Set up the increments and the regs per val based on the mode. */
12363 reg_increment = GET_MODE_SIZE (mode);
12364 regs_per_val = reg_increment / 4;
12365 offset_adj = return_pc ? 1 : 0;
12367 if (count <= 1
12368 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12369 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12370 return false;
12372 /* Check if this is a write-back. */
12373 elt = XVECEXP (op, 0, offset_adj);
12374 if (GET_CODE (SET_SRC (elt)) == PLUS)
12376 i++;
12377 base = 1;
12378 update = true;
12380 /* The offset adjustment must be the number of registers being
12381 popped times the size of a single register. */
12382 if (!REG_P (SET_DEST (elt))
12383 || !REG_P (XEXP (SET_SRC (elt), 0))
12384 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12385 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12386 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12387 ((count - 1 - offset_adj) * reg_increment))
12388 return false;
12391 i = i + offset_adj;
12392 base = base + offset_adj;
12393 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12394 success depends on the type: VLDM can do just one reg,
12395 LDM must do at least two. */
12396 if ((count <= i) && (mode == SImode))
12397 return false;
12399 elt = XVECEXP (op, 0, i - 1);
12400 if (GET_CODE (elt) != SET)
12401 return false;
12403 if (load)
12405 reg = SET_DEST (elt);
12406 mem = SET_SRC (elt);
12408 else
12410 reg = SET_SRC (elt);
12411 mem = SET_DEST (elt);
12414 if (!REG_P (reg) || !MEM_P (mem))
12415 return false;
12417 regno = REGNO (reg);
12418 first_regno = regno;
12419 addr = XEXP (mem, 0);
12420 if (GET_CODE (addr) == PLUS)
12422 if (!CONST_INT_P (XEXP (addr, 1)))
12423 return false;
12425 offset = INTVAL (XEXP (addr, 1));
12426 addr = XEXP (addr, 0);
12429 if (!REG_P (addr))
12430 return false;
12432 /* Don't allow SP to be loaded unless it is also the base register. It
12433 guarantees that SP is reset correctly when an LDM instruction
12434 is interrupted. Otherwise, we might end up with a corrupt stack. */
12435 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12436 return false;
12438 for (; i < count; i++)
12440 elt = XVECEXP (op, 0, i);
12441 if (GET_CODE (elt) != SET)
12442 return false;
12444 if (load)
12446 reg = SET_DEST (elt);
12447 mem = SET_SRC (elt);
12449 else
12451 reg = SET_SRC (elt);
12452 mem = SET_DEST (elt);
12455 if (!REG_P (reg)
12456 || GET_MODE (reg) != mode
12457 || REGNO (reg) <= regno
12458 || (consecutive
12459 && (REGNO (reg) !=
12460 (unsigned int) (first_regno + regs_per_val * (i - base))))
12461 /* Don't allow SP to be loaded unless it is also the base register. It
12462 guarantees that SP is reset correctly when an LDM instruction
12463 is interrupted. Otherwise, we might end up with a corrupt stack. */
12464 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12465 || !MEM_P (mem)
12466 || GET_MODE (mem) != mode
12467 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12468 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12469 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12470 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12471 offset + (i - base) * reg_increment))
12472 && (!REG_P (XEXP (mem, 0))
12473 || offset + (i - base) * reg_increment != 0)))
12474 return false;
12476 regno = REGNO (reg);
12477 if (regno == REGNO (addr))
12478 addr_reg_in_reglist = true;
12481 if (load)
12483 if (update && addr_reg_in_reglist)
12484 return false;
12486 /* For Thumb-1, address register is always modified - either by write-back
12487 or by explicit load. If the pattern does not describe an update,
12488 then the address register must be in the list of loaded registers. */
12489 if (TARGET_THUMB1)
12490 return update || addr_reg_in_reglist;
12493 return true;
12496 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12497 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12498 instruction. ADD_OFFSET is nonzero if the base address register needs
12499 to be modified with an add instruction before we can use it. */
12501 static bool
12502 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
12503 int nops, HOST_WIDE_INT add_offset)
12505 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12506 if the offset isn't small enough. The reason 2 ldrs are faster
12507 is because these ARMs are able to do more than one cache access
12508 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12509 whilst the ARM8 has a double bandwidth cache. This means that
12510 these cores can do both an instruction fetch and a data fetch in
12511 a single cycle, so the trick of calculating the address into a
12512 scratch register (one of the result regs) and then doing a load
12513 multiple actually becomes slower (and no smaller in code size).
12514 That is the transformation
12516 ldr rd1, [rbase + offset]
12517 ldr rd2, [rbase + offset + 4]
12521 add rd1, rbase, offset
12522 ldmia rd1, {rd1, rd2}
12524 produces worse code -- '3 cycles + any stalls on rd2' instead of
12525 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12526 access per cycle, the first sequence could never complete in less
12527 than 6 cycles, whereas the ldm sequence would only take 5 and
12528 would make better use of sequential accesses if not hitting the
12529 cache.
12531 We cheat here and test 'arm_ld_sched' which we currently know to
12532 only be true for the ARM8, ARM9 and StrongARM. If this ever
12533 changes, then the test below needs to be reworked. */
12534 if (nops == 2 && arm_ld_sched && add_offset != 0)
12535 return false;
12537 /* XScale has load-store double instructions, but they have stricter
12538 alignment requirements than load-store multiple, so we cannot
12539 use them.
12541 For XScale ldm requires 2 + NREGS cycles to complete and blocks
12542 the pipeline until completion.
12544 NREGS CYCLES
12550 An ldr instruction takes 1-3 cycles, but does not block the
12551 pipeline.
12553 NREGS CYCLES
12554 1 1-3
12555 2 2-6
12556 3 3-9
12557 4 4-12
12559 Best case ldr will always win. However, the more ldr instructions
12560 we issue, the less likely we are to be able to schedule them well.
12561 Using ldr instructions also increases code size.
12563 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
12564 for counts of 3 or 4 regs. */
12565 if (nops <= 2 && arm_tune_xscale && !optimize_size)
12566 return false;
12567 return true;
12570 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
12571 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
12572 an array ORDER which describes the sequence to use when accessing the
12573 offsets that produces an ascending order. In this sequence, each
12574 offset must be larger by exactly 4 than the previous one. ORDER[0]
12575 must have been filled in with the lowest offset by the caller.
12576 If UNSORTED_REGS is nonnull, it is an array of register numbers that
12577 we use to verify that ORDER produces an ascending order of registers.
12578 Return true if it was possible to construct such an order, false if
12579 not. */
12581 static bool
12582 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
12583 int *unsorted_regs)
12585 int i;
12586 for (i = 1; i < nops; i++)
12588 int j;
12590 order[i] = order[i - 1];
12591 for (j = 0; j < nops; j++)
12592 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
12594 /* We must find exactly one offset that is higher than the
12595 previous one by 4. */
12596 if (order[i] != order[i - 1])
12597 return false;
12598 order[i] = j;
12600 if (order[i] == order[i - 1])
12601 return false;
12602 /* The register numbers must be ascending. */
12603 if (unsorted_regs != NULL
12604 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
12605 return false;
12607 return true;
12610 /* Used to determine in a peephole whether a sequence of load
12611 instructions can be changed into a load-multiple instruction.
12612 NOPS is the number of separate load instructions we are examining. The
12613 first NOPS entries in OPERANDS are the destination registers, the
12614 next NOPS entries are memory operands. If this function is
12615 successful, *BASE is set to the common base register of the memory
12616 accesses; *LOAD_OFFSET is set to the first memory location's offset
12617 from that base register.
12618 REGS is an array filled in with the destination register numbers.
12619 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
12620 insn numbers to an ascending order of stores. If CHECK_REGS is true,
12621 the sequence of registers in REGS matches the loads from ascending memory
12622 locations, and the function verifies that the register numbers are
12623 themselves ascending. If CHECK_REGS is false, the register numbers
12624 are stored in the order they are found in the operands. */
12625 static int
12626 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
12627 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
12629 int unsorted_regs[MAX_LDM_STM_OPS];
12630 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
12631 int order[MAX_LDM_STM_OPS];
12632 rtx base_reg_rtx = NULL;
12633 int base_reg = -1;
12634 int i, ldm_case;
12636 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
12637 easily extended if required. */
12638 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
12640 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
12642 /* Loop over the operands and check that the memory references are
12643 suitable (i.e. immediate offsets from the same base register). At
12644 the same time, extract the target register, and the memory
12645 offsets. */
12646 for (i = 0; i < nops; i++)
12648 rtx reg;
12649 rtx offset;
12651 /* Convert a subreg of a mem into the mem itself. */
12652 if (GET_CODE (operands[nops + i]) == SUBREG)
12653 operands[nops + i] = alter_subreg (operands + (nops + i), true);
12655 gcc_assert (MEM_P (operands[nops + i]));
12657 /* Don't reorder volatile memory references; it doesn't seem worth
12658 looking for the case where the order is ok anyway. */
12659 if (MEM_VOLATILE_P (operands[nops + i]))
12660 return 0;
12662 offset = const0_rtx;
12664 if ((REG_P (reg = XEXP (operands[nops + i], 0))
12665 || (GET_CODE (reg) == SUBREG
12666 && REG_P (reg = SUBREG_REG (reg))))
12667 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
12668 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
12669 || (GET_CODE (reg) == SUBREG
12670 && REG_P (reg = SUBREG_REG (reg))))
12671 && (CONST_INT_P (offset
12672 = XEXP (XEXP (operands[nops + i], 0), 1)))))
12674 if (i == 0)
12676 base_reg = REGNO (reg);
12677 base_reg_rtx = reg;
12678 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
12679 return 0;
12681 else if (base_reg != (int) REGNO (reg))
12682 /* Not addressed from the same base register. */
12683 return 0;
12685 unsorted_regs[i] = (REG_P (operands[i])
12686 ? REGNO (operands[i])
12687 : REGNO (SUBREG_REG (operands[i])));
12689 /* If it isn't an integer register, or if it overwrites the
12690 base register but isn't the last insn in the list, then
12691 we can't do this. */
12692 if (unsorted_regs[i] < 0
12693 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
12694 || unsorted_regs[i] > 14
12695 || (i != nops - 1 && unsorted_regs[i] == base_reg))
12696 return 0;
12698 /* Don't allow SP to be loaded unless it is also the base
12699 register. It guarantees that SP is reset correctly when
12700 an LDM instruction is interrupted. Otherwise, we might
12701 end up with a corrupt stack. */
12702 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
12703 return 0;
12705 unsorted_offsets[i] = INTVAL (offset);
12706 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
12707 order[0] = i;
12709 else
12710 /* Not a suitable memory address. */
12711 return 0;
12714 /* All the useful information has now been extracted from the
12715 operands into unsorted_regs and unsorted_offsets; additionally,
12716 order[0] has been set to the lowest offset in the list. Sort
12717 the offsets into order, verifying that they are adjacent, and
12718 check that the register numbers are ascending. */
12719 if (!compute_offset_order (nops, unsorted_offsets, order,
12720 check_regs ? unsorted_regs : NULL))
12721 return 0;
12723 if (saved_order)
12724 memcpy (saved_order, order, sizeof order);
12726 if (base)
12728 *base = base_reg;
12730 for (i = 0; i < nops; i++)
12731 regs[i] = unsorted_regs[check_regs ? order[i] : i];
12733 *load_offset = unsorted_offsets[order[0]];
12736 if (TARGET_THUMB1
12737 && !peep2_reg_dead_p (nops, base_reg_rtx))
12738 return 0;
12740 if (unsorted_offsets[order[0]] == 0)
12741 ldm_case = 1; /* ldmia */
12742 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
12743 ldm_case = 2; /* ldmib */
12744 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
12745 ldm_case = 3; /* ldmda */
12746 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
12747 ldm_case = 4; /* ldmdb */
12748 else if (const_ok_for_arm (unsorted_offsets[order[0]])
12749 || const_ok_for_arm (-unsorted_offsets[order[0]]))
12750 ldm_case = 5;
12751 else
12752 return 0;
12754 if (!multiple_operation_profitable_p (false, nops,
12755 ldm_case == 5
12756 ? unsorted_offsets[order[0]] : 0))
12757 return 0;
12759 return ldm_case;
12762 /* Used to determine in a peephole whether a sequence of store instructions can
12763 be changed into a store-multiple instruction.
12764 NOPS is the number of separate store instructions we are examining.
12765 NOPS_TOTAL is the total number of instructions recognized by the peephole
12766 pattern.
12767 The first NOPS entries in OPERANDS are the source registers, the next
12768 NOPS entries are memory operands. If this function is successful, *BASE is
12769 set to the common base register of the memory accesses; *LOAD_OFFSET is set
12770 to the first memory location's offset from that base register. REGS is an
12771 array filled in with the source register numbers, REG_RTXS (if nonnull) is
12772 likewise filled with the corresponding rtx's.
12773 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
12774 numbers to an ascending order of stores.
12775 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
12776 from ascending memory locations, and the function verifies that the register
12777 numbers are themselves ascending. If CHECK_REGS is false, the register
12778 numbers are stored in the order they are found in the operands. */
12779 static int
12780 store_multiple_sequence (rtx *operands, int nops, int nops_total,
12781 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
12782 HOST_WIDE_INT *load_offset, bool check_regs)
12784 int unsorted_regs[MAX_LDM_STM_OPS];
12785 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
12786 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
12787 int order[MAX_LDM_STM_OPS];
12788 int base_reg = -1;
12789 rtx base_reg_rtx = NULL;
12790 int i, stm_case;
12792 /* Write back of base register is currently only supported for Thumb 1. */
12793 int base_writeback = TARGET_THUMB1;
12795 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
12796 easily extended if required. */
12797 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
12799 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
12801 /* Loop over the operands and check that the memory references are
12802 suitable (i.e. immediate offsets from the same base register). At
12803 the same time, extract the target register, and the memory
12804 offsets. */
12805 for (i = 0; i < nops; i++)
12807 rtx reg;
12808 rtx offset;
12810 /* Convert a subreg of a mem into the mem itself. */
12811 if (GET_CODE (operands[nops + i]) == SUBREG)
12812 operands[nops + i] = alter_subreg (operands + (nops + i), true);
12814 gcc_assert (MEM_P (operands[nops + i]));
12816 /* Don't reorder volatile memory references; it doesn't seem worth
12817 looking for the case where the order is ok anyway. */
12818 if (MEM_VOLATILE_P (operands[nops + i]))
12819 return 0;
12821 offset = const0_rtx;
12823 if ((REG_P (reg = XEXP (operands[nops + i], 0))
12824 || (GET_CODE (reg) == SUBREG
12825 && REG_P (reg = SUBREG_REG (reg))))
12826 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
12827 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
12828 || (GET_CODE (reg) == SUBREG
12829 && REG_P (reg = SUBREG_REG (reg))))
12830 && (CONST_INT_P (offset
12831 = XEXP (XEXP (operands[nops + i], 0), 1)))))
12833 unsorted_reg_rtxs[i] = (REG_P (operands[i])
12834 ? operands[i] : SUBREG_REG (operands[i]));
12835 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
12837 if (i == 0)
12839 base_reg = REGNO (reg);
12840 base_reg_rtx = reg;
12841 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
12842 return 0;
12844 else if (base_reg != (int) REGNO (reg))
12845 /* Not addressed from the same base register. */
12846 return 0;
12848 /* If it isn't an integer register, then we can't do this. */
12849 if (unsorted_regs[i] < 0
12850 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
12851 /* The effects are unpredictable if the base register is
12852 both updated and stored. */
12853 || (base_writeback && unsorted_regs[i] == base_reg)
12854 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
12855 || unsorted_regs[i] > 14)
12856 return 0;
12858 unsorted_offsets[i] = INTVAL (offset);
12859 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
12860 order[0] = i;
12862 else
12863 /* Not a suitable memory address. */
12864 return 0;
12867 /* All the useful information has now been extracted from the
12868 operands into unsorted_regs and unsorted_offsets; additionally,
12869 order[0] has been set to the lowest offset in the list. Sort
12870 the offsets into order, verifying that they are adjacent, and
12871 check that the register numbers are ascending. */
12872 if (!compute_offset_order (nops, unsorted_offsets, order,
12873 check_regs ? unsorted_regs : NULL))
12874 return 0;
12876 if (saved_order)
12877 memcpy (saved_order, order, sizeof order);
12879 if (base)
12881 *base = base_reg;
12883 for (i = 0; i < nops; i++)
12885 regs[i] = unsorted_regs[check_regs ? order[i] : i];
12886 if (reg_rtxs)
12887 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
12890 *load_offset = unsorted_offsets[order[0]];
12893 if (TARGET_THUMB1
12894 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
12895 return 0;
12897 if (unsorted_offsets[order[0]] == 0)
12898 stm_case = 1; /* stmia */
12899 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
12900 stm_case = 2; /* stmib */
12901 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
12902 stm_case = 3; /* stmda */
12903 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
12904 stm_case = 4; /* stmdb */
12905 else
12906 return 0;
12908 if (!multiple_operation_profitable_p (false, nops, 0))
12909 return 0;
12911 return stm_case;
12914 /* Routines for use in generating RTL. */
12916 /* Generate a load-multiple instruction. COUNT is the number of loads in
12917 the instruction; REGS and MEMS are arrays containing the operands.
12918 BASEREG is the base register to be used in addressing the memory operands.
12919 WBACK_OFFSET is nonzero if the instruction should update the base
12920 register. */
12922 static rtx
12923 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
12924 HOST_WIDE_INT wback_offset)
12926 int i = 0, j;
12927 rtx result;
12929 if (!multiple_operation_profitable_p (false, count, 0))
12931 rtx seq;
12933 start_sequence ();
12935 for (i = 0; i < count; i++)
12936 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
12938 if (wback_offset != 0)
12939 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
12941 seq = get_insns ();
12942 end_sequence ();
12944 return seq;
12947 result = gen_rtx_PARALLEL (VOIDmode,
12948 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
12949 if (wback_offset != 0)
12951 XVECEXP (result, 0, 0)
12952 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
12953 i = 1;
12954 count++;
12957 for (j = 0; i < count; i++, j++)
12958 XVECEXP (result, 0, i)
12959 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
12961 return result;
12964 /* Generate a store-multiple instruction. COUNT is the number of stores in
12965 the instruction; REGS and MEMS are arrays containing the operands.
12966 BASEREG is the base register to be used in addressing the memory operands.
12967 WBACK_OFFSET is nonzero if the instruction should update the base
12968 register. */
12970 static rtx
12971 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
12972 HOST_WIDE_INT wback_offset)
12974 int i = 0, j;
12975 rtx result;
12977 if (GET_CODE (basereg) == PLUS)
12978 basereg = XEXP (basereg, 0);
12980 if (!multiple_operation_profitable_p (false, count, 0))
12982 rtx seq;
12984 start_sequence ();
12986 for (i = 0; i < count; i++)
12987 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
12989 if (wback_offset != 0)
12990 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
12992 seq = get_insns ();
12993 end_sequence ();
12995 return seq;
12998 result = gen_rtx_PARALLEL (VOIDmode,
12999 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13000 if (wback_offset != 0)
13002 XVECEXP (result, 0, 0)
13003 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13004 i = 1;
13005 count++;
13008 for (j = 0; i < count; i++, j++)
13009 XVECEXP (result, 0, i)
13010 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13012 return result;
13015 /* Generate either a load-multiple or a store-multiple instruction. This
13016 function can be used in situations where we can start with a single MEM
13017 rtx and adjust its address upwards.
13018 COUNT is the number of operations in the instruction, not counting a
13019 possible update of the base register. REGS is an array containing the
13020 register operands.
13021 BASEREG is the base register to be used in addressing the memory operands,
13022 which are constructed from BASEMEM.
13023 WRITE_BACK specifies whether the generated instruction should include an
13024 update of the base register.
13025 OFFSETP is used to pass an offset to and from this function; this offset
13026 is not used when constructing the address (instead BASEMEM should have an
13027 appropriate offset in its address), it is used only for setting
13028 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13030 static rtx
13031 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13032 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13034 rtx mems[MAX_LDM_STM_OPS];
13035 HOST_WIDE_INT offset = *offsetp;
13036 int i;
13038 gcc_assert (count <= MAX_LDM_STM_OPS);
13040 if (GET_CODE (basereg) == PLUS)
13041 basereg = XEXP (basereg, 0);
13043 for (i = 0; i < count; i++)
13045 rtx addr = plus_constant (Pmode, basereg, i * 4);
13046 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13047 offset += 4;
13050 if (write_back)
13051 *offsetp = offset;
13053 if (is_load)
13054 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13055 write_back ? 4 * count : 0);
13056 else
13057 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13058 write_back ? 4 * count : 0);
13062 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13063 rtx basemem, HOST_WIDE_INT *offsetp)
13065 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13066 offsetp);
13070 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13071 rtx basemem, HOST_WIDE_INT *offsetp)
13073 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13074 offsetp);
13077 /* Called from a peephole2 expander to turn a sequence of loads into an
13078 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13079 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13080 is true if we can reorder the registers because they are used commutatively
13081 subsequently.
13082 Returns true iff we could generate a new instruction. */
13084 bool
13085 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13087 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13088 rtx mems[MAX_LDM_STM_OPS];
13089 int i, j, base_reg;
13090 rtx base_reg_rtx;
13091 HOST_WIDE_INT offset;
13092 int write_back = FALSE;
13093 int ldm_case;
13094 rtx addr;
13096 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13097 &base_reg, &offset, !sort_regs);
13099 if (ldm_case == 0)
13100 return false;
13102 if (sort_regs)
13103 for (i = 0; i < nops - 1; i++)
13104 for (j = i + 1; j < nops; j++)
13105 if (regs[i] > regs[j])
13107 int t = regs[i];
13108 regs[i] = regs[j];
13109 regs[j] = t;
13111 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13113 if (TARGET_THUMB1)
13115 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13116 gcc_assert (ldm_case == 1 || ldm_case == 5);
13117 write_back = TRUE;
13120 if (ldm_case == 5)
13122 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13123 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13124 offset = 0;
13125 if (!TARGET_THUMB1)
13127 base_reg = regs[0];
13128 base_reg_rtx = newbase;
13132 for (i = 0; i < nops; i++)
13134 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13135 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13136 SImode, addr, 0);
13138 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13139 write_back ? offset + i * 4 : 0));
13140 return true;
13143 /* Called from a peephole2 expander to turn a sequence of stores into an
13144 STM instruction. OPERANDS are the operands found by the peephole matcher;
13145 NOPS indicates how many separate stores we are trying to combine.
13146 Returns true iff we could generate a new instruction. */
13148 bool
13149 gen_stm_seq (rtx *operands, int nops)
13151 int i;
13152 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13153 rtx mems[MAX_LDM_STM_OPS];
13154 int base_reg;
13155 rtx base_reg_rtx;
13156 HOST_WIDE_INT offset;
13157 int write_back = FALSE;
13158 int stm_case;
13159 rtx addr;
13160 bool base_reg_dies;
13162 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13163 mem_order, &base_reg, &offset, true);
13165 if (stm_case == 0)
13166 return false;
13168 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13170 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13171 if (TARGET_THUMB1)
13173 gcc_assert (base_reg_dies);
13174 write_back = TRUE;
13177 if (stm_case == 5)
13179 gcc_assert (base_reg_dies);
13180 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13181 offset = 0;
13184 addr = plus_constant (Pmode, base_reg_rtx, offset);
13186 for (i = 0; i < nops; i++)
13188 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13189 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13190 SImode, addr, 0);
13192 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13193 write_back ? offset + i * 4 : 0));
13194 return true;
13197 /* Called from a peephole2 expander to turn a sequence of stores that are
13198 preceded by constant loads into an STM instruction. OPERANDS are the
13199 operands found by the peephole matcher; NOPS indicates how many
13200 separate stores we are trying to combine; there are 2 * NOPS
13201 instructions in the peephole.
13202 Returns true iff we could generate a new instruction. */
13204 bool
13205 gen_const_stm_seq (rtx *operands, int nops)
13207 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13208 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13209 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13210 rtx mems[MAX_LDM_STM_OPS];
13211 int base_reg;
13212 rtx base_reg_rtx;
13213 HOST_WIDE_INT offset;
13214 int write_back = FALSE;
13215 int stm_case;
13216 rtx addr;
13217 bool base_reg_dies;
13218 int i, j;
13219 HARD_REG_SET allocated;
13221 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13222 mem_order, &base_reg, &offset, false);
13224 if (stm_case == 0)
13225 return false;
13227 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13229 /* If the same register is used more than once, try to find a free
13230 register. */
13231 CLEAR_HARD_REG_SET (allocated);
13232 for (i = 0; i < nops; i++)
13234 for (j = i + 1; j < nops; j++)
13235 if (regs[i] == regs[j])
13237 rtx t = peep2_find_free_register (0, nops * 2,
13238 TARGET_THUMB1 ? "l" : "r",
13239 SImode, &allocated);
13240 if (t == NULL_RTX)
13241 return false;
13242 reg_rtxs[i] = t;
13243 regs[i] = REGNO (t);
13247 /* Compute an ordering that maps the register numbers to an ascending
13248 sequence. */
13249 reg_order[0] = 0;
13250 for (i = 0; i < nops; i++)
13251 if (regs[i] < regs[reg_order[0]])
13252 reg_order[0] = i;
13254 for (i = 1; i < nops; i++)
13256 int this_order = reg_order[i - 1];
13257 for (j = 0; j < nops; j++)
13258 if (regs[j] > regs[reg_order[i - 1]]
13259 && (this_order == reg_order[i - 1]
13260 || regs[j] < regs[this_order]))
13261 this_order = j;
13262 reg_order[i] = this_order;
13265 /* Ensure that registers that must be live after the instruction end
13266 up with the correct value. */
13267 for (i = 0; i < nops; i++)
13269 int this_order = reg_order[i];
13270 if ((this_order != mem_order[i]
13271 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13272 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13273 return false;
13276 /* Load the constants. */
13277 for (i = 0; i < nops; i++)
13279 rtx op = operands[2 * nops + mem_order[i]];
13280 sorted_regs[i] = regs[reg_order[i]];
13281 emit_move_insn (reg_rtxs[reg_order[i]], op);
13284 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13286 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13287 if (TARGET_THUMB1)
13289 gcc_assert (base_reg_dies);
13290 write_back = TRUE;
13293 if (stm_case == 5)
13295 gcc_assert (base_reg_dies);
13296 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13297 offset = 0;
13300 addr = plus_constant (Pmode, base_reg_rtx, offset);
13302 for (i = 0; i < nops; i++)
13304 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13305 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13306 SImode, addr, 0);
13308 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13309 write_back ? offset + i * 4 : 0));
13310 return true;
13313 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13314 unaligned copies on processors which support unaligned semantics for those
13315 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13316 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13317 An interleave factor of 1 (the minimum) will perform no interleaving.
13318 Load/store multiple are used for aligned addresses where possible. */
13320 static void
13321 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13322 HOST_WIDE_INT length,
13323 unsigned int interleave_factor)
13325 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13326 int *regnos = XALLOCAVEC (int, interleave_factor);
13327 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13328 HOST_WIDE_INT i, j;
13329 HOST_WIDE_INT remaining = length, words;
13330 rtx halfword_tmp = NULL, byte_tmp = NULL;
13331 rtx dst, src;
13332 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13333 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13334 HOST_WIDE_INT srcoffset, dstoffset;
13335 HOST_WIDE_INT src_autoinc, dst_autoinc;
13336 rtx mem, addr;
13338 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13340 /* Use hard registers if we have aligned source or destination so we can use
13341 load/store multiple with contiguous registers. */
13342 if (dst_aligned || src_aligned)
13343 for (i = 0; i < interleave_factor; i++)
13344 regs[i] = gen_rtx_REG (SImode, i);
13345 else
13346 for (i = 0; i < interleave_factor; i++)
13347 regs[i] = gen_reg_rtx (SImode);
13349 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13350 src = copy_addr_to_reg (XEXP (srcbase, 0));
13352 srcoffset = dstoffset = 0;
13354 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13355 For copying the last bytes we want to subtract this offset again. */
13356 src_autoinc = dst_autoinc = 0;
13358 for (i = 0; i < interleave_factor; i++)
13359 regnos[i] = i;
13361 /* Copy BLOCK_SIZE_BYTES chunks. */
13363 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13365 /* Load words. */
13366 if (src_aligned && interleave_factor > 1)
13368 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13369 TRUE, srcbase, &srcoffset));
13370 src_autoinc += UNITS_PER_WORD * interleave_factor;
13372 else
13374 for (j = 0; j < interleave_factor; j++)
13376 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13377 - src_autoinc));
13378 mem = adjust_automodify_address (srcbase, SImode, addr,
13379 srcoffset + j * UNITS_PER_WORD);
13380 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13382 srcoffset += block_size_bytes;
13385 /* Store words. */
13386 if (dst_aligned && interleave_factor > 1)
13388 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13389 TRUE, dstbase, &dstoffset));
13390 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13392 else
13394 for (j = 0; j < interleave_factor; j++)
13396 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13397 - dst_autoinc));
13398 mem = adjust_automodify_address (dstbase, SImode, addr,
13399 dstoffset + j * UNITS_PER_WORD);
13400 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13402 dstoffset += block_size_bytes;
13405 remaining -= block_size_bytes;
13408 /* Copy any whole words left (note these aren't interleaved with any
13409 subsequent halfword/byte load/stores in the interests of simplicity). */
13411 words = remaining / UNITS_PER_WORD;
13413 gcc_assert (words < interleave_factor);
13415 if (src_aligned && words > 1)
13417 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13418 &srcoffset));
13419 src_autoinc += UNITS_PER_WORD * words;
13421 else
13423 for (j = 0; j < words; j++)
13425 addr = plus_constant (Pmode, src,
13426 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13427 mem = adjust_automodify_address (srcbase, SImode, addr,
13428 srcoffset + j * UNITS_PER_WORD);
13429 if (src_aligned)
13430 emit_move_insn (regs[j], mem);
13431 else
13432 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13434 srcoffset += words * UNITS_PER_WORD;
13437 if (dst_aligned && words > 1)
13439 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13440 &dstoffset));
13441 dst_autoinc += words * UNITS_PER_WORD;
13443 else
13445 for (j = 0; j < words; j++)
13447 addr = plus_constant (Pmode, dst,
13448 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13449 mem = adjust_automodify_address (dstbase, SImode, addr,
13450 dstoffset + j * UNITS_PER_WORD);
13451 if (dst_aligned)
13452 emit_move_insn (mem, regs[j]);
13453 else
13454 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13456 dstoffset += words * UNITS_PER_WORD;
13459 remaining -= words * UNITS_PER_WORD;
13461 gcc_assert (remaining < 4);
13463 /* Copy a halfword if necessary. */
13465 if (remaining >= 2)
13467 halfword_tmp = gen_reg_rtx (SImode);
13469 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13470 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13471 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13473 /* Either write out immediately, or delay until we've loaded the last
13474 byte, depending on interleave factor. */
13475 if (interleave_factor == 1)
13477 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13478 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13479 emit_insn (gen_unaligned_storehi (mem,
13480 gen_lowpart (HImode, halfword_tmp)));
13481 halfword_tmp = NULL;
13482 dstoffset += 2;
13485 remaining -= 2;
13486 srcoffset += 2;
13489 gcc_assert (remaining < 2);
13491 /* Copy last byte. */
13493 if ((remaining & 1) != 0)
13495 byte_tmp = gen_reg_rtx (SImode);
13497 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13498 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
13499 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
13501 if (interleave_factor == 1)
13503 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13504 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13505 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13506 byte_tmp = NULL;
13507 dstoffset++;
13510 remaining--;
13511 srcoffset++;
13514 /* Store last halfword if we haven't done so already. */
13516 if (halfword_tmp)
13518 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13519 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13520 emit_insn (gen_unaligned_storehi (mem,
13521 gen_lowpart (HImode, halfword_tmp)));
13522 dstoffset += 2;
13525 /* Likewise for last byte. */
13527 if (byte_tmp)
13529 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13530 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13531 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13532 dstoffset++;
13535 gcc_assert (remaining == 0 && srcoffset == dstoffset);
13538 /* From mips_adjust_block_mem:
13540 Helper function for doing a loop-based block operation on memory
13541 reference MEM. Each iteration of the loop will operate on LENGTH
13542 bytes of MEM.
13544 Create a new base register for use within the loop and point it to
13545 the start of MEM. Create a new memory reference that uses this
13546 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
13548 static void
13549 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
13550 rtx *loop_mem)
13552 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
13554 /* Although the new mem does not refer to a known location,
13555 it does keep up to LENGTH bytes of alignment. */
13556 *loop_mem = change_address (mem, BLKmode, *loop_reg);
13557 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
13560 /* From mips_block_move_loop:
13562 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
13563 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
13564 the memory regions do not overlap. */
13566 static void
13567 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
13568 unsigned int interleave_factor,
13569 HOST_WIDE_INT bytes_per_iter)
13571 rtx src_reg, dest_reg, final_src, test;
13572 HOST_WIDE_INT leftover;
13574 leftover = length % bytes_per_iter;
13575 length -= leftover;
13577 /* Create registers and memory references for use within the loop. */
13578 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
13579 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
13581 /* Calculate the value that SRC_REG should have after the last iteration of
13582 the loop. */
13583 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
13584 0, 0, OPTAB_WIDEN);
13586 /* Emit the start of the loop. */
13587 rtx_code_label *label = gen_label_rtx ();
13588 emit_label (label);
13590 /* Emit the loop body. */
13591 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
13592 interleave_factor);
13594 /* Move on to the next block. */
13595 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
13596 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
13598 /* Emit the loop condition. */
13599 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
13600 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
13602 /* Mop up any left-over bytes. */
13603 if (leftover)
13604 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
13607 /* Emit a block move when either the source or destination is unaligned (not
13608 aligned to a four-byte boundary). This may need further tuning depending on
13609 core type, optimize_size setting, etc. */
13611 static int
13612 arm_movmemqi_unaligned (rtx *operands)
13614 HOST_WIDE_INT length = INTVAL (operands[2]);
13616 if (optimize_size)
13618 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
13619 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
13620 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
13621 size of code if optimizing for size. We'll use ldm/stm if src_aligned
13622 or dst_aligned though: allow more interleaving in those cases since the
13623 resulting code can be smaller. */
13624 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
13625 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
13627 if (length > 12)
13628 arm_block_move_unaligned_loop (operands[0], operands[1], length,
13629 interleave_factor, bytes_per_iter);
13630 else
13631 arm_block_move_unaligned_straight (operands[0], operands[1], length,
13632 interleave_factor);
13634 else
13636 /* Note that the loop created by arm_block_move_unaligned_loop may be
13637 subject to loop unrolling, which makes tuning this condition a little
13638 redundant. */
13639 if (length > 32)
13640 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
13641 else
13642 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
13645 return 1;
13649 arm_gen_movmemqi (rtx *operands)
13651 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
13652 HOST_WIDE_INT srcoffset, dstoffset;
13653 int i;
13654 rtx src, dst, srcbase, dstbase;
13655 rtx part_bytes_reg = NULL;
13656 rtx mem;
13658 if (!CONST_INT_P (operands[2])
13659 || !CONST_INT_P (operands[3])
13660 || INTVAL (operands[2]) > 64)
13661 return 0;
13663 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
13664 return arm_movmemqi_unaligned (operands);
13666 if (INTVAL (operands[3]) & 3)
13667 return 0;
13669 dstbase = operands[0];
13670 srcbase = operands[1];
13672 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
13673 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
13675 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
13676 out_words_to_go = INTVAL (operands[2]) / 4;
13677 last_bytes = INTVAL (operands[2]) & 3;
13678 dstoffset = srcoffset = 0;
13680 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
13681 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
13683 for (i = 0; in_words_to_go >= 2; i+=4)
13685 if (in_words_to_go > 4)
13686 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
13687 TRUE, srcbase, &srcoffset));
13688 else
13689 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
13690 src, FALSE, srcbase,
13691 &srcoffset));
13693 if (out_words_to_go)
13695 if (out_words_to_go > 4)
13696 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
13697 TRUE, dstbase, &dstoffset));
13698 else if (out_words_to_go != 1)
13699 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
13700 out_words_to_go, dst,
13701 (last_bytes == 0
13702 ? FALSE : TRUE),
13703 dstbase, &dstoffset));
13704 else
13706 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
13707 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
13708 if (last_bytes != 0)
13710 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
13711 dstoffset += 4;
13716 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
13717 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
13720 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
13721 if (out_words_to_go)
13723 rtx sreg;
13725 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
13726 sreg = copy_to_reg (mem);
13728 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
13729 emit_move_insn (mem, sreg);
13730 in_words_to_go--;
13732 gcc_assert (!in_words_to_go); /* Sanity check */
13735 if (in_words_to_go)
13737 gcc_assert (in_words_to_go > 0);
13739 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
13740 part_bytes_reg = copy_to_mode_reg (SImode, mem);
13743 gcc_assert (!last_bytes || part_bytes_reg);
13745 if (BYTES_BIG_ENDIAN && last_bytes)
13747 rtx tmp = gen_reg_rtx (SImode);
13749 /* The bytes we want are in the top end of the word. */
13750 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
13751 GEN_INT (8 * (4 - last_bytes))));
13752 part_bytes_reg = tmp;
13754 while (last_bytes)
13756 mem = adjust_automodify_address (dstbase, QImode,
13757 plus_constant (Pmode, dst,
13758 last_bytes - 1),
13759 dstoffset + last_bytes - 1);
13760 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
13762 if (--last_bytes)
13764 tmp = gen_reg_rtx (SImode);
13765 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
13766 part_bytes_reg = tmp;
13771 else
13773 if (last_bytes > 1)
13775 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
13776 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
13777 last_bytes -= 2;
13778 if (last_bytes)
13780 rtx tmp = gen_reg_rtx (SImode);
13781 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
13782 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
13783 part_bytes_reg = tmp;
13784 dstoffset += 2;
13788 if (last_bytes)
13790 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
13791 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
13795 return 1;
13798 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
13799 by mode size. */
13800 inline static rtx
13801 next_consecutive_mem (rtx mem)
13803 machine_mode mode = GET_MODE (mem);
13804 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
13805 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
13807 return adjust_automodify_address (mem, mode, addr, offset);
13810 /* Copy using LDRD/STRD instructions whenever possible.
13811 Returns true upon success. */
13812 bool
13813 gen_movmem_ldrd_strd (rtx *operands)
13815 unsigned HOST_WIDE_INT len;
13816 HOST_WIDE_INT align;
13817 rtx src, dst, base;
13818 rtx reg0;
13819 bool src_aligned, dst_aligned;
13820 bool src_volatile, dst_volatile;
13822 gcc_assert (CONST_INT_P (operands[2]));
13823 gcc_assert (CONST_INT_P (operands[3]));
13825 len = UINTVAL (operands[2]);
13826 if (len > 64)
13827 return false;
13829 /* Maximum alignment we can assume for both src and dst buffers. */
13830 align = INTVAL (operands[3]);
13832 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
13833 return false;
13835 /* Place src and dst addresses in registers
13836 and update the corresponding mem rtx. */
13837 dst = operands[0];
13838 dst_volatile = MEM_VOLATILE_P (dst);
13839 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
13840 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
13841 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
13843 src = operands[1];
13844 src_volatile = MEM_VOLATILE_P (src);
13845 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
13846 base = copy_to_mode_reg (SImode, XEXP (src, 0));
13847 src = adjust_automodify_address (src, VOIDmode, base, 0);
13849 if (!unaligned_access && !(src_aligned && dst_aligned))
13850 return false;
13852 if (src_volatile || dst_volatile)
13853 return false;
13855 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
13856 if (!(dst_aligned || src_aligned))
13857 return arm_gen_movmemqi (operands);
13859 /* If the either src or dst is unaligned we'll be accessing it as pairs
13860 of unaligned SImode accesses. Otherwise we can generate DImode
13861 ldrd/strd instructions. */
13862 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
13863 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
13865 while (len >= 8)
13867 len -= 8;
13868 reg0 = gen_reg_rtx (DImode);
13869 rtx low_reg = NULL_RTX;
13870 rtx hi_reg = NULL_RTX;
13872 if (!src_aligned || !dst_aligned)
13874 low_reg = gen_lowpart (SImode, reg0);
13875 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
13877 if (src_aligned)
13878 emit_move_insn (reg0, src);
13879 else
13881 emit_insn (gen_unaligned_loadsi (low_reg, src));
13882 src = next_consecutive_mem (src);
13883 emit_insn (gen_unaligned_loadsi (hi_reg, src));
13886 if (dst_aligned)
13887 emit_move_insn (dst, reg0);
13888 else
13890 emit_insn (gen_unaligned_storesi (dst, low_reg));
13891 dst = next_consecutive_mem (dst);
13892 emit_insn (gen_unaligned_storesi (dst, hi_reg));
13895 src = next_consecutive_mem (src);
13896 dst = next_consecutive_mem (dst);
13899 gcc_assert (len < 8);
13900 if (len >= 4)
13902 /* More than a word but less than a double-word to copy. Copy a word. */
13903 reg0 = gen_reg_rtx (SImode);
13904 src = adjust_address (src, SImode, 0);
13905 dst = adjust_address (dst, SImode, 0);
13906 if (src_aligned)
13907 emit_move_insn (reg0, src);
13908 else
13909 emit_insn (gen_unaligned_loadsi (reg0, src));
13911 if (dst_aligned)
13912 emit_move_insn (dst, reg0);
13913 else
13914 emit_insn (gen_unaligned_storesi (dst, reg0));
13916 src = next_consecutive_mem (src);
13917 dst = next_consecutive_mem (dst);
13918 len -= 4;
13921 if (len == 0)
13922 return true;
13924 /* Copy the remaining bytes. */
13925 if (len >= 2)
13927 dst = adjust_address (dst, HImode, 0);
13928 src = adjust_address (src, HImode, 0);
13929 reg0 = gen_reg_rtx (SImode);
13930 if (src_aligned)
13931 emit_insn (gen_zero_extendhisi2 (reg0, src));
13932 else
13933 emit_insn (gen_unaligned_loadhiu (reg0, src));
13935 if (dst_aligned)
13936 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
13937 else
13938 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
13940 src = next_consecutive_mem (src);
13941 dst = next_consecutive_mem (dst);
13942 if (len == 2)
13943 return true;
13946 dst = adjust_address (dst, QImode, 0);
13947 src = adjust_address (src, QImode, 0);
13948 reg0 = gen_reg_rtx (QImode);
13949 emit_move_insn (reg0, src);
13950 emit_move_insn (dst, reg0);
13951 return true;
13954 /* Select a dominance comparison mode if possible for a test of the general
13955 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
13956 COND_OR == DOM_CC_X_AND_Y => (X && Y)
13957 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
13958 COND_OR == DOM_CC_X_OR_Y => (X || Y)
13959 In all cases OP will be either EQ or NE, but we don't need to know which
13960 here. If we are unable to support a dominance comparison we return
13961 CC mode. This will then fail to match for the RTL expressions that
13962 generate this call. */
13963 machine_mode
13964 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
13966 enum rtx_code cond1, cond2;
13967 int swapped = 0;
13969 /* Currently we will probably get the wrong result if the individual
13970 comparisons are not simple. This also ensures that it is safe to
13971 reverse a comparison if necessary. */
13972 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
13973 != CCmode)
13974 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
13975 != CCmode))
13976 return CCmode;
13978 /* The if_then_else variant of this tests the second condition if the
13979 first passes, but is true if the first fails. Reverse the first
13980 condition to get a true "inclusive-or" expression. */
13981 if (cond_or == DOM_CC_NX_OR_Y)
13982 cond1 = reverse_condition (cond1);
13984 /* If the comparisons are not equal, and one doesn't dominate the other,
13985 then we can't do this. */
13986 if (cond1 != cond2
13987 && !comparison_dominates_p (cond1, cond2)
13988 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
13989 return CCmode;
13991 if (swapped)
13992 std::swap (cond1, cond2);
13994 switch (cond1)
13996 case EQ:
13997 if (cond_or == DOM_CC_X_AND_Y)
13998 return CC_DEQmode;
14000 switch (cond2)
14002 case EQ: return CC_DEQmode;
14003 case LE: return CC_DLEmode;
14004 case LEU: return CC_DLEUmode;
14005 case GE: return CC_DGEmode;
14006 case GEU: return CC_DGEUmode;
14007 default: gcc_unreachable ();
14010 case LT:
14011 if (cond_or == DOM_CC_X_AND_Y)
14012 return CC_DLTmode;
14014 switch (cond2)
14016 case LT:
14017 return CC_DLTmode;
14018 case LE:
14019 return CC_DLEmode;
14020 case NE:
14021 return CC_DNEmode;
14022 default:
14023 gcc_unreachable ();
14026 case GT:
14027 if (cond_or == DOM_CC_X_AND_Y)
14028 return CC_DGTmode;
14030 switch (cond2)
14032 case GT:
14033 return CC_DGTmode;
14034 case GE:
14035 return CC_DGEmode;
14036 case NE:
14037 return CC_DNEmode;
14038 default:
14039 gcc_unreachable ();
14042 case LTU:
14043 if (cond_or == DOM_CC_X_AND_Y)
14044 return CC_DLTUmode;
14046 switch (cond2)
14048 case LTU:
14049 return CC_DLTUmode;
14050 case LEU:
14051 return CC_DLEUmode;
14052 case NE:
14053 return CC_DNEmode;
14054 default:
14055 gcc_unreachable ();
14058 case GTU:
14059 if (cond_or == DOM_CC_X_AND_Y)
14060 return CC_DGTUmode;
14062 switch (cond2)
14064 case GTU:
14065 return CC_DGTUmode;
14066 case GEU:
14067 return CC_DGEUmode;
14068 case NE:
14069 return CC_DNEmode;
14070 default:
14071 gcc_unreachable ();
14074 /* The remaining cases only occur when both comparisons are the
14075 same. */
14076 case NE:
14077 gcc_assert (cond1 == cond2);
14078 return CC_DNEmode;
14080 case LE:
14081 gcc_assert (cond1 == cond2);
14082 return CC_DLEmode;
14084 case GE:
14085 gcc_assert (cond1 == cond2);
14086 return CC_DGEmode;
14088 case LEU:
14089 gcc_assert (cond1 == cond2);
14090 return CC_DLEUmode;
14092 case GEU:
14093 gcc_assert (cond1 == cond2);
14094 return CC_DGEUmode;
14096 default:
14097 gcc_unreachable ();
14101 machine_mode
14102 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14104 /* All floating point compares return CCFP if it is an equality
14105 comparison, and CCFPE otherwise. */
14106 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14108 switch (op)
14110 case EQ:
14111 case NE:
14112 case UNORDERED:
14113 case ORDERED:
14114 case UNLT:
14115 case UNLE:
14116 case UNGT:
14117 case UNGE:
14118 case UNEQ:
14119 case LTGT:
14120 return CCFPmode;
14122 case LT:
14123 case LE:
14124 case GT:
14125 case GE:
14126 return CCFPEmode;
14128 default:
14129 gcc_unreachable ();
14133 /* A compare with a shifted operand. Because of canonicalization, the
14134 comparison will have to be swapped when we emit the assembler. */
14135 if (GET_MODE (y) == SImode
14136 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14137 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14138 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14139 || GET_CODE (x) == ROTATERT))
14140 return CC_SWPmode;
14142 /* This operation is performed swapped, but since we only rely on the Z
14143 flag we don't need an additional mode. */
14144 if (GET_MODE (y) == SImode
14145 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14146 && GET_CODE (x) == NEG
14147 && (op == EQ || op == NE))
14148 return CC_Zmode;
14150 /* This is a special case that is used by combine to allow a
14151 comparison of a shifted byte load to be split into a zero-extend
14152 followed by a comparison of the shifted integer (only valid for
14153 equalities and unsigned inequalities). */
14154 if (GET_MODE (x) == SImode
14155 && GET_CODE (x) == ASHIFT
14156 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14157 && GET_CODE (XEXP (x, 0)) == SUBREG
14158 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14159 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14160 && (op == EQ || op == NE
14161 || op == GEU || op == GTU || op == LTU || op == LEU)
14162 && CONST_INT_P (y))
14163 return CC_Zmode;
14165 /* A construct for a conditional compare, if the false arm contains
14166 0, then both conditions must be true, otherwise either condition
14167 must be true. Not all conditions are possible, so CCmode is
14168 returned if it can't be done. */
14169 if (GET_CODE (x) == IF_THEN_ELSE
14170 && (XEXP (x, 2) == const0_rtx
14171 || XEXP (x, 2) == const1_rtx)
14172 && COMPARISON_P (XEXP (x, 0))
14173 && COMPARISON_P (XEXP (x, 1)))
14174 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14175 INTVAL (XEXP (x, 2)));
14177 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14178 if (GET_CODE (x) == AND
14179 && (op == EQ || op == NE)
14180 && COMPARISON_P (XEXP (x, 0))
14181 && COMPARISON_P (XEXP (x, 1)))
14182 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14183 DOM_CC_X_AND_Y);
14185 if (GET_CODE (x) == IOR
14186 && (op == EQ || op == NE)
14187 && COMPARISON_P (XEXP (x, 0))
14188 && COMPARISON_P (XEXP (x, 1)))
14189 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14190 DOM_CC_X_OR_Y);
14192 /* An operation (on Thumb) where we want to test for a single bit.
14193 This is done by shifting that bit up into the top bit of a
14194 scratch register; we can then branch on the sign bit. */
14195 if (TARGET_THUMB1
14196 && GET_MODE (x) == SImode
14197 && (op == EQ || op == NE)
14198 && GET_CODE (x) == ZERO_EXTRACT
14199 && XEXP (x, 1) == const1_rtx)
14200 return CC_Nmode;
14202 /* An operation that sets the condition codes as a side-effect, the
14203 V flag is not set correctly, so we can only use comparisons where
14204 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14205 instead.) */
14206 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14207 if (GET_MODE (x) == SImode
14208 && y == const0_rtx
14209 && (op == EQ || op == NE || op == LT || op == GE)
14210 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14211 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14212 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14213 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14214 || GET_CODE (x) == LSHIFTRT
14215 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14216 || GET_CODE (x) == ROTATERT
14217 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14218 return CC_NOOVmode;
14220 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14221 return CC_Zmode;
14223 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14224 && GET_CODE (x) == PLUS
14225 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14226 return CC_Cmode;
14228 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14230 switch (op)
14232 case EQ:
14233 case NE:
14234 /* A DImode comparison against zero can be implemented by
14235 or'ing the two halves together. */
14236 if (y == const0_rtx)
14237 return CC_Zmode;
14239 /* We can do an equality test in three Thumb instructions. */
14240 if (!TARGET_32BIT)
14241 return CC_Zmode;
14243 /* FALLTHROUGH */
14245 case LTU:
14246 case LEU:
14247 case GTU:
14248 case GEU:
14249 /* DImode unsigned comparisons can be implemented by cmp +
14250 cmpeq without a scratch register. Not worth doing in
14251 Thumb-2. */
14252 if (TARGET_32BIT)
14253 return CC_CZmode;
14255 /* FALLTHROUGH */
14257 case LT:
14258 case LE:
14259 case GT:
14260 case GE:
14261 /* DImode signed and unsigned comparisons can be implemented
14262 by cmp + sbcs with a scratch register, but that does not
14263 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14264 gcc_assert (op != EQ && op != NE);
14265 return CC_NCVmode;
14267 default:
14268 gcc_unreachable ();
14272 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14273 return GET_MODE (x);
14275 return CCmode;
14278 /* X and Y are two things to compare using CODE. Emit the compare insn and
14279 return the rtx for register 0 in the proper mode. FP means this is a
14280 floating point compare: I don't think that it is needed on the arm. */
14282 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14284 machine_mode mode;
14285 rtx cc_reg;
14286 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14288 /* We might have X as a constant, Y as a register because of the predicates
14289 used for cmpdi. If so, force X to a register here. */
14290 if (dimode_comparison && !REG_P (x))
14291 x = force_reg (DImode, x);
14293 mode = SELECT_CC_MODE (code, x, y);
14294 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14296 if (dimode_comparison
14297 && mode != CC_CZmode)
14299 rtx clobber, set;
14301 /* To compare two non-zero values for equality, XOR them and
14302 then compare against zero. Not used for ARM mode; there
14303 CC_CZmode is cheaper. */
14304 if (mode == CC_Zmode && y != const0_rtx)
14306 gcc_assert (!reload_completed);
14307 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14308 y = const0_rtx;
14311 /* A scratch register is required. */
14312 if (reload_completed)
14313 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14314 else
14315 scratch = gen_rtx_SCRATCH (SImode);
14317 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14318 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14319 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14321 else
14322 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14324 return cc_reg;
14327 /* Generate a sequence of insns that will generate the correct return
14328 address mask depending on the physical architecture that the program
14329 is running on. */
14331 arm_gen_return_addr_mask (void)
14333 rtx reg = gen_reg_rtx (Pmode);
14335 emit_insn (gen_return_addr_mask (reg));
14336 return reg;
14339 void
14340 arm_reload_in_hi (rtx *operands)
14342 rtx ref = operands[1];
14343 rtx base, scratch;
14344 HOST_WIDE_INT offset = 0;
14346 if (GET_CODE (ref) == SUBREG)
14348 offset = SUBREG_BYTE (ref);
14349 ref = SUBREG_REG (ref);
14352 if (REG_P (ref))
14354 /* We have a pseudo which has been spilt onto the stack; there
14355 are two cases here: the first where there is a simple
14356 stack-slot replacement and a second where the stack-slot is
14357 out of range, or is used as a subreg. */
14358 if (reg_equiv_mem (REGNO (ref)))
14360 ref = reg_equiv_mem (REGNO (ref));
14361 base = find_replacement (&XEXP (ref, 0));
14363 else
14364 /* The slot is out of range, or was dressed up in a SUBREG. */
14365 base = reg_equiv_address (REGNO (ref));
14367 /* PR 62554: If there is no equivalent memory location then just move
14368 the value as an SImode register move. This happens when the target
14369 architecture variant does not have an HImode register move. */
14370 if (base == NULL)
14372 gcc_assert (REG_P (operands[0]));
14373 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14374 gen_rtx_SUBREG (SImode, ref, 0)));
14375 return;
14378 else
14379 base = find_replacement (&XEXP (ref, 0));
14381 /* Handle the case where the address is too complex to be offset by 1. */
14382 if (GET_CODE (base) == MINUS
14383 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14385 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14387 emit_set_insn (base_plus, base);
14388 base = base_plus;
14390 else if (GET_CODE (base) == PLUS)
14392 /* The addend must be CONST_INT, or we would have dealt with it above. */
14393 HOST_WIDE_INT hi, lo;
14395 offset += INTVAL (XEXP (base, 1));
14396 base = XEXP (base, 0);
14398 /* Rework the address into a legal sequence of insns. */
14399 /* Valid range for lo is -4095 -> 4095 */
14400 lo = (offset >= 0
14401 ? (offset & 0xfff)
14402 : -((-offset) & 0xfff));
14404 /* Corner case, if lo is the max offset then we would be out of range
14405 once we have added the additional 1 below, so bump the msb into the
14406 pre-loading insn(s). */
14407 if (lo == 4095)
14408 lo &= 0x7ff;
14410 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14411 ^ (HOST_WIDE_INT) 0x80000000)
14412 - (HOST_WIDE_INT) 0x80000000);
14414 gcc_assert (hi + lo == offset);
14416 if (hi != 0)
14418 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14420 /* Get the base address; addsi3 knows how to handle constants
14421 that require more than one insn. */
14422 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14423 base = base_plus;
14424 offset = lo;
14428 /* Operands[2] may overlap operands[0] (though it won't overlap
14429 operands[1]), that's why we asked for a DImode reg -- so we can
14430 use the bit that does not overlap. */
14431 if (REGNO (operands[2]) == REGNO (operands[0]))
14432 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14433 else
14434 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14436 emit_insn (gen_zero_extendqisi2 (scratch,
14437 gen_rtx_MEM (QImode,
14438 plus_constant (Pmode, base,
14439 offset))));
14440 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14441 gen_rtx_MEM (QImode,
14442 plus_constant (Pmode, base,
14443 offset + 1))));
14444 if (!BYTES_BIG_ENDIAN)
14445 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14446 gen_rtx_IOR (SImode,
14447 gen_rtx_ASHIFT
14448 (SImode,
14449 gen_rtx_SUBREG (SImode, operands[0], 0),
14450 GEN_INT (8)),
14451 scratch));
14452 else
14453 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14454 gen_rtx_IOR (SImode,
14455 gen_rtx_ASHIFT (SImode, scratch,
14456 GEN_INT (8)),
14457 gen_rtx_SUBREG (SImode, operands[0], 0)));
14460 /* Handle storing a half-word to memory during reload by synthesizing as two
14461 byte stores. Take care not to clobber the input values until after we
14462 have moved them somewhere safe. This code assumes that if the DImode
14463 scratch in operands[2] overlaps either the input value or output address
14464 in some way, then that value must die in this insn (we absolutely need
14465 two scratch registers for some corner cases). */
14466 void
14467 arm_reload_out_hi (rtx *operands)
14469 rtx ref = operands[0];
14470 rtx outval = operands[1];
14471 rtx base, scratch;
14472 HOST_WIDE_INT offset = 0;
14474 if (GET_CODE (ref) == SUBREG)
14476 offset = SUBREG_BYTE (ref);
14477 ref = SUBREG_REG (ref);
14480 if (REG_P (ref))
14482 /* We have a pseudo which has been spilt onto the stack; there
14483 are two cases here: the first where there is a simple
14484 stack-slot replacement and a second where the stack-slot is
14485 out of range, or is used as a subreg. */
14486 if (reg_equiv_mem (REGNO (ref)))
14488 ref = reg_equiv_mem (REGNO (ref));
14489 base = find_replacement (&XEXP (ref, 0));
14491 else
14492 /* The slot is out of range, or was dressed up in a SUBREG. */
14493 base = reg_equiv_address (REGNO (ref));
14495 /* PR 62254: If there is no equivalent memory location then just move
14496 the value as an SImode register move. This happens when the target
14497 architecture variant does not have an HImode register move. */
14498 if (base == NULL)
14500 gcc_assert (REG_P (outval) || SUBREG_P (outval));
14502 if (REG_P (outval))
14504 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14505 gen_rtx_SUBREG (SImode, outval, 0)));
14507 else /* SUBREG_P (outval) */
14509 if (GET_MODE (SUBREG_REG (outval)) == SImode)
14510 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14511 SUBREG_REG (outval)));
14512 else
14513 /* FIXME: Handle other cases ? */
14514 gcc_unreachable ();
14516 return;
14519 else
14520 base = find_replacement (&XEXP (ref, 0));
14522 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14524 /* Handle the case where the address is too complex to be offset by 1. */
14525 if (GET_CODE (base) == MINUS
14526 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14528 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14530 /* Be careful not to destroy OUTVAL. */
14531 if (reg_overlap_mentioned_p (base_plus, outval))
14533 /* Updating base_plus might destroy outval, see if we can
14534 swap the scratch and base_plus. */
14535 if (!reg_overlap_mentioned_p (scratch, outval))
14536 std::swap (scratch, base_plus);
14537 else
14539 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14541 /* Be conservative and copy OUTVAL into the scratch now,
14542 this should only be necessary if outval is a subreg
14543 of something larger than a word. */
14544 /* XXX Might this clobber base? I can't see how it can,
14545 since scratch is known to overlap with OUTVAL, and
14546 must be wider than a word. */
14547 emit_insn (gen_movhi (scratch_hi, outval));
14548 outval = scratch_hi;
14552 emit_set_insn (base_plus, base);
14553 base = base_plus;
14555 else if (GET_CODE (base) == PLUS)
14557 /* The addend must be CONST_INT, or we would have dealt with it above. */
14558 HOST_WIDE_INT hi, lo;
14560 offset += INTVAL (XEXP (base, 1));
14561 base = XEXP (base, 0);
14563 /* Rework the address into a legal sequence of insns. */
14564 /* Valid range for lo is -4095 -> 4095 */
14565 lo = (offset >= 0
14566 ? (offset & 0xfff)
14567 : -((-offset) & 0xfff));
14569 /* Corner case, if lo is the max offset then we would be out of range
14570 once we have added the additional 1 below, so bump the msb into the
14571 pre-loading insn(s). */
14572 if (lo == 4095)
14573 lo &= 0x7ff;
14575 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14576 ^ (HOST_WIDE_INT) 0x80000000)
14577 - (HOST_WIDE_INT) 0x80000000);
14579 gcc_assert (hi + lo == offset);
14581 if (hi != 0)
14583 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14585 /* Be careful not to destroy OUTVAL. */
14586 if (reg_overlap_mentioned_p (base_plus, outval))
14588 /* Updating base_plus might destroy outval, see if we
14589 can swap the scratch and base_plus. */
14590 if (!reg_overlap_mentioned_p (scratch, outval))
14591 std::swap (scratch, base_plus);
14592 else
14594 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14596 /* Be conservative and copy outval into scratch now,
14597 this should only be necessary if outval is a
14598 subreg of something larger than a word. */
14599 /* XXX Might this clobber base? I can't see how it
14600 can, since scratch is known to overlap with
14601 outval. */
14602 emit_insn (gen_movhi (scratch_hi, outval));
14603 outval = scratch_hi;
14607 /* Get the base address; addsi3 knows how to handle constants
14608 that require more than one insn. */
14609 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14610 base = base_plus;
14611 offset = lo;
14615 if (BYTES_BIG_ENDIAN)
14617 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
14618 plus_constant (Pmode, base,
14619 offset + 1)),
14620 gen_lowpart (QImode, outval)));
14621 emit_insn (gen_lshrsi3 (scratch,
14622 gen_rtx_SUBREG (SImode, outval, 0),
14623 GEN_INT (8)));
14624 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
14625 offset)),
14626 gen_lowpart (QImode, scratch)));
14628 else
14630 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
14631 offset)),
14632 gen_lowpart (QImode, outval)));
14633 emit_insn (gen_lshrsi3 (scratch,
14634 gen_rtx_SUBREG (SImode, outval, 0),
14635 GEN_INT (8)));
14636 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
14637 plus_constant (Pmode, base,
14638 offset + 1)),
14639 gen_lowpart (QImode, scratch)));
14643 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
14644 (padded to the size of a word) should be passed in a register. */
14646 static bool
14647 arm_must_pass_in_stack (machine_mode mode, const_tree type)
14649 if (TARGET_AAPCS_BASED)
14650 return must_pass_in_stack_var_size (mode, type);
14651 else
14652 return must_pass_in_stack_var_size_or_pad (mode, type);
14656 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
14657 Return true if an argument passed on the stack should be padded upwards,
14658 i.e. if the least-significant byte has useful data.
14659 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
14660 aggregate types are placed in the lowest memory address. */
14662 bool
14663 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
14665 if (!TARGET_AAPCS_BASED)
14666 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
14668 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
14669 return false;
14671 return true;
14675 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
14676 Return !BYTES_BIG_ENDIAN if the least significant byte of the
14677 register has useful data, and return the opposite if the most
14678 significant byte does. */
14680 bool
14681 arm_pad_reg_upward (machine_mode mode,
14682 tree type, int first ATTRIBUTE_UNUSED)
14684 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
14686 /* For AAPCS, small aggregates, small fixed-point types,
14687 and small complex types are always padded upwards. */
14688 if (type)
14690 if ((AGGREGATE_TYPE_P (type)
14691 || TREE_CODE (type) == COMPLEX_TYPE
14692 || FIXED_POINT_TYPE_P (type))
14693 && int_size_in_bytes (type) <= 4)
14694 return true;
14696 else
14698 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
14699 && GET_MODE_SIZE (mode) <= 4)
14700 return true;
14704 /* Otherwise, use default padding. */
14705 return !BYTES_BIG_ENDIAN;
14708 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
14709 assuming that the address in the base register is word aligned. */
14710 bool
14711 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
14713 HOST_WIDE_INT max_offset;
14715 /* Offset must be a multiple of 4 in Thumb mode. */
14716 if (TARGET_THUMB2 && ((offset & 3) != 0))
14717 return false;
14719 if (TARGET_THUMB2)
14720 max_offset = 1020;
14721 else if (TARGET_ARM)
14722 max_offset = 255;
14723 else
14724 return false;
14726 return ((offset <= max_offset) && (offset >= -max_offset));
14729 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
14730 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
14731 Assumes that the address in the base register RN is word aligned. Pattern
14732 guarantees that both memory accesses use the same base register,
14733 the offsets are constants within the range, and the gap between the offsets is 4.
14734 If preload complete then check that registers are legal. WBACK indicates whether
14735 address is updated. LOAD indicates whether memory access is load or store. */
14736 bool
14737 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
14738 bool wback, bool load)
14740 unsigned int t, t2, n;
14742 if (!reload_completed)
14743 return true;
14745 if (!offset_ok_for_ldrd_strd (offset))
14746 return false;
14748 t = REGNO (rt);
14749 t2 = REGNO (rt2);
14750 n = REGNO (rn);
14752 if ((TARGET_THUMB2)
14753 && ((wback && (n == t || n == t2))
14754 || (t == SP_REGNUM)
14755 || (t == PC_REGNUM)
14756 || (t2 == SP_REGNUM)
14757 || (t2 == PC_REGNUM)
14758 || (!load && (n == PC_REGNUM))
14759 || (load && (t == t2))
14760 /* Triggers Cortex-M3 LDRD errata. */
14761 || (!wback && load && fix_cm3_ldrd && (n == t))))
14762 return false;
14764 if ((TARGET_ARM)
14765 && ((wback && (n == t || n == t2))
14766 || (t2 == PC_REGNUM)
14767 || (t % 2 != 0) /* First destination register is not even. */
14768 || (t2 != t + 1)
14769 /* PC can be used as base register (for offset addressing only),
14770 but it is depricated. */
14771 || (n == PC_REGNUM)))
14772 return false;
14774 return true;
14777 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
14778 operand MEM's address contains an immediate offset from the base
14779 register and has no side effects, in which case it sets BASE and
14780 OFFSET accordingly. */
14781 static bool
14782 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
14784 rtx addr;
14786 gcc_assert (base != NULL && offset != NULL);
14788 /* TODO: Handle more general memory operand patterns, such as
14789 PRE_DEC and PRE_INC. */
14791 if (side_effects_p (mem))
14792 return false;
14794 /* Can't deal with subregs. */
14795 if (GET_CODE (mem) == SUBREG)
14796 return false;
14798 gcc_assert (MEM_P (mem));
14800 *offset = const0_rtx;
14802 addr = XEXP (mem, 0);
14804 /* If addr isn't valid for DImode, then we can't handle it. */
14805 if (!arm_legitimate_address_p (DImode, addr,
14806 reload_in_progress || reload_completed))
14807 return false;
14809 if (REG_P (addr))
14811 *base = addr;
14812 return true;
14814 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
14816 *base = XEXP (addr, 0);
14817 *offset = XEXP (addr, 1);
14818 return (REG_P (*base) && CONST_INT_P (*offset));
14821 return false;
14824 /* Called from a peephole2 to replace two word-size accesses with a
14825 single LDRD/STRD instruction. Returns true iff we can generate a
14826 new instruction sequence. That is, both accesses use the same base
14827 register and the gap between constant offsets is 4. This function
14828 may reorder its operands to match ldrd/strd RTL templates.
14829 OPERANDS are the operands found by the peephole matcher;
14830 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
14831 corresponding memory operands. LOAD indicaates whether the access
14832 is load or store. CONST_STORE indicates a store of constant
14833 integer values held in OPERANDS[4,5] and assumes that the pattern
14834 is of length 4 insn, for the purpose of checking dead registers.
14835 COMMUTE indicates that register operands may be reordered. */
14836 bool
14837 gen_operands_ldrd_strd (rtx *operands, bool load,
14838 bool const_store, bool commute)
14840 int nops = 2;
14841 HOST_WIDE_INT offsets[2], offset;
14842 rtx base = NULL_RTX;
14843 rtx cur_base, cur_offset, tmp;
14844 int i, gap;
14845 HARD_REG_SET regset;
14847 gcc_assert (!const_store || !load);
14848 /* Check that the memory references are immediate offsets from the
14849 same base register. Extract the base register, the destination
14850 registers, and the corresponding memory offsets. */
14851 for (i = 0; i < nops; i++)
14853 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
14854 return false;
14856 if (i == 0)
14857 base = cur_base;
14858 else if (REGNO (base) != REGNO (cur_base))
14859 return false;
14861 offsets[i] = INTVAL (cur_offset);
14862 if (GET_CODE (operands[i]) == SUBREG)
14864 tmp = SUBREG_REG (operands[i]);
14865 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
14866 operands[i] = tmp;
14870 /* Make sure there is no dependency between the individual loads. */
14871 if (load && REGNO (operands[0]) == REGNO (base))
14872 return false; /* RAW */
14874 if (load && REGNO (operands[0]) == REGNO (operands[1]))
14875 return false; /* WAW */
14877 /* If the same input register is used in both stores
14878 when storing different constants, try to find a free register.
14879 For example, the code
14880 mov r0, 0
14881 str r0, [r2]
14882 mov r0, 1
14883 str r0, [r2, #4]
14884 can be transformed into
14885 mov r1, 0
14886 mov r0, 1
14887 strd r1, r0, [r2]
14888 in Thumb mode assuming that r1 is free.
14889 For ARM mode do the same but only if the starting register
14890 can be made to be even. */
14891 if (const_store
14892 && REGNO (operands[0]) == REGNO (operands[1])
14893 && INTVAL (operands[4]) != INTVAL (operands[5]))
14895 if (TARGET_THUMB2)
14897 CLEAR_HARD_REG_SET (regset);
14898 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
14899 if (tmp == NULL_RTX)
14900 return false;
14902 /* Use the new register in the first load to ensure that
14903 if the original input register is not dead after peephole,
14904 then it will have the correct constant value. */
14905 operands[0] = tmp;
14907 else if (TARGET_ARM)
14909 int regno = REGNO (operands[0]);
14910 if (!peep2_reg_dead_p (4, operands[0]))
14912 /* When the input register is even and is not dead after the
14913 pattern, it has to hold the second constant but we cannot
14914 form a legal STRD in ARM mode with this register as the second
14915 register. */
14916 if (regno % 2 == 0)
14917 return false;
14919 /* Is regno-1 free? */
14920 SET_HARD_REG_SET (regset);
14921 CLEAR_HARD_REG_BIT(regset, regno - 1);
14922 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
14923 if (tmp == NULL_RTX)
14924 return false;
14926 operands[0] = tmp;
14928 else
14930 /* Find a DImode register. */
14931 CLEAR_HARD_REG_SET (regset);
14932 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
14933 if (tmp != NULL_RTX)
14935 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
14936 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
14938 else
14940 /* Can we use the input register to form a DI register? */
14941 SET_HARD_REG_SET (regset);
14942 CLEAR_HARD_REG_BIT(regset,
14943 regno % 2 == 0 ? regno + 1 : regno - 1);
14944 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
14945 if (tmp == NULL_RTX)
14946 return false;
14947 operands[regno % 2 == 1 ? 0 : 1] = tmp;
14951 gcc_assert (operands[0] != NULL_RTX);
14952 gcc_assert (operands[1] != NULL_RTX);
14953 gcc_assert (REGNO (operands[0]) % 2 == 0);
14954 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
14958 /* Make sure the instructions are ordered with lower memory access first. */
14959 if (offsets[0] > offsets[1])
14961 gap = offsets[0] - offsets[1];
14962 offset = offsets[1];
14964 /* Swap the instructions such that lower memory is accessed first. */
14965 std::swap (operands[0], operands[1]);
14966 std::swap (operands[2], operands[3]);
14967 if (const_store)
14968 std::swap (operands[4], operands[5]);
14970 else
14972 gap = offsets[1] - offsets[0];
14973 offset = offsets[0];
14976 /* Make sure accesses are to consecutive memory locations. */
14977 if (gap != 4)
14978 return false;
14980 /* Make sure we generate legal instructions. */
14981 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
14982 false, load))
14983 return true;
14985 /* In Thumb state, where registers are almost unconstrained, there
14986 is little hope to fix it. */
14987 if (TARGET_THUMB2)
14988 return false;
14990 if (load && commute)
14992 /* Try reordering registers. */
14993 std::swap (operands[0], operands[1]);
14994 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
14995 false, load))
14996 return true;
14999 if (const_store)
15001 /* If input registers are dead after this pattern, they can be
15002 reordered or replaced by other registers that are free in the
15003 current pattern. */
15004 if (!peep2_reg_dead_p (4, operands[0])
15005 || !peep2_reg_dead_p (4, operands[1]))
15006 return false;
15008 /* Try to reorder the input registers. */
15009 /* For example, the code
15010 mov r0, 0
15011 mov r1, 1
15012 str r1, [r2]
15013 str r0, [r2, #4]
15014 can be transformed into
15015 mov r1, 0
15016 mov r0, 1
15017 strd r0, [r2]
15019 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15020 false, false))
15022 std::swap (operands[0], operands[1]);
15023 return true;
15026 /* Try to find a free DI register. */
15027 CLEAR_HARD_REG_SET (regset);
15028 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15029 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15030 while (true)
15032 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15033 if (tmp == NULL_RTX)
15034 return false;
15036 /* DREG must be an even-numbered register in DImode.
15037 Split it into SI registers. */
15038 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15039 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15040 gcc_assert (operands[0] != NULL_RTX);
15041 gcc_assert (operands[1] != NULL_RTX);
15042 gcc_assert (REGNO (operands[0]) % 2 == 0);
15043 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15045 return (operands_ok_ldrd_strd (operands[0], operands[1],
15046 base, offset,
15047 false, load));
15051 return false;
15057 /* Print a symbolic form of X to the debug file, F. */
15058 static void
15059 arm_print_value (FILE *f, rtx x)
15061 switch (GET_CODE (x))
15063 case CONST_INT:
15064 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15065 return;
15067 case CONST_DOUBLE:
15068 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15069 return;
15071 case CONST_VECTOR:
15073 int i;
15075 fprintf (f, "<");
15076 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15078 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15079 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15080 fputc (',', f);
15082 fprintf (f, ">");
15084 return;
15086 case CONST_STRING:
15087 fprintf (f, "\"%s\"", XSTR (x, 0));
15088 return;
15090 case SYMBOL_REF:
15091 fprintf (f, "`%s'", XSTR (x, 0));
15092 return;
15094 case LABEL_REF:
15095 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15096 return;
15098 case CONST:
15099 arm_print_value (f, XEXP (x, 0));
15100 return;
15102 case PLUS:
15103 arm_print_value (f, XEXP (x, 0));
15104 fprintf (f, "+");
15105 arm_print_value (f, XEXP (x, 1));
15106 return;
15108 case PC:
15109 fprintf (f, "pc");
15110 return;
15112 default:
15113 fprintf (f, "????");
15114 return;
15118 /* Routines for manipulation of the constant pool. */
15120 /* Arm instructions cannot load a large constant directly into a
15121 register; they have to come from a pc relative load. The constant
15122 must therefore be placed in the addressable range of the pc
15123 relative load. Depending on the precise pc relative load
15124 instruction the range is somewhere between 256 bytes and 4k. This
15125 means that we often have to dump a constant inside a function, and
15126 generate code to branch around it.
15128 It is important to minimize this, since the branches will slow
15129 things down and make the code larger.
15131 Normally we can hide the table after an existing unconditional
15132 branch so that there is no interruption of the flow, but in the
15133 worst case the code looks like this:
15135 ldr rn, L1
15137 b L2
15138 align
15139 L1: .long value
15143 ldr rn, L3
15145 b L4
15146 align
15147 L3: .long value
15151 We fix this by performing a scan after scheduling, which notices
15152 which instructions need to have their operands fetched from the
15153 constant table and builds the table.
15155 The algorithm starts by building a table of all the constants that
15156 need fixing up and all the natural barriers in the function (places
15157 where a constant table can be dropped without breaking the flow).
15158 For each fixup we note how far the pc-relative replacement will be
15159 able to reach and the offset of the instruction into the function.
15161 Having built the table we then group the fixes together to form
15162 tables that are as large as possible (subject to addressing
15163 constraints) and emit each table of constants after the last
15164 barrier that is within range of all the instructions in the group.
15165 If a group does not contain a barrier, then we forcibly create one
15166 by inserting a jump instruction into the flow. Once the table has
15167 been inserted, the insns are then modified to reference the
15168 relevant entry in the pool.
15170 Possible enhancements to the algorithm (not implemented) are:
15172 1) For some processors and object formats, there may be benefit in
15173 aligning the pools to the start of cache lines; this alignment
15174 would need to be taken into account when calculating addressability
15175 of a pool. */
15177 /* These typedefs are located at the start of this file, so that
15178 they can be used in the prototypes there. This comment is to
15179 remind readers of that fact so that the following structures
15180 can be understood more easily.
15182 typedef struct minipool_node Mnode;
15183 typedef struct minipool_fixup Mfix; */
15185 struct minipool_node
15187 /* Doubly linked chain of entries. */
15188 Mnode * next;
15189 Mnode * prev;
15190 /* The maximum offset into the code that this entry can be placed. While
15191 pushing fixes for forward references, all entries are sorted in order
15192 of increasing max_address. */
15193 HOST_WIDE_INT max_address;
15194 /* Similarly for an entry inserted for a backwards ref. */
15195 HOST_WIDE_INT min_address;
15196 /* The number of fixes referencing this entry. This can become zero
15197 if we "unpush" an entry. In this case we ignore the entry when we
15198 come to emit the code. */
15199 int refcount;
15200 /* The offset from the start of the minipool. */
15201 HOST_WIDE_INT offset;
15202 /* The value in table. */
15203 rtx value;
15204 /* The mode of value. */
15205 machine_mode mode;
15206 /* The size of the value. With iWMMXt enabled
15207 sizes > 4 also imply an alignment of 8-bytes. */
15208 int fix_size;
15211 struct minipool_fixup
15213 Mfix * next;
15214 rtx_insn * insn;
15215 HOST_WIDE_INT address;
15216 rtx * loc;
15217 machine_mode mode;
15218 int fix_size;
15219 rtx value;
15220 Mnode * minipool;
15221 HOST_WIDE_INT forwards;
15222 HOST_WIDE_INT backwards;
15225 /* Fixes less than a word need padding out to a word boundary. */
15226 #define MINIPOOL_FIX_SIZE(mode) \
15227 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15229 static Mnode * minipool_vector_head;
15230 static Mnode * minipool_vector_tail;
15231 static rtx_code_label *minipool_vector_label;
15232 static int minipool_pad;
15234 /* The linked list of all minipool fixes required for this function. */
15235 Mfix * minipool_fix_head;
15236 Mfix * minipool_fix_tail;
15237 /* The fix entry for the current minipool, once it has been placed. */
15238 Mfix * minipool_barrier;
15240 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15241 #define JUMP_TABLES_IN_TEXT_SECTION 0
15242 #endif
15244 static HOST_WIDE_INT
15245 get_jump_table_size (rtx_jump_table_data *insn)
15247 /* ADDR_VECs only take room if read-only data does into the text
15248 section. */
15249 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15251 rtx body = PATTERN (insn);
15252 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15253 HOST_WIDE_INT size;
15254 HOST_WIDE_INT modesize;
15256 modesize = GET_MODE_SIZE (GET_MODE (body));
15257 size = modesize * XVECLEN (body, elt);
15258 switch (modesize)
15260 case 1:
15261 /* Round up size of TBB table to a halfword boundary. */
15262 size = (size + 1) & ~HOST_WIDE_INT_1;
15263 break;
15264 case 2:
15265 /* No padding necessary for TBH. */
15266 break;
15267 case 4:
15268 /* Add two bytes for alignment on Thumb. */
15269 if (TARGET_THUMB)
15270 size += 2;
15271 break;
15272 default:
15273 gcc_unreachable ();
15275 return size;
15278 return 0;
15281 /* Return the maximum amount of padding that will be inserted before
15282 label LABEL. */
15284 static HOST_WIDE_INT
15285 get_label_padding (rtx label)
15287 HOST_WIDE_INT align, min_insn_size;
15289 align = 1 << label_to_alignment (label);
15290 min_insn_size = TARGET_THUMB ? 2 : 4;
15291 return align > min_insn_size ? align - min_insn_size : 0;
15294 /* Move a minipool fix MP from its current location to before MAX_MP.
15295 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15296 constraints may need updating. */
15297 static Mnode *
15298 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15299 HOST_WIDE_INT max_address)
15301 /* The code below assumes these are different. */
15302 gcc_assert (mp != max_mp);
15304 if (max_mp == NULL)
15306 if (max_address < mp->max_address)
15307 mp->max_address = max_address;
15309 else
15311 if (max_address > max_mp->max_address - mp->fix_size)
15312 mp->max_address = max_mp->max_address - mp->fix_size;
15313 else
15314 mp->max_address = max_address;
15316 /* Unlink MP from its current position. Since max_mp is non-null,
15317 mp->prev must be non-null. */
15318 mp->prev->next = mp->next;
15319 if (mp->next != NULL)
15320 mp->next->prev = mp->prev;
15321 else
15322 minipool_vector_tail = mp->prev;
15324 /* Re-insert it before MAX_MP. */
15325 mp->next = max_mp;
15326 mp->prev = max_mp->prev;
15327 max_mp->prev = mp;
15329 if (mp->prev != NULL)
15330 mp->prev->next = mp;
15331 else
15332 minipool_vector_head = mp;
15335 /* Save the new entry. */
15336 max_mp = mp;
15338 /* Scan over the preceding entries and adjust their addresses as
15339 required. */
15340 while (mp->prev != NULL
15341 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15343 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15344 mp = mp->prev;
15347 return max_mp;
15350 /* Add a constant to the minipool for a forward reference. Returns the
15351 node added or NULL if the constant will not fit in this pool. */
15352 static Mnode *
15353 add_minipool_forward_ref (Mfix *fix)
15355 /* If set, max_mp is the first pool_entry that has a lower
15356 constraint than the one we are trying to add. */
15357 Mnode * max_mp = NULL;
15358 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15359 Mnode * mp;
15361 /* If the minipool starts before the end of FIX->INSN then this FIX
15362 can not be placed into the current pool. Furthermore, adding the
15363 new constant pool entry may cause the pool to start FIX_SIZE bytes
15364 earlier. */
15365 if (minipool_vector_head &&
15366 (fix->address + get_attr_length (fix->insn)
15367 >= minipool_vector_head->max_address - fix->fix_size))
15368 return NULL;
15370 /* Scan the pool to see if a constant with the same value has
15371 already been added. While we are doing this, also note the
15372 location where we must insert the constant if it doesn't already
15373 exist. */
15374 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15376 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15377 && fix->mode == mp->mode
15378 && (!LABEL_P (fix->value)
15379 || (CODE_LABEL_NUMBER (fix->value)
15380 == CODE_LABEL_NUMBER (mp->value)))
15381 && rtx_equal_p (fix->value, mp->value))
15383 /* More than one fix references this entry. */
15384 mp->refcount++;
15385 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15388 /* Note the insertion point if necessary. */
15389 if (max_mp == NULL
15390 && mp->max_address > max_address)
15391 max_mp = mp;
15393 /* If we are inserting an 8-bytes aligned quantity and
15394 we have not already found an insertion point, then
15395 make sure that all such 8-byte aligned quantities are
15396 placed at the start of the pool. */
15397 if (ARM_DOUBLEWORD_ALIGN
15398 && max_mp == NULL
15399 && fix->fix_size >= 8
15400 && mp->fix_size < 8)
15402 max_mp = mp;
15403 max_address = mp->max_address;
15407 /* The value is not currently in the minipool, so we need to create
15408 a new entry for it. If MAX_MP is NULL, the entry will be put on
15409 the end of the list since the placement is less constrained than
15410 any existing entry. Otherwise, we insert the new fix before
15411 MAX_MP and, if necessary, adjust the constraints on the other
15412 entries. */
15413 mp = XNEW (Mnode);
15414 mp->fix_size = fix->fix_size;
15415 mp->mode = fix->mode;
15416 mp->value = fix->value;
15417 mp->refcount = 1;
15418 /* Not yet required for a backwards ref. */
15419 mp->min_address = -65536;
15421 if (max_mp == NULL)
15423 mp->max_address = max_address;
15424 mp->next = NULL;
15425 mp->prev = minipool_vector_tail;
15427 if (mp->prev == NULL)
15429 minipool_vector_head = mp;
15430 minipool_vector_label = gen_label_rtx ();
15432 else
15433 mp->prev->next = mp;
15435 minipool_vector_tail = mp;
15437 else
15439 if (max_address > max_mp->max_address - mp->fix_size)
15440 mp->max_address = max_mp->max_address - mp->fix_size;
15441 else
15442 mp->max_address = max_address;
15444 mp->next = max_mp;
15445 mp->prev = max_mp->prev;
15446 max_mp->prev = mp;
15447 if (mp->prev != NULL)
15448 mp->prev->next = mp;
15449 else
15450 minipool_vector_head = mp;
15453 /* Save the new entry. */
15454 max_mp = mp;
15456 /* Scan over the preceding entries and adjust their addresses as
15457 required. */
15458 while (mp->prev != NULL
15459 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15461 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15462 mp = mp->prev;
15465 return max_mp;
15468 static Mnode *
15469 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15470 HOST_WIDE_INT min_address)
15472 HOST_WIDE_INT offset;
15474 /* The code below assumes these are different. */
15475 gcc_assert (mp != min_mp);
15477 if (min_mp == NULL)
15479 if (min_address > mp->min_address)
15480 mp->min_address = min_address;
15482 else
15484 /* We will adjust this below if it is too loose. */
15485 mp->min_address = min_address;
15487 /* Unlink MP from its current position. Since min_mp is non-null,
15488 mp->next must be non-null. */
15489 mp->next->prev = mp->prev;
15490 if (mp->prev != NULL)
15491 mp->prev->next = mp->next;
15492 else
15493 minipool_vector_head = mp->next;
15495 /* Reinsert it after MIN_MP. */
15496 mp->prev = min_mp;
15497 mp->next = min_mp->next;
15498 min_mp->next = mp;
15499 if (mp->next != NULL)
15500 mp->next->prev = mp;
15501 else
15502 minipool_vector_tail = mp;
15505 min_mp = mp;
15507 offset = 0;
15508 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15510 mp->offset = offset;
15511 if (mp->refcount > 0)
15512 offset += mp->fix_size;
15514 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
15515 mp->next->min_address = mp->min_address + mp->fix_size;
15518 return min_mp;
15521 /* Add a constant to the minipool for a backward reference. Returns the
15522 node added or NULL if the constant will not fit in this pool.
15524 Note that the code for insertion for a backwards reference can be
15525 somewhat confusing because the calculated offsets for each fix do
15526 not take into account the size of the pool (which is still under
15527 construction. */
15528 static Mnode *
15529 add_minipool_backward_ref (Mfix *fix)
15531 /* If set, min_mp is the last pool_entry that has a lower constraint
15532 than the one we are trying to add. */
15533 Mnode *min_mp = NULL;
15534 /* This can be negative, since it is only a constraint. */
15535 HOST_WIDE_INT min_address = fix->address - fix->backwards;
15536 Mnode *mp;
15538 /* If we can't reach the current pool from this insn, or if we can't
15539 insert this entry at the end of the pool without pushing other
15540 fixes out of range, then we don't try. This ensures that we
15541 can't fail later on. */
15542 if (min_address >= minipool_barrier->address
15543 || (minipool_vector_tail->min_address + fix->fix_size
15544 >= minipool_barrier->address))
15545 return NULL;
15547 /* Scan the pool to see if a constant with the same value has
15548 already been added. While we are doing this, also note the
15549 location where we must insert the constant if it doesn't already
15550 exist. */
15551 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
15553 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15554 && fix->mode == mp->mode
15555 && (!LABEL_P (fix->value)
15556 || (CODE_LABEL_NUMBER (fix->value)
15557 == CODE_LABEL_NUMBER (mp->value)))
15558 && rtx_equal_p (fix->value, mp->value)
15559 /* Check that there is enough slack to move this entry to the
15560 end of the table (this is conservative). */
15561 && (mp->max_address
15562 > (minipool_barrier->address
15563 + minipool_vector_tail->offset
15564 + minipool_vector_tail->fix_size)))
15566 mp->refcount++;
15567 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
15570 if (min_mp != NULL)
15571 mp->min_address += fix->fix_size;
15572 else
15574 /* Note the insertion point if necessary. */
15575 if (mp->min_address < min_address)
15577 /* For now, we do not allow the insertion of 8-byte alignment
15578 requiring nodes anywhere but at the start of the pool. */
15579 if (ARM_DOUBLEWORD_ALIGN
15580 && fix->fix_size >= 8 && mp->fix_size < 8)
15581 return NULL;
15582 else
15583 min_mp = mp;
15585 else if (mp->max_address
15586 < minipool_barrier->address + mp->offset + fix->fix_size)
15588 /* Inserting before this entry would push the fix beyond
15589 its maximum address (which can happen if we have
15590 re-located a forwards fix); force the new fix to come
15591 after it. */
15592 if (ARM_DOUBLEWORD_ALIGN
15593 && fix->fix_size >= 8 && mp->fix_size < 8)
15594 return NULL;
15595 else
15597 min_mp = mp;
15598 min_address = mp->min_address + fix->fix_size;
15601 /* Do not insert a non-8-byte aligned quantity before 8-byte
15602 aligned quantities. */
15603 else if (ARM_DOUBLEWORD_ALIGN
15604 && fix->fix_size < 8
15605 && mp->fix_size >= 8)
15607 min_mp = mp;
15608 min_address = mp->min_address + fix->fix_size;
15613 /* We need to create a new entry. */
15614 mp = XNEW (Mnode);
15615 mp->fix_size = fix->fix_size;
15616 mp->mode = fix->mode;
15617 mp->value = fix->value;
15618 mp->refcount = 1;
15619 mp->max_address = minipool_barrier->address + 65536;
15621 mp->min_address = min_address;
15623 if (min_mp == NULL)
15625 mp->prev = NULL;
15626 mp->next = minipool_vector_head;
15628 if (mp->next == NULL)
15630 minipool_vector_tail = mp;
15631 minipool_vector_label = gen_label_rtx ();
15633 else
15634 mp->next->prev = mp;
15636 minipool_vector_head = mp;
15638 else
15640 mp->next = min_mp->next;
15641 mp->prev = min_mp;
15642 min_mp->next = mp;
15644 if (mp->next != NULL)
15645 mp->next->prev = mp;
15646 else
15647 minipool_vector_tail = mp;
15650 /* Save the new entry. */
15651 min_mp = mp;
15653 if (mp->prev)
15654 mp = mp->prev;
15655 else
15656 mp->offset = 0;
15658 /* Scan over the following entries and adjust their offsets. */
15659 while (mp->next != NULL)
15661 if (mp->next->min_address < mp->min_address + mp->fix_size)
15662 mp->next->min_address = mp->min_address + mp->fix_size;
15664 if (mp->refcount)
15665 mp->next->offset = mp->offset + mp->fix_size;
15666 else
15667 mp->next->offset = mp->offset;
15669 mp = mp->next;
15672 return min_mp;
15675 static void
15676 assign_minipool_offsets (Mfix *barrier)
15678 HOST_WIDE_INT offset = 0;
15679 Mnode *mp;
15681 minipool_barrier = barrier;
15683 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15685 mp->offset = offset;
15687 if (mp->refcount > 0)
15688 offset += mp->fix_size;
15692 /* Output the literal table */
15693 static void
15694 dump_minipool (rtx_insn *scan)
15696 Mnode * mp;
15697 Mnode * nmp;
15698 int align64 = 0;
15700 if (ARM_DOUBLEWORD_ALIGN)
15701 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15702 if (mp->refcount > 0 && mp->fix_size >= 8)
15704 align64 = 1;
15705 break;
15708 if (dump_file)
15709 fprintf (dump_file,
15710 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
15711 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
15713 scan = emit_label_after (gen_label_rtx (), scan);
15714 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
15715 scan = emit_label_after (minipool_vector_label, scan);
15717 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
15719 if (mp->refcount > 0)
15721 if (dump_file)
15723 fprintf (dump_file,
15724 ";; Offset %u, min %ld, max %ld ",
15725 (unsigned) mp->offset, (unsigned long) mp->min_address,
15726 (unsigned long) mp->max_address);
15727 arm_print_value (dump_file, mp->value);
15728 fputc ('\n', dump_file);
15731 switch (GET_MODE_SIZE (mp->mode))
15733 #ifdef HAVE_consttable_1
15734 case 1:
15735 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
15736 break;
15738 #endif
15739 #ifdef HAVE_consttable_2
15740 case 2:
15741 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
15742 break;
15744 #endif
15745 #ifdef HAVE_consttable_4
15746 case 4:
15747 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
15748 break;
15750 #endif
15751 #ifdef HAVE_consttable_8
15752 case 8:
15753 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
15754 break;
15756 #endif
15757 #ifdef HAVE_consttable_16
15758 case 16:
15759 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
15760 break;
15762 #endif
15763 default:
15764 gcc_unreachable ();
15768 nmp = mp->next;
15769 free (mp);
15772 minipool_vector_head = minipool_vector_tail = NULL;
15773 scan = emit_insn_after (gen_consttable_end (), scan);
15774 scan = emit_barrier_after (scan);
15777 /* Return the cost of forcibly inserting a barrier after INSN. */
15778 static int
15779 arm_barrier_cost (rtx_insn *insn)
15781 /* Basing the location of the pool on the loop depth is preferable,
15782 but at the moment, the basic block information seems to be
15783 corrupt by this stage of the compilation. */
15784 int base_cost = 50;
15785 rtx_insn *next = next_nonnote_insn (insn);
15787 if (next != NULL && LABEL_P (next))
15788 base_cost -= 20;
15790 switch (GET_CODE (insn))
15792 case CODE_LABEL:
15793 /* It will always be better to place the table before the label, rather
15794 than after it. */
15795 return 50;
15797 case INSN:
15798 case CALL_INSN:
15799 return base_cost;
15801 case JUMP_INSN:
15802 return base_cost - 10;
15804 default:
15805 return base_cost + 10;
15809 /* Find the best place in the insn stream in the range
15810 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
15811 Create the barrier by inserting a jump and add a new fix entry for
15812 it. */
15813 static Mfix *
15814 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
15816 HOST_WIDE_INT count = 0;
15817 rtx_barrier *barrier;
15818 rtx_insn *from = fix->insn;
15819 /* The instruction after which we will insert the jump. */
15820 rtx_insn *selected = NULL;
15821 int selected_cost;
15822 /* The address at which the jump instruction will be placed. */
15823 HOST_WIDE_INT selected_address;
15824 Mfix * new_fix;
15825 HOST_WIDE_INT max_count = max_address - fix->address;
15826 rtx_code_label *label = gen_label_rtx ();
15828 selected_cost = arm_barrier_cost (from);
15829 selected_address = fix->address;
15831 while (from && count < max_count)
15833 rtx_jump_table_data *tmp;
15834 int new_cost;
15836 /* This code shouldn't have been called if there was a natural barrier
15837 within range. */
15838 gcc_assert (!BARRIER_P (from));
15840 /* Count the length of this insn. This must stay in sync with the
15841 code that pushes minipool fixes. */
15842 if (LABEL_P (from))
15843 count += get_label_padding (from);
15844 else
15845 count += get_attr_length (from);
15847 /* If there is a jump table, add its length. */
15848 if (tablejump_p (from, NULL, &tmp))
15850 count += get_jump_table_size (tmp);
15852 /* Jump tables aren't in a basic block, so base the cost on
15853 the dispatch insn. If we select this location, we will
15854 still put the pool after the table. */
15855 new_cost = arm_barrier_cost (from);
15857 if (count < max_count
15858 && (!selected || new_cost <= selected_cost))
15860 selected = tmp;
15861 selected_cost = new_cost;
15862 selected_address = fix->address + count;
15865 /* Continue after the dispatch table. */
15866 from = NEXT_INSN (tmp);
15867 continue;
15870 new_cost = arm_barrier_cost (from);
15872 if (count < max_count
15873 && (!selected || new_cost <= selected_cost))
15875 selected = from;
15876 selected_cost = new_cost;
15877 selected_address = fix->address + count;
15880 from = NEXT_INSN (from);
15883 /* Make sure that we found a place to insert the jump. */
15884 gcc_assert (selected);
15886 /* Make sure we do not split a call and its corresponding
15887 CALL_ARG_LOCATION note. */
15888 if (CALL_P (selected))
15890 rtx_insn *next = NEXT_INSN (selected);
15891 if (next && NOTE_P (next)
15892 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
15893 selected = next;
15896 /* Create a new JUMP_INSN that branches around a barrier. */
15897 from = emit_jump_insn_after (gen_jump (label), selected);
15898 JUMP_LABEL (from) = label;
15899 barrier = emit_barrier_after (from);
15900 emit_label_after (label, barrier);
15902 /* Create a minipool barrier entry for the new barrier. */
15903 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
15904 new_fix->insn = barrier;
15905 new_fix->address = selected_address;
15906 new_fix->next = fix->next;
15907 fix->next = new_fix;
15909 return new_fix;
15912 /* Record that there is a natural barrier in the insn stream at
15913 ADDRESS. */
15914 static void
15915 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
15917 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
15919 fix->insn = insn;
15920 fix->address = address;
15922 fix->next = NULL;
15923 if (minipool_fix_head != NULL)
15924 minipool_fix_tail->next = fix;
15925 else
15926 minipool_fix_head = fix;
15928 minipool_fix_tail = fix;
15931 /* Record INSN, which will need fixing up to load a value from the
15932 minipool. ADDRESS is the offset of the insn since the start of the
15933 function; LOC is a pointer to the part of the insn which requires
15934 fixing; VALUE is the constant that must be loaded, which is of type
15935 MODE. */
15936 static void
15937 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
15938 machine_mode mode, rtx value)
15940 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
15942 fix->insn = insn;
15943 fix->address = address;
15944 fix->loc = loc;
15945 fix->mode = mode;
15946 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
15947 fix->value = value;
15948 fix->forwards = get_attr_pool_range (insn);
15949 fix->backwards = get_attr_neg_pool_range (insn);
15950 fix->minipool = NULL;
15952 /* If an insn doesn't have a range defined for it, then it isn't
15953 expecting to be reworked by this code. Better to stop now than
15954 to generate duff assembly code. */
15955 gcc_assert (fix->forwards || fix->backwards);
15957 /* If an entry requires 8-byte alignment then assume all constant pools
15958 require 4 bytes of padding. Trying to do this later on a per-pool
15959 basis is awkward because existing pool entries have to be modified. */
15960 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
15961 minipool_pad = 4;
15963 if (dump_file)
15965 fprintf (dump_file,
15966 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
15967 GET_MODE_NAME (mode),
15968 INSN_UID (insn), (unsigned long) address,
15969 -1 * (long)fix->backwards, (long)fix->forwards);
15970 arm_print_value (dump_file, fix->value);
15971 fprintf (dump_file, "\n");
15974 /* Add it to the chain of fixes. */
15975 fix->next = NULL;
15977 if (minipool_fix_head != NULL)
15978 minipool_fix_tail->next = fix;
15979 else
15980 minipool_fix_head = fix;
15982 minipool_fix_tail = fix;
15985 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
15986 Returns the number of insns needed, or 99 if we always want to synthesize
15987 the value. */
15989 arm_max_const_double_inline_cost ()
15991 /* Let the value get synthesized to avoid the use of literal pools. */
15992 if (arm_disable_literal_pool)
15993 return 99;
15995 return ((optimize_size || arm_ld_sched) ? 3 : 4);
15998 /* Return the cost of synthesizing a 64-bit constant VAL inline.
15999 Returns the number of insns needed, or 99 if we don't know how to
16000 do it. */
16002 arm_const_double_inline_cost (rtx val)
16004 rtx lowpart, highpart;
16005 machine_mode mode;
16007 mode = GET_MODE (val);
16009 if (mode == VOIDmode)
16010 mode = DImode;
16012 gcc_assert (GET_MODE_SIZE (mode) == 8);
16014 lowpart = gen_lowpart (SImode, val);
16015 highpart = gen_highpart_mode (SImode, mode, val);
16017 gcc_assert (CONST_INT_P (lowpart));
16018 gcc_assert (CONST_INT_P (highpart));
16020 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16021 NULL_RTX, NULL_RTX, 0, 0)
16022 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16023 NULL_RTX, NULL_RTX, 0, 0));
16026 /* Cost of loading a SImode constant. */
16027 static inline int
16028 arm_const_inline_cost (enum rtx_code code, rtx val)
16030 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16031 NULL_RTX, NULL_RTX, 1, 0);
16034 /* Return true if it is worthwhile to split a 64-bit constant into two
16035 32-bit operations. This is the case if optimizing for size, or
16036 if we have load delay slots, or if one 32-bit part can be done with
16037 a single data operation. */
16038 bool
16039 arm_const_double_by_parts (rtx val)
16041 machine_mode mode = GET_MODE (val);
16042 rtx part;
16044 if (optimize_size || arm_ld_sched)
16045 return true;
16047 if (mode == VOIDmode)
16048 mode = DImode;
16050 part = gen_highpart_mode (SImode, mode, val);
16052 gcc_assert (CONST_INT_P (part));
16054 if (const_ok_for_arm (INTVAL (part))
16055 || const_ok_for_arm (~INTVAL (part)))
16056 return true;
16058 part = gen_lowpart (SImode, val);
16060 gcc_assert (CONST_INT_P (part));
16062 if (const_ok_for_arm (INTVAL (part))
16063 || const_ok_for_arm (~INTVAL (part)))
16064 return true;
16066 return false;
16069 /* Return true if it is possible to inline both the high and low parts
16070 of a 64-bit constant into 32-bit data processing instructions. */
16071 bool
16072 arm_const_double_by_immediates (rtx val)
16074 machine_mode mode = GET_MODE (val);
16075 rtx part;
16077 if (mode == VOIDmode)
16078 mode = DImode;
16080 part = gen_highpart_mode (SImode, mode, val);
16082 gcc_assert (CONST_INT_P (part));
16084 if (!const_ok_for_arm (INTVAL (part)))
16085 return false;
16087 part = gen_lowpart (SImode, val);
16089 gcc_assert (CONST_INT_P (part));
16091 if (!const_ok_for_arm (INTVAL (part)))
16092 return false;
16094 return true;
16097 /* Scan INSN and note any of its operands that need fixing.
16098 If DO_PUSHES is false we do not actually push any of the fixups
16099 needed. */
16100 static void
16101 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16103 int opno;
16105 extract_constrain_insn (insn);
16107 if (recog_data.n_alternatives == 0)
16108 return;
16110 /* Fill in recog_op_alt with information about the constraints of
16111 this insn. */
16112 preprocess_constraints (insn);
16114 const operand_alternative *op_alt = which_op_alt ();
16115 for (opno = 0; opno < recog_data.n_operands; opno++)
16117 /* Things we need to fix can only occur in inputs. */
16118 if (recog_data.operand_type[opno] != OP_IN)
16119 continue;
16121 /* If this alternative is a memory reference, then any mention
16122 of constants in this alternative is really to fool reload
16123 into allowing us to accept one there. We need to fix them up
16124 now so that we output the right code. */
16125 if (op_alt[opno].memory_ok)
16127 rtx op = recog_data.operand[opno];
16129 if (CONSTANT_P (op))
16131 if (do_pushes)
16132 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16133 recog_data.operand_mode[opno], op);
16135 else if (MEM_P (op)
16136 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16137 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16139 if (do_pushes)
16141 rtx cop = avoid_constant_pool_reference (op);
16143 /* Casting the address of something to a mode narrower
16144 than a word can cause avoid_constant_pool_reference()
16145 to return the pool reference itself. That's no good to
16146 us here. Lets just hope that we can use the
16147 constant pool value directly. */
16148 if (op == cop)
16149 cop = get_pool_constant (XEXP (op, 0));
16151 push_minipool_fix (insn, address,
16152 recog_data.operand_loc[opno],
16153 recog_data.operand_mode[opno], cop);
16160 return;
16163 /* Rewrite move insn into subtract of 0 if the condition codes will
16164 be useful in next conditional jump insn. */
16166 static void
16167 thumb1_reorg (void)
16169 basic_block bb;
16171 FOR_EACH_BB_FN (bb, cfun)
16173 rtx dest, src;
16174 rtx cmp, op0, op1, set = NULL;
16175 rtx_insn *prev, *insn = BB_END (bb);
16176 bool insn_clobbered = false;
16178 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
16179 insn = PREV_INSN (insn);
16181 /* Find the last cbranchsi4_insn in basic block BB. */
16182 if (insn == BB_HEAD (bb)
16183 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
16184 continue;
16186 /* Get the register with which we are comparing. */
16187 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
16188 op0 = XEXP (cmp, 0);
16189 op1 = XEXP (cmp, 1);
16191 /* Check that comparison is against ZERO. */
16192 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
16193 continue;
16195 /* Find the first flag setting insn before INSN in basic block BB. */
16196 gcc_assert (insn != BB_HEAD (bb));
16197 for (prev = PREV_INSN (insn);
16198 (!insn_clobbered
16199 && prev != BB_HEAD (bb)
16200 && (NOTE_P (prev)
16201 || DEBUG_INSN_P (prev)
16202 || ((set = single_set (prev)) != NULL
16203 && get_attr_conds (prev) == CONDS_NOCOND)));
16204 prev = PREV_INSN (prev))
16206 if (reg_set_p (op0, prev))
16207 insn_clobbered = true;
16210 /* Skip if op0 is clobbered by insn other than prev. */
16211 if (insn_clobbered)
16212 continue;
16214 if (!set)
16215 continue;
16217 dest = SET_DEST (set);
16218 src = SET_SRC (set);
16219 if (!low_register_operand (dest, SImode)
16220 || !low_register_operand (src, SImode))
16221 continue;
16223 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16224 in INSN. Both src and dest of the move insn are checked. */
16225 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
16227 dest = copy_rtx (dest);
16228 src = copy_rtx (src);
16229 src = gen_rtx_MINUS (SImode, src, const0_rtx);
16230 PATTERN (prev) = gen_rtx_SET (dest, src);
16231 INSN_CODE (prev) = -1;
16232 /* Set test register in INSN to dest. */
16233 XEXP (cmp, 0) = copy_rtx (dest);
16234 INSN_CODE (insn) = -1;
16239 /* Convert instructions to their cc-clobbering variant if possible, since
16240 that allows us to use smaller encodings. */
16242 static void
16243 thumb2_reorg (void)
16245 basic_block bb;
16246 regset_head live;
16248 INIT_REG_SET (&live);
16250 /* We are freeing block_for_insn in the toplev to keep compatibility
16251 with old MDEP_REORGS that are not CFG based. Recompute it now. */
16252 compute_bb_for_insn ();
16253 df_analyze ();
16255 enum Convert_Action {SKIP, CONV, SWAP_CONV};
16257 FOR_EACH_BB_FN (bb, cfun)
16259 if ((current_tune->disparage_flag_setting_t16_encodings
16260 == tune_params::DISPARAGE_FLAGS_ALL)
16261 && optimize_bb_for_speed_p (bb))
16262 continue;
16264 rtx_insn *insn;
16265 Convert_Action action = SKIP;
16266 Convert_Action action_for_partial_flag_setting
16267 = ((current_tune->disparage_flag_setting_t16_encodings
16268 != tune_params::DISPARAGE_FLAGS_NEITHER)
16269 && optimize_bb_for_speed_p (bb))
16270 ? SKIP : CONV;
16272 COPY_REG_SET (&live, DF_LR_OUT (bb));
16273 df_simulate_initialize_backwards (bb, &live);
16274 FOR_BB_INSNS_REVERSE (bb, insn)
16276 if (NONJUMP_INSN_P (insn)
16277 && !REGNO_REG_SET_P (&live, CC_REGNUM)
16278 && GET_CODE (PATTERN (insn)) == SET)
16280 action = SKIP;
16281 rtx pat = PATTERN (insn);
16282 rtx dst = XEXP (pat, 0);
16283 rtx src = XEXP (pat, 1);
16284 rtx op0 = NULL_RTX, op1 = NULL_RTX;
16286 if (UNARY_P (src) || BINARY_P (src))
16287 op0 = XEXP (src, 0);
16289 if (BINARY_P (src))
16290 op1 = XEXP (src, 1);
16292 if (low_register_operand (dst, SImode))
16294 switch (GET_CODE (src))
16296 case PLUS:
16297 /* Adding two registers and storing the result
16298 in the first source is already a 16-bit
16299 operation. */
16300 if (rtx_equal_p (dst, op0)
16301 && register_operand (op1, SImode))
16302 break;
16304 if (low_register_operand (op0, SImode))
16306 /* ADDS <Rd>,<Rn>,<Rm> */
16307 if (low_register_operand (op1, SImode))
16308 action = CONV;
16309 /* ADDS <Rdn>,#<imm8> */
16310 /* SUBS <Rdn>,#<imm8> */
16311 else if (rtx_equal_p (dst, op0)
16312 && CONST_INT_P (op1)
16313 && IN_RANGE (INTVAL (op1), -255, 255))
16314 action = CONV;
16315 /* ADDS <Rd>,<Rn>,#<imm3> */
16316 /* SUBS <Rd>,<Rn>,#<imm3> */
16317 else if (CONST_INT_P (op1)
16318 && IN_RANGE (INTVAL (op1), -7, 7))
16319 action = CONV;
16321 /* ADCS <Rd>, <Rn> */
16322 else if (GET_CODE (XEXP (src, 0)) == PLUS
16323 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
16324 && low_register_operand (XEXP (XEXP (src, 0), 1),
16325 SImode)
16326 && COMPARISON_P (op1)
16327 && cc_register (XEXP (op1, 0), VOIDmode)
16328 && maybe_get_arm_condition_code (op1) == ARM_CS
16329 && XEXP (op1, 1) == const0_rtx)
16330 action = CONV;
16331 break;
16333 case MINUS:
16334 /* RSBS <Rd>,<Rn>,#0
16335 Not handled here: see NEG below. */
16336 /* SUBS <Rd>,<Rn>,#<imm3>
16337 SUBS <Rdn>,#<imm8>
16338 Not handled here: see PLUS above. */
16339 /* SUBS <Rd>,<Rn>,<Rm> */
16340 if (low_register_operand (op0, SImode)
16341 && low_register_operand (op1, SImode))
16342 action = CONV;
16343 break;
16345 case MULT:
16346 /* MULS <Rdm>,<Rn>,<Rdm>
16347 As an exception to the rule, this is only used
16348 when optimizing for size since MULS is slow on all
16349 known implementations. We do not even want to use
16350 MULS in cold code, if optimizing for speed, so we
16351 test the global flag here. */
16352 if (!optimize_size)
16353 break;
16354 /* Fall through. */
16355 case AND:
16356 case IOR:
16357 case XOR:
16358 /* ANDS <Rdn>,<Rm> */
16359 if (rtx_equal_p (dst, op0)
16360 && low_register_operand (op1, SImode))
16361 action = action_for_partial_flag_setting;
16362 else if (rtx_equal_p (dst, op1)
16363 && low_register_operand (op0, SImode))
16364 action = action_for_partial_flag_setting == SKIP
16365 ? SKIP : SWAP_CONV;
16366 break;
16368 case ASHIFTRT:
16369 case ASHIFT:
16370 case LSHIFTRT:
16371 /* ASRS <Rdn>,<Rm> */
16372 /* LSRS <Rdn>,<Rm> */
16373 /* LSLS <Rdn>,<Rm> */
16374 if (rtx_equal_p (dst, op0)
16375 && low_register_operand (op1, SImode))
16376 action = action_for_partial_flag_setting;
16377 /* ASRS <Rd>,<Rm>,#<imm5> */
16378 /* LSRS <Rd>,<Rm>,#<imm5> */
16379 /* LSLS <Rd>,<Rm>,#<imm5> */
16380 else if (low_register_operand (op0, SImode)
16381 && CONST_INT_P (op1)
16382 && IN_RANGE (INTVAL (op1), 0, 31))
16383 action = action_for_partial_flag_setting;
16384 break;
16386 case ROTATERT:
16387 /* RORS <Rdn>,<Rm> */
16388 if (rtx_equal_p (dst, op0)
16389 && low_register_operand (op1, SImode))
16390 action = action_for_partial_flag_setting;
16391 break;
16393 case NOT:
16394 /* MVNS <Rd>,<Rm> */
16395 if (low_register_operand (op0, SImode))
16396 action = action_for_partial_flag_setting;
16397 break;
16399 case NEG:
16400 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
16401 if (low_register_operand (op0, SImode))
16402 action = CONV;
16403 break;
16405 case CONST_INT:
16406 /* MOVS <Rd>,#<imm8> */
16407 if (CONST_INT_P (src)
16408 && IN_RANGE (INTVAL (src), 0, 255))
16409 action = action_for_partial_flag_setting;
16410 break;
16412 case REG:
16413 /* MOVS and MOV<c> with registers have different
16414 encodings, so are not relevant here. */
16415 break;
16417 default:
16418 break;
16422 if (action != SKIP)
16424 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
16425 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
16426 rtvec vec;
16428 if (action == SWAP_CONV)
16430 src = copy_rtx (src);
16431 XEXP (src, 0) = op1;
16432 XEXP (src, 1) = op0;
16433 pat = gen_rtx_SET (dst, src);
16434 vec = gen_rtvec (2, pat, clobber);
16436 else /* action == CONV */
16437 vec = gen_rtvec (2, pat, clobber);
16439 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
16440 INSN_CODE (insn) = -1;
16444 if (NONDEBUG_INSN_P (insn))
16445 df_simulate_one_insn_backwards (bb, insn, &live);
16449 CLEAR_REG_SET (&live);
16452 /* Gcc puts the pool in the wrong place for ARM, since we can only
16453 load addresses a limited distance around the pc. We do some
16454 special munging to move the constant pool values to the correct
16455 point in the code. */
16456 static void
16457 arm_reorg (void)
16459 rtx_insn *insn;
16460 HOST_WIDE_INT address = 0;
16461 Mfix * fix;
16463 if (TARGET_THUMB1)
16464 thumb1_reorg ();
16465 else if (TARGET_THUMB2)
16466 thumb2_reorg ();
16468 /* Ensure all insns that must be split have been split at this point.
16469 Otherwise, the pool placement code below may compute incorrect
16470 insn lengths. Note that when optimizing, all insns have already
16471 been split at this point. */
16472 if (!optimize)
16473 split_all_insns_noflow ();
16475 minipool_fix_head = minipool_fix_tail = NULL;
16477 /* The first insn must always be a note, or the code below won't
16478 scan it properly. */
16479 insn = get_insns ();
16480 gcc_assert (NOTE_P (insn));
16481 minipool_pad = 0;
16483 /* Scan all the insns and record the operands that will need fixing. */
16484 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
16486 if (BARRIER_P (insn))
16487 push_minipool_barrier (insn, address);
16488 else if (INSN_P (insn))
16490 rtx_jump_table_data *table;
16492 note_invalid_constants (insn, address, true);
16493 address += get_attr_length (insn);
16495 /* If the insn is a vector jump, add the size of the table
16496 and skip the table. */
16497 if (tablejump_p (insn, NULL, &table))
16499 address += get_jump_table_size (table);
16500 insn = table;
16503 else if (LABEL_P (insn))
16504 /* Add the worst-case padding due to alignment. We don't add
16505 the _current_ padding because the minipool insertions
16506 themselves might change it. */
16507 address += get_label_padding (insn);
16510 fix = minipool_fix_head;
16512 /* Now scan the fixups and perform the required changes. */
16513 while (fix)
16515 Mfix * ftmp;
16516 Mfix * fdel;
16517 Mfix * last_added_fix;
16518 Mfix * last_barrier = NULL;
16519 Mfix * this_fix;
16521 /* Skip any further barriers before the next fix. */
16522 while (fix && BARRIER_P (fix->insn))
16523 fix = fix->next;
16525 /* No more fixes. */
16526 if (fix == NULL)
16527 break;
16529 last_added_fix = NULL;
16531 for (ftmp = fix; ftmp; ftmp = ftmp->next)
16533 if (BARRIER_P (ftmp->insn))
16535 if (ftmp->address >= minipool_vector_head->max_address)
16536 break;
16538 last_barrier = ftmp;
16540 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
16541 break;
16543 last_added_fix = ftmp; /* Keep track of the last fix added. */
16546 /* If we found a barrier, drop back to that; any fixes that we
16547 could have reached but come after the barrier will now go in
16548 the next mini-pool. */
16549 if (last_barrier != NULL)
16551 /* Reduce the refcount for those fixes that won't go into this
16552 pool after all. */
16553 for (fdel = last_barrier->next;
16554 fdel && fdel != ftmp;
16555 fdel = fdel->next)
16557 fdel->minipool->refcount--;
16558 fdel->minipool = NULL;
16561 ftmp = last_barrier;
16563 else
16565 /* ftmp is first fix that we can't fit into this pool and
16566 there no natural barriers that we could use. Insert a
16567 new barrier in the code somewhere between the previous
16568 fix and this one, and arrange to jump around it. */
16569 HOST_WIDE_INT max_address;
16571 /* The last item on the list of fixes must be a barrier, so
16572 we can never run off the end of the list of fixes without
16573 last_barrier being set. */
16574 gcc_assert (ftmp);
16576 max_address = minipool_vector_head->max_address;
16577 /* Check that there isn't another fix that is in range that
16578 we couldn't fit into this pool because the pool was
16579 already too large: we need to put the pool before such an
16580 instruction. The pool itself may come just after the
16581 fix because create_fix_barrier also allows space for a
16582 jump instruction. */
16583 if (ftmp->address < max_address)
16584 max_address = ftmp->address + 1;
16586 last_barrier = create_fix_barrier (last_added_fix, max_address);
16589 assign_minipool_offsets (last_barrier);
16591 while (ftmp)
16593 if (!BARRIER_P (ftmp->insn)
16594 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
16595 == NULL))
16596 break;
16598 ftmp = ftmp->next;
16601 /* Scan over the fixes we have identified for this pool, fixing them
16602 up and adding the constants to the pool itself. */
16603 for (this_fix = fix; this_fix && ftmp != this_fix;
16604 this_fix = this_fix->next)
16605 if (!BARRIER_P (this_fix->insn))
16607 rtx addr
16608 = plus_constant (Pmode,
16609 gen_rtx_LABEL_REF (VOIDmode,
16610 minipool_vector_label),
16611 this_fix->minipool->offset);
16612 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
16615 dump_minipool (last_barrier->insn);
16616 fix = ftmp;
16619 /* From now on we must synthesize any constants that we can't handle
16620 directly. This can happen if the RTL gets split during final
16621 instruction generation. */
16622 cfun->machine->after_arm_reorg = 1;
16624 /* Free the minipool memory. */
16625 obstack_free (&minipool_obstack, minipool_startobj);
16628 /* Routines to output assembly language. */
16630 /* Return string representation of passed in real value. */
16631 static const char *
16632 fp_const_from_val (REAL_VALUE_TYPE *r)
16634 if (!fp_consts_inited)
16635 init_fp_table ();
16637 gcc_assert (real_equal (r, &value_fp0));
16638 return "0";
16641 /* OPERANDS[0] is the entire list of insns that constitute pop,
16642 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
16643 is in the list, UPDATE is true iff the list contains explicit
16644 update of base register. */
16645 void
16646 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
16647 bool update)
16649 int i;
16650 char pattern[100];
16651 int offset;
16652 const char *conditional;
16653 int num_saves = XVECLEN (operands[0], 0);
16654 unsigned int regno;
16655 unsigned int regno_base = REGNO (operands[1]);
16656 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
16658 offset = 0;
16659 offset += update ? 1 : 0;
16660 offset += return_pc ? 1 : 0;
16662 /* Is the base register in the list? */
16663 for (i = offset; i < num_saves; i++)
16665 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
16666 /* If SP is in the list, then the base register must be SP. */
16667 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
16668 /* If base register is in the list, there must be no explicit update. */
16669 if (regno == regno_base)
16670 gcc_assert (!update);
16673 conditional = reverse ? "%?%D0" : "%?%d0";
16674 /* Can't use POP if returning from an interrupt. */
16675 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
16676 sprintf (pattern, "pop%s\t{", conditional);
16677 else
16679 /* Output ldmfd when the base register is SP, otherwise output ldmia.
16680 It's just a convention, their semantics are identical. */
16681 if (regno_base == SP_REGNUM)
16682 sprintf (pattern, "ldmfd%s\t", conditional);
16683 else if (update)
16684 sprintf (pattern, "ldmia%s\t", conditional);
16685 else
16686 sprintf (pattern, "ldm%s\t", conditional);
16688 strcat (pattern, reg_names[regno_base]);
16689 if (update)
16690 strcat (pattern, "!, {");
16691 else
16692 strcat (pattern, ", {");
16695 /* Output the first destination register. */
16696 strcat (pattern,
16697 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
16699 /* Output the rest of the destination registers. */
16700 for (i = offset + 1; i < num_saves; i++)
16702 strcat (pattern, ", ");
16703 strcat (pattern,
16704 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
16707 strcat (pattern, "}");
16709 if (interrupt_p && return_pc)
16710 strcat (pattern, "^");
16712 output_asm_insn (pattern, &cond);
16716 /* Output the assembly for a store multiple. */
16718 const char *
16719 vfp_output_vstmd (rtx * operands)
16721 char pattern[100];
16722 int p;
16723 int base;
16724 int i;
16725 rtx addr_reg = REG_P (XEXP (operands[0], 0))
16726 ? XEXP (operands[0], 0)
16727 : XEXP (XEXP (operands[0], 0), 0);
16728 bool push_p = REGNO (addr_reg) == SP_REGNUM;
16730 if (push_p)
16731 strcpy (pattern, "vpush%?.64\t{%P1");
16732 else
16733 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
16735 p = strlen (pattern);
16737 gcc_assert (REG_P (operands[1]));
16739 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
16740 for (i = 1; i < XVECLEN (operands[2], 0); i++)
16742 p += sprintf (&pattern[p], ", d%d", base + i);
16744 strcpy (&pattern[p], "}");
16746 output_asm_insn (pattern, operands);
16747 return "";
16751 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
16752 number of bytes pushed. */
16754 static int
16755 vfp_emit_fstmd (int base_reg, int count)
16757 rtx par;
16758 rtx dwarf;
16759 rtx tmp, reg;
16760 int i;
16762 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
16763 register pairs are stored by a store multiple insn. We avoid this
16764 by pushing an extra pair. */
16765 if (count == 2 && !arm_arch6)
16767 if (base_reg == LAST_VFP_REGNUM - 3)
16768 base_reg -= 2;
16769 count++;
16772 /* FSTMD may not store more than 16 doubleword registers at once. Split
16773 larger stores into multiple parts (up to a maximum of two, in
16774 practice). */
16775 if (count > 16)
16777 int saved;
16778 /* NOTE: base_reg is an internal register number, so each D register
16779 counts as 2. */
16780 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
16781 saved += vfp_emit_fstmd (base_reg, 16);
16782 return saved;
16785 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
16786 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
16788 reg = gen_rtx_REG (DFmode, base_reg);
16789 base_reg += 2;
16791 XVECEXP (par, 0, 0)
16792 = gen_rtx_SET (gen_frame_mem
16793 (BLKmode,
16794 gen_rtx_PRE_MODIFY (Pmode,
16795 stack_pointer_rtx,
16796 plus_constant
16797 (Pmode, stack_pointer_rtx,
16798 - (count * 8)))
16800 gen_rtx_UNSPEC (BLKmode,
16801 gen_rtvec (1, reg),
16802 UNSPEC_PUSH_MULT));
16804 tmp = gen_rtx_SET (stack_pointer_rtx,
16805 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
16806 RTX_FRAME_RELATED_P (tmp) = 1;
16807 XVECEXP (dwarf, 0, 0) = tmp;
16809 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
16810 RTX_FRAME_RELATED_P (tmp) = 1;
16811 XVECEXP (dwarf, 0, 1) = tmp;
16813 for (i = 1; i < count; i++)
16815 reg = gen_rtx_REG (DFmode, base_reg);
16816 base_reg += 2;
16817 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
16819 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
16820 plus_constant (Pmode,
16821 stack_pointer_rtx,
16822 i * 8)),
16823 reg);
16824 RTX_FRAME_RELATED_P (tmp) = 1;
16825 XVECEXP (dwarf, 0, i + 1) = tmp;
16828 par = emit_insn (par);
16829 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16830 RTX_FRAME_RELATED_P (par) = 1;
16832 return count * 8;
16835 /* Emit a call instruction with pattern PAT. ADDR is the address of
16836 the call target. */
16838 void
16839 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
16841 rtx insn;
16843 insn = emit_call_insn (pat);
16845 /* The PIC register is live on entry to VxWorks PIC PLT entries.
16846 If the call might use such an entry, add a use of the PIC register
16847 to the instruction's CALL_INSN_FUNCTION_USAGE. */
16848 if (TARGET_VXWORKS_RTP
16849 && flag_pic
16850 && !sibcall
16851 && GET_CODE (addr) == SYMBOL_REF
16852 && (SYMBOL_REF_DECL (addr)
16853 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
16854 : !SYMBOL_REF_LOCAL_P (addr)))
16856 require_pic_register ();
16857 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
16860 if (TARGET_AAPCS_BASED)
16862 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
16863 linker. We need to add an IP clobber to allow setting
16864 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
16865 is not needed since it's a fixed register. */
16866 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
16867 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
16871 /* Output a 'call' insn. */
16872 const char *
16873 output_call (rtx *operands)
16875 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
16877 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
16878 if (REGNO (operands[0]) == LR_REGNUM)
16880 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
16881 output_asm_insn ("mov%?\t%0, %|lr", operands);
16884 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
16886 if (TARGET_INTERWORK || arm_arch4t)
16887 output_asm_insn ("bx%?\t%0", operands);
16888 else
16889 output_asm_insn ("mov%?\t%|pc, %0", operands);
16891 return "";
16894 /* Output a move from arm registers to arm registers of a long double
16895 OPERANDS[0] is the destination.
16896 OPERANDS[1] is the source. */
16897 const char *
16898 output_mov_long_double_arm_from_arm (rtx *operands)
16900 /* We have to be careful here because the two might overlap. */
16901 int dest_start = REGNO (operands[0]);
16902 int src_start = REGNO (operands[1]);
16903 rtx ops[2];
16904 int i;
16906 if (dest_start < src_start)
16908 for (i = 0; i < 3; i++)
16910 ops[0] = gen_rtx_REG (SImode, dest_start + i);
16911 ops[1] = gen_rtx_REG (SImode, src_start + i);
16912 output_asm_insn ("mov%?\t%0, %1", ops);
16915 else
16917 for (i = 2; i >= 0; i--)
16919 ops[0] = gen_rtx_REG (SImode, dest_start + i);
16920 ops[1] = gen_rtx_REG (SImode, src_start + i);
16921 output_asm_insn ("mov%?\t%0, %1", ops);
16925 return "";
16928 void
16929 arm_emit_movpair (rtx dest, rtx src)
16931 rtx insn;
16933 /* If the src is an immediate, simplify it. */
16934 if (CONST_INT_P (src))
16936 HOST_WIDE_INT val = INTVAL (src);
16937 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
16938 if ((val >> 16) & 0x0000ffff)
16940 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
16941 GEN_INT (16)),
16942 GEN_INT ((val >> 16) & 0x0000ffff));
16943 insn = get_last_insn ();
16944 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
16946 return;
16948 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
16949 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
16950 insn = get_last_insn ();
16951 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
16954 /* Output a move between double words. It must be REG<-MEM
16955 or MEM<-REG. */
16956 const char *
16957 output_move_double (rtx *operands, bool emit, int *count)
16959 enum rtx_code code0 = GET_CODE (operands[0]);
16960 enum rtx_code code1 = GET_CODE (operands[1]);
16961 rtx otherops[3];
16962 if (count)
16963 *count = 1;
16965 /* The only case when this might happen is when
16966 you are looking at the length of a DImode instruction
16967 that has an invalid constant in it. */
16968 if (code0 == REG && code1 != MEM)
16970 gcc_assert (!emit);
16971 *count = 2;
16972 return "";
16975 if (code0 == REG)
16977 unsigned int reg0 = REGNO (operands[0]);
16979 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
16981 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
16983 switch (GET_CODE (XEXP (operands[1], 0)))
16985 case REG:
16987 if (emit)
16989 if (TARGET_LDRD
16990 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
16991 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
16992 else
16993 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
16995 break;
16997 case PRE_INC:
16998 gcc_assert (TARGET_LDRD);
16999 if (emit)
17000 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17001 break;
17003 case PRE_DEC:
17004 if (emit)
17006 if (TARGET_LDRD)
17007 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17008 else
17009 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
17011 break;
17013 case POST_INC:
17014 if (emit)
17016 if (TARGET_LDRD)
17017 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
17018 else
17019 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
17021 break;
17023 case POST_DEC:
17024 gcc_assert (TARGET_LDRD);
17025 if (emit)
17026 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
17027 break;
17029 case PRE_MODIFY:
17030 case POST_MODIFY:
17031 /* Autoicrement addressing modes should never have overlapping
17032 base and destination registers, and overlapping index registers
17033 are already prohibited, so this doesn't need to worry about
17034 fix_cm3_ldrd. */
17035 otherops[0] = operands[0];
17036 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17037 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17039 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17041 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17043 /* Registers overlap so split out the increment. */
17044 if (emit)
17046 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17047 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
17049 if (count)
17050 *count = 2;
17052 else
17054 /* Use a single insn if we can.
17055 FIXME: IWMMXT allows offsets larger than ldrd can
17056 handle, fix these up with a pair of ldr. */
17057 if (TARGET_THUMB2
17058 || !CONST_INT_P (otherops[2])
17059 || (INTVAL (otherops[2]) > -256
17060 && INTVAL (otherops[2]) < 256))
17062 if (emit)
17063 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
17065 else
17067 if (emit)
17069 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17070 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17072 if (count)
17073 *count = 2;
17078 else
17080 /* Use a single insn if we can.
17081 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17082 fix these up with a pair of ldr. */
17083 if (TARGET_THUMB2
17084 || !CONST_INT_P (otherops[2])
17085 || (INTVAL (otherops[2]) > -256
17086 && INTVAL (otherops[2]) < 256))
17088 if (emit)
17089 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
17091 else
17093 if (emit)
17095 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17096 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17098 if (count)
17099 *count = 2;
17102 break;
17104 case LABEL_REF:
17105 case CONST:
17106 /* We might be able to use ldrd %0, %1 here. However the range is
17107 different to ldr/adr, and it is broken on some ARMv7-M
17108 implementations. */
17109 /* Use the second register of the pair to avoid problematic
17110 overlap. */
17111 otherops[1] = operands[1];
17112 if (emit)
17113 output_asm_insn ("adr%?\t%0, %1", otherops);
17114 operands[1] = otherops[0];
17115 if (emit)
17117 if (TARGET_LDRD)
17118 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
17119 else
17120 output_asm_insn ("ldmia%?\t%1, %M0", operands);
17123 if (count)
17124 *count = 2;
17125 break;
17127 /* ??? This needs checking for thumb2. */
17128 default:
17129 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17130 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17132 otherops[0] = operands[0];
17133 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17134 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17136 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17138 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17140 switch ((int) INTVAL (otherops[2]))
17142 case -8:
17143 if (emit)
17144 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
17145 return "";
17146 case -4:
17147 if (TARGET_THUMB2)
17148 break;
17149 if (emit)
17150 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
17151 return "";
17152 case 4:
17153 if (TARGET_THUMB2)
17154 break;
17155 if (emit)
17156 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
17157 return "";
17160 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
17161 operands[1] = otherops[0];
17162 if (TARGET_LDRD
17163 && (REG_P (otherops[2])
17164 || TARGET_THUMB2
17165 || (CONST_INT_P (otherops[2])
17166 && INTVAL (otherops[2]) > -256
17167 && INTVAL (otherops[2]) < 256)))
17169 if (reg_overlap_mentioned_p (operands[0],
17170 otherops[2]))
17172 /* Swap base and index registers over to
17173 avoid a conflict. */
17174 std::swap (otherops[1], otherops[2]);
17176 /* If both registers conflict, it will usually
17177 have been fixed by a splitter. */
17178 if (reg_overlap_mentioned_p (operands[0], otherops[2])
17179 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
17181 if (emit)
17183 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17184 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
17186 if (count)
17187 *count = 2;
17189 else
17191 otherops[0] = operands[0];
17192 if (emit)
17193 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
17195 return "";
17198 if (CONST_INT_P (otherops[2]))
17200 if (emit)
17202 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
17203 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
17204 else
17205 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17208 else
17210 if (emit)
17211 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17214 else
17216 if (emit)
17217 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
17220 if (count)
17221 *count = 2;
17223 if (TARGET_LDRD)
17224 return "ldrd%?\t%0, [%1]";
17226 return "ldmia%?\t%1, %M0";
17228 else
17230 otherops[1] = adjust_address (operands[1], SImode, 4);
17231 /* Take care of overlapping base/data reg. */
17232 if (reg_mentioned_p (operands[0], operands[1]))
17234 if (emit)
17236 output_asm_insn ("ldr%?\t%0, %1", otherops);
17237 output_asm_insn ("ldr%?\t%0, %1", operands);
17239 if (count)
17240 *count = 2;
17243 else
17245 if (emit)
17247 output_asm_insn ("ldr%?\t%0, %1", operands);
17248 output_asm_insn ("ldr%?\t%0, %1", otherops);
17250 if (count)
17251 *count = 2;
17256 else
17258 /* Constraints should ensure this. */
17259 gcc_assert (code0 == MEM && code1 == REG);
17260 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
17261 || (TARGET_ARM && TARGET_LDRD));
17263 switch (GET_CODE (XEXP (operands[0], 0)))
17265 case REG:
17266 if (emit)
17268 if (TARGET_LDRD)
17269 output_asm_insn ("strd%?\t%1, [%m0]", operands);
17270 else
17271 output_asm_insn ("stm%?\t%m0, %M1", operands);
17273 break;
17275 case PRE_INC:
17276 gcc_assert (TARGET_LDRD);
17277 if (emit)
17278 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
17279 break;
17281 case PRE_DEC:
17282 if (emit)
17284 if (TARGET_LDRD)
17285 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
17286 else
17287 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
17289 break;
17291 case POST_INC:
17292 if (emit)
17294 if (TARGET_LDRD)
17295 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
17296 else
17297 output_asm_insn ("stm%?\t%m0!, %M1", operands);
17299 break;
17301 case POST_DEC:
17302 gcc_assert (TARGET_LDRD);
17303 if (emit)
17304 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
17305 break;
17307 case PRE_MODIFY:
17308 case POST_MODIFY:
17309 otherops[0] = operands[1];
17310 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
17311 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
17313 /* IWMMXT allows offsets larger than ldrd can handle,
17314 fix these up with a pair of ldr. */
17315 if (!TARGET_THUMB2
17316 && CONST_INT_P (otherops[2])
17317 && (INTVAL(otherops[2]) <= -256
17318 || INTVAL(otherops[2]) >= 256))
17320 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17322 if (emit)
17324 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
17325 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17327 if (count)
17328 *count = 2;
17330 else
17332 if (emit)
17334 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17335 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
17337 if (count)
17338 *count = 2;
17341 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17343 if (emit)
17344 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
17346 else
17348 if (emit)
17349 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
17351 break;
17353 case PLUS:
17354 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
17355 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17357 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
17359 case -8:
17360 if (emit)
17361 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
17362 return "";
17364 case -4:
17365 if (TARGET_THUMB2)
17366 break;
17367 if (emit)
17368 output_asm_insn ("stmda%?\t%m0, %M1", operands);
17369 return "";
17371 case 4:
17372 if (TARGET_THUMB2)
17373 break;
17374 if (emit)
17375 output_asm_insn ("stmib%?\t%m0, %M1", operands);
17376 return "";
17379 if (TARGET_LDRD
17380 && (REG_P (otherops[2])
17381 || TARGET_THUMB2
17382 || (CONST_INT_P (otherops[2])
17383 && INTVAL (otherops[2]) > -256
17384 && INTVAL (otherops[2]) < 256)))
17386 otherops[0] = operands[1];
17387 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
17388 if (emit)
17389 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
17390 return "";
17392 /* Fall through */
17394 default:
17395 otherops[0] = adjust_address (operands[0], SImode, 4);
17396 otherops[1] = operands[1];
17397 if (emit)
17399 output_asm_insn ("str%?\t%1, %0", operands);
17400 output_asm_insn ("str%?\t%H1, %0", otherops);
17402 if (count)
17403 *count = 2;
17407 return "";
17410 /* Output a move, load or store for quad-word vectors in ARM registers. Only
17411 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
17413 const char *
17414 output_move_quad (rtx *operands)
17416 if (REG_P (operands[0]))
17418 /* Load, or reg->reg move. */
17420 if (MEM_P (operands[1]))
17422 switch (GET_CODE (XEXP (operands[1], 0)))
17424 case REG:
17425 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17426 break;
17428 case LABEL_REF:
17429 case CONST:
17430 output_asm_insn ("adr%?\t%0, %1", operands);
17431 output_asm_insn ("ldmia%?\t%0, %M0", operands);
17432 break;
17434 default:
17435 gcc_unreachable ();
17438 else
17440 rtx ops[2];
17441 int dest, src, i;
17443 gcc_assert (REG_P (operands[1]));
17445 dest = REGNO (operands[0]);
17446 src = REGNO (operands[1]);
17448 /* This seems pretty dumb, but hopefully GCC won't try to do it
17449 very often. */
17450 if (dest < src)
17451 for (i = 0; i < 4; i++)
17453 ops[0] = gen_rtx_REG (SImode, dest + i);
17454 ops[1] = gen_rtx_REG (SImode, src + i);
17455 output_asm_insn ("mov%?\t%0, %1", ops);
17457 else
17458 for (i = 3; i >= 0; i--)
17460 ops[0] = gen_rtx_REG (SImode, dest + i);
17461 ops[1] = gen_rtx_REG (SImode, src + i);
17462 output_asm_insn ("mov%?\t%0, %1", ops);
17466 else
17468 gcc_assert (MEM_P (operands[0]));
17469 gcc_assert (REG_P (operands[1]));
17470 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
17472 switch (GET_CODE (XEXP (operands[0], 0)))
17474 case REG:
17475 output_asm_insn ("stm%?\t%m0, %M1", operands);
17476 break;
17478 default:
17479 gcc_unreachable ();
17483 return "";
17486 /* Output a VFP load or store instruction. */
17488 const char *
17489 output_move_vfp (rtx *operands)
17491 rtx reg, mem, addr, ops[2];
17492 int load = REG_P (operands[0]);
17493 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
17494 int sp = (!TARGET_VFP_FP16INST
17495 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
17496 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
17497 const char *templ;
17498 char buff[50];
17499 machine_mode mode;
17501 reg = operands[!load];
17502 mem = operands[load];
17504 mode = GET_MODE (reg);
17506 gcc_assert (REG_P (reg));
17507 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
17508 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
17509 || mode == SFmode
17510 || mode == DFmode
17511 || mode == HImode
17512 || mode == SImode
17513 || mode == DImode
17514 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
17515 gcc_assert (MEM_P (mem));
17517 addr = XEXP (mem, 0);
17519 switch (GET_CODE (addr))
17521 case PRE_DEC:
17522 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
17523 ops[0] = XEXP (addr, 0);
17524 ops[1] = reg;
17525 break;
17527 case POST_INC:
17528 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
17529 ops[0] = XEXP (addr, 0);
17530 ops[1] = reg;
17531 break;
17533 default:
17534 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
17535 ops[0] = reg;
17536 ops[1] = mem;
17537 break;
17540 sprintf (buff, templ,
17541 load ? "ld" : "st",
17542 dp ? "64" : sp ? "32" : "16",
17543 dp ? "P" : "",
17544 integer_p ? "\t%@ int" : "");
17545 output_asm_insn (buff, ops);
17547 return "";
17550 /* Output a Neon double-word or quad-word load or store, or a load
17551 or store for larger structure modes.
17553 WARNING: The ordering of elements is weird in big-endian mode,
17554 because the EABI requires that vectors stored in memory appear
17555 as though they were stored by a VSTM, as required by the EABI.
17556 GCC RTL defines element ordering based on in-memory order.
17557 This can be different from the architectural ordering of elements
17558 within a NEON register. The intrinsics defined in arm_neon.h use the
17559 NEON register element ordering, not the GCC RTL element ordering.
17561 For example, the in-memory ordering of a big-endian a quadword
17562 vector with 16-bit elements when stored from register pair {d0,d1}
17563 will be (lowest address first, d0[N] is NEON register element N):
17565 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
17567 When necessary, quadword registers (dN, dN+1) are moved to ARM
17568 registers from rN in the order:
17570 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
17572 So that STM/LDM can be used on vectors in ARM registers, and the
17573 same memory layout will result as if VSTM/VLDM were used.
17575 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
17576 possible, which allows use of appropriate alignment tags.
17577 Note that the choice of "64" is independent of the actual vector
17578 element size; this size simply ensures that the behavior is
17579 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
17581 Due to limitations of those instructions, use of VST1.64/VLD1.64
17582 is not possible if:
17583 - the address contains PRE_DEC, or
17584 - the mode refers to more than 4 double-word registers
17586 In those cases, it would be possible to replace VSTM/VLDM by a
17587 sequence of instructions; this is not currently implemented since
17588 this is not certain to actually improve performance. */
17590 const char *
17591 output_move_neon (rtx *operands)
17593 rtx reg, mem, addr, ops[2];
17594 int regno, nregs, load = REG_P (operands[0]);
17595 const char *templ;
17596 char buff[50];
17597 machine_mode mode;
17599 reg = operands[!load];
17600 mem = operands[load];
17602 mode = GET_MODE (reg);
17604 gcc_assert (REG_P (reg));
17605 regno = REGNO (reg);
17606 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
17607 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
17608 || NEON_REGNO_OK_FOR_QUAD (regno));
17609 gcc_assert (VALID_NEON_DREG_MODE (mode)
17610 || VALID_NEON_QREG_MODE (mode)
17611 || VALID_NEON_STRUCT_MODE (mode));
17612 gcc_assert (MEM_P (mem));
17614 addr = XEXP (mem, 0);
17616 /* Strip off const from addresses like (const (plus (...))). */
17617 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
17618 addr = XEXP (addr, 0);
17620 switch (GET_CODE (addr))
17622 case POST_INC:
17623 /* We have to use vldm / vstm for too-large modes. */
17624 if (nregs > 4)
17626 templ = "v%smia%%?\t%%0!, %%h1";
17627 ops[0] = XEXP (addr, 0);
17629 else
17631 templ = "v%s1.64\t%%h1, %%A0";
17632 ops[0] = mem;
17634 ops[1] = reg;
17635 break;
17637 case PRE_DEC:
17638 /* We have to use vldm / vstm in this case, since there is no
17639 pre-decrement form of the vld1 / vst1 instructions. */
17640 templ = "v%smdb%%?\t%%0!, %%h1";
17641 ops[0] = XEXP (addr, 0);
17642 ops[1] = reg;
17643 break;
17645 case POST_MODIFY:
17646 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
17647 gcc_unreachable ();
17649 case REG:
17650 /* We have to use vldm / vstm for too-large modes. */
17651 if (nregs > 1)
17653 if (nregs > 4)
17654 templ = "v%smia%%?\t%%m0, %%h1";
17655 else
17656 templ = "v%s1.64\t%%h1, %%A0";
17658 ops[0] = mem;
17659 ops[1] = reg;
17660 break;
17662 /* Fall through. */
17663 case LABEL_REF:
17664 case PLUS:
17666 int i;
17667 int overlap = -1;
17668 for (i = 0; i < nregs; i++)
17670 /* We're only using DImode here because it's a convenient size. */
17671 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
17672 ops[1] = adjust_address (mem, DImode, 8 * i);
17673 if (reg_overlap_mentioned_p (ops[0], mem))
17675 gcc_assert (overlap == -1);
17676 overlap = i;
17678 else
17680 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
17681 output_asm_insn (buff, ops);
17684 if (overlap != -1)
17686 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
17687 ops[1] = adjust_address (mem, SImode, 8 * overlap);
17688 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
17689 output_asm_insn (buff, ops);
17692 return "";
17695 default:
17696 gcc_unreachable ();
17699 sprintf (buff, templ, load ? "ld" : "st");
17700 output_asm_insn (buff, ops);
17702 return "";
17705 /* Compute and return the length of neon_mov<mode>, where <mode> is
17706 one of VSTRUCT modes: EI, OI, CI or XI. */
17708 arm_attr_length_move_neon (rtx_insn *insn)
17710 rtx reg, mem, addr;
17711 int load;
17712 machine_mode mode;
17714 extract_insn_cached (insn);
17716 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
17718 mode = GET_MODE (recog_data.operand[0]);
17719 switch (mode)
17721 case EImode:
17722 case OImode:
17723 return 8;
17724 case CImode:
17725 return 12;
17726 case XImode:
17727 return 16;
17728 default:
17729 gcc_unreachable ();
17733 load = REG_P (recog_data.operand[0]);
17734 reg = recog_data.operand[!load];
17735 mem = recog_data.operand[load];
17737 gcc_assert (MEM_P (mem));
17739 mode = GET_MODE (reg);
17740 addr = XEXP (mem, 0);
17742 /* Strip off const from addresses like (const (plus (...))). */
17743 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
17744 addr = XEXP (addr, 0);
17746 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
17748 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
17749 return insns * 4;
17751 else
17752 return 4;
17755 /* Return nonzero if the offset in the address is an immediate. Otherwise,
17756 return zero. */
17759 arm_address_offset_is_imm (rtx_insn *insn)
17761 rtx mem, addr;
17763 extract_insn_cached (insn);
17765 if (REG_P (recog_data.operand[0]))
17766 return 0;
17768 mem = recog_data.operand[0];
17770 gcc_assert (MEM_P (mem));
17772 addr = XEXP (mem, 0);
17774 if (REG_P (addr)
17775 || (GET_CODE (addr) == PLUS
17776 && REG_P (XEXP (addr, 0))
17777 && CONST_INT_P (XEXP (addr, 1))))
17778 return 1;
17779 else
17780 return 0;
17783 /* Output an ADD r, s, #n where n may be too big for one instruction.
17784 If adding zero to one register, output nothing. */
17785 const char *
17786 output_add_immediate (rtx *operands)
17788 HOST_WIDE_INT n = INTVAL (operands[2]);
17790 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
17792 if (n < 0)
17793 output_multi_immediate (operands,
17794 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
17795 -n);
17796 else
17797 output_multi_immediate (operands,
17798 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
17802 return "";
17805 /* Output a multiple immediate operation.
17806 OPERANDS is the vector of operands referred to in the output patterns.
17807 INSTR1 is the output pattern to use for the first constant.
17808 INSTR2 is the output pattern to use for subsequent constants.
17809 IMMED_OP is the index of the constant slot in OPERANDS.
17810 N is the constant value. */
17811 static const char *
17812 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
17813 int immed_op, HOST_WIDE_INT n)
17815 #if HOST_BITS_PER_WIDE_INT > 32
17816 n &= 0xffffffff;
17817 #endif
17819 if (n == 0)
17821 /* Quick and easy output. */
17822 operands[immed_op] = const0_rtx;
17823 output_asm_insn (instr1, operands);
17825 else
17827 int i;
17828 const char * instr = instr1;
17830 /* Note that n is never zero here (which would give no output). */
17831 for (i = 0; i < 32; i += 2)
17833 if (n & (3 << i))
17835 operands[immed_op] = GEN_INT (n & (255 << i));
17836 output_asm_insn (instr, operands);
17837 instr = instr2;
17838 i += 6;
17843 return "";
17846 /* Return the name of a shifter operation. */
17847 static const char *
17848 arm_shift_nmem(enum rtx_code code)
17850 switch (code)
17852 case ASHIFT:
17853 return ARM_LSL_NAME;
17855 case ASHIFTRT:
17856 return "asr";
17858 case LSHIFTRT:
17859 return "lsr";
17861 case ROTATERT:
17862 return "ror";
17864 default:
17865 abort();
17869 /* Return the appropriate ARM instruction for the operation code.
17870 The returned result should not be overwritten. OP is the rtx of the
17871 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
17872 was shifted. */
17873 const char *
17874 arithmetic_instr (rtx op, int shift_first_arg)
17876 switch (GET_CODE (op))
17878 case PLUS:
17879 return "add";
17881 case MINUS:
17882 return shift_first_arg ? "rsb" : "sub";
17884 case IOR:
17885 return "orr";
17887 case XOR:
17888 return "eor";
17890 case AND:
17891 return "and";
17893 case ASHIFT:
17894 case ASHIFTRT:
17895 case LSHIFTRT:
17896 case ROTATERT:
17897 return arm_shift_nmem(GET_CODE(op));
17899 default:
17900 gcc_unreachable ();
17904 /* Ensure valid constant shifts and return the appropriate shift mnemonic
17905 for the operation code. The returned result should not be overwritten.
17906 OP is the rtx code of the shift.
17907 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
17908 shift. */
17909 static const char *
17910 shift_op (rtx op, HOST_WIDE_INT *amountp)
17912 const char * mnem;
17913 enum rtx_code code = GET_CODE (op);
17915 switch (code)
17917 case ROTATE:
17918 if (!CONST_INT_P (XEXP (op, 1)))
17920 output_operand_lossage ("invalid shift operand");
17921 return NULL;
17924 code = ROTATERT;
17925 *amountp = 32 - INTVAL (XEXP (op, 1));
17926 mnem = "ror";
17927 break;
17929 case ASHIFT:
17930 case ASHIFTRT:
17931 case LSHIFTRT:
17932 case ROTATERT:
17933 mnem = arm_shift_nmem(code);
17934 if (CONST_INT_P (XEXP (op, 1)))
17936 *amountp = INTVAL (XEXP (op, 1));
17938 else if (REG_P (XEXP (op, 1)))
17940 *amountp = -1;
17941 return mnem;
17943 else
17945 output_operand_lossage ("invalid shift operand");
17946 return NULL;
17948 break;
17950 case MULT:
17951 /* We never have to worry about the amount being other than a
17952 power of 2, since this case can never be reloaded from a reg. */
17953 if (!CONST_INT_P (XEXP (op, 1)))
17955 output_operand_lossage ("invalid shift operand");
17956 return NULL;
17959 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
17961 /* Amount must be a power of two. */
17962 if (*amountp & (*amountp - 1))
17964 output_operand_lossage ("invalid shift operand");
17965 return NULL;
17968 *amountp = exact_log2 (*amountp);
17969 gcc_assert (IN_RANGE (*amountp, 0, 31));
17970 return ARM_LSL_NAME;
17972 default:
17973 output_operand_lossage ("invalid shift operand");
17974 return NULL;
17977 /* This is not 100% correct, but follows from the desire to merge
17978 multiplication by a power of 2 with the recognizer for a
17979 shift. >=32 is not a valid shift for "lsl", so we must try and
17980 output a shift that produces the correct arithmetical result.
17981 Using lsr #32 is identical except for the fact that the carry bit
17982 is not set correctly if we set the flags; but we never use the
17983 carry bit from such an operation, so we can ignore that. */
17984 if (code == ROTATERT)
17985 /* Rotate is just modulo 32. */
17986 *amountp &= 31;
17987 else if (*amountp != (*amountp & 31))
17989 if (code == ASHIFT)
17990 mnem = "lsr";
17991 *amountp = 32;
17994 /* Shifts of 0 are no-ops. */
17995 if (*amountp == 0)
17996 return NULL;
17998 return mnem;
18001 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18002 because /bin/as is horribly restrictive. The judgement about
18003 whether or not each character is 'printable' (and can be output as
18004 is) or not (and must be printed with an octal escape) must be made
18005 with reference to the *host* character set -- the situation is
18006 similar to that discussed in the comments above pp_c_char in
18007 c-pretty-print.c. */
18009 #define MAX_ASCII_LEN 51
18011 void
18012 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18014 int i;
18015 int len_so_far = 0;
18017 fputs ("\t.ascii\t\"", stream);
18019 for (i = 0; i < len; i++)
18021 int c = p[i];
18023 if (len_so_far >= MAX_ASCII_LEN)
18025 fputs ("\"\n\t.ascii\t\"", stream);
18026 len_so_far = 0;
18029 if (ISPRINT (c))
18031 if (c == '\\' || c == '\"')
18033 putc ('\\', stream);
18034 len_so_far++;
18036 putc (c, stream);
18037 len_so_far++;
18039 else
18041 fprintf (stream, "\\%03o", c);
18042 len_so_far += 4;
18046 fputs ("\"\n", stream);
18049 /* Whether a register is callee saved or not. This is necessary because high
18050 registers are marked as caller saved when optimizing for size on Thumb-1
18051 targets despite being callee saved in order to avoid using them. */
18052 #define callee_saved_reg_p(reg) \
18053 (!call_used_regs[reg] \
18054 || (TARGET_THUMB1 && optimize_size \
18055 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18057 /* Compute the register save mask for registers 0 through 12
18058 inclusive. This code is used by arm_compute_save_reg_mask. */
18060 static unsigned long
18061 arm_compute_save_reg0_reg12_mask (void)
18063 unsigned long func_type = arm_current_func_type ();
18064 unsigned long save_reg_mask = 0;
18065 unsigned int reg;
18067 if (IS_INTERRUPT (func_type))
18069 unsigned int max_reg;
18070 /* Interrupt functions must not corrupt any registers,
18071 even call clobbered ones. If this is a leaf function
18072 we can just examine the registers used by the RTL, but
18073 otherwise we have to assume that whatever function is
18074 called might clobber anything, and so we have to save
18075 all the call-clobbered registers as well. */
18076 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18077 /* FIQ handlers have registers r8 - r12 banked, so
18078 we only need to check r0 - r7, Normal ISRs only
18079 bank r14 and r15, so we must check up to r12.
18080 r13 is the stack pointer which is always preserved,
18081 so we do not need to consider it here. */
18082 max_reg = 7;
18083 else
18084 max_reg = 12;
18086 for (reg = 0; reg <= max_reg; reg++)
18087 if (df_regs_ever_live_p (reg)
18088 || (! crtl->is_leaf && call_used_regs[reg]))
18089 save_reg_mask |= (1 << reg);
18091 /* Also save the pic base register if necessary. */
18092 if (flag_pic
18093 && !TARGET_SINGLE_PIC_BASE
18094 && arm_pic_register != INVALID_REGNUM
18095 && crtl->uses_pic_offset_table)
18096 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18098 else if (IS_VOLATILE(func_type))
18100 /* For noreturn functions we historically omitted register saves
18101 altogether. However this really messes up debugging. As a
18102 compromise save just the frame pointers. Combined with the link
18103 register saved elsewhere this should be sufficient to get
18104 a backtrace. */
18105 if (frame_pointer_needed)
18106 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18107 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18108 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18109 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18110 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18112 else
18114 /* In the normal case we only need to save those registers
18115 which are call saved and which are used by this function. */
18116 for (reg = 0; reg <= 11; reg++)
18117 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
18118 save_reg_mask |= (1 << reg);
18120 /* Handle the frame pointer as a special case. */
18121 if (frame_pointer_needed)
18122 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18124 /* If we aren't loading the PIC register,
18125 don't stack it even though it may be live. */
18126 if (flag_pic
18127 && !TARGET_SINGLE_PIC_BASE
18128 && arm_pic_register != INVALID_REGNUM
18129 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18130 || crtl->uses_pic_offset_table))
18131 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18133 /* The prologue will copy SP into R0, so save it. */
18134 if (IS_STACKALIGN (func_type))
18135 save_reg_mask |= 1;
18138 /* Save registers so the exception handler can modify them. */
18139 if (crtl->calls_eh_return)
18141 unsigned int i;
18143 for (i = 0; ; i++)
18145 reg = EH_RETURN_DATA_REGNO (i);
18146 if (reg == INVALID_REGNUM)
18147 break;
18148 save_reg_mask |= 1 << reg;
18152 return save_reg_mask;
18155 /* Return true if r3 is live at the start of the function. */
18157 static bool
18158 arm_r3_live_at_start_p (void)
18160 /* Just look at cfg info, which is still close enough to correct at this
18161 point. This gives false positives for broken functions that might use
18162 uninitialized data that happens to be allocated in r3, but who cares? */
18163 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
18166 /* Compute the number of bytes used to store the static chain register on the
18167 stack, above the stack frame. We need to know this accurately to get the
18168 alignment of the rest of the stack frame correct. */
18170 static int
18171 arm_compute_static_chain_stack_bytes (void)
18173 /* See the defining assertion in arm_expand_prologue. */
18174 if (IS_NESTED (arm_current_func_type ())
18175 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18176 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
18177 && !df_regs_ever_live_p (LR_REGNUM)))
18178 && arm_r3_live_at_start_p ()
18179 && crtl->args.pretend_args_size == 0)
18180 return 4;
18182 return 0;
18185 /* Compute a bit mask of which registers need to be
18186 saved on the stack for the current function.
18187 This is used by arm_get_frame_offsets, which may add extra registers. */
18189 static unsigned long
18190 arm_compute_save_reg_mask (void)
18192 unsigned int save_reg_mask = 0;
18193 unsigned long func_type = arm_current_func_type ();
18194 unsigned int reg;
18196 if (IS_NAKED (func_type))
18197 /* This should never really happen. */
18198 return 0;
18200 /* If we are creating a stack frame, then we must save the frame pointer,
18201 IP (which will hold the old stack pointer), LR and the PC. */
18202 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18203 save_reg_mask |=
18204 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
18205 | (1 << IP_REGNUM)
18206 | (1 << LR_REGNUM)
18207 | (1 << PC_REGNUM);
18209 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
18211 /* Decide if we need to save the link register.
18212 Interrupt routines have their own banked link register,
18213 so they never need to save it.
18214 Otherwise if we do not use the link register we do not need to save
18215 it. If we are pushing other registers onto the stack however, we
18216 can save an instruction in the epilogue by pushing the link register
18217 now and then popping it back into the PC. This incurs extra memory
18218 accesses though, so we only do it when optimizing for size, and only
18219 if we know that we will not need a fancy return sequence. */
18220 if (df_regs_ever_live_p (LR_REGNUM)
18221 || (save_reg_mask
18222 && optimize_size
18223 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
18224 && !crtl->tail_call_emit
18225 && !crtl->calls_eh_return))
18226 save_reg_mask |= 1 << LR_REGNUM;
18228 if (cfun->machine->lr_save_eliminated)
18229 save_reg_mask &= ~ (1 << LR_REGNUM);
18231 if (TARGET_REALLY_IWMMXT
18232 && ((bit_count (save_reg_mask)
18233 + ARM_NUM_INTS (crtl->args.pretend_args_size +
18234 arm_compute_static_chain_stack_bytes())
18235 ) % 2) != 0)
18237 /* The total number of registers that are going to be pushed
18238 onto the stack is odd. We need to ensure that the stack
18239 is 64-bit aligned before we start to save iWMMXt registers,
18240 and also before we start to create locals. (A local variable
18241 might be a double or long long which we will load/store using
18242 an iWMMXt instruction). Therefore we need to push another
18243 ARM register, so that the stack will be 64-bit aligned. We
18244 try to avoid using the arg registers (r0 -r3) as they might be
18245 used to pass values in a tail call. */
18246 for (reg = 4; reg <= 12; reg++)
18247 if ((save_reg_mask & (1 << reg)) == 0)
18248 break;
18250 if (reg <= 12)
18251 save_reg_mask |= (1 << reg);
18252 else
18254 cfun->machine->sibcall_blocked = 1;
18255 save_reg_mask |= (1 << 3);
18259 /* We may need to push an additional register for use initializing the
18260 PIC base register. */
18261 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
18262 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
18264 reg = thumb_find_work_register (1 << 4);
18265 if (!call_used_regs[reg])
18266 save_reg_mask |= (1 << reg);
18269 return save_reg_mask;
18272 /* Compute a bit mask of which registers need to be
18273 saved on the stack for the current function. */
18274 static unsigned long
18275 thumb1_compute_save_reg_mask (void)
18277 unsigned long mask;
18278 unsigned reg;
18280 mask = 0;
18281 for (reg = 0; reg < 12; reg ++)
18282 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
18283 mask |= 1 << reg;
18285 if (flag_pic
18286 && !TARGET_SINGLE_PIC_BASE
18287 && arm_pic_register != INVALID_REGNUM
18288 && crtl->uses_pic_offset_table)
18289 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18291 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
18292 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
18293 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18295 /* LR will also be pushed if any lo regs are pushed. */
18296 if (mask & 0xff || thumb_force_lr_save ())
18297 mask |= (1 << LR_REGNUM);
18299 /* Make sure we have a low work register if we need one.
18300 We will need one if we are going to push a high register,
18301 but we are not currently intending to push a low register. */
18302 if ((mask & 0xff) == 0
18303 && ((mask & 0x0f00) || TARGET_BACKTRACE))
18305 /* Use thumb_find_work_register to choose which register
18306 we will use. If the register is live then we will
18307 have to push it. Use LAST_LO_REGNUM as our fallback
18308 choice for the register to select. */
18309 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
18310 /* Make sure the register returned by thumb_find_work_register is
18311 not part of the return value. */
18312 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
18313 reg = LAST_LO_REGNUM;
18315 if (callee_saved_reg_p (reg))
18316 mask |= 1 << reg;
18319 /* The 504 below is 8 bytes less than 512 because there are two possible
18320 alignment words. We can't tell here if they will be present or not so we
18321 have to play it safe and assume that they are. */
18322 if ((CALLER_INTERWORKING_SLOT_SIZE +
18323 ROUND_UP_WORD (get_frame_size ()) +
18324 crtl->outgoing_args_size) >= 504)
18326 /* This is the same as the code in thumb1_expand_prologue() which
18327 determines which register to use for stack decrement. */
18328 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
18329 if (mask & (1 << reg))
18330 break;
18332 if (reg > LAST_LO_REGNUM)
18334 /* Make sure we have a register available for stack decrement. */
18335 mask |= 1 << LAST_LO_REGNUM;
18339 return mask;
18343 /* Return the number of bytes required to save VFP registers. */
18344 static int
18345 arm_get_vfp_saved_size (void)
18347 unsigned int regno;
18348 int count;
18349 int saved;
18351 saved = 0;
18352 /* Space for saved VFP registers. */
18353 if (TARGET_HARD_FLOAT)
18355 count = 0;
18356 for (regno = FIRST_VFP_REGNUM;
18357 regno < LAST_VFP_REGNUM;
18358 regno += 2)
18360 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
18361 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
18363 if (count > 0)
18365 /* Workaround ARM10 VFPr1 bug. */
18366 if (count == 2 && !arm_arch6)
18367 count++;
18368 saved += count * 8;
18370 count = 0;
18372 else
18373 count++;
18375 if (count > 0)
18377 if (count == 2 && !arm_arch6)
18378 count++;
18379 saved += count * 8;
18382 return saved;
18386 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
18387 everything bar the final return instruction. If simple_return is true,
18388 then do not output epilogue, because it has already been emitted in RTL. */
18389 const char *
18390 output_return_instruction (rtx operand, bool really_return, bool reverse,
18391 bool simple_return)
18393 char conditional[10];
18394 char instr[100];
18395 unsigned reg;
18396 unsigned long live_regs_mask;
18397 unsigned long func_type;
18398 arm_stack_offsets *offsets;
18400 func_type = arm_current_func_type ();
18402 if (IS_NAKED (func_type))
18403 return "";
18405 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
18407 /* If this function was declared non-returning, and we have
18408 found a tail call, then we have to trust that the called
18409 function won't return. */
18410 if (really_return)
18412 rtx ops[2];
18414 /* Otherwise, trap an attempted return by aborting. */
18415 ops[0] = operand;
18416 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
18417 : "abort");
18418 assemble_external_libcall (ops[1]);
18419 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
18422 return "";
18425 gcc_assert (!cfun->calls_alloca || really_return);
18427 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
18429 cfun->machine->return_used_this_function = 1;
18431 offsets = arm_get_frame_offsets ();
18432 live_regs_mask = offsets->saved_regs_mask;
18434 if (!simple_return && live_regs_mask)
18436 const char * return_reg;
18438 /* If we do not have any special requirements for function exit
18439 (e.g. interworking) then we can load the return address
18440 directly into the PC. Otherwise we must load it into LR. */
18441 if (really_return
18442 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
18443 return_reg = reg_names[PC_REGNUM];
18444 else
18445 return_reg = reg_names[LR_REGNUM];
18447 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
18449 /* There are three possible reasons for the IP register
18450 being saved. 1) a stack frame was created, in which case
18451 IP contains the old stack pointer, or 2) an ISR routine
18452 corrupted it, or 3) it was saved to align the stack on
18453 iWMMXt. In case 1, restore IP into SP, otherwise just
18454 restore IP. */
18455 if (frame_pointer_needed)
18457 live_regs_mask &= ~ (1 << IP_REGNUM);
18458 live_regs_mask |= (1 << SP_REGNUM);
18460 else
18461 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
18464 /* On some ARM architectures it is faster to use LDR rather than
18465 LDM to load a single register. On other architectures, the
18466 cost is the same. In 26 bit mode, or for exception handlers,
18467 we have to use LDM to load the PC so that the CPSR is also
18468 restored. */
18469 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
18470 if (live_regs_mask == (1U << reg))
18471 break;
18473 if (reg <= LAST_ARM_REGNUM
18474 && (reg != LR_REGNUM
18475 || ! really_return
18476 || ! IS_INTERRUPT (func_type)))
18478 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
18479 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
18481 else
18483 char *p;
18484 int first = 1;
18486 /* Generate the load multiple instruction to restore the
18487 registers. Note we can get here, even if
18488 frame_pointer_needed is true, but only if sp already
18489 points to the base of the saved core registers. */
18490 if (live_regs_mask & (1 << SP_REGNUM))
18492 unsigned HOST_WIDE_INT stack_adjust;
18494 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
18495 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
18497 if (stack_adjust && arm_arch5 && TARGET_ARM)
18498 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
18499 else
18501 /* If we can't use ldmib (SA110 bug),
18502 then try to pop r3 instead. */
18503 if (stack_adjust)
18504 live_regs_mask |= 1 << 3;
18506 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
18509 /* For interrupt returns we have to use an LDM rather than
18510 a POP so that we can use the exception return variant. */
18511 else if (IS_INTERRUPT (func_type))
18512 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
18513 else
18514 sprintf (instr, "pop%s\t{", conditional);
18516 p = instr + strlen (instr);
18518 for (reg = 0; reg <= SP_REGNUM; reg++)
18519 if (live_regs_mask & (1 << reg))
18521 int l = strlen (reg_names[reg]);
18523 if (first)
18524 first = 0;
18525 else
18527 memcpy (p, ", ", 2);
18528 p += 2;
18531 memcpy (p, "%|", 2);
18532 memcpy (p + 2, reg_names[reg], l);
18533 p += l + 2;
18536 if (live_regs_mask & (1 << LR_REGNUM))
18538 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
18539 /* If returning from an interrupt, restore the CPSR. */
18540 if (IS_INTERRUPT (func_type))
18541 strcat (p, "^");
18543 else
18544 strcpy (p, "}");
18547 output_asm_insn (instr, & operand);
18549 /* See if we need to generate an extra instruction to
18550 perform the actual function return. */
18551 if (really_return
18552 && func_type != ARM_FT_INTERWORKED
18553 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
18555 /* The return has already been handled
18556 by loading the LR into the PC. */
18557 return "";
18561 if (really_return)
18563 switch ((int) ARM_FUNC_TYPE (func_type))
18565 case ARM_FT_ISR:
18566 case ARM_FT_FIQ:
18567 /* ??? This is wrong for unified assembly syntax. */
18568 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
18569 break;
18571 case ARM_FT_INTERWORKED:
18572 gcc_assert (arm_arch5 || arm_arch4t);
18573 sprintf (instr, "bx%s\t%%|lr", conditional);
18574 break;
18576 case ARM_FT_EXCEPTION:
18577 /* ??? This is wrong for unified assembly syntax. */
18578 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
18579 break;
18581 default:
18582 /* Use bx if it's available. */
18583 if (arm_arch5 || arm_arch4t)
18584 sprintf (instr, "bx%s\t%%|lr", conditional);
18585 else
18586 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
18587 break;
18590 output_asm_insn (instr, & operand);
18593 return "";
18596 /* Write the function name into the code section, directly preceding
18597 the function prologue.
18599 Code will be output similar to this:
18601 .ascii "arm_poke_function_name", 0
18602 .align
18604 .word 0xff000000 + (t1 - t0)
18605 arm_poke_function_name
18606 mov ip, sp
18607 stmfd sp!, {fp, ip, lr, pc}
18608 sub fp, ip, #4
18610 When performing a stack backtrace, code can inspect the value
18611 of 'pc' stored at 'fp' + 0. If the trace function then looks
18612 at location pc - 12 and the top 8 bits are set, then we know
18613 that there is a function name embedded immediately preceding this
18614 location and has length ((pc[-3]) & 0xff000000).
18616 We assume that pc is declared as a pointer to an unsigned long.
18618 It is of no benefit to output the function name if we are assembling
18619 a leaf function. These function types will not contain a stack
18620 backtrace structure, therefore it is not possible to determine the
18621 function name. */
18622 void
18623 arm_poke_function_name (FILE *stream, const char *name)
18625 unsigned long alignlength;
18626 unsigned long length;
18627 rtx x;
18629 length = strlen (name) + 1;
18630 alignlength = ROUND_UP_WORD (length);
18632 ASM_OUTPUT_ASCII (stream, name, length);
18633 ASM_OUTPUT_ALIGN (stream, 2);
18634 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
18635 assemble_aligned_integer (UNITS_PER_WORD, x);
18638 /* Place some comments into the assembler stream
18639 describing the current function. */
18640 static void
18641 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
18643 unsigned long func_type;
18645 /* ??? Do we want to print some of the below anyway? */
18646 if (TARGET_THUMB1)
18647 return;
18649 /* Sanity check. */
18650 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
18652 func_type = arm_current_func_type ();
18654 switch ((int) ARM_FUNC_TYPE (func_type))
18656 default:
18657 case ARM_FT_NORMAL:
18658 break;
18659 case ARM_FT_INTERWORKED:
18660 asm_fprintf (f, "\t%@ Function supports interworking.\n");
18661 break;
18662 case ARM_FT_ISR:
18663 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
18664 break;
18665 case ARM_FT_FIQ:
18666 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
18667 break;
18668 case ARM_FT_EXCEPTION:
18669 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
18670 break;
18673 if (IS_NAKED (func_type))
18674 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
18676 if (IS_VOLATILE (func_type))
18677 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
18679 if (IS_NESTED (func_type))
18680 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
18681 if (IS_STACKALIGN (func_type))
18682 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
18684 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
18685 crtl->args.size,
18686 crtl->args.pretend_args_size, frame_size);
18688 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
18689 frame_pointer_needed,
18690 cfun->machine->uses_anonymous_args);
18692 if (cfun->machine->lr_save_eliminated)
18693 asm_fprintf (f, "\t%@ link register save eliminated.\n");
18695 if (crtl->calls_eh_return)
18696 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
18700 static void
18701 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
18702 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
18704 arm_stack_offsets *offsets;
18706 if (TARGET_THUMB1)
18708 int regno;
18710 /* Emit any call-via-reg trampolines that are needed for v4t support
18711 of call_reg and call_value_reg type insns. */
18712 for (regno = 0; regno < LR_REGNUM; regno++)
18714 rtx label = cfun->machine->call_via[regno];
18716 if (label != NULL)
18718 switch_to_section (function_section (current_function_decl));
18719 targetm.asm_out.internal_label (asm_out_file, "L",
18720 CODE_LABEL_NUMBER (label));
18721 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
18725 /* ??? Probably not safe to set this here, since it assumes that a
18726 function will be emitted as assembly immediately after we generate
18727 RTL for it. This does not happen for inline functions. */
18728 cfun->machine->return_used_this_function = 0;
18730 else /* TARGET_32BIT */
18732 /* We need to take into account any stack-frame rounding. */
18733 offsets = arm_get_frame_offsets ();
18735 gcc_assert (!use_return_insn (FALSE, NULL)
18736 || (cfun->machine->return_used_this_function != 0)
18737 || offsets->saved_regs == offsets->outgoing_args
18738 || frame_pointer_needed);
18742 /* Generate and emit a sequence of insns equivalent to PUSH, but using
18743 STR and STRD. If an even number of registers are being pushed, one
18744 or more STRD patterns are created for each register pair. If an
18745 odd number of registers are pushed, emit an initial STR followed by
18746 as many STRD instructions as are needed. This works best when the
18747 stack is initially 64-bit aligned (the normal case), since it
18748 ensures that each STRD is also 64-bit aligned. */
18749 static void
18750 thumb2_emit_strd_push (unsigned long saved_regs_mask)
18752 int num_regs = 0;
18753 int i;
18754 int regno;
18755 rtx par = NULL_RTX;
18756 rtx dwarf = NULL_RTX;
18757 rtx tmp;
18758 bool first = true;
18760 num_regs = bit_count (saved_regs_mask);
18762 /* Must be at least one register to save, and can't save SP or PC. */
18763 gcc_assert (num_regs > 0 && num_regs <= 14);
18764 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
18765 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
18767 /* Create sequence for DWARF info. All the frame-related data for
18768 debugging is held in this wrapper. */
18769 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
18771 /* Describe the stack adjustment. */
18772 tmp = gen_rtx_SET (stack_pointer_rtx,
18773 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
18774 RTX_FRAME_RELATED_P (tmp) = 1;
18775 XVECEXP (dwarf, 0, 0) = tmp;
18777 /* Find the first register. */
18778 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
18781 i = 0;
18783 /* If there's an odd number of registers to push. Start off by
18784 pushing a single register. This ensures that subsequent strd
18785 operations are dword aligned (assuming that SP was originally
18786 64-bit aligned). */
18787 if ((num_regs & 1) != 0)
18789 rtx reg, mem, insn;
18791 reg = gen_rtx_REG (SImode, regno);
18792 if (num_regs == 1)
18793 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
18794 stack_pointer_rtx));
18795 else
18796 mem = gen_frame_mem (Pmode,
18797 gen_rtx_PRE_MODIFY
18798 (Pmode, stack_pointer_rtx,
18799 plus_constant (Pmode, stack_pointer_rtx,
18800 -4 * num_regs)));
18802 tmp = gen_rtx_SET (mem, reg);
18803 RTX_FRAME_RELATED_P (tmp) = 1;
18804 insn = emit_insn (tmp);
18805 RTX_FRAME_RELATED_P (insn) = 1;
18806 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
18807 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
18808 RTX_FRAME_RELATED_P (tmp) = 1;
18809 i++;
18810 regno++;
18811 XVECEXP (dwarf, 0, i) = tmp;
18812 first = false;
18815 while (i < num_regs)
18816 if (saved_regs_mask & (1 << regno))
18818 rtx reg1, reg2, mem1, mem2;
18819 rtx tmp0, tmp1, tmp2;
18820 int regno2;
18822 /* Find the register to pair with this one. */
18823 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
18824 regno2++)
18827 reg1 = gen_rtx_REG (SImode, regno);
18828 reg2 = gen_rtx_REG (SImode, regno2);
18830 if (first)
18832 rtx insn;
18834 first = false;
18835 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
18836 stack_pointer_rtx,
18837 -4 * num_regs));
18838 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
18839 stack_pointer_rtx,
18840 -4 * (num_regs - 1)));
18841 tmp0 = gen_rtx_SET (stack_pointer_rtx,
18842 plus_constant (Pmode, stack_pointer_rtx,
18843 -4 * (num_regs)));
18844 tmp1 = gen_rtx_SET (mem1, reg1);
18845 tmp2 = gen_rtx_SET (mem2, reg2);
18846 RTX_FRAME_RELATED_P (tmp0) = 1;
18847 RTX_FRAME_RELATED_P (tmp1) = 1;
18848 RTX_FRAME_RELATED_P (tmp2) = 1;
18849 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
18850 XVECEXP (par, 0, 0) = tmp0;
18851 XVECEXP (par, 0, 1) = tmp1;
18852 XVECEXP (par, 0, 2) = tmp2;
18853 insn = emit_insn (par);
18854 RTX_FRAME_RELATED_P (insn) = 1;
18855 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
18857 else
18859 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
18860 stack_pointer_rtx,
18861 4 * i));
18862 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
18863 stack_pointer_rtx,
18864 4 * (i + 1)));
18865 tmp1 = gen_rtx_SET (mem1, reg1);
18866 tmp2 = gen_rtx_SET (mem2, reg2);
18867 RTX_FRAME_RELATED_P (tmp1) = 1;
18868 RTX_FRAME_RELATED_P (tmp2) = 1;
18869 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
18870 XVECEXP (par, 0, 0) = tmp1;
18871 XVECEXP (par, 0, 1) = tmp2;
18872 emit_insn (par);
18875 /* Create unwind information. This is an approximation. */
18876 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
18877 plus_constant (Pmode,
18878 stack_pointer_rtx,
18879 4 * i)),
18880 reg1);
18881 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
18882 plus_constant (Pmode,
18883 stack_pointer_rtx,
18884 4 * (i + 1))),
18885 reg2);
18887 RTX_FRAME_RELATED_P (tmp1) = 1;
18888 RTX_FRAME_RELATED_P (tmp2) = 1;
18889 XVECEXP (dwarf, 0, i + 1) = tmp1;
18890 XVECEXP (dwarf, 0, i + 2) = tmp2;
18891 i += 2;
18892 regno = regno2 + 1;
18894 else
18895 regno++;
18897 return;
18900 /* STRD in ARM mode requires consecutive registers. This function emits STRD
18901 whenever possible, otherwise it emits single-word stores. The first store
18902 also allocates stack space for all saved registers, using writeback with
18903 post-addressing mode. All other stores use offset addressing. If no STRD
18904 can be emitted, this function emits a sequence of single-word stores,
18905 and not an STM as before, because single-word stores provide more freedom
18906 scheduling and can be turned into an STM by peephole optimizations. */
18907 static void
18908 arm_emit_strd_push (unsigned long saved_regs_mask)
18910 int num_regs = 0;
18911 int i, j, dwarf_index = 0;
18912 int offset = 0;
18913 rtx dwarf = NULL_RTX;
18914 rtx insn = NULL_RTX;
18915 rtx tmp, mem;
18917 /* TODO: A more efficient code can be emitted by changing the
18918 layout, e.g., first push all pairs that can use STRD to keep the
18919 stack aligned, and then push all other registers. */
18920 for (i = 0; i <= LAST_ARM_REGNUM; i++)
18921 if (saved_regs_mask & (1 << i))
18922 num_regs++;
18924 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
18925 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
18926 gcc_assert (num_regs > 0);
18928 /* Create sequence for DWARF info. */
18929 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
18931 /* For dwarf info, we generate explicit stack update. */
18932 tmp = gen_rtx_SET (stack_pointer_rtx,
18933 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
18934 RTX_FRAME_RELATED_P (tmp) = 1;
18935 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
18937 /* Save registers. */
18938 offset = - 4 * num_regs;
18939 j = 0;
18940 while (j <= LAST_ARM_REGNUM)
18941 if (saved_regs_mask & (1 << j))
18943 if ((j % 2 == 0)
18944 && (saved_regs_mask & (1 << (j + 1))))
18946 /* Current register and previous register form register pair for
18947 which STRD can be generated. */
18948 if (offset < 0)
18950 /* Allocate stack space for all saved registers. */
18951 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
18952 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
18953 mem = gen_frame_mem (DImode, tmp);
18954 offset = 0;
18956 else if (offset > 0)
18957 mem = gen_frame_mem (DImode,
18958 plus_constant (Pmode,
18959 stack_pointer_rtx,
18960 offset));
18961 else
18962 mem = gen_frame_mem (DImode, stack_pointer_rtx);
18964 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
18965 RTX_FRAME_RELATED_P (tmp) = 1;
18966 tmp = emit_insn (tmp);
18968 /* Record the first store insn. */
18969 if (dwarf_index == 1)
18970 insn = tmp;
18972 /* Generate dwarf info. */
18973 mem = gen_frame_mem (SImode,
18974 plus_constant (Pmode,
18975 stack_pointer_rtx,
18976 offset));
18977 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
18978 RTX_FRAME_RELATED_P (tmp) = 1;
18979 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
18981 mem = gen_frame_mem (SImode,
18982 plus_constant (Pmode,
18983 stack_pointer_rtx,
18984 offset + 4));
18985 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
18986 RTX_FRAME_RELATED_P (tmp) = 1;
18987 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
18989 offset += 8;
18990 j += 2;
18992 else
18994 /* Emit a single word store. */
18995 if (offset < 0)
18997 /* Allocate stack space for all saved registers. */
18998 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
18999 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19000 mem = gen_frame_mem (SImode, tmp);
19001 offset = 0;
19003 else if (offset > 0)
19004 mem = gen_frame_mem (SImode,
19005 plus_constant (Pmode,
19006 stack_pointer_rtx,
19007 offset));
19008 else
19009 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19011 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19012 RTX_FRAME_RELATED_P (tmp) = 1;
19013 tmp = emit_insn (tmp);
19015 /* Record the first store insn. */
19016 if (dwarf_index == 1)
19017 insn = tmp;
19019 /* Generate dwarf info. */
19020 mem = gen_frame_mem (SImode,
19021 plus_constant(Pmode,
19022 stack_pointer_rtx,
19023 offset));
19024 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19025 RTX_FRAME_RELATED_P (tmp) = 1;
19026 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19028 offset += 4;
19029 j += 1;
19032 else
19033 j++;
19035 /* Attach dwarf info to the first insn we generate. */
19036 gcc_assert (insn != NULL_RTX);
19037 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19038 RTX_FRAME_RELATED_P (insn) = 1;
19041 /* Generate and emit an insn that we will recognize as a push_multi.
19042 Unfortunately, since this insn does not reflect very well the actual
19043 semantics of the operation, we need to annotate the insn for the benefit
19044 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19045 MASK for registers that should be annotated for DWARF2 frame unwind
19046 information. */
19047 static rtx
19048 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19050 int num_regs = 0;
19051 int num_dwarf_regs = 0;
19052 int i, j;
19053 rtx par;
19054 rtx dwarf;
19055 int dwarf_par_index;
19056 rtx tmp, reg;
19058 /* We don't record the PC in the dwarf frame information. */
19059 dwarf_regs_mask &= ~(1 << PC_REGNUM);
19061 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19063 if (mask & (1 << i))
19064 num_regs++;
19065 if (dwarf_regs_mask & (1 << i))
19066 num_dwarf_regs++;
19069 gcc_assert (num_regs && num_regs <= 16);
19070 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
19072 /* For the body of the insn we are going to generate an UNSPEC in
19073 parallel with several USEs. This allows the insn to be recognized
19074 by the push_multi pattern in the arm.md file.
19076 The body of the insn looks something like this:
19078 (parallel [
19079 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19080 (const_int:SI <num>)))
19081 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19082 (use (reg:SI XX))
19083 (use (reg:SI YY))
19087 For the frame note however, we try to be more explicit and actually
19088 show each register being stored into the stack frame, plus a (single)
19089 decrement of the stack pointer. We do it this way in order to be
19090 friendly to the stack unwinding code, which only wants to see a single
19091 stack decrement per instruction. The RTL we generate for the note looks
19092 something like this:
19094 (sequence [
19095 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19096 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19097 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19098 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19102 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19103 instead we'd have a parallel expression detailing all
19104 the stores to the various memory addresses so that debug
19105 information is more up-to-date. Remember however while writing
19106 this to take care of the constraints with the push instruction.
19108 Note also that this has to be taken care of for the VFP registers.
19110 For more see PR43399. */
19112 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19113 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19114 dwarf_par_index = 1;
19116 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19118 if (mask & (1 << i))
19120 reg = gen_rtx_REG (SImode, i);
19122 XVECEXP (par, 0, 0)
19123 = gen_rtx_SET (gen_frame_mem
19124 (BLKmode,
19125 gen_rtx_PRE_MODIFY (Pmode,
19126 stack_pointer_rtx,
19127 plus_constant
19128 (Pmode, stack_pointer_rtx,
19129 -4 * num_regs))
19131 gen_rtx_UNSPEC (BLKmode,
19132 gen_rtvec (1, reg),
19133 UNSPEC_PUSH_MULT));
19135 if (dwarf_regs_mask & (1 << i))
19137 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
19138 reg);
19139 RTX_FRAME_RELATED_P (tmp) = 1;
19140 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19143 break;
19147 for (j = 1, i++; j < num_regs; i++)
19149 if (mask & (1 << i))
19151 reg = gen_rtx_REG (SImode, i);
19153 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
19155 if (dwarf_regs_mask & (1 << i))
19158 = gen_rtx_SET (gen_frame_mem
19159 (SImode,
19160 plus_constant (Pmode, stack_pointer_rtx,
19161 4 * j)),
19162 reg);
19163 RTX_FRAME_RELATED_P (tmp) = 1;
19164 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19167 j++;
19171 par = emit_insn (par);
19173 tmp = gen_rtx_SET (stack_pointer_rtx,
19174 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19175 RTX_FRAME_RELATED_P (tmp) = 1;
19176 XVECEXP (dwarf, 0, 0) = tmp;
19178 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19180 return par;
19183 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19184 SIZE is the offset to be adjusted.
19185 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19186 static void
19187 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
19189 rtx dwarf;
19191 RTX_FRAME_RELATED_P (insn) = 1;
19192 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
19193 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
19196 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19197 SAVED_REGS_MASK shows which registers need to be restored.
19199 Unfortunately, since this insn does not reflect very well the actual
19200 semantics of the operation, we need to annotate the insn for the benefit
19201 of DWARF2 frame unwind information. */
19202 static void
19203 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
19205 int num_regs = 0;
19206 int i, j;
19207 rtx par;
19208 rtx dwarf = NULL_RTX;
19209 rtx tmp, reg;
19210 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
19211 int offset_adj;
19212 int emit_update;
19214 offset_adj = return_in_pc ? 1 : 0;
19215 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19216 if (saved_regs_mask & (1 << i))
19217 num_regs++;
19219 gcc_assert (num_regs && num_regs <= 16);
19221 /* If SP is in reglist, then we don't emit SP update insn. */
19222 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
19224 /* The parallel needs to hold num_regs SETs
19225 and one SET for the stack update. */
19226 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
19228 if (return_in_pc)
19229 XVECEXP (par, 0, 0) = ret_rtx;
19231 if (emit_update)
19233 /* Increment the stack pointer, based on there being
19234 num_regs 4-byte registers to restore. */
19235 tmp = gen_rtx_SET (stack_pointer_rtx,
19236 plus_constant (Pmode,
19237 stack_pointer_rtx,
19238 4 * num_regs));
19239 RTX_FRAME_RELATED_P (tmp) = 1;
19240 XVECEXP (par, 0, offset_adj) = tmp;
19243 /* Now restore every reg, which may include PC. */
19244 for (j = 0, i = 0; j < num_regs; i++)
19245 if (saved_regs_mask & (1 << i))
19247 reg = gen_rtx_REG (SImode, i);
19248 if ((num_regs == 1) && emit_update && !return_in_pc)
19250 /* Emit single load with writeback. */
19251 tmp = gen_frame_mem (SImode,
19252 gen_rtx_POST_INC (Pmode,
19253 stack_pointer_rtx));
19254 tmp = emit_insn (gen_rtx_SET (reg, tmp));
19255 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19256 return;
19259 tmp = gen_rtx_SET (reg,
19260 gen_frame_mem
19261 (SImode,
19262 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
19263 RTX_FRAME_RELATED_P (tmp) = 1;
19264 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
19266 /* We need to maintain a sequence for DWARF info too. As dwarf info
19267 should not have PC, skip PC. */
19268 if (i != PC_REGNUM)
19269 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19271 j++;
19274 if (return_in_pc)
19275 par = emit_jump_insn (par);
19276 else
19277 par = emit_insn (par);
19279 REG_NOTES (par) = dwarf;
19280 if (!return_in_pc)
19281 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
19282 stack_pointer_rtx, stack_pointer_rtx);
19285 /* Generate and emit an insn pattern that we will recognize as a pop_multi
19286 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
19288 Unfortunately, since this insn does not reflect very well the actual
19289 semantics of the operation, we need to annotate the insn for the benefit
19290 of DWARF2 frame unwind information. */
19291 static void
19292 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
19294 int i, j;
19295 rtx par;
19296 rtx dwarf = NULL_RTX;
19297 rtx tmp, reg;
19299 gcc_assert (num_regs && num_regs <= 32);
19301 /* Workaround ARM10 VFPr1 bug. */
19302 if (num_regs == 2 && !arm_arch6)
19304 if (first_reg == 15)
19305 first_reg--;
19307 num_regs++;
19310 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
19311 there could be up to 32 D-registers to restore.
19312 If there are more than 16 D-registers, make two recursive calls,
19313 each of which emits one pop_multi instruction. */
19314 if (num_regs > 16)
19316 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
19317 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
19318 return;
19321 /* The parallel needs to hold num_regs SETs
19322 and one SET for the stack update. */
19323 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
19325 /* Increment the stack pointer, based on there being
19326 num_regs 8-byte registers to restore. */
19327 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
19328 RTX_FRAME_RELATED_P (tmp) = 1;
19329 XVECEXP (par, 0, 0) = tmp;
19331 /* Now show every reg that will be restored, using a SET for each. */
19332 for (j = 0, i=first_reg; j < num_regs; i += 2)
19334 reg = gen_rtx_REG (DFmode, i);
19336 tmp = gen_rtx_SET (reg,
19337 gen_frame_mem
19338 (DFmode,
19339 plus_constant (Pmode, base_reg, 8 * j)));
19340 RTX_FRAME_RELATED_P (tmp) = 1;
19341 XVECEXP (par, 0, j + 1) = tmp;
19343 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19345 j++;
19348 par = emit_insn (par);
19349 REG_NOTES (par) = dwarf;
19351 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
19352 if (REGNO (base_reg) == IP_REGNUM)
19354 RTX_FRAME_RELATED_P (par) = 1;
19355 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
19357 else
19358 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
19359 base_reg, base_reg);
19362 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
19363 number of registers are being popped, multiple LDRD patterns are created for
19364 all register pairs. If odd number of registers are popped, last register is
19365 loaded by using LDR pattern. */
19366 static void
19367 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
19369 int num_regs = 0;
19370 int i, j;
19371 rtx par = NULL_RTX;
19372 rtx dwarf = NULL_RTX;
19373 rtx tmp, reg, tmp1;
19374 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
19376 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19377 if (saved_regs_mask & (1 << i))
19378 num_regs++;
19380 gcc_assert (num_regs && num_regs <= 16);
19382 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
19383 to be popped. So, if num_regs is even, now it will become odd,
19384 and we can generate pop with PC. If num_regs is odd, it will be
19385 even now, and ldr with return can be generated for PC. */
19386 if (return_in_pc)
19387 num_regs--;
19389 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19391 /* Var j iterates over all the registers to gather all the registers in
19392 saved_regs_mask. Var i gives index of saved registers in stack frame.
19393 A PARALLEL RTX of register-pair is created here, so that pattern for
19394 LDRD can be matched. As PC is always last register to be popped, and
19395 we have already decremented num_regs if PC, we don't have to worry
19396 about PC in this loop. */
19397 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
19398 if (saved_regs_mask & (1 << j))
19400 /* Create RTX for memory load. */
19401 reg = gen_rtx_REG (SImode, j);
19402 tmp = gen_rtx_SET (reg,
19403 gen_frame_mem (SImode,
19404 plus_constant (Pmode,
19405 stack_pointer_rtx, 4 * i)));
19406 RTX_FRAME_RELATED_P (tmp) = 1;
19408 if (i % 2 == 0)
19410 /* When saved-register index (i) is even, the RTX to be emitted is
19411 yet to be created. Hence create it first. The LDRD pattern we
19412 are generating is :
19413 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
19414 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
19415 where target registers need not be consecutive. */
19416 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19417 dwarf = NULL_RTX;
19420 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
19421 added as 0th element and if i is odd, reg_i is added as 1st element
19422 of LDRD pattern shown above. */
19423 XVECEXP (par, 0, (i % 2)) = tmp;
19424 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19426 if ((i % 2) == 1)
19428 /* When saved-register index (i) is odd, RTXs for both the registers
19429 to be loaded are generated in above given LDRD pattern, and the
19430 pattern can be emitted now. */
19431 par = emit_insn (par);
19432 REG_NOTES (par) = dwarf;
19433 RTX_FRAME_RELATED_P (par) = 1;
19436 i++;
19439 /* If the number of registers pushed is odd AND return_in_pc is false OR
19440 number of registers are even AND return_in_pc is true, last register is
19441 popped using LDR. It can be PC as well. Hence, adjust the stack first and
19442 then LDR with post increment. */
19444 /* Increment the stack pointer, based on there being
19445 num_regs 4-byte registers to restore. */
19446 tmp = gen_rtx_SET (stack_pointer_rtx,
19447 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
19448 RTX_FRAME_RELATED_P (tmp) = 1;
19449 tmp = emit_insn (tmp);
19450 if (!return_in_pc)
19452 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
19453 stack_pointer_rtx, stack_pointer_rtx);
19456 dwarf = NULL_RTX;
19458 if (((num_regs % 2) == 1 && !return_in_pc)
19459 || ((num_regs % 2) == 0 && return_in_pc))
19461 /* Scan for the single register to be popped. Skip until the saved
19462 register is found. */
19463 for (; (saved_regs_mask & (1 << j)) == 0; j++);
19465 /* Gen LDR with post increment here. */
19466 tmp1 = gen_rtx_MEM (SImode,
19467 gen_rtx_POST_INC (SImode,
19468 stack_pointer_rtx));
19469 set_mem_alias_set (tmp1, get_frame_alias_set ());
19471 reg = gen_rtx_REG (SImode, j);
19472 tmp = gen_rtx_SET (reg, tmp1);
19473 RTX_FRAME_RELATED_P (tmp) = 1;
19474 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19476 if (return_in_pc)
19478 /* If return_in_pc, j must be PC_REGNUM. */
19479 gcc_assert (j == PC_REGNUM);
19480 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19481 XVECEXP (par, 0, 0) = ret_rtx;
19482 XVECEXP (par, 0, 1) = tmp;
19483 par = emit_jump_insn (par);
19485 else
19487 par = emit_insn (tmp);
19488 REG_NOTES (par) = dwarf;
19489 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
19490 stack_pointer_rtx, stack_pointer_rtx);
19494 else if ((num_regs % 2) == 1 && return_in_pc)
19496 /* There are 2 registers to be popped. So, generate the pattern
19497 pop_multiple_with_stack_update_and_return to pop in PC. */
19498 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
19501 return;
19504 /* LDRD in ARM mode needs consecutive registers as operands. This function
19505 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
19506 offset addressing and then generates one separate stack udpate. This provides
19507 more scheduling freedom, compared to writeback on every load. However,
19508 if the function returns using load into PC directly
19509 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
19510 before the last load. TODO: Add a peephole optimization to recognize
19511 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
19512 peephole optimization to merge the load at stack-offset zero
19513 with the stack update instruction using load with writeback
19514 in post-index addressing mode. */
19515 static void
19516 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
19518 int j = 0;
19519 int offset = 0;
19520 rtx par = NULL_RTX;
19521 rtx dwarf = NULL_RTX;
19522 rtx tmp, mem;
19524 /* Restore saved registers. */
19525 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
19526 j = 0;
19527 while (j <= LAST_ARM_REGNUM)
19528 if (saved_regs_mask & (1 << j))
19530 if ((j % 2) == 0
19531 && (saved_regs_mask & (1 << (j + 1)))
19532 && (j + 1) != PC_REGNUM)
19534 /* Current register and next register form register pair for which
19535 LDRD can be generated. PC is always the last register popped, and
19536 we handle it separately. */
19537 if (offset > 0)
19538 mem = gen_frame_mem (DImode,
19539 plus_constant (Pmode,
19540 stack_pointer_rtx,
19541 offset));
19542 else
19543 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19545 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
19546 tmp = emit_insn (tmp);
19547 RTX_FRAME_RELATED_P (tmp) = 1;
19549 /* Generate dwarf info. */
19551 dwarf = alloc_reg_note (REG_CFA_RESTORE,
19552 gen_rtx_REG (SImode, j),
19553 NULL_RTX);
19554 dwarf = alloc_reg_note (REG_CFA_RESTORE,
19555 gen_rtx_REG (SImode, j + 1),
19556 dwarf);
19558 REG_NOTES (tmp) = dwarf;
19560 offset += 8;
19561 j += 2;
19563 else if (j != PC_REGNUM)
19565 /* Emit a single word load. */
19566 if (offset > 0)
19567 mem = gen_frame_mem (SImode,
19568 plus_constant (Pmode,
19569 stack_pointer_rtx,
19570 offset));
19571 else
19572 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19574 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
19575 tmp = emit_insn (tmp);
19576 RTX_FRAME_RELATED_P (tmp) = 1;
19578 /* Generate dwarf info. */
19579 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
19580 gen_rtx_REG (SImode, j),
19581 NULL_RTX);
19583 offset += 4;
19584 j += 1;
19586 else /* j == PC_REGNUM */
19587 j++;
19589 else
19590 j++;
19592 /* Update the stack. */
19593 if (offset > 0)
19595 tmp = gen_rtx_SET (stack_pointer_rtx,
19596 plus_constant (Pmode,
19597 stack_pointer_rtx,
19598 offset));
19599 tmp = emit_insn (tmp);
19600 arm_add_cfa_adjust_cfa_note (tmp, offset,
19601 stack_pointer_rtx, stack_pointer_rtx);
19602 offset = 0;
19605 if (saved_regs_mask & (1 << PC_REGNUM))
19607 /* Only PC is to be popped. */
19608 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19609 XVECEXP (par, 0, 0) = ret_rtx;
19610 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
19611 gen_frame_mem (SImode,
19612 gen_rtx_POST_INC (SImode,
19613 stack_pointer_rtx)));
19614 RTX_FRAME_RELATED_P (tmp) = 1;
19615 XVECEXP (par, 0, 1) = tmp;
19616 par = emit_jump_insn (par);
19618 /* Generate dwarf info. */
19619 dwarf = alloc_reg_note (REG_CFA_RESTORE,
19620 gen_rtx_REG (SImode, PC_REGNUM),
19621 NULL_RTX);
19622 REG_NOTES (par) = dwarf;
19623 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
19624 stack_pointer_rtx, stack_pointer_rtx);
19628 /* Calculate the size of the return value that is passed in registers. */
19629 static unsigned
19630 arm_size_return_regs (void)
19632 machine_mode mode;
19634 if (crtl->return_rtx != 0)
19635 mode = GET_MODE (crtl->return_rtx);
19636 else
19637 mode = DECL_MODE (DECL_RESULT (current_function_decl));
19639 return GET_MODE_SIZE (mode);
19642 /* Return true if the current function needs to save/restore LR. */
19643 static bool
19644 thumb_force_lr_save (void)
19646 return !cfun->machine->lr_save_eliminated
19647 && (!leaf_function_p ()
19648 || thumb_far_jump_used_p ()
19649 || df_regs_ever_live_p (LR_REGNUM));
19652 /* We do not know if r3 will be available because
19653 we do have an indirect tailcall happening in this
19654 particular case. */
19655 static bool
19656 is_indirect_tailcall_p (rtx call)
19658 rtx pat = PATTERN (call);
19660 /* Indirect tail call. */
19661 pat = XVECEXP (pat, 0, 0);
19662 if (GET_CODE (pat) == SET)
19663 pat = SET_SRC (pat);
19665 pat = XEXP (XEXP (pat, 0), 0);
19666 return REG_P (pat);
19669 /* Return true if r3 is used by any of the tail call insns in the
19670 current function. */
19671 static bool
19672 any_sibcall_could_use_r3 (void)
19674 edge_iterator ei;
19675 edge e;
19677 if (!crtl->tail_call_emit)
19678 return false;
19679 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
19680 if (e->flags & EDGE_SIBCALL)
19682 rtx_insn *call = BB_END (e->src);
19683 if (!CALL_P (call))
19684 call = prev_nonnote_nondebug_insn (call);
19685 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
19686 if (find_regno_fusage (call, USE, 3)
19687 || is_indirect_tailcall_p (call))
19688 return true;
19690 return false;
19694 /* Compute the distance from register FROM to register TO.
19695 These can be the arg pointer (26), the soft frame pointer (25),
19696 the stack pointer (13) or the hard frame pointer (11).
19697 In thumb mode r7 is used as the soft frame pointer, if needed.
19698 Typical stack layout looks like this:
19700 old stack pointer -> | |
19701 ----
19702 | | \
19703 | | saved arguments for
19704 | | vararg functions
19705 | | /
19707 hard FP & arg pointer -> | | \
19708 | | stack
19709 | | frame
19710 | | /
19712 | | \
19713 | | call saved
19714 | | registers
19715 soft frame pointer -> | | /
19717 | | \
19718 | | local
19719 | | variables
19720 locals base pointer -> | | /
19722 | | \
19723 | | outgoing
19724 | | arguments
19725 current stack pointer -> | | /
19728 For a given function some or all of these stack components
19729 may not be needed, giving rise to the possibility of
19730 eliminating some of the registers.
19732 The values returned by this function must reflect the behavior
19733 of arm_expand_prologue() and arm_compute_save_reg_mask().
19735 The sign of the number returned reflects the direction of stack
19736 growth, so the values are positive for all eliminations except
19737 from the soft frame pointer to the hard frame pointer.
19739 SFP may point just inside the local variables block to ensure correct
19740 alignment. */
19743 /* Calculate stack offsets. These are used to calculate register elimination
19744 offsets and in prologue/epilogue code. Also calculates which registers
19745 should be saved. */
19747 static arm_stack_offsets *
19748 arm_get_frame_offsets (void)
19750 struct arm_stack_offsets *offsets;
19751 unsigned long func_type;
19752 int leaf;
19753 int saved;
19754 int core_saved;
19755 HOST_WIDE_INT frame_size;
19756 int i;
19758 offsets = &cfun->machine->stack_offsets;
19760 /* We need to know if we are a leaf function. Unfortunately, it
19761 is possible to be called after start_sequence has been called,
19762 which causes get_insns to return the insns for the sequence,
19763 not the function, which will cause leaf_function_p to return
19764 the incorrect result.
19766 to know about leaf functions once reload has completed, and the
19767 frame size cannot be changed after that time, so we can safely
19768 use the cached value. */
19770 if (reload_completed)
19771 return offsets;
19773 /* Initially this is the size of the local variables. It will translated
19774 into an offset once we have determined the size of preceding data. */
19775 frame_size = ROUND_UP_WORD (get_frame_size ());
19777 leaf = leaf_function_p ();
19779 /* Space for variadic functions. */
19780 offsets->saved_args = crtl->args.pretend_args_size;
19782 /* In Thumb mode this is incorrect, but never used. */
19783 offsets->frame
19784 = (offsets->saved_args
19785 + arm_compute_static_chain_stack_bytes ()
19786 + (frame_pointer_needed ? 4 : 0));
19788 if (TARGET_32BIT)
19790 unsigned int regno;
19792 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
19793 core_saved = bit_count (offsets->saved_regs_mask) * 4;
19794 saved = core_saved;
19796 /* We know that SP will be doubleword aligned on entry, and we must
19797 preserve that condition at any subroutine call. We also require the
19798 soft frame pointer to be doubleword aligned. */
19800 if (TARGET_REALLY_IWMMXT)
19802 /* Check for the call-saved iWMMXt registers. */
19803 for (regno = FIRST_IWMMXT_REGNUM;
19804 regno <= LAST_IWMMXT_REGNUM;
19805 regno++)
19806 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
19807 saved += 8;
19810 func_type = arm_current_func_type ();
19811 /* Space for saved VFP registers. */
19812 if (! IS_VOLATILE (func_type)
19813 && TARGET_HARD_FLOAT)
19814 saved += arm_get_vfp_saved_size ();
19816 else /* TARGET_THUMB1 */
19818 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
19819 core_saved = bit_count (offsets->saved_regs_mask) * 4;
19820 saved = core_saved;
19821 if (TARGET_BACKTRACE)
19822 saved += 16;
19825 /* Saved registers include the stack frame. */
19826 offsets->saved_regs
19827 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
19828 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
19830 /* A leaf function does not need any stack alignment if it has nothing
19831 on the stack. */
19832 if (leaf && frame_size == 0
19833 /* However if it calls alloca(), we have a dynamically allocated
19834 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
19835 && ! cfun->calls_alloca)
19837 offsets->outgoing_args = offsets->soft_frame;
19838 offsets->locals_base = offsets->soft_frame;
19839 return offsets;
19842 /* Ensure SFP has the correct alignment. */
19843 if (ARM_DOUBLEWORD_ALIGN
19844 && (offsets->soft_frame & 7))
19846 offsets->soft_frame += 4;
19847 /* Try to align stack by pushing an extra reg. Don't bother doing this
19848 when there is a stack frame as the alignment will be rolled into
19849 the normal stack adjustment. */
19850 if (frame_size + crtl->outgoing_args_size == 0)
19852 int reg = -1;
19854 /* Register r3 is caller-saved. Normally it does not need to be
19855 saved on entry by the prologue. However if we choose to save
19856 it for padding then we may confuse the compiler into thinking
19857 a prologue sequence is required when in fact it is not. This
19858 will occur when shrink-wrapping if r3 is used as a scratch
19859 register and there are no other callee-saved writes.
19861 This situation can be avoided when other callee-saved registers
19862 are available and r3 is not mandatory if we choose a callee-saved
19863 register for padding. */
19864 bool prefer_callee_reg_p = false;
19866 /* If it is safe to use r3, then do so. This sometimes
19867 generates better code on Thumb-2 by avoiding the need to
19868 use 32-bit push/pop instructions. */
19869 if (! any_sibcall_could_use_r3 ()
19870 && arm_size_return_regs () <= 12
19871 && (offsets->saved_regs_mask & (1 << 3)) == 0
19872 && (TARGET_THUMB2
19873 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
19875 reg = 3;
19876 if (!TARGET_THUMB2)
19877 prefer_callee_reg_p = true;
19879 if (reg == -1
19880 || prefer_callee_reg_p)
19882 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
19884 /* Avoid fixed registers; they may be changed at
19885 arbitrary times so it's unsafe to restore them
19886 during the epilogue. */
19887 if (!fixed_regs[i]
19888 && (offsets->saved_regs_mask & (1 << i)) == 0)
19890 reg = i;
19891 break;
19896 if (reg != -1)
19898 offsets->saved_regs += 4;
19899 offsets->saved_regs_mask |= (1 << reg);
19904 offsets->locals_base = offsets->soft_frame + frame_size;
19905 offsets->outgoing_args = (offsets->locals_base
19906 + crtl->outgoing_args_size);
19908 if (ARM_DOUBLEWORD_ALIGN)
19910 /* Ensure SP remains doubleword aligned. */
19911 if (offsets->outgoing_args & 7)
19912 offsets->outgoing_args += 4;
19913 gcc_assert (!(offsets->outgoing_args & 7));
19916 return offsets;
19920 /* Calculate the relative offsets for the different stack pointers. Positive
19921 offsets are in the direction of stack growth. */
19923 HOST_WIDE_INT
19924 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
19926 arm_stack_offsets *offsets;
19928 offsets = arm_get_frame_offsets ();
19930 /* OK, now we have enough information to compute the distances.
19931 There must be an entry in these switch tables for each pair
19932 of registers in ELIMINABLE_REGS, even if some of the entries
19933 seem to be redundant or useless. */
19934 switch (from)
19936 case ARG_POINTER_REGNUM:
19937 switch (to)
19939 case THUMB_HARD_FRAME_POINTER_REGNUM:
19940 return 0;
19942 case FRAME_POINTER_REGNUM:
19943 /* This is the reverse of the soft frame pointer
19944 to hard frame pointer elimination below. */
19945 return offsets->soft_frame - offsets->saved_args;
19947 case ARM_HARD_FRAME_POINTER_REGNUM:
19948 /* This is only non-zero in the case where the static chain register
19949 is stored above the frame. */
19950 return offsets->frame - offsets->saved_args - 4;
19952 case STACK_POINTER_REGNUM:
19953 /* If nothing has been pushed on the stack at all
19954 then this will return -4. This *is* correct! */
19955 return offsets->outgoing_args - (offsets->saved_args + 4);
19957 default:
19958 gcc_unreachable ();
19960 gcc_unreachable ();
19962 case FRAME_POINTER_REGNUM:
19963 switch (to)
19965 case THUMB_HARD_FRAME_POINTER_REGNUM:
19966 return 0;
19968 case ARM_HARD_FRAME_POINTER_REGNUM:
19969 /* The hard frame pointer points to the top entry in the
19970 stack frame. The soft frame pointer to the bottom entry
19971 in the stack frame. If there is no stack frame at all,
19972 then they are identical. */
19974 return offsets->frame - offsets->soft_frame;
19976 case STACK_POINTER_REGNUM:
19977 return offsets->outgoing_args - offsets->soft_frame;
19979 default:
19980 gcc_unreachable ();
19982 gcc_unreachable ();
19984 default:
19985 /* You cannot eliminate from the stack pointer.
19986 In theory you could eliminate from the hard frame
19987 pointer to the stack pointer, but this will never
19988 happen, since if a stack frame is not needed the
19989 hard frame pointer will never be used. */
19990 gcc_unreachable ();
19994 /* Given FROM and TO register numbers, say whether this elimination is
19995 allowed. Frame pointer elimination is automatically handled.
19997 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
19998 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
19999 pointer, we must eliminate FRAME_POINTER_REGNUM into
20000 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20001 ARG_POINTER_REGNUM. */
20003 bool
20004 arm_can_eliminate (const int from, const int to)
20006 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20007 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20008 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20009 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20010 true);
20013 /* Emit RTL to save coprocessor registers on function entry. Returns the
20014 number of bytes pushed. */
20016 static int
20017 arm_save_coproc_regs(void)
20019 int saved_size = 0;
20020 unsigned reg;
20021 unsigned start_reg;
20022 rtx insn;
20024 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20025 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20027 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20028 insn = gen_rtx_MEM (V2SImode, insn);
20029 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20030 RTX_FRAME_RELATED_P (insn) = 1;
20031 saved_size += 8;
20034 if (TARGET_HARD_FLOAT)
20036 start_reg = FIRST_VFP_REGNUM;
20038 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20040 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20041 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20043 if (start_reg != reg)
20044 saved_size += vfp_emit_fstmd (start_reg,
20045 (reg - start_reg) / 2);
20046 start_reg = reg + 2;
20049 if (start_reg != reg)
20050 saved_size += vfp_emit_fstmd (start_reg,
20051 (reg - start_reg) / 2);
20053 return saved_size;
20057 /* Set the Thumb frame pointer from the stack pointer. */
20059 static void
20060 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20062 HOST_WIDE_INT amount;
20063 rtx insn, dwarf;
20065 amount = offsets->outgoing_args - offsets->locals_base;
20066 if (amount < 1024)
20067 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20068 stack_pointer_rtx, GEN_INT (amount)));
20069 else
20071 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20072 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20073 expects the first two operands to be the same. */
20074 if (TARGET_THUMB2)
20076 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20077 stack_pointer_rtx,
20078 hard_frame_pointer_rtx));
20080 else
20082 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20083 hard_frame_pointer_rtx,
20084 stack_pointer_rtx));
20086 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
20087 plus_constant (Pmode, stack_pointer_rtx, amount));
20088 RTX_FRAME_RELATED_P (dwarf) = 1;
20089 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20092 RTX_FRAME_RELATED_P (insn) = 1;
20095 struct scratch_reg {
20096 rtx reg;
20097 bool saved;
20100 /* Return a short-lived scratch register for use as a 2nd scratch register on
20101 function entry after the registers are saved in the prologue. This register
20102 must be released by means of release_scratch_register_on_entry. IP is not
20103 considered since it is always used as the 1st scratch register if available.
20105 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
20106 mask of live registers. */
20108 static void
20109 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
20110 unsigned long live_regs)
20112 int regno = -1;
20114 sr->saved = false;
20116 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
20117 regno = LR_REGNUM;
20118 else
20120 unsigned int i;
20122 for (i = 4; i < 11; i++)
20123 if (regno1 != i && (live_regs & (1 << i)) != 0)
20125 regno = i;
20126 break;
20129 if (regno < 0)
20131 /* If IP is used as the 1st scratch register for a nested function,
20132 then either r3 wasn't available or is used to preserve IP. */
20133 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
20134 regno1 = 3;
20135 regno = (regno1 == 3 ? 2 : 3);
20136 sr->saved
20137 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
20138 regno);
20142 sr->reg = gen_rtx_REG (SImode, regno);
20143 if (sr->saved)
20145 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20146 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
20147 rtx x = gen_rtx_SET (stack_pointer_rtx,
20148 plus_constant (Pmode, stack_pointer_rtx, -4));
20149 RTX_FRAME_RELATED_P (insn) = 1;
20150 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
20154 /* Release a scratch register obtained from the preceding function. */
20156 static void
20157 release_scratch_register_on_entry (struct scratch_reg *sr)
20159 if (sr->saved)
20161 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
20162 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
20163 rtx x = gen_rtx_SET (stack_pointer_rtx,
20164 plus_constant (Pmode, stack_pointer_rtx, 4));
20165 RTX_FRAME_RELATED_P (insn) = 1;
20166 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
20170 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
20172 #if PROBE_INTERVAL > 4096
20173 #error Cannot use indexed addressing mode for stack probing
20174 #endif
20176 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
20177 inclusive. These are offsets from the current stack pointer. REGNO1
20178 is the index number of the 1st scratch register and LIVE_REGS is the
20179 mask of live registers. */
20181 static void
20182 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
20183 unsigned int regno1, unsigned long live_regs)
20185 rtx reg1 = gen_rtx_REG (Pmode, regno1);
20187 /* See if we have a constant small number of probes to generate. If so,
20188 that's the easy case. */
20189 if (size <= PROBE_INTERVAL)
20191 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
20192 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
20193 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
20196 /* The run-time loop is made up of 10 insns in the generic case while the
20197 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
20198 else if (size <= 5 * PROBE_INTERVAL)
20200 HOST_WIDE_INT i, rem;
20202 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
20203 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
20204 emit_stack_probe (reg1);
20206 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
20207 it exceeds SIZE. If only two probes are needed, this will not
20208 generate any code. Then probe at FIRST + SIZE. */
20209 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
20211 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
20212 emit_stack_probe (reg1);
20215 rem = size - (i - PROBE_INTERVAL);
20216 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
20218 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
20219 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
20221 else
20222 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
20225 /* Otherwise, do the same as above, but in a loop. Note that we must be
20226 extra careful with variables wrapping around because we might be at
20227 the very top (or the very bottom) of the address space and we have
20228 to be able to handle this case properly; in particular, we use an
20229 equality test for the loop condition. */
20230 else
20232 HOST_WIDE_INT rounded_size;
20233 struct scratch_reg sr;
20235 get_scratch_register_on_entry (&sr, regno1, live_regs);
20237 emit_move_insn (reg1, GEN_INT (first));
20240 /* Step 1: round SIZE to the previous multiple of the interval. */
20242 rounded_size = size & -PROBE_INTERVAL;
20243 emit_move_insn (sr.reg, GEN_INT (rounded_size));
20246 /* Step 2: compute initial and final value of the loop counter. */
20248 /* TEST_ADDR = SP + FIRST. */
20249 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
20251 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
20252 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
20255 /* Step 3: the loop
20259 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
20260 probe at TEST_ADDR
20262 while (TEST_ADDR != LAST_ADDR)
20264 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
20265 until it is equal to ROUNDED_SIZE. */
20267 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
20270 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
20271 that SIZE is equal to ROUNDED_SIZE. */
20273 if (size != rounded_size)
20275 HOST_WIDE_INT rem = size - rounded_size;
20277 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
20279 emit_set_insn (sr.reg,
20280 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
20281 emit_stack_probe (plus_constant (Pmode, sr.reg,
20282 PROBE_INTERVAL - rem));
20284 else
20285 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
20288 release_scratch_register_on_entry (&sr);
20291 /* Make sure nothing is scheduled before we are done. */
20292 emit_insn (gen_blockage ());
20295 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
20296 absolute addresses. */
20298 const char *
20299 output_probe_stack_range (rtx reg1, rtx reg2)
20301 static int labelno = 0;
20302 char loop_lab[32];
20303 rtx xops[2];
20305 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
20307 /* Loop. */
20308 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
20310 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
20311 xops[0] = reg1;
20312 xops[1] = GEN_INT (PROBE_INTERVAL);
20313 output_asm_insn ("sub\t%0, %0, %1", xops);
20315 /* Probe at TEST_ADDR. */
20316 output_asm_insn ("str\tr0, [%0, #0]", xops);
20318 /* Test if TEST_ADDR == LAST_ADDR. */
20319 xops[1] = reg2;
20320 output_asm_insn ("cmp\t%0, %1", xops);
20322 /* Branch. */
20323 fputs ("\tbne\t", asm_out_file);
20324 assemble_name_raw (asm_out_file, loop_lab);
20325 fputc ('\n', asm_out_file);
20327 return "";
20330 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20331 function. */
20332 void
20333 arm_expand_prologue (void)
20335 rtx amount;
20336 rtx insn;
20337 rtx ip_rtx;
20338 unsigned long live_regs_mask;
20339 unsigned long func_type;
20340 int fp_offset = 0;
20341 int saved_pretend_args = 0;
20342 int saved_regs = 0;
20343 unsigned HOST_WIDE_INT args_to_push;
20344 HOST_WIDE_INT size;
20345 arm_stack_offsets *offsets;
20346 bool clobber_ip;
20348 func_type = arm_current_func_type ();
20350 /* Naked functions don't have prologues. */
20351 if (IS_NAKED (func_type))
20353 if (flag_stack_usage_info)
20354 current_function_static_stack_size = 0;
20355 return;
20358 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20359 args_to_push = crtl->args.pretend_args_size;
20361 /* Compute which register we will have to save onto the stack. */
20362 offsets = arm_get_frame_offsets ();
20363 live_regs_mask = offsets->saved_regs_mask;
20365 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
20367 if (IS_STACKALIGN (func_type))
20369 rtx r0, r1;
20371 /* Handle a word-aligned stack pointer. We generate the following:
20373 mov r0, sp
20374 bic r1, r0, #7
20375 mov sp, r1
20376 <save and restore r0 in normal prologue/epilogue>
20377 mov sp, r0
20378 bx lr
20380 The unwinder doesn't need to know about the stack realignment.
20381 Just tell it we saved SP in r0. */
20382 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
20384 r0 = gen_rtx_REG (SImode, R0_REGNUM);
20385 r1 = gen_rtx_REG (SImode, R1_REGNUM);
20387 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
20388 RTX_FRAME_RELATED_P (insn) = 1;
20389 add_reg_note (insn, REG_CFA_REGISTER, NULL);
20391 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
20393 /* ??? The CFA changes here, which may cause GDB to conclude that it
20394 has entered a different function. That said, the unwind info is
20395 correct, individually, before and after this instruction because
20396 we've described the save of SP, which will override the default
20397 handling of SP as restoring from the CFA. */
20398 emit_insn (gen_movsi (stack_pointer_rtx, r1));
20401 /* The static chain register is the same as the IP register. If it is
20402 clobbered when creating the frame, we need to save and restore it. */
20403 clobber_ip = IS_NESTED (func_type)
20404 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20405 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
20406 && !df_regs_ever_live_p (LR_REGNUM)
20407 && arm_r3_live_at_start_p ()));
20409 /* Find somewhere to store IP whilst the frame is being created.
20410 We try the following places in order:
20412 1. The last argument register r3 if it is available.
20413 2. A slot on the stack above the frame if there are no
20414 arguments to push onto the stack.
20415 3. Register r3 again, after pushing the argument registers
20416 onto the stack, if this is a varargs function.
20417 4. The last slot on the stack created for the arguments to
20418 push, if this isn't a varargs function.
20420 Note - we only need to tell the dwarf2 backend about the SP
20421 adjustment in the second variant; the static chain register
20422 doesn't need to be unwound, as it doesn't contain a value
20423 inherited from the caller. */
20424 if (clobber_ip)
20426 if (!arm_r3_live_at_start_p ())
20427 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20428 else if (args_to_push == 0)
20430 rtx addr, dwarf;
20432 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
20433 saved_regs += 4;
20435 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20436 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
20437 fp_offset = 4;
20439 /* Just tell the dwarf backend that we adjusted SP. */
20440 dwarf = gen_rtx_SET (stack_pointer_rtx,
20441 plus_constant (Pmode, stack_pointer_rtx,
20442 -fp_offset));
20443 RTX_FRAME_RELATED_P (insn) = 1;
20444 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20446 else
20448 /* Store the args on the stack. */
20449 if (cfun->machine->uses_anonymous_args)
20451 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
20452 (0xf0 >> (args_to_push / 4)) & 0xf);
20453 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20454 saved_pretend_args = 1;
20456 else
20458 rtx addr, dwarf;
20460 if (args_to_push == 4)
20461 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20462 else
20463 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
20464 plus_constant (Pmode,
20465 stack_pointer_rtx,
20466 -args_to_push));
20468 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
20470 /* Just tell the dwarf backend that we adjusted SP. */
20471 dwarf = gen_rtx_SET (stack_pointer_rtx,
20472 plus_constant (Pmode, stack_pointer_rtx,
20473 -args_to_push));
20474 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20477 RTX_FRAME_RELATED_P (insn) = 1;
20478 fp_offset = args_to_push;
20479 args_to_push = 0;
20483 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20485 if (IS_INTERRUPT (func_type))
20487 /* Interrupt functions must not corrupt any registers.
20488 Creating a frame pointer however, corrupts the IP
20489 register, so we must push it first. */
20490 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
20492 /* Do not set RTX_FRAME_RELATED_P on this insn.
20493 The dwarf stack unwinding code only wants to see one
20494 stack decrement per function, and this is not it. If
20495 this instruction is labeled as being part of the frame
20496 creation sequence then dwarf2out_frame_debug_expr will
20497 die when it encounters the assignment of IP to FP
20498 later on, since the use of SP here establishes SP as
20499 the CFA register and not IP.
20501 Anyway this instruction is not really part of the stack
20502 frame creation although it is part of the prologue. */
20505 insn = emit_set_insn (ip_rtx,
20506 plus_constant (Pmode, stack_pointer_rtx,
20507 fp_offset));
20508 RTX_FRAME_RELATED_P (insn) = 1;
20511 if (args_to_push)
20513 /* Push the argument registers, or reserve space for them. */
20514 if (cfun->machine->uses_anonymous_args)
20515 insn = emit_multi_reg_push
20516 ((0xf0 >> (args_to_push / 4)) & 0xf,
20517 (0xf0 >> (args_to_push / 4)) & 0xf);
20518 else
20519 insn = emit_insn
20520 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20521 GEN_INT (- args_to_push)));
20522 RTX_FRAME_RELATED_P (insn) = 1;
20525 /* If this is an interrupt service routine, and the link register
20526 is going to be pushed, and we're not generating extra
20527 push of IP (needed when frame is needed and frame layout if apcs),
20528 subtracting four from LR now will mean that the function return
20529 can be done with a single instruction. */
20530 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
20531 && (live_regs_mask & (1 << LR_REGNUM)) != 0
20532 && !(frame_pointer_needed && TARGET_APCS_FRAME)
20533 && TARGET_ARM)
20535 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
20537 emit_set_insn (lr, plus_constant (SImode, lr, -4));
20540 if (live_regs_mask)
20542 unsigned long dwarf_regs_mask = live_regs_mask;
20544 saved_regs += bit_count (live_regs_mask) * 4;
20545 if (optimize_size && !frame_pointer_needed
20546 && saved_regs == offsets->saved_regs - offsets->saved_args)
20548 /* If no coprocessor registers are being pushed and we don't have
20549 to worry about a frame pointer then push extra registers to
20550 create the stack frame. This is done is a way that does not
20551 alter the frame layout, so is independent of the epilogue. */
20552 int n;
20553 int frame;
20554 n = 0;
20555 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
20556 n++;
20557 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
20558 if (frame && n * 4 >= frame)
20560 n = frame / 4;
20561 live_regs_mask |= (1 << n) - 1;
20562 saved_regs += frame;
20566 if (TARGET_LDRD
20567 && current_tune->prefer_ldrd_strd
20568 && !optimize_function_for_size_p (cfun))
20570 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
20571 if (TARGET_THUMB2)
20572 thumb2_emit_strd_push (live_regs_mask);
20573 else if (TARGET_ARM
20574 && !TARGET_APCS_FRAME
20575 && !IS_INTERRUPT (func_type))
20576 arm_emit_strd_push (live_regs_mask);
20577 else
20579 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
20580 RTX_FRAME_RELATED_P (insn) = 1;
20583 else
20585 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
20586 RTX_FRAME_RELATED_P (insn) = 1;
20590 if (! IS_VOLATILE (func_type))
20591 saved_regs += arm_save_coproc_regs ();
20593 if (frame_pointer_needed && TARGET_ARM)
20595 /* Create the new frame pointer. */
20596 if (TARGET_APCS_FRAME)
20598 insn = GEN_INT (-(4 + args_to_push + fp_offset));
20599 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
20600 RTX_FRAME_RELATED_P (insn) = 1;
20602 else
20604 insn = GEN_INT (saved_regs - (4 + fp_offset));
20605 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20606 stack_pointer_rtx, insn));
20607 RTX_FRAME_RELATED_P (insn) = 1;
20611 size = offsets->outgoing_args - offsets->saved_args;
20612 if (flag_stack_usage_info)
20613 current_function_static_stack_size = size;
20615 /* If this isn't an interrupt service routine and we have a frame, then do
20616 stack checking. We use IP as the first scratch register, except for the
20617 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
20618 if (!IS_INTERRUPT (func_type)
20619 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
20621 unsigned int regno;
20623 if (!IS_NESTED (func_type) || clobber_ip)
20624 regno = IP_REGNUM;
20625 else if (df_regs_ever_live_p (LR_REGNUM))
20626 regno = LR_REGNUM;
20627 else
20628 regno = 3;
20630 if (crtl->is_leaf && !cfun->calls_alloca)
20632 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
20633 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
20634 size - STACK_CHECK_PROTECT,
20635 regno, live_regs_mask);
20637 else if (size > 0)
20638 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
20639 regno, live_regs_mask);
20642 /* Recover the static chain register. */
20643 if (clobber_ip)
20645 if (!arm_r3_live_at_start_p () || saved_pretend_args)
20646 insn = gen_rtx_REG (SImode, 3);
20647 else
20649 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
20650 insn = gen_frame_mem (SImode, insn);
20652 emit_set_insn (ip_rtx, insn);
20653 emit_insn (gen_force_register_use (ip_rtx));
20656 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
20658 /* This add can produce multiple insns for a large constant, so we
20659 need to get tricky. */
20660 rtx_insn *last = get_last_insn ();
20662 amount = GEN_INT (offsets->saved_args + saved_regs
20663 - offsets->outgoing_args);
20665 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20666 amount));
20669 last = last ? NEXT_INSN (last) : get_insns ();
20670 RTX_FRAME_RELATED_P (last) = 1;
20672 while (last != insn);
20674 /* If the frame pointer is needed, emit a special barrier that
20675 will prevent the scheduler from moving stores to the frame
20676 before the stack adjustment. */
20677 if (frame_pointer_needed)
20678 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
20679 hard_frame_pointer_rtx));
20683 if (frame_pointer_needed && TARGET_THUMB2)
20684 thumb_set_frame_pointer (offsets);
20686 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20688 unsigned long mask;
20690 mask = live_regs_mask;
20691 mask &= THUMB2_WORK_REGS;
20692 if (!IS_NESTED (func_type))
20693 mask |= (1 << IP_REGNUM);
20694 arm_load_pic_register (mask);
20697 /* If we are profiling, make sure no instructions are scheduled before
20698 the call to mcount. Similarly if the user has requested no
20699 scheduling in the prolog. Similarly if we want non-call exceptions
20700 using the EABI unwinder, to prevent faulting instructions from being
20701 swapped with a stack adjustment. */
20702 if (crtl->profile || !TARGET_SCHED_PROLOG
20703 || (arm_except_unwind_info (&global_options) == UI_TARGET
20704 && cfun->can_throw_non_call_exceptions))
20705 emit_insn (gen_blockage ());
20707 /* If the link register is being kept alive, with the return address in it,
20708 then make sure that it does not get reused by the ce2 pass. */
20709 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
20710 cfun->machine->lr_save_eliminated = 1;
20713 /* Print condition code to STREAM. Helper function for arm_print_operand. */
20714 static void
20715 arm_print_condition (FILE *stream)
20717 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
20719 /* Branch conversion is not implemented for Thumb-2. */
20720 if (TARGET_THUMB)
20722 output_operand_lossage ("predicated Thumb instruction");
20723 return;
20725 if (current_insn_predicate != NULL)
20727 output_operand_lossage
20728 ("predicated instruction in conditional sequence");
20729 return;
20732 fputs (arm_condition_codes[arm_current_cc], stream);
20734 else if (current_insn_predicate)
20736 enum arm_cond_code code;
20738 if (TARGET_THUMB1)
20740 output_operand_lossage ("predicated Thumb instruction");
20741 return;
20744 code = get_arm_condition_code (current_insn_predicate);
20745 fputs (arm_condition_codes[code], stream);
20750 /* Globally reserved letters: acln
20751 Puncutation letters currently used: @_|?().!#
20752 Lower case letters currently used: bcdefhimpqtvwxyz
20753 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
20754 Letters previously used, but now deprecated/obsolete: sVWXYZ.
20756 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
20758 If CODE is 'd', then the X is a condition operand and the instruction
20759 should only be executed if the condition is true.
20760 if CODE is 'D', then the X is a condition operand and the instruction
20761 should only be executed if the condition is false: however, if the mode
20762 of the comparison is CCFPEmode, then always execute the instruction -- we
20763 do this because in these circumstances !GE does not necessarily imply LT;
20764 in these cases the instruction pattern will take care to make sure that
20765 an instruction containing %d will follow, thereby undoing the effects of
20766 doing this instruction unconditionally.
20767 If CODE is 'N' then X is a floating point operand that must be negated
20768 before output.
20769 If CODE is 'B' then output a bitwise inverted value of X (a const int).
20770 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
20771 static void
20772 arm_print_operand (FILE *stream, rtx x, int code)
20774 switch (code)
20776 case '@':
20777 fputs (ASM_COMMENT_START, stream);
20778 return;
20780 case '_':
20781 fputs (user_label_prefix, stream);
20782 return;
20784 case '|':
20785 fputs (REGISTER_PREFIX, stream);
20786 return;
20788 case '?':
20789 arm_print_condition (stream);
20790 return;
20792 case '.':
20793 /* The current condition code for a condition code setting instruction.
20794 Preceded by 's' in unified syntax, otherwise followed by 's'. */
20795 fputc('s', stream);
20796 arm_print_condition (stream);
20797 return;
20799 case '!':
20800 /* If the instruction is conditionally executed then print
20801 the current condition code, otherwise print 's'. */
20802 gcc_assert (TARGET_THUMB2);
20803 if (current_insn_predicate)
20804 arm_print_condition (stream);
20805 else
20806 fputc('s', stream);
20807 break;
20809 /* %# is a "break" sequence. It doesn't output anything, but is used to
20810 separate e.g. operand numbers from following text, if that text consists
20811 of further digits which we don't want to be part of the operand
20812 number. */
20813 case '#':
20814 return;
20816 case 'N':
20818 REAL_VALUE_TYPE r;
20819 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
20820 fprintf (stream, "%s", fp_const_from_val (&r));
20822 return;
20824 /* An integer or symbol address without a preceding # sign. */
20825 case 'c':
20826 switch (GET_CODE (x))
20828 case CONST_INT:
20829 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
20830 break;
20832 case SYMBOL_REF:
20833 output_addr_const (stream, x);
20834 break;
20836 case CONST:
20837 if (GET_CODE (XEXP (x, 0)) == PLUS
20838 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
20840 output_addr_const (stream, x);
20841 break;
20843 /* Fall through. */
20845 default:
20846 output_operand_lossage ("Unsupported operand for code '%c'", code);
20848 return;
20850 /* An integer that we want to print in HEX. */
20851 case 'x':
20852 switch (GET_CODE (x))
20854 case CONST_INT:
20855 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
20856 break;
20858 default:
20859 output_operand_lossage ("Unsupported operand for code '%c'", code);
20861 return;
20863 case 'B':
20864 if (CONST_INT_P (x))
20866 HOST_WIDE_INT val;
20867 val = ARM_SIGN_EXTEND (~INTVAL (x));
20868 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
20870 else
20872 putc ('~', stream);
20873 output_addr_const (stream, x);
20875 return;
20877 case 'b':
20878 /* Print the log2 of a CONST_INT. */
20880 HOST_WIDE_INT val;
20882 if (!CONST_INT_P (x)
20883 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
20884 output_operand_lossage ("Unsupported operand for code '%c'", code);
20885 else
20886 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
20888 return;
20890 case 'L':
20891 /* The low 16 bits of an immediate constant. */
20892 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
20893 return;
20895 case 'i':
20896 fprintf (stream, "%s", arithmetic_instr (x, 1));
20897 return;
20899 case 'I':
20900 fprintf (stream, "%s", arithmetic_instr (x, 0));
20901 return;
20903 case 'S':
20905 HOST_WIDE_INT val;
20906 const char *shift;
20908 shift = shift_op (x, &val);
20910 if (shift)
20912 fprintf (stream, ", %s ", shift);
20913 if (val == -1)
20914 arm_print_operand (stream, XEXP (x, 1), 0);
20915 else
20916 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
20919 return;
20921 /* An explanation of the 'Q', 'R' and 'H' register operands:
20923 In a pair of registers containing a DI or DF value the 'Q'
20924 operand returns the register number of the register containing
20925 the least significant part of the value. The 'R' operand returns
20926 the register number of the register containing the most
20927 significant part of the value.
20929 The 'H' operand returns the higher of the two register numbers.
20930 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
20931 same as the 'Q' operand, since the most significant part of the
20932 value is held in the lower number register. The reverse is true
20933 on systems where WORDS_BIG_ENDIAN is false.
20935 The purpose of these operands is to distinguish between cases
20936 where the endian-ness of the values is important (for example
20937 when they are added together), and cases where the endian-ness
20938 is irrelevant, but the order of register operations is important.
20939 For example when loading a value from memory into a register
20940 pair, the endian-ness does not matter. Provided that the value
20941 from the lower memory address is put into the lower numbered
20942 register, and the value from the higher address is put into the
20943 higher numbered register, the load will work regardless of whether
20944 the value being loaded is big-wordian or little-wordian. The
20945 order of the two register loads can matter however, if the address
20946 of the memory location is actually held in one of the registers
20947 being overwritten by the load.
20949 The 'Q' and 'R' constraints are also available for 64-bit
20950 constants. */
20951 case 'Q':
20952 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
20954 rtx part = gen_lowpart (SImode, x);
20955 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
20956 return;
20959 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
20961 output_operand_lossage ("invalid operand for code '%c'", code);
20962 return;
20965 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
20966 return;
20968 case 'R':
20969 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
20971 machine_mode mode = GET_MODE (x);
20972 rtx part;
20974 if (mode == VOIDmode)
20975 mode = DImode;
20976 part = gen_highpart_mode (SImode, mode, x);
20977 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
20978 return;
20981 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
20983 output_operand_lossage ("invalid operand for code '%c'", code);
20984 return;
20987 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
20988 return;
20990 case 'H':
20991 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
20993 output_operand_lossage ("invalid operand for code '%c'", code);
20994 return;
20997 asm_fprintf (stream, "%r", REGNO (x) + 1);
20998 return;
21000 case 'J':
21001 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21003 output_operand_lossage ("invalid operand for code '%c'", code);
21004 return;
21007 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21008 return;
21010 case 'K':
21011 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21013 output_operand_lossage ("invalid operand for code '%c'", code);
21014 return;
21017 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21018 return;
21020 case 'm':
21021 asm_fprintf (stream, "%r",
21022 REG_P (XEXP (x, 0))
21023 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21024 return;
21026 case 'M':
21027 asm_fprintf (stream, "{%r-%r}",
21028 REGNO (x),
21029 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21030 return;
21032 /* Like 'M', but writing doubleword vector registers, for use by Neon
21033 insns. */
21034 case 'h':
21036 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21037 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21038 if (numregs == 1)
21039 asm_fprintf (stream, "{d%d}", regno);
21040 else
21041 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21043 return;
21045 case 'd':
21046 /* CONST_TRUE_RTX means always -- that's the default. */
21047 if (x == const_true_rtx)
21048 return;
21050 if (!COMPARISON_P (x))
21052 output_operand_lossage ("invalid operand for code '%c'", code);
21053 return;
21056 fputs (arm_condition_codes[get_arm_condition_code (x)],
21057 stream);
21058 return;
21060 case 'D':
21061 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21062 want to do that. */
21063 if (x == const_true_rtx)
21065 output_operand_lossage ("instruction never executed");
21066 return;
21068 if (!COMPARISON_P (x))
21070 output_operand_lossage ("invalid operand for code '%c'", code);
21071 return;
21074 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21075 (get_arm_condition_code (x))],
21076 stream);
21077 return;
21079 case 's':
21080 case 'V':
21081 case 'W':
21082 case 'X':
21083 case 'Y':
21084 case 'Z':
21085 /* Former Maverick support, removed after GCC-4.7. */
21086 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21087 return;
21089 case 'U':
21090 if (!REG_P (x)
21091 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21092 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21093 /* Bad value for wCG register number. */
21095 output_operand_lossage ("invalid operand for code '%c'", code);
21096 return;
21099 else
21100 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21101 return;
21103 /* Print an iWMMXt control register name. */
21104 case 'w':
21105 if (!CONST_INT_P (x)
21106 || INTVAL (x) < 0
21107 || INTVAL (x) >= 16)
21108 /* Bad value for wC register number. */
21110 output_operand_lossage ("invalid operand for code '%c'", code);
21111 return;
21114 else
21116 static const char * wc_reg_names [16] =
21118 "wCID", "wCon", "wCSSF", "wCASF",
21119 "wC4", "wC5", "wC6", "wC7",
21120 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21121 "wC12", "wC13", "wC14", "wC15"
21124 fputs (wc_reg_names [INTVAL (x)], stream);
21126 return;
21128 /* Print the high single-precision register of a VFP double-precision
21129 register. */
21130 case 'p':
21132 machine_mode mode = GET_MODE (x);
21133 int regno;
21135 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21137 output_operand_lossage ("invalid operand for code '%c'", code);
21138 return;
21141 regno = REGNO (x);
21142 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21144 output_operand_lossage ("invalid operand for code '%c'", code);
21145 return;
21148 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21150 return;
21152 /* Print a VFP/Neon double precision or quad precision register name. */
21153 case 'P':
21154 case 'q':
21156 machine_mode mode = GET_MODE (x);
21157 int is_quad = (code == 'q');
21158 int regno;
21160 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21162 output_operand_lossage ("invalid operand for code '%c'", code);
21163 return;
21166 if (!REG_P (x)
21167 || !IS_VFP_REGNUM (REGNO (x)))
21169 output_operand_lossage ("invalid operand for code '%c'", code);
21170 return;
21173 regno = REGNO (x);
21174 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21175 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21177 output_operand_lossage ("invalid operand for code '%c'", code);
21178 return;
21181 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21182 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21184 return;
21186 /* These two codes print the low/high doubleword register of a Neon quad
21187 register, respectively. For pair-structure types, can also print
21188 low/high quadword registers. */
21189 case 'e':
21190 case 'f':
21192 machine_mode mode = GET_MODE (x);
21193 int regno;
21195 if ((GET_MODE_SIZE (mode) != 16
21196 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21198 output_operand_lossage ("invalid operand for code '%c'", code);
21199 return;
21202 regno = REGNO (x);
21203 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21205 output_operand_lossage ("invalid operand for code '%c'", code);
21206 return;
21209 if (GET_MODE_SIZE (mode) == 16)
21210 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21211 + (code == 'f' ? 1 : 0));
21212 else
21213 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21214 + (code == 'f' ? 1 : 0));
21216 return;
21218 /* Print a VFPv3 floating-point constant, represented as an integer
21219 index. */
21220 case 'G':
21222 int index = vfp3_const_double_index (x);
21223 gcc_assert (index != -1);
21224 fprintf (stream, "%d", index);
21226 return;
21228 /* Print bits representing opcode features for Neon.
21230 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21231 and polynomials as unsigned.
21233 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21235 Bit 2 is 1 for rounding functions, 0 otherwise. */
21237 /* Identify the type as 's', 'u', 'p' or 'f'. */
21238 case 'T':
21240 HOST_WIDE_INT bits = INTVAL (x);
21241 fputc ("uspf"[bits & 3], stream);
21243 return;
21245 /* Likewise, but signed and unsigned integers are both 'i'. */
21246 case 'F':
21248 HOST_WIDE_INT bits = INTVAL (x);
21249 fputc ("iipf"[bits & 3], stream);
21251 return;
21253 /* As for 'T', but emit 'u' instead of 'p'. */
21254 case 't':
21256 HOST_WIDE_INT bits = INTVAL (x);
21257 fputc ("usuf"[bits & 3], stream);
21259 return;
21261 /* Bit 2: rounding (vs none). */
21262 case 'O':
21264 HOST_WIDE_INT bits = INTVAL (x);
21265 fputs ((bits & 4) != 0 ? "r" : "", stream);
21267 return;
21269 /* Memory operand for vld1/vst1 instruction. */
21270 case 'A':
21272 rtx addr;
21273 bool postinc = FALSE;
21274 rtx postinc_reg = NULL;
21275 unsigned align, memsize, align_bits;
21277 gcc_assert (MEM_P (x));
21278 addr = XEXP (x, 0);
21279 if (GET_CODE (addr) == POST_INC)
21281 postinc = 1;
21282 addr = XEXP (addr, 0);
21284 if (GET_CODE (addr) == POST_MODIFY)
21286 postinc_reg = XEXP( XEXP (addr, 1), 1);
21287 addr = XEXP (addr, 0);
21289 asm_fprintf (stream, "[%r", REGNO (addr));
21291 /* We know the alignment of this access, so we can emit a hint in the
21292 instruction (for some alignments) as an aid to the memory subsystem
21293 of the target. */
21294 align = MEM_ALIGN (x) >> 3;
21295 memsize = MEM_SIZE (x);
21297 /* Only certain alignment specifiers are supported by the hardware. */
21298 if (memsize == 32 && (align % 32) == 0)
21299 align_bits = 256;
21300 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21301 align_bits = 128;
21302 else if (memsize >= 8 && (align % 8) == 0)
21303 align_bits = 64;
21304 else
21305 align_bits = 0;
21307 if (align_bits != 0)
21308 asm_fprintf (stream, ":%d", align_bits);
21310 asm_fprintf (stream, "]");
21312 if (postinc)
21313 fputs("!", stream);
21314 if (postinc_reg)
21315 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
21317 return;
21319 case 'C':
21321 rtx addr;
21323 gcc_assert (MEM_P (x));
21324 addr = XEXP (x, 0);
21325 gcc_assert (REG_P (addr));
21326 asm_fprintf (stream, "[%r]", REGNO (addr));
21328 return;
21330 /* Translate an S register number into a D register number and element index. */
21331 case 'y':
21333 machine_mode mode = GET_MODE (x);
21334 int regno;
21336 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21338 output_operand_lossage ("invalid operand for code '%c'", code);
21339 return;
21342 regno = REGNO (x);
21343 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21345 output_operand_lossage ("invalid operand for code '%c'", code);
21346 return;
21349 regno = regno - FIRST_VFP_REGNUM;
21350 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21352 return;
21354 case 'v':
21355 gcc_assert (CONST_DOUBLE_P (x));
21356 int result;
21357 result = vfp3_const_double_for_fract_bits (x);
21358 if (result == 0)
21359 result = vfp3_const_double_for_bits (x);
21360 fprintf (stream, "#%d", result);
21361 return;
21363 /* Register specifier for vld1.16/vst1.16. Translate the S register
21364 number into a D register number and element index. */
21365 case 'z':
21367 machine_mode mode = GET_MODE (x);
21368 int regno;
21370 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
21372 output_operand_lossage ("invalid operand for code '%c'", code);
21373 return;
21376 regno = REGNO (x);
21377 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21379 output_operand_lossage ("invalid operand for code '%c'", code);
21380 return;
21383 regno = regno - FIRST_VFP_REGNUM;
21384 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
21386 return;
21388 default:
21389 if (x == 0)
21391 output_operand_lossage ("missing operand");
21392 return;
21395 switch (GET_CODE (x))
21397 case REG:
21398 asm_fprintf (stream, "%r", REGNO (x));
21399 break;
21401 case MEM:
21402 output_address (GET_MODE (x), XEXP (x, 0));
21403 break;
21405 case CONST_DOUBLE:
21407 char fpstr[20];
21408 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
21409 sizeof (fpstr), 0, 1);
21410 fprintf (stream, "#%s", fpstr);
21412 break;
21414 default:
21415 gcc_assert (GET_CODE (x) != NEG);
21416 fputc ('#', stream);
21417 if (GET_CODE (x) == HIGH)
21419 fputs (":lower16:", stream);
21420 x = XEXP (x, 0);
21423 output_addr_const (stream, x);
21424 break;
21429 /* Target hook for printing a memory address. */
21430 static void
21431 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
21433 if (TARGET_32BIT)
21435 int is_minus = GET_CODE (x) == MINUS;
21437 if (REG_P (x))
21438 asm_fprintf (stream, "[%r]", REGNO (x));
21439 else if (GET_CODE (x) == PLUS || is_minus)
21441 rtx base = XEXP (x, 0);
21442 rtx index = XEXP (x, 1);
21443 HOST_WIDE_INT offset = 0;
21444 if (!REG_P (base)
21445 || (REG_P (index) && REGNO (index) == SP_REGNUM))
21447 /* Ensure that BASE is a register. */
21448 /* (one of them must be). */
21449 /* Also ensure the SP is not used as in index register. */
21450 std::swap (base, index);
21452 switch (GET_CODE (index))
21454 case CONST_INT:
21455 offset = INTVAL (index);
21456 if (is_minus)
21457 offset = -offset;
21458 asm_fprintf (stream, "[%r, #%wd]",
21459 REGNO (base), offset);
21460 break;
21462 case REG:
21463 asm_fprintf (stream, "[%r, %s%r]",
21464 REGNO (base), is_minus ? "-" : "",
21465 REGNO (index));
21466 break;
21468 case MULT:
21469 case ASHIFTRT:
21470 case LSHIFTRT:
21471 case ASHIFT:
21472 case ROTATERT:
21474 asm_fprintf (stream, "[%r, %s%r",
21475 REGNO (base), is_minus ? "-" : "",
21476 REGNO (XEXP (index, 0)));
21477 arm_print_operand (stream, index, 'S');
21478 fputs ("]", stream);
21479 break;
21482 default:
21483 gcc_unreachable ();
21486 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
21487 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
21489 gcc_assert (REG_P (XEXP (x, 0)));
21491 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
21492 asm_fprintf (stream, "[%r, #%s%d]!",
21493 REGNO (XEXP (x, 0)),
21494 GET_CODE (x) == PRE_DEC ? "-" : "",
21495 GET_MODE_SIZE (mode));
21496 else
21497 asm_fprintf (stream, "[%r], #%s%d",
21498 REGNO (XEXP (x, 0)),
21499 GET_CODE (x) == POST_DEC ? "-" : "",
21500 GET_MODE_SIZE (mode));
21502 else if (GET_CODE (x) == PRE_MODIFY)
21504 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
21505 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21506 asm_fprintf (stream, "#%wd]!",
21507 INTVAL (XEXP (XEXP (x, 1), 1)));
21508 else
21509 asm_fprintf (stream, "%r]!",
21510 REGNO (XEXP (XEXP (x, 1), 1)));
21512 else if (GET_CODE (x) == POST_MODIFY)
21514 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
21515 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21516 asm_fprintf (stream, "#%wd",
21517 INTVAL (XEXP (XEXP (x, 1), 1)));
21518 else
21519 asm_fprintf (stream, "%r",
21520 REGNO (XEXP (XEXP (x, 1), 1)));
21522 else output_addr_const (stream, x);
21524 else
21526 if (REG_P (x))
21527 asm_fprintf (stream, "[%r]", REGNO (x));
21528 else if (GET_CODE (x) == POST_INC)
21529 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
21530 else if (GET_CODE (x) == PLUS)
21532 gcc_assert (REG_P (XEXP (x, 0)));
21533 if (CONST_INT_P (XEXP (x, 1)))
21534 asm_fprintf (stream, "[%r, #%wd]",
21535 REGNO (XEXP (x, 0)),
21536 INTVAL (XEXP (x, 1)));
21537 else
21538 asm_fprintf (stream, "[%r, %r]",
21539 REGNO (XEXP (x, 0)),
21540 REGNO (XEXP (x, 1)));
21542 else
21543 output_addr_const (stream, x);
21547 /* Target hook for indicating whether a punctuation character for
21548 TARGET_PRINT_OPERAND is valid. */
21549 static bool
21550 arm_print_operand_punct_valid_p (unsigned char code)
21552 return (code == '@' || code == '|' || code == '.'
21553 || code == '(' || code == ')' || code == '#'
21554 || (TARGET_32BIT && (code == '?'))
21555 || (TARGET_THUMB2 && (code == '!'))
21556 || (TARGET_THUMB && (code == '_')));
21559 /* Target hook for assembling integer objects. The ARM version needs to
21560 handle word-sized values specially. */
21561 static bool
21562 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
21564 machine_mode mode;
21566 if (size == UNITS_PER_WORD && aligned_p)
21568 fputs ("\t.word\t", asm_out_file);
21569 output_addr_const (asm_out_file, x);
21571 /* Mark symbols as position independent. We only do this in the
21572 .text segment, not in the .data segment. */
21573 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
21574 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
21576 /* See legitimize_pic_address for an explanation of the
21577 TARGET_VXWORKS_RTP check. */
21578 if (!arm_pic_data_is_text_relative
21579 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
21580 fputs ("(GOT)", asm_out_file);
21581 else
21582 fputs ("(GOTOFF)", asm_out_file);
21584 fputc ('\n', asm_out_file);
21585 return true;
21588 mode = GET_MODE (x);
21590 if (arm_vector_mode_supported_p (mode))
21592 int i, units;
21594 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21596 units = CONST_VECTOR_NUNITS (x);
21597 size = GET_MODE_UNIT_SIZE (mode);
21599 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21600 for (i = 0; i < units; i++)
21602 rtx elt = CONST_VECTOR_ELT (x, i);
21603 assemble_integer
21604 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
21606 else
21607 for (i = 0; i < units; i++)
21609 rtx elt = CONST_VECTOR_ELT (x, i);
21610 assemble_real
21611 (*CONST_DOUBLE_REAL_VALUE (elt), GET_MODE_INNER (mode),
21612 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
21615 return true;
21618 return default_assemble_integer (x, size, aligned_p);
21621 static void
21622 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
21624 section *s;
21626 if (!TARGET_AAPCS_BASED)
21628 (is_ctor ?
21629 default_named_section_asm_out_constructor
21630 : default_named_section_asm_out_destructor) (symbol, priority);
21631 return;
21634 /* Put these in the .init_array section, using a special relocation. */
21635 if (priority != DEFAULT_INIT_PRIORITY)
21637 char buf[18];
21638 sprintf (buf, "%s.%.5u",
21639 is_ctor ? ".init_array" : ".fini_array",
21640 priority);
21641 s = get_section (buf, SECTION_WRITE, NULL_TREE);
21643 else if (is_ctor)
21644 s = ctors_section;
21645 else
21646 s = dtors_section;
21648 switch_to_section (s);
21649 assemble_align (POINTER_SIZE);
21650 fputs ("\t.word\t", asm_out_file);
21651 output_addr_const (asm_out_file, symbol);
21652 fputs ("(target1)\n", asm_out_file);
21655 /* Add a function to the list of static constructors. */
21657 static void
21658 arm_elf_asm_constructor (rtx symbol, int priority)
21660 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
21663 /* Add a function to the list of static destructors. */
21665 static void
21666 arm_elf_asm_destructor (rtx symbol, int priority)
21668 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
21671 /* A finite state machine takes care of noticing whether or not instructions
21672 can be conditionally executed, and thus decrease execution time and code
21673 size by deleting branch instructions. The fsm is controlled by
21674 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
21676 /* The state of the fsm controlling condition codes are:
21677 0: normal, do nothing special
21678 1: make ASM_OUTPUT_OPCODE not output this instruction
21679 2: make ASM_OUTPUT_OPCODE not output this instruction
21680 3: make instructions conditional
21681 4: make instructions conditional
21683 State transitions (state->state by whom under condition):
21684 0 -> 1 final_prescan_insn if the `target' is a label
21685 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
21686 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
21687 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
21688 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
21689 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
21690 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
21691 (the target insn is arm_target_insn).
21693 If the jump clobbers the conditions then we use states 2 and 4.
21695 A similar thing can be done with conditional return insns.
21697 XXX In case the `target' is an unconditional branch, this conditionalising
21698 of the instructions always reduces code size, but not always execution
21699 time. But then, I want to reduce the code size to somewhere near what
21700 /bin/cc produces. */
21702 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
21703 instructions. When a COND_EXEC instruction is seen the subsequent
21704 instructions are scanned so that multiple conditional instructions can be
21705 combined into a single IT block. arm_condexec_count and arm_condexec_mask
21706 specify the length and true/false mask for the IT block. These will be
21707 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
21709 /* Returns the index of the ARM condition code string in
21710 `arm_condition_codes', or ARM_NV if the comparison is invalid.
21711 COMPARISON should be an rtx like `(eq (...) (...))'. */
21713 enum arm_cond_code
21714 maybe_get_arm_condition_code (rtx comparison)
21716 machine_mode mode = GET_MODE (XEXP (comparison, 0));
21717 enum arm_cond_code code;
21718 enum rtx_code comp_code = GET_CODE (comparison);
21720 if (GET_MODE_CLASS (mode) != MODE_CC)
21721 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
21722 XEXP (comparison, 1));
21724 switch (mode)
21726 case CC_DNEmode: code = ARM_NE; goto dominance;
21727 case CC_DEQmode: code = ARM_EQ; goto dominance;
21728 case CC_DGEmode: code = ARM_GE; goto dominance;
21729 case CC_DGTmode: code = ARM_GT; goto dominance;
21730 case CC_DLEmode: code = ARM_LE; goto dominance;
21731 case CC_DLTmode: code = ARM_LT; goto dominance;
21732 case CC_DGEUmode: code = ARM_CS; goto dominance;
21733 case CC_DGTUmode: code = ARM_HI; goto dominance;
21734 case CC_DLEUmode: code = ARM_LS; goto dominance;
21735 case CC_DLTUmode: code = ARM_CC;
21737 dominance:
21738 if (comp_code == EQ)
21739 return ARM_INVERSE_CONDITION_CODE (code);
21740 if (comp_code == NE)
21741 return code;
21742 return ARM_NV;
21744 case CC_NOOVmode:
21745 switch (comp_code)
21747 case NE: return ARM_NE;
21748 case EQ: return ARM_EQ;
21749 case GE: return ARM_PL;
21750 case LT: return ARM_MI;
21751 default: return ARM_NV;
21754 case CC_Zmode:
21755 switch (comp_code)
21757 case NE: return ARM_NE;
21758 case EQ: return ARM_EQ;
21759 default: return ARM_NV;
21762 case CC_Nmode:
21763 switch (comp_code)
21765 case NE: return ARM_MI;
21766 case EQ: return ARM_PL;
21767 default: return ARM_NV;
21770 case CCFPEmode:
21771 case CCFPmode:
21772 /* We can handle all cases except UNEQ and LTGT. */
21773 switch (comp_code)
21775 case GE: return ARM_GE;
21776 case GT: return ARM_GT;
21777 case LE: return ARM_LS;
21778 case LT: return ARM_MI;
21779 case NE: return ARM_NE;
21780 case EQ: return ARM_EQ;
21781 case ORDERED: return ARM_VC;
21782 case UNORDERED: return ARM_VS;
21783 case UNLT: return ARM_LT;
21784 case UNLE: return ARM_LE;
21785 case UNGT: return ARM_HI;
21786 case UNGE: return ARM_PL;
21787 /* UNEQ and LTGT do not have a representation. */
21788 case UNEQ: /* Fall through. */
21789 case LTGT: /* Fall through. */
21790 default: return ARM_NV;
21793 case CC_SWPmode:
21794 switch (comp_code)
21796 case NE: return ARM_NE;
21797 case EQ: return ARM_EQ;
21798 case GE: return ARM_LE;
21799 case GT: return ARM_LT;
21800 case LE: return ARM_GE;
21801 case LT: return ARM_GT;
21802 case GEU: return ARM_LS;
21803 case GTU: return ARM_CC;
21804 case LEU: return ARM_CS;
21805 case LTU: return ARM_HI;
21806 default: return ARM_NV;
21809 case CC_Cmode:
21810 switch (comp_code)
21812 case LTU: return ARM_CS;
21813 case GEU: return ARM_CC;
21814 case NE: return ARM_CS;
21815 case EQ: return ARM_CC;
21816 default: return ARM_NV;
21819 case CC_CZmode:
21820 switch (comp_code)
21822 case NE: return ARM_NE;
21823 case EQ: return ARM_EQ;
21824 case GEU: return ARM_CS;
21825 case GTU: return ARM_HI;
21826 case LEU: return ARM_LS;
21827 case LTU: return ARM_CC;
21828 default: return ARM_NV;
21831 case CC_NCVmode:
21832 switch (comp_code)
21834 case GE: return ARM_GE;
21835 case LT: return ARM_LT;
21836 case GEU: return ARM_CS;
21837 case LTU: return ARM_CC;
21838 default: return ARM_NV;
21841 case CC_Vmode:
21842 switch (comp_code)
21844 case NE: return ARM_VS;
21845 case EQ: return ARM_VC;
21846 default: return ARM_NV;
21849 case CCmode:
21850 switch (comp_code)
21852 case NE: return ARM_NE;
21853 case EQ: return ARM_EQ;
21854 case GE: return ARM_GE;
21855 case GT: return ARM_GT;
21856 case LE: return ARM_LE;
21857 case LT: return ARM_LT;
21858 case GEU: return ARM_CS;
21859 case GTU: return ARM_HI;
21860 case LEU: return ARM_LS;
21861 case LTU: return ARM_CC;
21862 default: return ARM_NV;
21865 default: gcc_unreachable ();
21869 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
21870 static enum arm_cond_code
21871 get_arm_condition_code (rtx comparison)
21873 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
21874 gcc_assert (code != ARM_NV);
21875 return code;
21878 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
21879 instructions. */
21880 void
21881 thumb2_final_prescan_insn (rtx_insn *insn)
21883 rtx_insn *first_insn = insn;
21884 rtx body = PATTERN (insn);
21885 rtx predicate;
21886 enum arm_cond_code code;
21887 int n;
21888 int mask;
21889 int max;
21891 /* max_insns_skipped in the tune was already taken into account in the
21892 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
21893 just emit the IT blocks as we can. It does not make sense to split
21894 the IT blocks. */
21895 max = MAX_INSN_PER_IT_BLOCK;
21897 /* Remove the previous insn from the count of insns to be output. */
21898 if (arm_condexec_count)
21899 arm_condexec_count--;
21901 /* Nothing to do if we are already inside a conditional block. */
21902 if (arm_condexec_count)
21903 return;
21905 if (GET_CODE (body) != COND_EXEC)
21906 return;
21908 /* Conditional jumps are implemented directly. */
21909 if (JUMP_P (insn))
21910 return;
21912 predicate = COND_EXEC_TEST (body);
21913 arm_current_cc = get_arm_condition_code (predicate);
21915 n = get_attr_ce_count (insn);
21916 arm_condexec_count = 1;
21917 arm_condexec_mask = (1 << n) - 1;
21918 arm_condexec_masklen = n;
21919 /* See if subsequent instructions can be combined into the same block. */
21920 for (;;)
21922 insn = next_nonnote_insn (insn);
21924 /* Jumping into the middle of an IT block is illegal, so a label or
21925 barrier terminates the block. */
21926 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
21927 break;
21929 body = PATTERN (insn);
21930 /* USE and CLOBBER aren't really insns, so just skip them. */
21931 if (GET_CODE (body) == USE
21932 || GET_CODE (body) == CLOBBER)
21933 continue;
21935 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
21936 if (GET_CODE (body) != COND_EXEC)
21937 break;
21938 /* Maximum number of conditionally executed instructions in a block. */
21939 n = get_attr_ce_count (insn);
21940 if (arm_condexec_masklen + n > max)
21941 break;
21943 predicate = COND_EXEC_TEST (body);
21944 code = get_arm_condition_code (predicate);
21945 mask = (1 << n) - 1;
21946 if (arm_current_cc == code)
21947 arm_condexec_mask |= (mask << arm_condexec_masklen);
21948 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
21949 break;
21951 arm_condexec_count++;
21952 arm_condexec_masklen += n;
21954 /* A jump must be the last instruction in a conditional block. */
21955 if (JUMP_P (insn))
21956 break;
21958 /* Restore recog_data (getting the attributes of other insns can
21959 destroy this array, but final.c assumes that it remains intact
21960 across this call). */
21961 extract_constrain_insn_cached (first_insn);
21964 void
21965 arm_final_prescan_insn (rtx_insn *insn)
21967 /* BODY will hold the body of INSN. */
21968 rtx body = PATTERN (insn);
21970 /* This will be 1 if trying to repeat the trick, and things need to be
21971 reversed if it appears to fail. */
21972 int reverse = 0;
21974 /* If we start with a return insn, we only succeed if we find another one. */
21975 int seeking_return = 0;
21976 enum rtx_code return_code = UNKNOWN;
21978 /* START_INSN will hold the insn from where we start looking. This is the
21979 first insn after the following code_label if REVERSE is true. */
21980 rtx_insn *start_insn = insn;
21982 /* If in state 4, check if the target branch is reached, in order to
21983 change back to state 0. */
21984 if (arm_ccfsm_state == 4)
21986 if (insn == arm_target_insn)
21988 arm_target_insn = NULL;
21989 arm_ccfsm_state = 0;
21991 return;
21994 /* If in state 3, it is possible to repeat the trick, if this insn is an
21995 unconditional branch to a label, and immediately following this branch
21996 is the previous target label which is only used once, and the label this
21997 branch jumps to is not too far off. */
21998 if (arm_ccfsm_state == 3)
22000 if (simplejump_p (insn))
22002 start_insn = next_nonnote_insn (start_insn);
22003 if (BARRIER_P (start_insn))
22005 /* XXX Isn't this always a barrier? */
22006 start_insn = next_nonnote_insn (start_insn);
22008 if (LABEL_P (start_insn)
22009 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22010 && LABEL_NUSES (start_insn) == 1)
22011 reverse = TRUE;
22012 else
22013 return;
22015 else if (ANY_RETURN_P (body))
22017 start_insn = next_nonnote_insn (start_insn);
22018 if (BARRIER_P (start_insn))
22019 start_insn = next_nonnote_insn (start_insn);
22020 if (LABEL_P (start_insn)
22021 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22022 && LABEL_NUSES (start_insn) == 1)
22024 reverse = TRUE;
22025 seeking_return = 1;
22026 return_code = GET_CODE (body);
22028 else
22029 return;
22031 else
22032 return;
22035 gcc_assert (!arm_ccfsm_state || reverse);
22036 if (!JUMP_P (insn))
22037 return;
22039 /* This jump might be paralleled with a clobber of the condition codes
22040 the jump should always come first */
22041 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22042 body = XVECEXP (body, 0, 0);
22044 if (reverse
22045 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22046 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22048 int insns_skipped;
22049 int fail = FALSE, succeed = FALSE;
22050 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22051 int then_not_else = TRUE;
22052 rtx_insn *this_insn = start_insn;
22053 rtx label = 0;
22055 /* Register the insn jumped to. */
22056 if (reverse)
22058 if (!seeking_return)
22059 label = XEXP (SET_SRC (body), 0);
22061 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22062 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22063 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22065 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22066 then_not_else = FALSE;
22068 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22070 seeking_return = 1;
22071 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22073 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22075 seeking_return = 1;
22076 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22077 then_not_else = FALSE;
22079 else
22080 gcc_unreachable ();
22082 /* See how many insns this branch skips, and what kind of insns. If all
22083 insns are okay, and the label or unconditional branch to the same
22084 label is not too far away, succeed. */
22085 for (insns_skipped = 0;
22086 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22088 rtx scanbody;
22090 this_insn = next_nonnote_insn (this_insn);
22091 if (!this_insn)
22092 break;
22094 switch (GET_CODE (this_insn))
22096 case CODE_LABEL:
22097 /* Succeed if it is the target label, otherwise fail since
22098 control falls in from somewhere else. */
22099 if (this_insn == label)
22101 arm_ccfsm_state = 1;
22102 succeed = TRUE;
22104 else
22105 fail = TRUE;
22106 break;
22108 case BARRIER:
22109 /* Succeed if the following insn is the target label.
22110 Otherwise fail.
22111 If return insns are used then the last insn in a function
22112 will be a barrier. */
22113 this_insn = next_nonnote_insn (this_insn);
22114 if (this_insn && this_insn == label)
22116 arm_ccfsm_state = 1;
22117 succeed = TRUE;
22119 else
22120 fail = TRUE;
22121 break;
22123 case CALL_INSN:
22124 /* The AAPCS says that conditional calls should not be
22125 used since they make interworking inefficient (the
22126 linker can't transform BL<cond> into BLX). That's
22127 only a problem if the machine has BLX. */
22128 if (arm_arch5)
22130 fail = TRUE;
22131 break;
22134 /* Succeed if the following insn is the target label, or
22135 if the following two insns are a barrier and the
22136 target label. */
22137 this_insn = next_nonnote_insn (this_insn);
22138 if (this_insn && BARRIER_P (this_insn))
22139 this_insn = next_nonnote_insn (this_insn);
22141 if (this_insn && this_insn == label
22142 && insns_skipped < max_insns_skipped)
22144 arm_ccfsm_state = 1;
22145 succeed = TRUE;
22147 else
22148 fail = TRUE;
22149 break;
22151 case JUMP_INSN:
22152 /* If this is an unconditional branch to the same label, succeed.
22153 If it is to another label, do nothing. If it is conditional,
22154 fail. */
22155 /* XXX Probably, the tests for SET and the PC are
22156 unnecessary. */
22158 scanbody = PATTERN (this_insn);
22159 if (GET_CODE (scanbody) == SET
22160 && GET_CODE (SET_DEST (scanbody)) == PC)
22162 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22163 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22165 arm_ccfsm_state = 2;
22166 succeed = TRUE;
22168 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22169 fail = TRUE;
22171 /* Fail if a conditional return is undesirable (e.g. on a
22172 StrongARM), but still allow this if optimizing for size. */
22173 else if (GET_CODE (scanbody) == return_code
22174 && !use_return_insn (TRUE, NULL)
22175 && !optimize_size)
22176 fail = TRUE;
22177 else if (GET_CODE (scanbody) == return_code)
22179 arm_ccfsm_state = 2;
22180 succeed = TRUE;
22182 else if (GET_CODE (scanbody) == PARALLEL)
22184 switch (get_attr_conds (this_insn))
22186 case CONDS_NOCOND:
22187 break;
22188 default:
22189 fail = TRUE;
22190 break;
22193 else
22194 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22196 break;
22198 case INSN:
22199 /* Instructions using or affecting the condition codes make it
22200 fail. */
22201 scanbody = PATTERN (this_insn);
22202 if (!(GET_CODE (scanbody) == SET
22203 || GET_CODE (scanbody) == PARALLEL)
22204 || get_attr_conds (this_insn) != CONDS_NOCOND)
22205 fail = TRUE;
22206 break;
22208 default:
22209 break;
22212 if (succeed)
22214 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22215 arm_target_label = CODE_LABEL_NUMBER (label);
22216 else
22218 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22220 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22222 this_insn = next_nonnote_insn (this_insn);
22223 gcc_assert (!this_insn
22224 || (!BARRIER_P (this_insn)
22225 && !LABEL_P (this_insn)));
22227 if (!this_insn)
22229 /* Oh, dear! we ran off the end.. give up. */
22230 extract_constrain_insn_cached (insn);
22231 arm_ccfsm_state = 0;
22232 arm_target_insn = NULL;
22233 return;
22235 arm_target_insn = this_insn;
22238 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22239 what it was. */
22240 if (!reverse)
22241 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22243 if (reverse || then_not_else)
22244 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22247 /* Restore recog_data (getting the attributes of other insns can
22248 destroy this array, but final.c assumes that it remains intact
22249 across this call. */
22250 extract_constrain_insn_cached (insn);
22254 /* Output IT instructions. */
22255 void
22256 thumb2_asm_output_opcode (FILE * stream)
22258 char buff[5];
22259 int n;
22261 if (arm_condexec_mask)
22263 for (n = 0; n < arm_condexec_masklen; n++)
22264 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22265 buff[n] = 0;
22266 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22267 arm_condition_codes[arm_current_cc]);
22268 arm_condexec_mask = 0;
22272 /* Returns true if REGNO is a valid register
22273 for holding a quantity of type MODE. */
22275 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
22277 if (GET_MODE_CLASS (mode) == MODE_CC)
22278 return (regno == CC_REGNUM
22279 || (TARGET_HARD_FLOAT
22280 && regno == VFPCC_REGNUM));
22282 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
22283 return false;
22285 if (TARGET_THUMB1)
22286 /* For the Thumb we only allow values bigger than SImode in
22287 registers 0 - 6, so that there is always a second low
22288 register available to hold the upper part of the value.
22289 We probably we ought to ensure that the register is the
22290 start of an even numbered register pair. */
22291 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22293 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
22295 if (mode == SFmode || mode == SImode)
22296 return VFP_REGNO_OK_FOR_SINGLE (regno);
22298 if (mode == DFmode)
22299 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22301 if (mode == HFmode)
22302 return VFP_REGNO_OK_FOR_SINGLE (regno);
22304 /* VFP registers can hold HImode values. */
22305 if (mode == HImode)
22306 return VFP_REGNO_OK_FOR_SINGLE (regno);
22308 if (TARGET_NEON)
22309 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22310 || (VALID_NEON_QREG_MODE (mode)
22311 && NEON_REGNO_OK_FOR_QUAD (regno))
22312 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22313 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22314 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22315 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22316 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22318 return FALSE;
22321 if (TARGET_REALLY_IWMMXT)
22323 if (IS_IWMMXT_GR_REGNUM (regno))
22324 return mode == SImode;
22326 if (IS_IWMMXT_REGNUM (regno))
22327 return VALID_IWMMXT_REG_MODE (mode);
22330 /* We allow almost any value to be stored in the general registers.
22331 Restrict doubleword quantities to even register pairs in ARM state
22332 so that we can use ldrd. Do not allow very large Neon structure
22333 opaque modes in general registers; they would use too many. */
22334 if (regno <= LAST_ARM_REGNUM)
22336 if (ARM_NUM_REGS (mode) > 4)
22337 return FALSE;
22339 if (TARGET_THUMB2)
22340 return TRUE;
22342 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
22345 if (regno == FRAME_POINTER_REGNUM
22346 || regno == ARG_POINTER_REGNUM)
22347 /* We only allow integers in the fake hard registers. */
22348 return GET_MODE_CLASS (mode) == MODE_INT;
22350 return FALSE;
22353 /* Implement MODES_TIEABLE_P. */
22355 bool
22356 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
22358 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22359 return true;
22361 /* We specifically want to allow elements of "structure" modes to
22362 be tieable to the structure. This more general condition allows
22363 other rarer situations too. */
22364 if (TARGET_NEON
22365 && (VALID_NEON_DREG_MODE (mode1)
22366 || VALID_NEON_QREG_MODE (mode1)
22367 || VALID_NEON_STRUCT_MODE (mode1))
22368 && (VALID_NEON_DREG_MODE (mode2)
22369 || VALID_NEON_QREG_MODE (mode2)
22370 || VALID_NEON_STRUCT_MODE (mode2)))
22371 return true;
22373 return false;
22376 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22377 not used in arm mode. */
22379 enum reg_class
22380 arm_regno_class (int regno)
22382 if (regno == PC_REGNUM)
22383 return NO_REGS;
22385 if (TARGET_THUMB1)
22387 if (regno == STACK_POINTER_REGNUM)
22388 return STACK_REG;
22389 if (regno == CC_REGNUM)
22390 return CC_REG;
22391 if (regno < 8)
22392 return LO_REGS;
22393 return HI_REGS;
22396 if (TARGET_THUMB2 && regno < 8)
22397 return LO_REGS;
22399 if ( regno <= LAST_ARM_REGNUM
22400 || regno == FRAME_POINTER_REGNUM
22401 || regno == ARG_POINTER_REGNUM)
22402 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
22404 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
22405 return TARGET_THUMB2 ? CC_REG : NO_REGS;
22407 if (IS_VFP_REGNUM (regno))
22409 if (regno <= D7_VFP_REGNUM)
22410 return VFP_D0_D7_REGS;
22411 else if (regno <= LAST_LO_VFP_REGNUM)
22412 return VFP_LO_REGS;
22413 else
22414 return VFP_HI_REGS;
22417 if (IS_IWMMXT_REGNUM (regno))
22418 return IWMMXT_REGS;
22420 if (IS_IWMMXT_GR_REGNUM (regno))
22421 return IWMMXT_GR_REGS;
22423 return NO_REGS;
22426 /* Handle a special case when computing the offset
22427 of an argument from the frame pointer. */
22429 arm_debugger_arg_offset (int value, rtx addr)
22431 rtx_insn *insn;
22433 /* We are only interested if dbxout_parms() failed to compute the offset. */
22434 if (value != 0)
22435 return 0;
22437 /* We can only cope with the case where the address is held in a register. */
22438 if (!REG_P (addr))
22439 return 0;
22441 /* If we are using the frame pointer to point at the argument, then
22442 an offset of 0 is correct. */
22443 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
22444 return 0;
22446 /* If we are using the stack pointer to point at the
22447 argument, then an offset of 0 is correct. */
22448 /* ??? Check this is consistent with thumb2 frame layout. */
22449 if ((TARGET_THUMB || !frame_pointer_needed)
22450 && REGNO (addr) == SP_REGNUM)
22451 return 0;
22453 /* Oh dear. The argument is pointed to by a register rather
22454 than being held in a register, or being stored at a known
22455 offset from the frame pointer. Since GDB only understands
22456 those two kinds of argument we must translate the address
22457 held in the register into an offset from the frame pointer.
22458 We do this by searching through the insns for the function
22459 looking to see where this register gets its value. If the
22460 register is initialized from the frame pointer plus an offset
22461 then we are in luck and we can continue, otherwise we give up.
22463 This code is exercised by producing debugging information
22464 for a function with arguments like this:
22466 double func (double a, double b, int c, double d) {return d;}
22468 Without this code the stab for parameter 'd' will be set to
22469 an offset of 0 from the frame pointer, rather than 8. */
22471 /* The if() statement says:
22473 If the insn is a normal instruction
22474 and if the insn is setting the value in a register
22475 and if the register being set is the register holding the address of the argument
22476 and if the address is computing by an addition
22477 that involves adding to a register
22478 which is the frame pointer
22479 a constant integer
22481 then... */
22483 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22485 if ( NONJUMP_INSN_P (insn)
22486 && GET_CODE (PATTERN (insn)) == SET
22487 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
22488 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
22489 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
22490 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
22491 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
22494 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
22496 break;
22500 if (value == 0)
22502 debug_rtx (addr);
22503 warning (0, "unable to compute real location of stacked parameter");
22504 value = 8; /* XXX magic hack */
22507 return value;
22510 /* Implement TARGET_PROMOTED_TYPE. */
22512 static tree
22513 arm_promoted_type (const_tree t)
22515 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
22516 return float_type_node;
22517 return NULL_TREE;
22520 /* Implement TARGET_CONVERT_TO_TYPE.
22521 Specifically, this hook implements the peculiarity of the ARM
22522 half-precision floating-point C semantics that requires conversions between
22523 __fp16 to or from double to do an intermediate conversion to float. */
22525 static tree
22526 arm_convert_to_type (tree type, tree expr)
22528 tree fromtype = TREE_TYPE (expr);
22529 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
22530 return NULL_TREE;
22531 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
22532 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
22533 return convert (type, convert (float_type_node, expr));
22534 return NULL_TREE;
22537 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
22538 This simply adds HFmode as a supported mode; even though we don't
22539 implement arithmetic on this type directly, it's supported by
22540 optabs conversions, much the way the double-word arithmetic is
22541 special-cased in the default hook. */
22543 static bool
22544 arm_scalar_mode_supported_p (machine_mode mode)
22546 if (mode == HFmode)
22547 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
22548 else if (ALL_FIXED_POINT_MODE_P (mode))
22549 return true;
22550 else
22551 return default_scalar_mode_supported_p (mode);
22554 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
22555 not to early-clobber SRC registers in the process.
22557 We assume that the operands described by SRC and DEST represent a
22558 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
22559 number of components into which the copy has been decomposed. */
22560 void
22561 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
22563 unsigned int i;
22565 if (!reg_overlap_mentioned_p (operands[0], operands[1])
22566 || REGNO (operands[0]) < REGNO (operands[1]))
22568 for (i = 0; i < count; i++)
22570 operands[2 * i] = dest[i];
22571 operands[2 * i + 1] = src[i];
22574 else
22576 for (i = 0; i < count; i++)
22578 operands[2 * i] = dest[count - i - 1];
22579 operands[2 * i + 1] = src[count - i - 1];
22584 /* Split operands into moves from op[1] + op[2] into op[0]. */
22586 void
22587 neon_split_vcombine (rtx operands[3])
22589 unsigned int dest = REGNO (operands[0]);
22590 unsigned int src1 = REGNO (operands[1]);
22591 unsigned int src2 = REGNO (operands[2]);
22592 machine_mode halfmode = GET_MODE (operands[1]);
22593 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
22594 rtx destlo, desthi;
22596 if (src1 == dest && src2 == dest + halfregs)
22598 /* No-op move. Can't split to nothing; emit something. */
22599 emit_note (NOTE_INSN_DELETED);
22600 return;
22603 /* Preserve register attributes for variable tracking. */
22604 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
22605 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
22606 GET_MODE_SIZE (halfmode));
22608 /* Special case of reversed high/low parts. Use VSWP. */
22609 if (src2 == dest && src1 == dest + halfregs)
22611 rtx x = gen_rtx_SET (destlo, operands[1]);
22612 rtx y = gen_rtx_SET (desthi, operands[2]);
22613 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
22614 return;
22617 if (!reg_overlap_mentioned_p (operands[2], destlo))
22619 /* Try to avoid unnecessary moves if part of the result
22620 is in the right place already. */
22621 if (src1 != dest)
22622 emit_move_insn (destlo, operands[1]);
22623 if (src2 != dest + halfregs)
22624 emit_move_insn (desthi, operands[2]);
22626 else
22628 if (src2 != dest + halfregs)
22629 emit_move_insn (desthi, operands[2]);
22630 if (src1 != dest)
22631 emit_move_insn (destlo, operands[1]);
22635 /* Return the number (counting from 0) of
22636 the least significant set bit in MASK. */
22638 inline static int
22639 number_of_first_bit_set (unsigned mask)
22641 return ctz_hwi (mask);
22644 /* Like emit_multi_reg_push, but allowing for a different set of
22645 registers to be described as saved. MASK is the set of registers
22646 to be saved; REAL_REGS is the set of registers to be described as
22647 saved. If REAL_REGS is 0, only describe the stack adjustment. */
22649 static rtx_insn *
22650 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
22652 unsigned long regno;
22653 rtx par[10], tmp, reg;
22654 rtx_insn *insn;
22655 int i, j;
22657 /* Build the parallel of the registers actually being stored. */
22658 for (i = 0; mask; ++i, mask &= mask - 1)
22660 regno = ctz_hwi (mask);
22661 reg = gen_rtx_REG (SImode, regno);
22663 if (i == 0)
22664 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
22665 else
22666 tmp = gen_rtx_USE (VOIDmode, reg);
22668 par[i] = tmp;
22671 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
22672 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22673 tmp = gen_frame_mem (BLKmode, tmp);
22674 tmp = gen_rtx_SET (tmp, par[0]);
22675 par[0] = tmp;
22677 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
22678 insn = emit_insn (tmp);
22680 /* Always build the stack adjustment note for unwind info. */
22681 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
22682 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
22683 par[0] = tmp;
22685 /* Build the parallel of the registers recorded as saved for unwind. */
22686 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
22688 regno = ctz_hwi (real_regs);
22689 reg = gen_rtx_REG (SImode, regno);
22691 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
22692 tmp = gen_frame_mem (SImode, tmp);
22693 tmp = gen_rtx_SET (tmp, reg);
22694 RTX_FRAME_RELATED_P (tmp) = 1;
22695 par[j + 1] = tmp;
22698 if (j == 0)
22699 tmp = par[0];
22700 else
22702 RTX_FRAME_RELATED_P (par[0]) = 1;
22703 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
22706 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
22708 return insn;
22711 /* Emit code to push or pop registers to or from the stack. F is the
22712 assembly file. MASK is the registers to pop. */
22713 static void
22714 thumb_pop (FILE *f, unsigned long mask)
22716 int regno;
22717 int lo_mask = mask & 0xFF;
22718 int pushed_words = 0;
22720 gcc_assert (mask);
22722 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
22724 /* Special case. Do not generate a POP PC statement here, do it in
22725 thumb_exit() */
22726 thumb_exit (f, -1);
22727 return;
22730 fprintf (f, "\tpop\t{");
22732 /* Look at the low registers first. */
22733 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
22735 if (lo_mask & 1)
22737 asm_fprintf (f, "%r", regno);
22739 if ((lo_mask & ~1) != 0)
22740 fprintf (f, ", ");
22742 pushed_words++;
22746 if (mask & (1 << PC_REGNUM))
22748 /* Catch popping the PC. */
22749 if (TARGET_INTERWORK || TARGET_BACKTRACE
22750 || crtl->calls_eh_return)
22752 /* The PC is never poped directly, instead
22753 it is popped into r3 and then BX is used. */
22754 fprintf (f, "}\n");
22756 thumb_exit (f, -1);
22758 return;
22760 else
22762 if (mask & 0xFF)
22763 fprintf (f, ", ");
22765 asm_fprintf (f, "%r", PC_REGNUM);
22769 fprintf (f, "}\n");
22772 /* Generate code to return from a thumb function.
22773 If 'reg_containing_return_addr' is -1, then the return address is
22774 actually on the stack, at the stack pointer. */
22775 static void
22776 thumb_exit (FILE *f, int reg_containing_return_addr)
22778 unsigned regs_available_for_popping;
22779 unsigned regs_to_pop;
22780 int pops_needed;
22781 unsigned available;
22782 unsigned required;
22783 machine_mode mode;
22784 int size;
22785 int restore_a4 = FALSE;
22787 /* Compute the registers we need to pop. */
22788 regs_to_pop = 0;
22789 pops_needed = 0;
22791 if (reg_containing_return_addr == -1)
22793 regs_to_pop |= 1 << LR_REGNUM;
22794 ++pops_needed;
22797 if (TARGET_BACKTRACE)
22799 /* Restore the (ARM) frame pointer and stack pointer. */
22800 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
22801 pops_needed += 2;
22804 /* If there is nothing to pop then just emit the BX instruction and
22805 return. */
22806 if (pops_needed == 0)
22808 if (crtl->calls_eh_return)
22809 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
22811 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
22812 return;
22814 /* Otherwise if we are not supporting interworking and we have not created
22815 a backtrace structure and the function was not entered in ARM mode then
22816 just pop the return address straight into the PC. */
22817 else if (!TARGET_INTERWORK
22818 && !TARGET_BACKTRACE
22819 && !is_called_in_ARM_mode (current_function_decl)
22820 && !crtl->calls_eh_return)
22822 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
22823 return;
22826 /* Find out how many of the (return) argument registers we can corrupt. */
22827 regs_available_for_popping = 0;
22829 /* If returning via __builtin_eh_return, the bottom three registers
22830 all contain information needed for the return. */
22831 if (crtl->calls_eh_return)
22832 size = 12;
22833 else
22835 /* If we can deduce the registers used from the function's
22836 return value. This is more reliable that examining
22837 df_regs_ever_live_p () because that will be set if the register is
22838 ever used in the function, not just if the register is used
22839 to hold a return value. */
22841 if (crtl->return_rtx != 0)
22842 mode = GET_MODE (crtl->return_rtx);
22843 else
22844 mode = DECL_MODE (DECL_RESULT (current_function_decl));
22846 size = GET_MODE_SIZE (mode);
22848 if (size == 0)
22850 /* In a void function we can use any argument register.
22851 In a function that returns a structure on the stack
22852 we can use the second and third argument registers. */
22853 if (mode == VOIDmode)
22854 regs_available_for_popping =
22855 (1 << ARG_REGISTER (1))
22856 | (1 << ARG_REGISTER (2))
22857 | (1 << ARG_REGISTER (3));
22858 else
22859 regs_available_for_popping =
22860 (1 << ARG_REGISTER (2))
22861 | (1 << ARG_REGISTER (3));
22863 else if (size <= 4)
22864 regs_available_for_popping =
22865 (1 << ARG_REGISTER (2))
22866 | (1 << ARG_REGISTER (3));
22867 else if (size <= 8)
22868 regs_available_for_popping =
22869 (1 << ARG_REGISTER (3));
22872 /* Match registers to be popped with registers into which we pop them. */
22873 for (available = regs_available_for_popping,
22874 required = regs_to_pop;
22875 required != 0 && available != 0;
22876 available &= ~(available & - available),
22877 required &= ~(required & - required))
22878 -- pops_needed;
22880 /* If we have any popping registers left over, remove them. */
22881 if (available > 0)
22882 regs_available_for_popping &= ~available;
22884 /* Otherwise if we need another popping register we can use
22885 the fourth argument register. */
22886 else if (pops_needed)
22888 /* If we have not found any free argument registers and
22889 reg a4 contains the return address, we must move it. */
22890 if (regs_available_for_popping == 0
22891 && reg_containing_return_addr == LAST_ARG_REGNUM)
22893 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
22894 reg_containing_return_addr = LR_REGNUM;
22896 else if (size > 12)
22898 /* Register a4 is being used to hold part of the return value,
22899 but we have dire need of a free, low register. */
22900 restore_a4 = TRUE;
22902 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
22905 if (reg_containing_return_addr != LAST_ARG_REGNUM)
22907 /* The fourth argument register is available. */
22908 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
22910 --pops_needed;
22914 /* Pop as many registers as we can. */
22915 thumb_pop (f, regs_available_for_popping);
22917 /* Process the registers we popped. */
22918 if (reg_containing_return_addr == -1)
22920 /* The return address was popped into the lowest numbered register. */
22921 regs_to_pop &= ~(1 << LR_REGNUM);
22923 reg_containing_return_addr =
22924 number_of_first_bit_set (regs_available_for_popping);
22926 /* Remove this register for the mask of available registers, so that
22927 the return address will not be corrupted by further pops. */
22928 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
22931 /* If we popped other registers then handle them here. */
22932 if (regs_available_for_popping)
22934 int frame_pointer;
22936 /* Work out which register currently contains the frame pointer. */
22937 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
22939 /* Move it into the correct place. */
22940 asm_fprintf (f, "\tmov\t%r, %r\n",
22941 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
22943 /* (Temporarily) remove it from the mask of popped registers. */
22944 regs_available_for_popping &= ~(1 << frame_pointer);
22945 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
22947 if (regs_available_for_popping)
22949 int stack_pointer;
22951 /* We popped the stack pointer as well,
22952 find the register that contains it. */
22953 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
22955 /* Move it into the stack register. */
22956 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
22958 /* At this point we have popped all necessary registers, so
22959 do not worry about restoring regs_available_for_popping
22960 to its correct value:
22962 assert (pops_needed == 0)
22963 assert (regs_available_for_popping == (1 << frame_pointer))
22964 assert (regs_to_pop == (1 << STACK_POINTER)) */
22966 else
22968 /* Since we have just move the popped value into the frame
22969 pointer, the popping register is available for reuse, and
22970 we know that we still have the stack pointer left to pop. */
22971 regs_available_for_popping |= (1 << frame_pointer);
22975 /* If we still have registers left on the stack, but we no longer have
22976 any registers into which we can pop them, then we must move the return
22977 address into the link register and make available the register that
22978 contained it. */
22979 if (regs_available_for_popping == 0 && pops_needed > 0)
22981 regs_available_for_popping |= 1 << reg_containing_return_addr;
22983 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
22984 reg_containing_return_addr);
22986 reg_containing_return_addr = LR_REGNUM;
22989 /* If we have registers left on the stack then pop some more.
22990 We know that at most we will want to pop FP and SP. */
22991 if (pops_needed > 0)
22993 int popped_into;
22994 int move_to;
22996 thumb_pop (f, regs_available_for_popping);
22998 /* We have popped either FP or SP.
22999 Move whichever one it is into the correct register. */
23000 popped_into = number_of_first_bit_set (regs_available_for_popping);
23001 move_to = number_of_first_bit_set (regs_to_pop);
23003 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23005 regs_to_pop &= ~(1 << move_to);
23007 --pops_needed;
23010 /* If we still have not popped everything then we must have only
23011 had one register available to us and we are now popping the SP. */
23012 if (pops_needed > 0)
23014 int popped_into;
23016 thumb_pop (f, regs_available_for_popping);
23018 popped_into = number_of_first_bit_set (regs_available_for_popping);
23020 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23022 assert (regs_to_pop == (1 << STACK_POINTER))
23023 assert (pops_needed == 1)
23027 /* If necessary restore the a4 register. */
23028 if (restore_a4)
23030 if (reg_containing_return_addr != LR_REGNUM)
23032 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23033 reg_containing_return_addr = LR_REGNUM;
23036 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23039 if (crtl->calls_eh_return)
23040 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23042 /* Return to caller. */
23043 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23046 /* Scan INSN just before assembler is output for it.
23047 For Thumb-1, we track the status of the condition codes; this
23048 information is used in the cbranchsi4_insn pattern. */
23049 void
23050 thumb1_final_prescan_insn (rtx_insn *insn)
23052 if (flag_print_asm_name)
23053 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23054 INSN_ADDRESSES (INSN_UID (insn)));
23055 /* Don't overwrite the previous setter when we get to a cbranch. */
23056 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23058 enum attr_conds conds;
23060 if (cfun->machine->thumb1_cc_insn)
23062 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23063 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23064 CC_STATUS_INIT;
23066 conds = get_attr_conds (insn);
23067 if (conds == CONDS_SET)
23069 rtx set = single_set (insn);
23070 cfun->machine->thumb1_cc_insn = insn;
23071 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23072 cfun->machine->thumb1_cc_op1 = const0_rtx;
23073 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23074 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23076 rtx src1 = XEXP (SET_SRC (set), 1);
23077 if (src1 == const0_rtx)
23078 cfun->machine->thumb1_cc_mode = CCmode;
23080 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23082 /* Record the src register operand instead of dest because
23083 cprop_hardreg pass propagates src. */
23084 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23087 else if (conds != CONDS_NOCOND)
23088 cfun->machine->thumb1_cc_insn = NULL_RTX;
23091 /* Check if unexpected far jump is used. */
23092 if (cfun->machine->lr_save_eliminated
23093 && get_attr_far_jump (insn) == FAR_JUMP_YES)
23094 internal_error("Unexpected thumb1 far jump");
23098 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23100 unsigned HOST_WIDE_INT mask = 0xff;
23101 int i;
23103 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23104 if (val == 0) /* XXX */
23105 return 0;
23107 for (i = 0; i < 25; i++)
23108 if ((val & (mask << i)) == val)
23109 return 1;
23111 return 0;
23114 /* Returns nonzero if the current function contains,
23115 or might contain a far jump. */
23116 static int
23117 thumb_far_jump_used_p (void)
23119 rtx_insn *insn;
23120 bool far_jump = false;
23121 unsigned int func_size = 0;
23123 /* This test is only important for leaf functions. */
23124 /* assert (!leaf_function_p ()); */
23126 /* If we have already decided that far jumps may be used,
23127 do not bother checking again, and always return true even if
23128 it turns out that they are not being used. Once we have made
23129 the decision that far jumps are present (and that hence the link
23130 register will be pushed onto the stack) we cannot go back on it. */
23131 if (cfun->machine->far_jump_used)
23132 return 1;
23134 /* If this function is not being called from the prologue/epilogue
23135 generation code then it must be being called from the
23136 INITIAL_ELIMINATION_OFFSET macro. */
23137 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23139 /* In this case we know that we are being asked about the elimination
23140 of the arg pointer register. If that register is not being used,
23141 then there are no arguments on the stack, and we do not have to
23142 worry that a far jump might force the prologue to push the link
23143 register, changing the stack offsets. In this case we can just
23144 return false, since the presence of far jumps in the function will
23145 not affect stack offsets.
23147 If the arg pointer is live (or if it was live, but has now been
23148 eliminated and so set to dead) then we do have to test to see if
23149 the function might contain a far jump. This test can lead to some
23150 false negatives, since before reload is completed, then length of
23151 branch instructions is not known, so gcc defaults to returning their
23152 longest length, which in turn sets the far jump attribute to true.
23154 A false negative will not result in bad code being generated, but it
23155 will result in a needless push and pop of the link register. We
23156 hope that this does not occur too often.
23158 If we need doubleword stack alignment this could affect the other
23159 elimination offsets so we can't risk getting it wrong. */
23160 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23161 cfun->machine->arg_pointer_live = 1;
23162 else if (!cfun->machine->arg_pointer_live)
23163 return 0;
23166 /* We should not change far_jump_used during or after reload, as there is
23167 no chance to change stack frame layout. */
23168 if (reload_in_progress || reload_completed)
23169 return 0;
23171 /* Check to see if the function contains a branch
23172 insn with the far jump attribute set. */
23173 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23175 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
23177 far_jump = true;
23179 func_size += get_attr_length (insn);
23182 /* Attribute far_jump will always be true for thumb1 before
23183 shorten_branch pass. So checking far_jump attribute before
23184 shorten_branch isn't much useful.
23186 Following heuristic tries to estimate more accurately if a far jump
23187 may finally be used. The heuristic is very conservative as there is
23188 no chance to roll-back the decision of not to use far jump.
23190 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23191 2-byte insn is associated with a 4 byte constant pool. Using
23192 function size 2048/3 as the threshold is conservative enough. */
23193 if (far_jump)
23195 if ((func_size * 3) >= 2048)
23197 /* Record the fact that we have decided that
23198 the function does use far jumps. */
23199 cfun->machine->far_jump_used = 1;
23200 return 1;
23204 return 0;
23207 /* Return nonzero if FUNC must be entered in ARM mode. */
23208 static bool
23209 is_called_in_ARM_mode (tree func)
23211 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23213 /* Ignore the problem about functions whose address is taken. */
23214 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23215 return true;
23217 #ifdef ARM_PE
23218 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23219 #else
23220 return false;
23221 #endif
23224 /* Given the stack offsets and register mask in OFFSETS, decide how
23225 many additional registers to push instead of subtracting a constant
23226 from SP. For epilogues the principle is the same except we use pop.
23227 FOR_PROLOGUE indicates which we're generating. */
23228 static int
23229 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23231 HOST_WIDE_INT amount;
23232 unsigned long live_regs_mask = offsets->saved_regs_mask;
23233 /* Extract a mask of the ones we can give to the Thumb's push/pop
23234 instruction. */
23235 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23236 /* Then count how many other high registers will need to be pushed. */
23237 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23238 int n_free, reg_base, size;
23240 if (!for_prologue && frame_pointer_needed)
23241 amount = offsets->locals_base - offsets->saved_regs;
23242 else
23243 amount = offsets->outgoing_args - offsets->saved_regs;
23245 /* If the stack frame size is 512 exactly, we can save one load
23246 instruction, which should make this a win even when optimizing
23247 for speed. */
23248 if (!optimize_size && amount != 512)
23249 return 0;
23251 /* Can't do this if there are high registers to push. */
23252 if (high_regs_pushed != 0)
23253 return 0;
23255 /* Shouldn't do it in the prologue if no registers would normally
23256 be pushed at all. In the epilogue, also allow it if we'll have
23257 a pop insn for the PC. */
23258 if (l_mask == 0
23259 && (for_prologue
23260 || TARGET_BACKTRACE
23261 || (live_regs_mask & 1 << LR_REGNUM) == 0
23262 || TARGET_INTERWORK
23263 || crtl->args.pretend_args_size != 0))
23264 return 0;
23266 /* Don't do this if thumb_expand_prologue wants to emit instructions
23267 between the push and the stack frame allocation. */
23268 if (for_prologue
23269 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
23270 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
23271 return 0;
23273 reg_base = 0;
23274 n_free = 0;
23275 if (!for_prologue)
23277 size = arm_size_return_regs ();
23278 reg_base = ARM_NUM_INTS (size);
23279 live_regs_mask >>= reg_base;
23282 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
23283 && (for_prologue || call_used_regs[reg_base + n_free]))
23285 live_regs_mask >>= 1;
23286 n_free++;
23289 if (n_free == 0)
23290 return 0;
23291 gcc_assert (amount / 4 * 4 == amount);
23293 if (amount >= 512 && (amount - n_free * 4) < 512)
23294 return (amount - 508) / 4;
23295 if (amount <= n_free * 4)
23296 return amount / 4;
23297 return 0;
23300 /* The bits which aren't usefully expanded as rtl. */
23301 const char *
23302 thumb1_unexpanded_epilogue (void)
23304 arm_stack_offsets *offsets;
23305 int regno;
23306 unsigned long live_regs_mask = 0;
23307 int high_regs_pushed = 0;
23308 int extra_pop;
23309 int had_to_push_lr;
23310 int size;
23312 if (cfun->machine->return_used_this_function != 0)
23313 return "";
23315 if (IS_NAKED (arm_current_func_type ()))
23316 return "";
23318 offsets = arm_get_frame_offsets ();
23319 live_regs_mask = offsets->saved_regs_mask;
23320 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23322 /* If we can deduce the registers used from the function's return value.
23323 This is more reliable that examining df_regs_ever_live_p () because that
23324 will be set if the register is ever used in the function, not just if
23325 the register is used to hold a return value. */
23326 size = arm_size_return_regs ();
23328 extra_pop = thumb1_extra_regs_pushed (offsets, false);
23329 if (extra_pop > 0)
23331 unsigned long extra_mask = (1 << extra_pop) - 1;
23332 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
23335 /* The prolog may have pushed some high registers to use as
23336 work registers. e.g. the testsuite file:
23337 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
23338 compiles to produce:
23339 push {r4, r5, r6, r7, lr}
23340 mov r7, r9
23341 mov r6, r8
23342 push {r6, r7}
23343 as part of the prolog. We have to undo that pushing here. */
23345 if (high_regs_pushed)
23347 unsigned long mask = live_regs_mask & 0xff;
23348 int next_hi_reg;
23350 /* The available low registers depend on the size of the value we are
23351 returning. */
23352 if (size <= 12)
23353 mask |= 1 << 3;
23354 if (size <= 8)
23355 mask |= 1 << 2;
23357 if (mask == 0)
23358 /* Oh dear! We have no low registers into which we can pop
23359 high registers! */
23360 internal_error
23361 ("no low registers available for popping high registers");
23363 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
23364 if (live_regs_mask & (1 << next_hi_reg))
23365 break;
23367 while (high_regs_pushed)
23369 /* Find lo register(s) into which the high register(s) can
23370 be popped. */
23371 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
23373 if (mask & (1 << regno))
23374 high_regs_pushed--;
23375 if (high_regs_pushed == 0)
23376 break;
23379 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
23381 /* Pop the values into the low register(s). */
23382 thumb_pop (asm_out_file, mask);
23384 /* Move the value(s) into the high registers. */
23385 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
23387 if (mask & (1 << regno))
23389 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
23390 regno);
23392 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
23393 if (live_regs_mask & (1 << next_hi_reg))
23394 break;
23398 live_regs_mask &= ~0x0f00;
23401 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
23402 live_regs_mask &= 0xff;
23404 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
23406 /* Pop the return address into the PC. */
23407 if (had_to_push_lr)
23408 live_regs_mask |= 1 << PC_REGNUM;
23410 /* Either no argument registers were pushed or a backtrace
23411 structure was created which includes an adjusted stack
23412 pointer, so just pop everything. */
23413 if (live_regs_mask)
23414 thumb_pop (asm_out_file, live_regs_mask);
23416 /* We have either just popped the return address into the
23417 PC or it is was kept in LR for the entire function.
23418 Note that thumb_pop has already called thumb_exit if the
23419 PC was in the list. */
23420 if (!had_to_push_lr)
23421 thumb_exit (asm_out_file, LR_REGNUM);
23423 else
23425 /* Pop everything but the return address. */
23426 if (live_regs_mask)
23427 thumb_pop (asm_out_file, live_regs_mask);
23429 if (had_to_push_lr)
23431 if (size > 12)
23433 /* We have no free low regs, so save one. */
23434 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
23435 LAST_ARG_REGNUM);
23438 /* Get the return address into a temporary register. */
23439 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
23441 if (size > 12)
23443 /* Move the return address to lr. */
23444 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
23445 LAST_ARG_REGNUM);
23446 /* Restore the low register. */
23447 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
23448 IP_REGNUM);
23449 regno = LR_REGNUM;
23451 else
23452 regno = LAST_ARG_REGNUM;
23454 else
23455 regno = LR_REGNUM;
23457 /* Remove the argument registers that were pushed onto the stack. */
23458 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
23459 SP_REGNUM, SP_REGNUM,
23460 crtl->args.pretend_args_size);
23462 thumb_exit (asm_out_file, regno);
23465 return "";
23468 /* Functions to save and restore machine-specific function data. */
23469 static struct machine_function *
23470 arm_init_machine_status (void)
23472 struct machine_function *machine;
23473 machine = ggc_cleared_alloc<machine_function> ();
23475 #if ARM_FT_UNKNOWN != 0
23476 machine->func_type = ARM_FT_UNKNOWN;
23477 #endif
23478 return machine;
23481 /* Return an RTX indicating where the return address to the
23482 calling function can be found. */
23484 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
23486 if (count != 0)
23487 return NULL_RTX;
23489 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
23492 /* Do anything needed before RTL is emitted for each function. */
23493 void
23494 arm_init_expanders (void)
23496 /* Arrange to initialize and mark the machine per-function status. */
23497 init_machine_status = arm_init_machine_status;
23499 /* This is to stop the combine pass optimizing away the alignment
23500 adjustment of va_arg. */
23501 /* ??? It is claimed that this should not be necessary. */
23502 if (cfun)
23503 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
23506 /* Check that FUNC is called with a different mode. */
23508 bool
23509 arm_change_mode_p (tree func)
23511 if (TREE_CODE (func) != FUNCTION_DECL)
23512 return false;
23514 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
23516 if (!callee_tree)
23517 callee_tree = target_option_default_node;
23519 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
23520 int flags = callee_opts->x_target_flags;
23522 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
23525 /* Like arm_compute_initial_elimination offset. Simpler because there
23526 isn't an ABI specified frame pointer for Thumb. Instead, we set it
23527 to point at the base of the local variables after static stack
23528 space for a function has been allocated. */
23530 HOST_WIDE_INT
23531 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23533 arm_stack_offsets *offsets;
23535 offsets = arm_get_frame_offsets ();
23537 switch (from)
23539 case ARG_POINTER_REGNUM:
23540 switch (to)
23542 case STACK_POINTER_REGNUM:
23543 return offsets->outgoing_args - offsets->saved_args;
23545 case FRAME_POINTER_REGNUM:
23546 return offsets->soft_frame - offsets->saved_args;
23548 case ARM_HARD_FRAME_POINTER_REGNUM:
23549 return offsets->saved_regs - offsets->saved_args;
23551 case THUMB_HARD_FRAME_POINTER_REGNUM:
23552 return offsets->locals_base - offsets->saved_args;
23554 default:
23555 gcc_unreachable ();
23557 break;
23559 case FRAME_POINTER_REGNUM:
23560 switch (to)
23562 case STACK_POINTER_REGNUM:
23563 return offsets->outgoing_args - offsets->soft_frame;
23565 case ARM_HARD_FRAME_POINTER_REGNUM:
23566 return offsets->saved_regs - offsets->soft_frame;
23568 case THUMB_HARD_FRAME_POINTER_REGNUM:
23569 return offsets->locals_base - offsets->soft_frame;
23571 default:
23572 gcc_unreachable ();
23574 break;
23576 default:
23577 gcc_unreachable ();
23581 /* Generate the function's prologue. */
23583 void
23584 thumb1_expand_prologue (void)
23586 rtx_insn *insn;
23588 HOST_WIDE_INT amount;
23589 HOST_WIDE_INT size;
23590 arm_stack_offsets *offsets;
23591 unsigned long func_type;
23592 int regno;
23593 unsigned long live_regs_mask;
23594 unsigned long l_mask;
23595 unsigned high_regs_pushed = 0;
23596 bool lr_needs_saving;
23598 func_type = arm_current_func_type ();
23600 /* Naked functions don't have prologues. */
23601 if (IS_NAKED (func_type))
23603 if (flag_stack_usage_info)
23604 current_function_static_stack_size = 0;
23605 return;
23608 if (IS_INTERRUPT (func_type))
23610 error ("interrupt Service Routines cannot be coded in Thumb mode");
23611 return;
23614 if (is_called_in_ARM_mode (current_function_decl))
23615 emit_insn (gen_prologue_thumb1_interwork ());
23617 offsets = arm_get_frame_offsets ();
23618 live_regs_mask = offsets->saved_regs_mask;
23619 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
23621 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
23622 l_mask = live_regs_mask & 0x40ff;
23623 /* Then count how many other high registers will need to be pushed. */
23624 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23626 if (crtl->args.pretend_args_size)
23628 rtx x = GEN_INT (-crtl->args.pretend_args_size);
23630 if (cfun->machine->uses_anonymous_args)
23632 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
23633 unsigned long mask;
23635 mask = 1ul << (LAST_ARG_REGNUM + 1);
23636 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
23638 insn = thumb1_emit_multi_reg_push (mask, 0);
23640 else
23642 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23643 stack_pointer_rtx, x));
23645 RTX_FRAME_RELATED_P (insn) = 1;
23648 if (TARGET_BACKTRACE)
23650 HOST_WIDE_INT offset = 0;
23651 unsigned work_register;
23652 rtx work_reg, x, arm_hfp_rtx;
23654 /* We have been asked to create a stack backtrace structure.
23655 The code looks like this:
23657 0 .align 2
23658 0 func:
23659 0 sub SP, #16 Reserve space for 4 registers.
23660 2 push {R7} Push low registers.
23661 4 add R7, SP, #20 Get the stack pointer before the push.
23662 6 str R7, [SP, #8] Store the stack pointer
23663 (before reserving the space).
23664 8 mov R7, PC Get hold of the start of this code + 12.
23665 10 str R7, [SP, #16] Store it.
23666 12 mov R7, FP Get hold of the current frame pointer.
23667 14 str R7, [SP, #4] Store it.
23668 16 mov R7, LR Get hold of the current return address.
23669 18 str R7, [SP, #12] Store it.
23670 20 add R7, SP, #16 Point at the start of the
23671 backtrace structure.
23672 22 mov FP, R7 Put this value into the frame pointer. */
23674 work_register = thumb_find_work_register (live_regs_mask);
23675 work_reg = gen_rtx_REG (SImode, work_register);
23676 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
23678 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23679 stack_pointer_rtx, GEN_INT (-16)));
23680 RTX_FRAME_RELATED_P (insn) = 1;
23682 if (l_mask)
23684 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
23685 RTX_FRAME_RELATED_P (insn) = 1;
23686 lr_needs_saving = false;
23688 offset = bit_count (l_mask) * UNITS_PER_WORD;
23691 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
23692 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
23694 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
23695 x = gen_frame_mem (SImode, x);
23696 emit_move_insn (x, work_reg);
23698 /* Make sure that the instruction fetching the PC is in the right place
23699 to calculate "start of backtrace creation code + 12". */
23700 /* ??? The stores using the common WORK_REG ought to be enough to
23701 prevent the scheduler from doing anything weird. Failing that
23702 we could always move all of the following into an UNSPEC_VOLATILE. */
23703 if (l_mask)
23705 x = gen_rtx_REG (SImode, PC_REGNUM);
23706 emit_move_insn (work_reg, x);
23708 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
23709 x = gen_frame_mem (SImode, x);
23710 emit_move_insn (x, work_reg);
23712 emit_move_insn (work_reg, arm_hfp_rtx);
23714 x = plus_constant (Pmode, stack_pointer_rtx, offset);
23715 x = gen_frame_mem (SImode, x);
23716 emit_move_insn (x, work_reg);
23718 else
23720 emit_move_insn (work_reg, arm_hfp_rtx);
23722 x = plus_constant (Pmode, stack_pointer_rtx, offset);
23723 x = gen_frame_mem (SImode, x);
23724 emit_move_insn (x, work_reg);
23726 x = gen_rtx_REG (SImode, PC_REGNUM);
23727 emit_move_insn (work_reg, x);
23729 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
23730 x = gen_frame_mem (SImode, x);
23731 emit_move_insn (x, work_reg);
23734 x = gen_rtx_REG (SImode, LR_REGNUM);
23735 emit_move_insn (work_reg, x);
23737 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
23738 x = gen_frame_mem (SImode, x);
23739 emit_move_insn (x, work_reg);
23741 x = GEN_INT (offset + 12);
23742 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
23744 emit_move_insn (arm_hfp_rtx, work_reg);
23746 /* Optimization: If we are not pushing any low registers but we are going
23747 to push some high registers then delay our first push. This will just
23748 be a push of LR and we can combine it with the push of the first high
23749 register. */
23750 else if ((l_mask & 0xff) != 0
23751 || (high_regs_pushed == 0 && lr_needs_saving))
23753 unsigned long mask = l_mask;
23754 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
23755 insn = thumb1_emit_multi_reg_push (mask, mask);
23756 RTX_FRAME_RELATED_P (insn) = 1;
23757 lr_needs_saving = false;
23760 if (high_regs_pushed)
23762 unsigned pushable_regs;
23763 unsigned next_hi_reg;
23764 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
23765 : crtl->args.info.nregs;
23766 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
23768 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
23769 if (live_regs_mask & (1 << next_hi_reg))
23770 break;
23772 /* Here we need to mask out registers used for passing arguments
23773 even if they can be pushed. This is to avoid using them to stash the high
23774 registers. Such kind of stash may clobber the use of arguments. */
23775 pushable_regs = l_mask & (~arg_regs_mask);
23776 if (lr_needs_saving)
23777 pushable_regs &= ~(1 << LR_REGNUM);
23779 if (pushable_regs == 0)
23780 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
23782 while (high_regs_pushed > 0)
23784 unsigned long real_regs_mask = 0;
23785 unsigned long push_mask = 0;
23787 for (regno = LR_REGNUM; regno >= 0; regno --)
23789 if (pushable_regs & (1 << regno))
23791 emit_move_insn (gen_rtx_REG (SImode, regno),
23792 gen_rtx_REG (SImode, next_hi_reg));
23794 high_regs_pushed --;
23795 real_regs_mask |= (1 << next_hi_reg);
23796 push_mask |= (1 << regno);
23798 if (high_regs_pushed)
23800 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
23801 next_hi_reg --)
23802 if (live_regs_mask & (1 << next_hi_reg))
23803 break;
23805 else
23806 break;
23810 /* If we had to find a work register and we have not yet
23811 saved the LR then add it to the list of regs to push. */
23812 if (lr_needs_saving)
23814 push_mask |= 1 << LR_REGNUM;
23815 real_regs_mask |= 1 << LR_REGNUM;
23816 lr_needs_saving = false;
23819 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
23820 RTX_FRAME_RELATED_P (insn) = 1;
23824 /* Load the pic register before setting the frame pointer,
23825 so we can use r7 as a temporary work register. */
23826 if (flag_pic && arm_pic_register != INVALID_REGNUM)
23827 arm_load_pic_register (live_regs_mask);
23829 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
23830 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
23831 stack_pointer_rtx);
23833 size = offsets->outgoing_args - offsets->saved_args;
23834 if (flag_stack_usage_info)
23835 current_function_static_stack_size = size;
23837 /* If we have a frame, then do stack checking. FIXME: not implemented. */
23838 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
23839 sorry ("-fstack-check=specific for Thumb-1");
23841 amount = offsets->outgoing_args - offsets->saved_regs;
23842 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
23843 if (amount)
23845 if (amount < 512)
23847 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23848 GEN_INT (- amount)));
23849 RTX_FRAME_RELATED_P (insn) = 1;
23851 else
23853 rtx reg, dwarf;
23855 /* The stack decrement is too big for an immediate value in a single
23856 insn. In theory we could issue multiple subtracts, but after
23857 three of them it becomes more space efficient to place the full
23858 value in the constant pool and load into a register. (Also the
23859 ARM debugger really likes to see only one stack decrement per
23860 function). So instead we look for a scratch register into which
23861 we can load the decrement, and then we subtract this from the
23862 stack pointer. Unfortunately on the thumb the only available
23863 scratch registers are the argument registers, and we cannot use
23864 these as they may hold arguments to the function. Instead we
23865 attempt to locate a call preserved register which is used by this
23866 function. If we can find one, then we know that it will have
23867 been pushed at the start of the prologue and so we can corrupt
23868 it now. */
23869 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
23870 if (live_regs_mask & (1 << regno))
23871 break;
23873 gcc_assert(regno <= LAST_LO_REGNUM);
23875 reg = gen_rtx_REG (SImode, regno);
23877 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
23879 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23880 stack_pointer_rtx, reg));
23882 dwarf = gen_rtx_SET (stack_pointer_rtx,
23883 plus_constant (Pmode, stack_pointer_rtx,
23884 -amount));
23885 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23886 RTX_FRAME_RELATED_P (insn) = 1;
23890 if (frame_pointer_needed)
23891 thumb_set_frame_pointer (offsets);
23893 /* If we are profiling, make sure no instructions are scheduled before
23894 the call to mcount. Similarly if the user has requested no
23895 scheduling in the prolog. Similarly if we want non-call exceptions
23896 using the EABI unwinder, to prevent faulting instructions from being
23897 swapped with a stack adjustment. */
23898 if (crtl->profile || !TARGET_SCHED_PROLOG
23899 || (arm_except_unwind_info (&global_options) == UI_TARGET
23900 && cfun->can_throw_non_call_exceptions))
23901 emit_insn (gen_blockage ());
23903 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
23904 if (live_regs_mask & 0xff)
23905 cfun->machine->lr_save_eliminated = 0;
23908 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
23909 POP instruction can be generated. LR should be replaced by PC. All
23910 the checks required are already done by USE_RETURN_INSN (). Hence,
23911 all we really need to check here is if single register is to be
23912 returned, or multiple register return. */
23913 void
23914 thumb2_expand_return (bool simple_return)
23916 int i, num_regs;
23917 unsigned long saved_regs_mask;
23918 arm_stack_offsets *offsets;
23920 offsets = arm_get_frame_offsets ();
23921 saved_regs_mask = offsets->saved_regs_mask;
23923 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
23924 if (saved_regs_mask & (1 << i))
23925 num_regs++;
23927 if (!simple_return && saved_regs_mask)
23929 if (num_regs == 1)
23931 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
23932 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
23933 rtx addr = gen_rtx_MEM (SImode,
23934 gen_rtx_POST_INC (SImode,
23935 stack_pointer_rtx));
23936 set_mem_alias_set (addr, get_frame_alias_set ());
23937 XVECEXP (par, 0, 0) = ret_rtx;
23938 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
23939 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
23940 emit_jump_insn (par);
23942 else
23944 saved_regs_mask &= ~ (1 << LR_REGNUM);
23945 saved_regs_mask |= (1 << PC_REGNUM);
23946 arm_emit_multi_reg_pop (saved_regs_mask);
23949 else
23951 emit_jump_insn (simple_return_rtx);
23955 void
23956 thumb1_expand_epilogue (void)
23958 HOST_WIDE_INT amount;
23959 arm_stack_offsets *offsets;
23960 int regno;
23962 /* Naked functions don't have prologues. */
23963 if (IS_NAKED (arm_current_func_type ()))
23964 return;
23966 offsets = arm_get_frame_offsets ();
23967 amount = offsets->outgoing_args - offsets->saved_regs;
23969 if (frame_pointer_needed)
23971 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
23972 amount = offsets->locals_base - offsets->saved_regs;
23974 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
23976 gcc_assert (amount >= 0);
23977 if (amount)
23979 emit_insn (gen_blockage ());
23981 if (amount < 512)
23982 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23983 GEN_INT (amount)));
23984 else
23986 /* r3 is always free in the epilogue. */
23987 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
23989 emit_insn (gen_movsi (reg, GEN_INT (amount)));
23990 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
23994 /* Emit a USE (stack_pointer_rtx), so that
23995 the stack adjustment will not be deleted. */
23996 emit_insn (gen_force_register_use (stack_pointer_rtx));
23998 if (crtl->profile || !TARGET_SCHED_PROLOG)
23999 emit_insn (gen_blockage ());
24001 /* Emit a clobber for each insn that will be restored in the epilogue,
24002 so that flow2 will get register lifetimes correct. */
24003 for (regno = 0; regno < 13; regno++)
24004 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24005 emit_clobber (gen_rtx_REG (SImode, regno));
24007 if (! df_regs_ever_live_p (LR_REGNUM))
24008 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24011 /* Epilogue code for APCS frame. */
24012 static void
24013 arm_expand_epilogue_apcs_frame (bool really_return)
24015 unsigned long func_type;
24016 unsigned long saved_regs_mask;
24017 int num_regs = 0;
24018 int i;
24019 int floats_from_frame = 0;
24020 arm_stack_offsets *offsets;
24022 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24023 func_type = arm_current_func_type ();
24025 /* Get frame offsets for ARM. */
24026 offsets = arm_get_frame_offsets ();
24027 saved_regs_mask = offsets->saved_regs_mask;
24029 /* Find the offset of the floating-point save area in the frame. */
24030 floats_from_frame
24031 = (offsets->saved_args
24032 + arm_compute_static_chain_stack_bytes ()
24033 - offsets->frame);
24035 /* Compute how many core registers saved and how far away the floats are. */
24036 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24037 if (saved_regs_mask & (1 << i))
24039 num_regs++;
24040 floats_from_frame += 4;
24043 if (TARGET_HARD_FLOAT)
24045 int start_reg;
24046 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24048 /* The offset is from IP_REGNUM. */
24049 int saved_size = arm_get_vfp_saved_size ();
24050 if (saved_size > 0)
24052 rtx_insn *insn;
24053 floats_from_frame += saved_size;
24054 insn = emit_insn (gen_addsi3 (ip_rtx,
24055 hard_frame_pointer_rtx,
24056 GEN_INT (-floats_from_frame)));
24057 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24058 ip_rtx, hard_frame_pointer_rtx);
24061 /* Generate VFP register multi-pop. */
24062 start_reg = FIRST_VFP_REGNUM;
24064 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24065 /* Look for a case where a reg does not need restoring. */
24066 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24067 && (!df_regs_ever_live_p (i + 1)
24068 || call_used_regs[i + 1]))
24070 if (start_reg != i)
24071 arm_emit_vfp_multi_reg_pop (start_reg,
24072 (i - start_reg) / 2,
24073 gen_rtx_REG (SImode,
24074 IP_REGNUM));
24075 start_reg = i + 2;
24078 /* Restore the remaining regs that we have discovered (or possibly
24079 even all of them, if the conditional in the for loop never
24080 fired). */
24081 if (start_reg != i)
24082 arm_emit_vfp_multi_reg_pop (start_reg,
24083 (i - start_reg) / 2,
24084 gen_rtx_REG (SImode, IP_REGNUM));
24087 if (TARGET_IWMMXT)
24089 /* The frame pointer is guaranteed to be non-double-word aligned, as
24090 it is set to double-word-aligned old_stack_pointer - 4. */
24091 rtx_insn *insn;
24092 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24094 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24095 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24097 rtx addr = gen_frame_mem (V2SImode,
24098 plus_constant (Pmode, hard_frame_pointer_rtx,
24099 - lrm_count * 4));
24100 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24101 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24102 gen_rtx_REG (V2SImode, i),
24103 NULL_RTX);
24104 lrm_count += 2;
24108 /* saved_regs_mask should contain IP which contains old stack pointer
24109 at the time of activation creation. Since SP and IP are adjacent registers,
24110 we can restore the value directly into SP. */
24111 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24112 saved_regs_mask &= ~(1 << IP_REGNUM);
24113 saved_regs_mask |= (1 << SP_REGNUM);
24115 /* There are two registers left in saved_regs_mask - LR and PC. We
24116 only need to restore LR (the return address), but to
24117 save time we can load it directly into PC, unless we need a
24118 special function exit sequence, or we are not really returning. */
24119 if (really_return
24120 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24121 && !crtl->calls_eh_return)
24122 /* Delete LR from the register mask, so that LR on
24123 the stack is loaded into the PC in the register mask. */
24124 saved_regs_mask &= ~(1 << LR_REGNUM);
24125 else
24126 saved_regs_mask &= ~(1 << PC_REGNUM);
24128 num_regs = bit_count (saved_regs_mask);
24129 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24131 rtx_insn *insn;
24132 emit_insn (gen_blockage ());
24133 /* Unwind the stack to just below the saved registers. */
24134 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24135 hard_frame_pointer_rtx,
24136 GEN_INT (- 4 * num_regs)));
24138 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24139 stack_pointer_rtx, hard_frame_pointer_rtx);
24142 arm_emit_multi_reg_pop (saved_regs_mask);
24144 if (IS_INTERRUPT (func_type))
24146 /* Interrupt handlers will have pushed the
24147 IP onto the stack, so restore it now. */
24148 rtx_insn *insn;
24149 rtx addr = gen_rtx_MEM (SImode,
24150 gen_rtx_POST_INC (SImode,
24151 stack_pointer_rtx));
24152 set_mem_alias_set (addr, get_frame_alias_set ());
24153 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24154 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24155 gen_rtx_REG (SImode, IP_REGNUM),
24156 NULL_RTX);
24159 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24160 return;
24162 if (crtl->calls_eh_return)
24163 emit_insn (gen_addsi3 (stack_pointer_rtx,
24164 stack_pointer_rtx,
24165 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24167 if (IS_STACKALIGN (func_type))
24168 /* Restore the original stack pointer. Before prologue, the stack was
24169 realigned and the original stack pointer saved in r0. For details,
24170 see comment in arm_expand_prologue. */
24171 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
24173 emit_jump_insn (simple_return_rtx);
24176 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24177 function is not a sibcall. */
24178 void
24179 arm_expand_epilogue (bool really_return)
24181 unsigned long func_type;
24182 unsigned long saved_regs_mask;
24183 int num_regs = 0;
24184 int i;
24185 int amount;
24186 arm_stack_offsets *offsets;
24188 func_type = arm_current_func_type ();
24190 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24191 let output_return_instruction take care of instruction emission if any. */
24192 if (IS_NAKED (func_type)
24193 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24195 if (really_return)
24196 emit_jump_insn (simple_return_rtx);
24197 return;
24200 /* If we are throwing an exception, then we really must be doing a
24201 return, so we can't tail-call. */
24202 gcc_assert (!crtl->calls_eh_return || really_return);
24204 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24206 arm_expand_epilogue_apcs_frame (really_return);
24207 return;
24210 /* Get frame offsets for ARM. */
24211 offsets = arm_get_frame_offsets ();
24212 saved_regs_mask = offsets->saved_regs_mask;
24213 num_regs = bit_count (saved_regs_mask);
24215 if (frame_pointer_needed)
24217 rtx_insn *insn;
24218 /* Restore stack pointer if necessary. */
24219 if (TARGET_ARM)
24221 /* In ARM mode, frame pointer points to first saved register.
24222 Restore stack pointer to last saved register. */
24223 amount = offsets->frame - offsets->saved_regs;
24225 /* Force out any pending memory operations that reference stacked data
24226 before stack de-allocation occurs. */
24227 emit_insn (gen_blockage ());
24228 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24229 hard_frame_pointer_rtx,
24230 GEN_INT (amount)));
24231 arm_add_cfa_adjust_cfa_note (insn, amount,
24232 stack_pointer_rtx,
24233 hard_frame_pointer_rtx);
24235 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24236 deleted. */
24237 emit_insn (gen_force_register_use (stack_pointer_rtx));
24239 else
24241 /* In Thumb-2 mode, the frame pointer points to the last saved
24242 register. */
24243 amount = offsets->locals_base - offsets->saved_regs;
24244 if (amount)
24246 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24247 hard_frame_pointer_rtx,
24248 GEN_INT (amount)));
24249 arm_add_cfa_adjust_cfa_note (insn, amount,
24250 hard_frame_pointer_rtx,
24251 hard_frame_pointer_rtx);
24254 /* Force out any pending memory operations that reference stacked data
24255 before stack de-allocation occurs. */
24256 emit_insn (gen_blockage ());
24257 insn = emit_insn (gen_movsi (stack_pointer_rtx,
24258 hard_frame_pointer_rtx));
24259 arm_add_cfa_adjust_cfa_note (insn, 0,
24260 stack_pointer_rtx,
24261 hard_frame_pointer_rtx);
24262 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24263 deleted. */
24264 emit_insn (gen_force_register_use (stack_pointer_rtx));
24267 else
24269 /* Pop off outgoing args and local frame to adjust stack pointer to
24270 last saved register. */
24271 amount = offsets->outgoing_args - offsets->saved_regs;
24272 if (amount)
24274 rtx_insn *tmp;
24275 /* Force out any pending memory operations that reference stacked data
24276 before stack de-allocation occurs. */
24277 emit_insn (gen_blockage ());
24278 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24279 stack_pointer_rtx,
24280 GEN_INT (amount)));
24281 arm_add_cfa_adjust_cfa_note (tmp, amount,
24282 stack_pointer_rtx, stack_pointer_rtx);
24283 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24284 not deleted. */
24285 emit_insn (gen_force_register_use (stack_pointer_rtx));
24289 if (TARGET_HARD_FLOAT)
24291 /* Generate VFP register multi-pop. */
24292 int end_reg = LAST_VFP_REGNUM + 1;
24294 /* Scan the registers in reverse order. We need to match
24295 any groupings made in the prologue and generate matching
24296 vldm operations. The need to match groups is because,
24297 unlike pop, vldm can only do consecutive regs. */
24298 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
24299 /* Look for a case where a reg does not need restoring. */
24300 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24301 && (!df_regs_ever_live_p (i + 1)
24302 || call_used_regs[i + 1]))
24304 /* Restore the regs discovered so far (from reg+2 to
24305 end_reg). */
24306 if (end_reg > i + 2)
24307 arm_emit_vfp_multi_reg_pop (i + 2,
24308 (end_reg - (i + 2)) / 2,
24309 stack_pointer_rtx);
24310 end_reg = i;
24313 /* Restore the remaining regs that we have discovered (or possibly
24314 even all of them, if the conditional in the for loop never
24315 fired). */
24316 if (end_reg > i + 2)
24317 arm_emit_vfp_multi_reg_pop (i + 2,
24318 (end_reg - (i + 2)) / 2,
24319 stack_pointer_rtx);
24322 if (TARGET_IWMMXT)
24323 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
24324 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24326 rtx_insn *insn;
24327 rtx addr = gen_rtx_MEM (V2SImode,
24328 gen_rtx_POST_INC (SImode,
24329 stack_pointer_rtx));
24330 set_mem_alias_set (addr, get_frame_alias_set ());
24331 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24332 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24333 gen_rtx_REG (V2SImode, i),
24334 NULL_RTX);
24335 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
24336 stack_pointer_rtx, stack_pointer_rtx);
24339 if (saved_regs_mask)
24341 rtx insn;
24342 bool return_in_pc = false;
24344 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
24345 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
24346 && !IS_STACKALIGN (func_type)
24347 && really_return
24348 && crtl->args.pretend_args_size == 0
24349 && saved_regs_mask & (1 << LR_REGNUM)
24350 && !crtl->calls_eh_return)
24352 saved_regs_mask &= ~(1 << LR_REGNUM);
24353 saved_regs_mask |= (1 << PC_REGNUM);
24354 return_in_pc = true;
24357 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
24359 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24360 if (saved_regs_mask & (1 << i))
24362 rtx addr = gen_rtx_MEM (SImode,
24363 gen_rtx_POST_INC (SImode,
24364 stack_pointer_rtx));
24365 set_mem_alias_set (addr, get_frame_alias_set ());
24367 if (i == PC_REGNUM)
24369 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24370 XVECEXP (insn, 0, 0) = ret_rtx;
24371 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
24372 addr);
24373 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
24374 insn = emit_jump_insn (insn);
24376 else
24378 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
24379 addr));
24380 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24381 gen_rtx_REG (SImode, i),
24382 NULL_RTX);
24383 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
24384 stack_pointer_rtx,
24385 stack_pointer_rtx);
24389 else
24391 if (TARGET_LDRD
24392 && current_tune->prefer_ldrd_strd
24393 && !optimize_function_for_size_p (cfun))
24395 if (TARGET_THUMB2)
24396 thumb2_emit_ldrd_pop (saved_regs_mask);
24397 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
24398 arm_emit_ldrd_pop (saved_regs_mask);
24399 else
24400 arm_emit_multi_reg_pop (saved_regs_mask);
24402 else
24403 arm_emit_multi_reg_pop (saved_regs_mask);
24406 if (return_in_pc)
24407 return;
24410 amount
24411 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
24412 if (amount)
24414 int i, j;
24415 rtx dwarf = NULL_RTX;
24416 rtx_insn *tmp =
24417 emit_insn (gen_addsi3 (stack_pointer_rtx,
24418 stack_pointer_rtx,
24419 GEN_INT (amount)));
24421 RTX_FRAME_RELATED_P (tmp) = 1;
24423 if (cfun->machine->uses_anonymous_args)
24425 /* Restore pretend args. Refer arm_expand_prologue on how to save
24426 pretend_args in stack. */
24427 int num_regs = crtl->args.pretend_args_size / 4;
24428 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
24429 for (j = 0, i = 0; j < num_regs; i++)
24430 if (saved_regs_mask & (1 << i))
24432 rtx reg = gen_rtx_REG (SImode, i);
24433 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
24434 j++;
24436 REG_NOTES (tmp) = dwarf;
24438 arm_add_cfa_adjust_cfa_note (tmp, amount,
24439 stack_pointer_rtx, stack_pointer_rtx);
24442 if (!really_return)
24443 return;
24445 if (crtl->calls_eh_return)
24446 emit_insn (gen_addsi3 (stack_pointer_rtx,
24447 stack_pointer_rtx,
24448 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24450 if (IS_STACKALIGN (func_type))
24451 /* Restore the original stack pointer. Before prologue, the stack was
24452 realigned and the original stack pointer saved in r0. For details,
24453 see comment in arm_expand_prologue. */
24454 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
24456 emit_jump_insn (simple_return_rtx);
24459 /* Implementation of insn prologue_thumb1_interwork. This is the first
24460 "instruction" of a function called in ARM mode. Swap to thumb mode. */
24462 const char *
24463 thumb1_output_interwork (void)
24465 const char * name;
24466 FILE *f = asm_out_file;
24468 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
24469 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
24470 == SYMBOL_REF);
24471 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
24473 /* Generate code sequence to switch us into Thumb mode. */
24474 /* The .code 32 directive has already been emitted by
24475 ASM_DECLARE_FUNCTION_NAME. */
24476 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
24477 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
24479 /* Generate a label, so that the debugger will notice the
24480 change in instruction sets. This label is also used by
24481 the assembler to bypass the ARM code when this function
24482 is called from a Thumb encoded function elsewhere in the
24483 same file. Hence the definition of STUB_NAME here must
24484 agree with the definition in gas/config/tc-arm.c. */
24486 #define STUB_NAME ".real_start_of"
24488 fprintf (f, "\t.code\t16\n");
24489 #ifdef ARM_PE
24490 if (arm_dllexport_name_p (name))
24491 name = arm_strip_name_encoding (name);
24492 #endif
24493 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
24494 fprintf (f, "\t.thumb_func\n");
24495 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
24497 return "";
24500 /* Handle the case of a double word load into a low register from
24501 a computed memory address. The computed address may involve a
24502 register which is overwritten by the load. */
24503 const char *
24504 thumb_load_double_from_address (rtx *operands)
24506 rtx addr;
24507 rtx base;
24508 rtx offset;
24509 rtx arg1;
24510 rtx arg2;
24512 gcc_assert (REG_P (operands[0]));
24513 gcc_assert (MEM_P (operands[1]));
24515 /* Get the memory address. */
24516 addr = XEXP (operands[1], 0);
24518 /* Work out how the memory address is computed. */
24519 switch (GET_CODE (addr))
24521 case REG:
24522 operands[2] = adjust_address (operands[1], SImode, 4);
24524 if (REGNO (operands[0]) == REGNO (addr))
24526 output_asm_insn ("ldr\t%H0, %2", operands);
24527 output_asm_insn ("ldr\t%0, %1", operands);
24529 else
24531 output_asm_insn ("ldr\t%0, %1", operands);
24532 output_asm_insn ("ldr\t%H0, %2", operands);
24534 break;
24536 case CONST:
24537 /* Compute <address> + 4 for the high order load. */
24538 operands[2] = adjust_address (operands[1], SImode, 4);
24540 output_asm_insn ("ldr\t%0, %1", operands);
24541 output_asm_insn ("ldr\t%H0, %2", operands);
24542 break;
24544 case PLUS:
24545 arg1 = XEXP (addr, 0);
24546 arg2 = XEXP (addr, 1);
24548 if (CONSTANT_P (arg1))
24549 base = arg2, offset = arg1;
24550 else
24551 base = arg1, offset = arg2;
24553 gcc_assert (REG_P (base));
24555 /* Catch the case of <address> = <reg> + <reg> */
24556 if (REG_P (offset))
24558 int reg_offset = REGNO (offset);
24559 int reg_base = REGNO (base);
24560 int reg_dest = REGNO (operands[0]);
24562 /* Add the base and offset registers together into the
24563 higher destination register. */
24564 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
24565 reg_dest + 1, reg_base, reg_offset);
24567 /* Load the lower destination register from the address in
24568 the higher destination register. */
24569 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
24570 reg_dest, reg_dest + 1);
24572 /* Load the higher destination register from its own address
24573 plus 4. */
24574 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
24575 reg_dest + 1, reg_dest + 1);
24577 else
24579 /* Compute <address> + 4 for the high order load. */
24580 operands[2] = adjust_address (operands[1], SImode, 4);
24582 /* If the computed address is held in the low order register
24583 then load the high order register first, otherwise always
24584 load the low order register first. */
24585 if (REGNO (operands[0]) == REGNO (base))
24587 output_asm_insn ("ldr\t%H0, %2", operands);
24588 output_asm_insn ("ldr\t%0, %1", operands);
24590 else
24592 output_asm_insn ("ldr\t%0, %1", operands);
24593 output_asm_insn ("ldr\t%H0, %2", operands);
24596 break;
24598 case LABEL_REF:
24599 /* With no registers to worry about we can just load the value
24600 directly. */
24601 operands[2] = adjust_address (operands[1], SImode, 4);
24603 output_asm_insn ("ldr\t%H0, %2", operands);
24604 output_asm_insn ("ldr\t%0, %1", operands);
24605 break;
24607 default:
24608 gcc_unreachable ();
24611 return "";
24614 const char *
24615 thumb_output_move_mem_multiple (int n, rtx *operands)
24617 switch (n)
24619 case 2:
24620 if (REGNO (operands[4]) > REGNO (operands[5]))
24621 std::swap (operands[4], operands[5]);
24623 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
24624 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
24625 break;
24627 case 3:
24628 if (REGNO (operands[4]) > REGNO (operands[5]))
24629 std::swap (operands[4], operands[5]);
24630 if (REGNO (operands[5]) > REGNO (operands[6]))
24631 std::swap (operands[5], operands[6]);
24632 if (REGNO (operands[4]) > REGNO (operands[5]))
24633 std::swap (operands[4], operands[5]);
24635 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
24636 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
24637 break;
24639 default:
24640 gcc_unreachable ();
24643 return "";
24646 /* Output a call-via instruction for thumb state. */
24647 const char *
24648 thumb_call_via_reg (rtx reg)
24650 int regno = REGNO (reg);
24651 rtx *labelp;
24653 gcc_assert (regno < LR_REGNUM);
24655 /* If we are in the normal text section we can use a single instance
24656 per compilation unit. If we are doing function sections, then we need
24657 an entry per section, since we can't rely on reachability. */
24658 if (in_section == text_section)
24660 thumb_call_reg_needed = 1;
24662 if (thumb_call_via_label[regno] == NULL)
24663 thumb_call_via_label[regno] = gen_label_rtx ();
24664 labelp = thumb_call_via_label + regno;
24666 else
24668 if (cfun->machine->call_via[regno] == NULL)
24669 cfun->machine->call_via[regno] = gen_label_rtx ();
24670 labelp = cfun->machine->call_via + regno;
24673 output_asm_insn ("bl\t%a0", labelp);
24674 return "";
24677 /* Routines for generating rtl. */
24678 void
24679 thumb_expand_movmemqi (rtx *operands)
24681 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
24682 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
24683 HOST_WIDE_INT len = INTVAL (operands[2]);
24684 HOST_WIDE_INT offset = 0;
24686 while (len >= 12)
24688 emit_insn (gen_movmem12b (out, in, out, in));
24689 len -= 12;
24692 if (len >= 8)
24694 emit_insn (gen_movmem8b (out, in, out, in));
24695 len -= 8;
24698 if (len >= 4)
24700 rtx reg = gen_reg_rtx (SImode);
24701 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
24702 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
24703 len -= 4;
24704 offset += 4;
24707 if (len >= 2)
24709 rtx reg = gen_reg_rtx (HImode);
24710 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
24711 plus_constant (Pmode, in,
24712 offset))));
24713 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
24714 offset)),
24715 reg));
24716 len -= 2;
24717 offset += 2;
24720 if (len)
24722 rtx reg = gen_reg_rtx (QImode);
24723 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
24724 plus_constant (Pmode, in,
24725 offset))));
24726 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
24727 offset)),
24728 reg));
24732 void
24733 thumb_reload_out_hi (rtx *operands)
24735 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
24738 /* Return the length of a function name prefix
24739 that starts with the character 'c'. */
24740 static int
24741 arm_get_strip_length (int c)
24743 switch (c)
24745 ARM_NAME_ENCODING_LENGTHS
24746 default: return 0;
24750 /* Return a pointer to a function's name with any
24751 and all prefix encodings stripped from it. */
24752 const char *
24753 arm_strip_name_encoding (const char *name)
24755 int skip;
24757 while ((skip = arm_get_strip_length (* name)))
24758 name += skip;
24760 return name;
24763 /* If there is a '*' anywhere in the name's prefix, then
24764 emit the stripped name verbatim, otherwise prepend an
24765 underscore if leading underscores are being used. */
24766 void
24767 arm_asm_output_labelref (FILE *stream, const char *name)
24769 int skip;
24770 int verbatim = 0;
24772 while ((skip = arm_get_strip_length (* name)))
24774 verbatim |= (*name == '*');
24775 name += skip;
24778 if (verbatim)
24779 fputs (name, stream);
24780 else
24781 asm_fprintf (stream, "%U%s", name);
24784 /* This function is used to emit an EABI tag and its associated value.
24785 We emit the numerical value of the tag in case the assembler does not
24786 support textual tags. (Eg gas prior to 2.20). If requested we include
24787 the tag name in a comment so that anyone reading the assembler output
24788 will know which tag is being set.
24790 This function is not static because arm-c.c needs it too. */
24792 void
24793 arm_emit_eabi_attribute (const char *name, int num, int val)
24795 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
24796 if (flag_verbose_asm || flag_debug_asm)
24797 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
24798 asm_fprintf (asm_out_file, "\n");
24801 /* This function is used to print CPU tuning information as comment
24802 in assembler file. Pointers are not printed for now. */
24804 void
24805 arm_print_tune_info (void)
24807 asm_fprintf (asm_out_file, "\t@.tune parameters\n");
24808 asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
24809 current_tune->constant_limit);
24810 asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
24811 current_tune->max_insns_skipped);
24812 asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n",
24813 current_tune->prefetch.num_slots);
24814 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n",
24815 current_tune->prefetch.l1_cache_size);
24816 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
24817 current_tune->prefetch.l1_cache_line_size);
24818 asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
24819 (int) current_tune->prefer_constant_pool);
24820 asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
24821 asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
24822 asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
24823 current_tune->branch_cost (false, false));
24824 asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
24825 current_tune->branch_cost (false, true));
24826 asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
24827 current_tune->branch_cost (true, false));
24828 asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
24829 current_tune->branch_cost (true, true));
24830 asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
24831 (int) current_tune->prefer_ldrd_strd);
24832 asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
24833 (int) current_tune->logical_op_non_short_circuit_thumb,
24834 (int) current_tune->logical_op_non_short_circuit_arm);
24835 asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
24836 (int) current_tune->prefer_neon_for_64bits);
24837 asm_fprintf (asm_out_file,
24838 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
24839 (int) current_tune->disparage_flag_setting_t16_encodings);
24840 asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
24841 (int) current_tune->string_ops_prefer_neon);
24842 asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
24843 current_tune->max_insns_inline_memset);
24844 asm_fprintf (asm_out_file, "\t\t@fusible_ops:\t%u\n",
24845 current_tune->fusible_ops);
24846 asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
24847 (int) current_tune->sched_autopref);
24850 static void
24851 arm_file_start (void)
24853 int val;
24855 if (TARGET_BPABI)
24857 if (arm_selected_arch)
24859 /* armv7ve doesn't support any extensions. */
24860 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
24862 /* Keep backward compatability for assemblers
24863 which don't support armv7ve. */
24864 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
24865 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
24866 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
24867 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
24868 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
24870 else
24872 const char* pos = strchr (arm_selected_arch->name, '+');
24873 if (pos)
24875 char buf[32];
24876 gcc_assert (strlen (arm_selected_arch->name)
24877 <= sizeof (buf) / sizeof (*pos));
24878 strncpy (buf, arm_selected_arch->name,
24879 (pos - arm_selected_arch->name) * sizeof (*pos));
24880 buf[pos - arm_selected_arch->name] = '\0';
24881 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
24882 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
24884 else
24885 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
24888 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
24889 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
24890 else
24892 const char* truncated_name
24893 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
24894 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
24897 if (print_tune_info)
24898 arm_print_tune_info ();
24900 if (! TARGET_SOFT_FLOAT)
24902 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
24903 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
24905 if (TARGET_HARD_FLOAT_ABI)
24906 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
24909 /* Some of these attributes only apply when the corresponding features
24910 are used. However we don't have any easy way of figuring this out.
24911 Conservatively record the setting that would have been used. */
24913 if (flag_rounding_math)
24914 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
24916 if (!flag_unsafe_math_optimizations)
24918 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
24919 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
24921 if (flag_signaling_nans)
24922 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
24924 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
24925 flag_finite_math_only ? 1 : 3);
24927 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
24928 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
24929 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
24930 flag_short_enums ? 1 : 2);
24932 /* Tag_ABI_optimization_goals. */
24933 if (optimize_size)
24934 val = 4;
24935 else if (optimize >= 2)
24936 val = 2;
24937 else if (optimize)
24938 val = 1;
24939 else
24940 val = 6;
24941 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
24943 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
24944 unaligned_access);
24946 if (arm_fp16_format)
24947 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
24948 (int) arm_fp16_format);
24950 if (arm_lang_output_object_attributes_hook)
24951 arm_lang_output_object_attributes_hook();
24954 default_file_start ();
24957 static void
24958 arm_file_end (void)
24960 int regno;
24962 if (NEED_INDICATE_EXEC_STACK)
24963 /* Add .note.GNU-stack. */
24964 file_end_indicate_exec_stack ();
24966 if (! thumb_call_reg_needed)
24967 return;
24969 switch_to_section (text_section);
24970 asm_fprintf (asm_out_file, "\t.code 16\n");
24971 ASM_OUTPUT_ALIGN (asm_out_file, 1);
24973 for (regno = 0; regno < LR_REGNUM; regno++)
24975 rtx label = thumb_call_via_label[regno];
24977 if (label != 0)
24979 targetm.asm_out.internal_label (asm_out_file, "L",
24980 CODE_LABEL_NUMBER (label));
24981 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
24986 #ifndef ARM_PE
24987 /* Symbols in the text segment can be accessed without indirecting via the
24988 constant pool; it may take an extra binary operation, but this is still
24989 faster than indirecting via memory. Don't do this when not optimizing,
24990 since we won't be calculating al of the offsets necessary to do this
24991 simplification. */
24993 static void
24994 arm_encode_section_info (tree decl, rtx rtl, int first)
24996 if (optimize > 0 && TREE_CONSTANT (decl))
24997 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
24999 default_encode_section_info (decl, rtl, first);
25001 #endif /* !ARM_PE */
25003 static void
25004 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25006 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25007 && !strcmp (prefix, "L"))
25009 arm_ccfsm_state = 0;
25010 arm_target_insn = NULL;
25012 default_internal_label (stream, prefix, labelno);
25015 /* Output code to add DELTA to the first argument, and then jump
25016 to FUNCTION. Used for C++ multiple inheritance. */
25018 static void
25019 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
25020 HOST_WIDE_INT, tree function)
25022 static int thunk_label = 0;
25023 char label[256];
25024 char labelpc[256];
25025 int mi_delta = delta;
25026 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25027 int shift = 0;
25028 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25029 ? 1 : 0);
25030 if (mi_delta < 0)
25031 mi_delta = - mi_delta;
25033 final_start_function (emit_barrier (), file, 1);
25035 if (TARGET_THUMB1)
25037 int labelno = thunk_label++;
25038 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25039 /* Thunks are entered in arm mode when avaiable. */
25040 if (TARGET_THUMB1_ONLY)
25042 /* push r3 so we can use it as a temporary. */
25043 /* TODO: Omit this save if r3 is not used. */
25044 fputs ("\tpush {r3}\n", file);
25045 fputs ("\tldr\tr3, ", file);
25047 else
25049 fputs ("\tldr\tr12, ", file);
25051 assemble_name (file, label);
25052 fputc ('\n', file);
25053 if (flag_pic)
25055 /* If we are generating PIC, the ldr instruction below loads
25056 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25057 the address of the add + 8, so we have:
25059 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25060 = target + 1.
25062 Note that we have "+ 1" because some versions of GNU ld
25063 don't set the low bit of the result for R_ARM_REL32
25064 relocations against thumb function symbols.
25065 On ARMv6M this is +4, not +8. */
25066 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25067 assemble_name (file, labelpc);
25068 fputs (":\n", file);
25069 if (TARGET_THUMB1_ONLY)
25071 /* This is 2 insns after the start of the thunk, so we know it
25072 is 4-byte aligned. */
25073 fputs ("\tadd\tr3, pc, r3\n", file);
25074 fputs ("\tmov r12, r3\n", file);
25076 else
25077 fputs ("\tadd\tr12, pc, r12\n", file);
25079 else if (TARGET_THUMB1_ONLY)
25080 fputs ("\tmov r12, r3\n", file);
25082 if (TARGET_THUMB1_ONLY)
25084 if (mi_delta > 255)
25086 fputs ("\tldr\tr3, ", file);
25087 assemble_name (file, label);
25088 fputs ("+4\n", file);
25089 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25090 mi_op, this_regno, this_regno);
25092 else if (mi_delta != 0)
25094 /* Thumb1 unified syntax requires s suffix in instruction name when
25095 one of the operands is immediate. */
25096 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25097 mi_op, this_regno, this_regno,
25098 mi_delta);
25101 else
25103 /* TODO: Use movw/movt for large constants when available. */
25104 while (mi_delta != 0)
25106 if ((mi_delta & (3 << shift)) == 0)
25107 shift += 2;
25108 else
25110 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25111 mi_op, this_regno, this_regno,
25112 mi_delta & (0xff << shift));
25113 mi_delta &= ~(0xff << shift);
25114 shift += 8;
25118 if (TARGET_THUMB1)
25120 if (TARGET_THUMB1_ONLY)
25121 fputs ("\tpop\t{r3}\n", file);
25123 fprintf (file, "\tbx\tr12\n");
25124 ASM_OUTPUT_ALIGN (file, 2);
25125 assemble_name (file, label);
25126 fputs (":\n", file);
25127 if (flag_pic)
25129 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25130 rtx tem = XEXP (DECL_RTL (function), 0);
25131 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25132 pipeline offset is four rather than eight. Adjust the offset
25133 accordingly. */
25134 tem = plus_constant (GET_MODE (tem), tem,
25135 TARGET_THUMB1_ONLY ? -3 : -7);
25136 tem = gen_rtx_MINUS (GET_MODE (tem),
25137 tem,
25138 gen_rtx_SYMBOL_REF (Pmode,
25139 ggc_strdup (labelpc)));
25140 assemble_integer (tem, 4, BITS_PER_WORD, 1);
25142 else
25143 /* Output ".word .LTHUNKn". */
25144 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25146 if (TARGET_THUMB1_ONLY && mi_delta > 255)
25147 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25149 else
25151 fputs ("\tb\t", file);
25152 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25153 if (NEED_PLT_RELOC)
25154 fputs ("(PLT)", file);
25155 fputc ('\n', file);
25158 final_end_function ();
25161 /* MI thunk handling for TARGET_32BIT. */
25163 static void
25164 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
25165 HOST_WIDE_INT vcall_offset, tree function)
25167 /* On ARM, this_regno is R0 or R1 depending on
25168 whether the function returns an aggregate or not.
25170 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
25171 function)
25172 ? R1_REGNUM : R0_REGNUM);
25174 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
25175 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
25176 reload_completed = 1;
25177 emit_note (NOTE_INSN_PROLOGUE_END);
25179 /* Add DELTA to THIS_RTX. */
25180 if (delta != 0)
25181 arm_split_constant (PLUS, Pmode, NULL_RTX,
25182 delta, this_rtx, this_rtx, false);
25184 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
25185 if (vcall_offset != 0)
25187 /* Load *THIS_RTX. */
25188 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
25189 /* Compute *THIS_RTX + VCALL_OFFSET. */
25190 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
25191 false);
25192 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
25193 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
25194 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
25197 /* Generate a tail call to the target function. */
25198 if (!TREE_USED (function))
25200 assemble_external (function);
25201 TREE_USED (function) = 1;
25203 rtx funexp = XEXP (DECL_RTL (function), 0);
25204 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
25205 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
25206 SIBLING_CALL_P (insn) = 1;
25208 insn = get_insns ();
25209 shorten_branches (insn);
25210 final_start_function (insn, file, 1);
25211 final (insn, file, 1);
25212 final_end_function ();
25214 /* Stop pretending this is a post-reload pass. */
25215 reload_completed = 0;
25218 /* Output code to add DELTA to the first argument, and then jump
25219 to FUNCTION. Used for C++ multiple inheritance. */
25221 static void
25222 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
25223 HOST_WIDE_INT vcall_offset, tree function)
25225 if (TARGET_32BIT)
25226 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
25227 else
25228 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
25232 arm_emit_vector_const (FILE *file, rtx x)
25234 int i;
25235 const char * pattern;
25237 gcc_assert (GET_CODE (x) == CONST_VECTOR);
25239 switch (GET_MODE (x))
25241 case V2SImode: pattern = "%08x"; break;
25242 case V4HImode: pattern = "%04x"; break;
25243 case V8QImode: pattern = "%02x"; break;
25244 default: gcc_unreachable ();
25247 fprintf (file, "0x");
25248 for (i = CONST_VECTOR_NUNITS (x); i--;)
25250 rtx element;
25252 element = CONST_VECTOR_ELT (x, i);
25253 fprintf (file, pattern, INTVAL (element));
25256 return 1;
25259 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25260 HFmode constant pool entries are actually loaded with ldr. */
25261 void
25262 arm_emit_fp16_const (rtx c)
25264 long bits;
25266 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
25267 if (WORDS_BIG_ENDIAN)
25268 assemble_zeros (2);
25269 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25270 if (!WORDS_BIG_ENDIAN)
25271 assemble_zeros (2);
25274 const char *
25275 arm_output_load_gr (rtx *operands)
25277 rtx reg;
25278 rtx offset;
25279 rtx wcgr;
25280 rtx sum;
25282 if (!MEM_P (operands [1])
25283 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25284 || !REG_P (reg = XEXP (sum, 0))
25285 || !CONST_INT_P (offset = XEXP (sum, 1))
25286 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25287 return "wldrw%?\t%0, %1";
25289 /* Fix up an out-of-range load of a GR register. */
25290 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25291 wcgr = operands[0];
25292 operands[0] = reg;
25293 output_asm_insn ("ldr%?\t%0, %1", operands);
25295 operands[0] = wcgr;
25296 operands[1] = reg;
25297 output_asm_insn ("tmcr%?\t%0, %1", operands);
25298 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25300 return "";
25303 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25305 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25306 named arg and all anonymous args onto the stack.
25307 XXX I know the prologue shouldn't be pushing registers, but it is faster
25308 that way. */
25310 static void
25311 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25312 machine_mode mode,
25313 tree type,
25314 int *pretend_size,
25315 int second_time ATTRIBUTE_UNUSED)
25317 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25318 int nregs;
25320 cfun->machine->uses_anonymous_args = 1;
25321 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
25323 nregs = pcum->aapcs_ncrn;
25324 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
25325 nregs++;
25327 else
25328 nregs = pcum->nregs;
25330 if (nregs < NUM_ARG_REGS)
25331 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
25334 /* We can't rely on the caller doing the proper promotion when
25335 using APCS or ATPCS. */
25337 static bool
25338 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
25340 return !TARGET_AAPCS_BASED;
25343 static machine_mode
25344 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
25345 machine_mode mode,
25346 int *punsignedp ATTRIBUTE_UNUSED,
25347 const_tree fntype ATTRIBUTE_UNUSED,
25348 int for_return ATTRIBUTE_UNUSED)
25350 if (GET_MODE_CLASS (mode) == MODE_INT
25351 && GET_MODE_SIZE (mode) < 4)
25352 return SImode;
25354 return mode;
25357 /* AAPCS based ABIs use short enums by default. */
25359 static bool
25360 arm_default_short_enums (void)
25362 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
25366 /* AAPCS requires that anonymous bitfields affect structure alignment. */
25368 static bool
25369 arm_align_anon_bitfield (void)
25371 return TARGET_AAPCS_BASED;
25375 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
25377 static tree
25378 arm_cxx_guard_type (void)
25380 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
25384 /* The EABI says test the least significant bit of a guard variable. */
25386 static bool
25387 arm_cxx_guard_mask_bit (void)
25389 return TARGET_AAPCS_BASED;
25393 /* The EABI specifies that all array cookies are 8 bytes long. */
25395 static tree
25396 arm_get_cookie_size (tree type)
25398 tree size;
25400 if (!TARGET_AAPCS_BASED)
25401 return default_cxx_get_cookie_size (type);
25403 size = build_int_cst (sizetype, 8);
25404 return size;
25408 /* The EABI says that array cookies should also contain the element size. */
25410 static bool
25411 arm_cookie_has_size (void)
25413 return TARGET_AAPCS_BASED;
25417 /* The EABI says constructors and destructors should return a pointer to
25418 the object constructed/destroyed. */
25420 static bool
25421 arm_cxx_cdtor_returns_this (void)
25423 return TARGET_AAPCS_BASED;
25426 /* The EABI says that an inline function may never be the key
25427 method. */
25429 static bool
25430 arm_cxx_key_method_may_be_inline (void)
25432 return !TARGET_AAPCS_BASED;
25435 static void
25436 arm_cxx_determine_class_data_visibility (tree decl)
25438 if (!TARGET_AAPCS_BASED
25439 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
25440 return;
25442 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
25443 is exported. However, on systems without dynamic vague linkage,
25444 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
25445 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
25446 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
25447 else
25448 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
25449 DECL_VISIBILITY_SPECIFIED (decl) = 1;
25452 static bool
25453 arm_cxx_class_data_always_comdat (void)
25455 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
25456 vague linkage if the class has no key function. */
25457 return !TARGET_AAPCS_BASED;
25461 /* The EABI says __aeabi_atexit should be used to register static
25462 destructors. */
25464 static bool
25465 arm_cxx_use_aeabi_atexit (void)
25467 return TARGET_AAPCS_BASED;
25471 void
25472 arm_set_return_address (rtx source, rtx scratch)
25474 arm_stack_offsets *offsets;
25475 HOST_WIDE_INT delta;
25476 rtx addr;
25477 unsigned long saved_regs;
25479 offsets = arm_get_frame_offsets ();
25480 saved_regs = offsets->saved_regs_mask;
25482 if ((saved_regs & (1 << LR_REGNUM)) == 0)
25483 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
25484 else
25486 if (frame_pointer_needed)
25487 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
25488 else
25490 /* LR will be the first saved register. */
25491 delta = offsets->outgoing_args - (offsets->frame + 4);
25494 if (delta >= 4096)
25496 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
25497 GEN_INT (delta & ~4095)));
25498 addr = scratch;
25499 delta &= 4095;
25501 else
25502 addr = stack_pointer_rtx;
25504 addr = plus_constant (Pmode, addr, delta);
25506 /* The store needs to be marked as frame related in order to prevent
25507 DSE from deleting it as dead if it is based on fp. */
25508 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
25509 RTX_FRAME_RELATED_P (insn) = 1;
25510 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
25515 void
25516 thumb_set_return_address (rtx source, rtx scratch)
25518 arm_stack_offsets *offsets;
25519 HOST_WIDE_INT delta;
25520 HOST_WIDE_INT limit;
25521 int reg;
25522 rtx addr;
25523 unsigned long mask;
25525 emit_use (source);
25527 offsets = arm_get_frame_offsets ();
25528 mask = offsets->saved_regs_mask;
25529 if (mask & (1 << LR_REGNUM))
25531 limit = 1024;
25532 /* Find the saved regs. */
25533 if (frame_pointer_needed)
25535 delta = offsets->soft_frame - offsets->saved_args;
25536 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
25537 if (TARGET_THUMB1)
25538 limit = 128;
25540 else
25542 delta = offsets->outgoing_args - offsets->saved_args;
25543 reg = SP_REGNUM;
25545 /* Allow for the stack frame. */
25546 if (TARGET_THUMB1 && TARGET_BACKTRACE)
25547 delta -= 16;
25548 /* The link register is always the first saved register. */
25549 delta -= 4;
25551 /* Construct the address. */
25552 addr = gen_rtx_REG (SImode, reg);
25553 if (delta > limit)
25555 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
25556 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
25557 addr = scratch;
25559 else
25560 addr = plus_constant (Pmode, addr, delta);
25562 /* The store needs to be marked as frame related in order to prevent
25563 DSE from deleting it as dead if it is based on fp. */
25564 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
25565 RTX_FRAME_RELATED_P (insn) = 1;
25566 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
25568 else
25569 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
25572 /* Implements target hook vector_mode_supported_p. */
25573 bool
25574 arm_vector_mode_supported_p (machine_mode mode)
25576 /* Neon also supports V2SImode, etc. listed in the clause below. */
25577 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
25578 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
25579 || mode == V2DImode || mode == V8HFmode))
25580 return true;
25582 if ((TARGET_NEON || TARGET_IWMMXT)
25583 && ((mode == V2SImode)
25584 || (mode == V4HImode)
25585 || (mode == V8QImode)))
25586 return true;
25588 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
25589 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
25590 || mode == V2HAmode))
25591 return true;
25593 return false;
25596 /* Implements target hook array_mode_supported_p. */
25598 static bool
25599 arm_array_mode_supported_p (machine_mode mode,
25600 unsigned HOST_WIDE_INT nelems)
25602 if (TARGET_NEON
25603 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
25604 && (nelems >= 2 && nelems <= 4))
25605 return true;
25607 return false;
25610 /* Use the option -mvectorize-with-neon-double to override the use of quardword
25611 registers when autovectorizing for Neon, at least until multiple vector
25612 widths are supported properly by the middle-end. */
25614 static machine_mode
25615 arm_preferred_simd_mode (machine_mode mode)
25617 if (TARGET_NEON)
25618 switch (mode)
25620 case SFmode:
25621 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
25622 case SImode:
25623 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
25624 case HImode:
25625 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
25626 case QImode:
25627 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
25628 case DImode:
25629 if (!TARGET_NEON_VECTORIZE_DOUBLE)
25630 return V2DImode;
25631 break;
25633 default:;
25636 if (TARGET_REALLY_IWMMXT)
25637 switch (mode)
25639 case SImode:
25640 return V2SImode;
25641 case HImode:
25642 return V4HImode;
25643 case QImode:
25644 return V8QImode;
25646 default:;
25649 return word_mode;
25652 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
25654 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
25655 using r0-r4 for function arguments, r7 for the stack frame and don't have
25656 enough left over to do doubleword arithmetic. For Thumb-2 all the
25657 potentially problematic instructions accept high registers so this is not
25658 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
25659 that require many low registers. */
25660 static bool
25661 arm_class_likely_spilled_p (reg_class_t rclass)
25663 if ((TARGET_THUMB1 && rclass == LO_REGS)
25664 || rclass == CC_REG)
25665 return true;
25667 return false;
25670 /* Implements target hook small_register_classes_for_mode_p. */
25671 bool
25672 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
25674 return TARGET_THUMB1;
25677 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
25678 ARM insns and therefore guarantee that the shift count is modulo 256.
25679 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
25680 guarantee no particular behavior for out-of-range counts. */
25682 static unsigned HOST_WIDE_INT
25683 arm_shift_truncation_mask (machine_mode mode)
25685 return mode == SImode ? 255 : 0;
25689 /* Map internal gcc register numbers to DWARF2 register numbers. */
25691 unsigned int
25692 arm_dbx_register_number (unsigned int regno)
25694 if (regno < 16)
25695 return regno;
25697 if (IS_VFP_REGNUM (regno))
25699 /* See comment in arm_dwarf_register_span. */
25700 if (VFP_REGNO_OK_FOR_SINGLE (regno))
25701 return 64 + regno - FIRST_VFP_REGNUM;
25702 else
25703 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
25706 if (IS_IWMMXT_GR_REGNUM (regno))
25707 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
25709 if (IS_IWMMXT_REGNUM (regno))
25710 return 112 + regno - FIRST_IWMMXT_REGNUM;
25712 return DWARF_FRAME_REGISTERS;
25715 /* Dwarf models VFPv3 registers as 32 64-bit registers.
25716 GCC models tham as 64 32-bit registers, so we need to describe this to
25717 the DWARF generation code. Other registers can use the default. */
25718 static rtx
25719 arm_dwarf_register_span (rtx rtl)
25721 machine_mode mode;
25722 unsigned regno;
25723 rtx parts[16];
25724 int nregs;
25725 int i;
25727 regno = REGNO (rtl);
25728 if (!IS_VFP_REGNUM (regno))
25729 return NULL_RTX;
25731 /* XXX FIXME: The EABI defines two VFP register ranges:
25732 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
25733 256-287: D0-D31
25734 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
25735 corresponding D register. Until GDB supports this, we shall use the
25736 legacy encodings. We also use these encodings for D0-D15 for
25737 compatibility with older debuggers. */
25738 mode = GET_MODE (rtl);
25739 if (GET_MODE_SIZE (mode) < 8)
25740 return NULL_RTX;
25742 if (VFP_REGNO_OK_FOR_SINGLE (regno))
25744 nregs = GET_MODE_SIZE (mode) / 4;
25745 for (i = 0; i < nregs; i += 2)
25746 if (TARGET_BIG_END)
25748 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
25749 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
25751 else
25753 parts[i] = gen_rtx_REG (SImode, regno + i);
25754 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
25757 else
25759 nregs = GET_MODE_SIZE (mode) / 8;
25760 for (i = 0; i < nregs; i++)
25761 parts[i] = gen_rtx_REG (DImode, regno + i);
25764 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
25767 #if ARM_UNWIND_INFO
25768 /* Emit unwind directives for a store-multiple instruction or stack pointer
25769 push during alignment.
25770 These should only ever be generated by the function prologue code, so
25771 expect them to have a particular form.
25772 The store-multiple instruction sometimes pushes pc as the last register,
25773 although it should not be tracked into unwind information, or for -Os
25774 sometimes pushes some dummy registers before first register that needs
25775 to be tracked in unwind information; such dummy registers are there just
25776 to avoid separate stack adjustment, and will not be restored in the
25777 epilogue. */
25779 static void
25780 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
25782 int i;
25783 HOST_WIDE_INT offset;
25784 HOST_WIDE_INT nregs;
25785 int reg_size;
25786 unsigned reg;
25787 unsigned lastreg;
25788 unsigned padfirst = 0, padlast = 0;
25789 rtx e;
25791 e = XVECEXP (p, 0, 0);
25792 gcc_assert (GET_CODE (e) == SET);
25794 /* First insn will adjust the stack pointer. */
25795 gcc_assert (GET_CODE (e) == SET
25796 && REG_P (SET_DEST (e))
25797 && REGNO (SET_DEST (e)) == SP_REGNUM
25798 && GET_CODE (SET_SRC (e)) == PLUS);
25800 offset = -INTVAL (XEXP (SET_SRC (e), 1));
25801 nregs = XVECLEN (p, 0) - 1;
25802 gcc_assert (nregs);
25804 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
25805 if (reg < 16)
25807 /* For -Os dummy registers can be pushed at the beginning to
25808 avoid separate stack pointer adjustment. */
25809 e = XVECEXP (p, 0, 1);
25810 e = XEXP (SET_DEST (e), 0);
25811 if (GET_CODE (e) == PLUS)
25812 padfirst = INTVAL (XEXP (e, 1));
25813 gcc_assert (padfirst == 0 || optimize_size);
25814 /* The function prologue may also push pc, but not annotate it as it is
25815 never restored. We turn this into a stack pointer adjustment. */
25816 e = XVECEXP (p, 0, nregs);
25817 e = XEXP (SET_DEST (e), 0);
25818 if (GET_CODE (e) == PLUS)
25819 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
25820 else
25821 padlast = offset - 4;
25822 gcc_assert (padlast == 0 || padlast == 4);
25823 if (padlast == 4)
25824 fprintf (asm_out_file, "\t.pad #4\n");
25825 reg_size = 4;
25826 fprintf (asm_out_file, "\t.save {");
25828 else if (IS_VFP_REGNUM (reg))
25830 reg_size = 8;
25831 fprintf (asm_out_file, "\t.vsave {");
25833 else
25834 /* Unknown register type. */
25835 gcc_unreachable ();
25837 /* If the stack increment doesn't match the size of the saved registers,
25838 something has gone horribly wrong. */
25839 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
25841 offset = padfirst;
25842 lastreg = 0;
25843 /* The remaining insns will describe the stores. */
25844 for (i = 1; i <= nregs; i++)
25846 /* Expect (set (mem <addr>) (reg)).
25847 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
25848 e = XVECEXP (p, 0, i);
25849 gcc_assert (GET_CODE (e) == SET
25850 && MEM_P (SET_DEST (e))
25851 && REG_P (SET_SRC (e)));
25853 reg = REGNO (SET_SRC (e));
25854 gcc_assert (reg >= lastreg);
25856 if (i != 1)
25857 fprintf (asm_out_file, ", ");
25858 /* We can't use %r for vfp because we need to use the
25859 double precision register names. */
25860 if (IS_VFP_REGNUM (reg))
25861 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
25862 else
25863 asm_fprintf (asm_out_file, "%r", reg);
25865 if (flag_checking)
25867 /* Check that the addresses are consecutive. */
25868 e = XEXP (SET_DEST (e), 0);
25869 if (GET_CODE (e) == PLUS)
25870 gcc_assert (REG_P (XEXP (e, 0))
25871 && REGNO (XEXP (e, 0)) == SP_REGNUM
25872 && CONST_INT_P (XEXP (e, 1))
25873 && offset == INTVAL (XEXP (e, 1)));
25874 else
25875 gcc_assert (i == 1
25876 && REG_P (e)
25877 && REGNO (e) == SP_REGNUM);
25878 offset += reg_size;
25881 fprintf (asm_out_file, "}\n");
25882 if (padfirst)
25883 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
25886 /* Emit unwind directives for a SET. */
25888 static void
25889 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
25891 rtx e0;
25892 rtx e1;
25893 unsigned reg;
25895 e0 = XEXP (p, 0);
25896 e1 = XEXP (p, 1);
25897 switch (GET_CODE (e0))
25899 case MEM:
25900 /* Pushing a single register. */
25901 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
25902 || !REG_P (XEXP (XEXP (e0, 0), 0))
25903 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
25904 abort ();
25906 asm_fprintf (asm_out_file, "\t.save ");
25907 if (IS_VFP_REGNUM (REGNO (e1)))
25908 asm_fprintf(asm_out_file, "{d%d}\n",
25909 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
25910 else
25911 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
25912 break;
25914 case REG:
25915 if (REGNO (e0) == SP_REGNUM)
25917 /* A stack increment. */
25918 if (GET_CODE (e1) != PLUS
25919 || !REG_P (XEXP (e1, 0))
25920 || REGNO (XEXP (e1, 0)) != SP_REGNUM
25921 || !CONST_INT_P (XEXP (e1, 1)))
25922 abort ();
25924 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
25925 -INTVAL (XEXP (e1, 1)));
25927 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
25929 HOST_WIDE_INT offset;
25931 if (GET_CODE (e1) == PLUS)
25933 if (!REG_P (XEXP (e1, 0))
25934 || !CONST_INT_P (XEXP (e1, 1)))
25935 abort ();
25936 reg = REGNO (XEXP (e1, 0));
25937 offset = INTVAL (XEXP (e1, 1));
25938 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
25939 HARD_FRAME_POINTER_REGNUM, reg,
25940 offset);
25942 else if (REG_P (e1))
25944 reg = REGNO (e1);
25945 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
25946 HARD_FRAME_POINTER_REGNUM, reg);
25948 else
25949 abort ();
25951 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
25953 /* Move from sp to reg. */
25954 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
25956 else if (GET_CODE (e1) == PLUS
25957 && REG_P (XEXP (e1, 0))
25958 && REGNO (XEXP (e1, 0)) == SP_REGNUM
25959 && CONST_INT_P (XEXP (e1, 1)))
25961 /* Set reg to offset from sp. */
25962 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
25963 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
25965 else
25966 abort ();
25967 break;
25969 default:
25970 abort ();
25975 /* Emit unwind directives for the given insn. */
25977 static void
25978 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
25980 rtx note, pat;
25981 bool handled_one = false;
25983 if (arm_except_unwind_info (&global_options) != UI_TARGET)
25984 return;
25986 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
25987 && (TREE_NOTHROW (current_function_decl)
25988 || crtl->all_throwers_are_sibcalls))
25989 return;
25991 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
25992 return;
25994 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
25996 switch (REG_NOTE_KIND (note))
25998 case REG_FRAME_RELATED_EXPR:
25999 pat = XEXP (note, 0);
26000 goto found;
26002 case REG_CFA_REGISTER:
26003 pat = XEXP (note, 0);
26004 if (pat == NULL)
26006 pat = PATTERN (insn);
26007 if (GET_CODE (pat) == PARALLEL)
26008 pat = XVECEXP (pat, 0, 0);
26011 /* Only emitted for IS_STACKALIGN re-alignment. */
26013 rtx dest, src;
26014 unsigned reg;
26016 src = SET_SRC (pat);
26017 dest = SET_DEST (pat);
26019 gcc_assert (src == stack_pointer_rtx);
26020 reg = REGNO (dest);
26021 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26022 reg + 0x90, reg);
26024 handled_one = true;
26025 break;
26027 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26028 to get correct dwarf information for shrink-wrap. We should not
26029 emit unwind information for it because these are used either for
26030 pretend arguments or notes to adjust sp and restore registers from
26031 stack. */
26032 case REG_CFA_DEF_CFA:
26033 case REG_CFA_ADJUST_CFA:
26034 case REG_CFA_RESTORE:
26035 return;
26037 case REG_CFA_EXPRESSION:
26038 case REG_CFA_OFFSET:
26039 /* ??? Only handling here what we actually emit. */
26040 gcc_unreachable ();
26042 default:
26043 break;
26046 if (handled_one)
26047 return;
26048 pat = PATTERN (insn);
26049 found:
26051 switch (GET_CODE (pat))
26053 case SET:
26054 arm_unwind_emit_set (asm_out_file, pat);
26055 break;
26057 case SEQUENCE:
26058 /* Store multiple. */
26059 arm_unwind_emit_sequence (asm_out_file, pat);
26060 break;
26062 default:
26063 abort();
26068 /* Output a reference from a function exception table to the type_info
26069 object X. The EABI specifies that the symbol should be relocated by
26070 an R_ARM_TARGET2 relocation. */
26072 static bool
26073 arm_output_ttype (rtx x)
26075 fputs ("\t.word\t", asm_out_file);
26076 output_addr_const (asm_out_file, x);
26077 /* Use special relocations for symbol references. */
26078 if (!CONST_INT_P (x))
26079 fputs ("(TARGET2)", asm_out_file);
26080 fputc ('\n', asm_out_file);
26082 return TRUE;
26085 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26087 static void
26088 arm_asm_emit_except_personality (rtx personality)
26090 fputs ("\t.personality\t", asm_out_file);
26091 output_addr_const (asm_out_file, personality);
26092 fputc ('\n', asm_out_file);
26094 #endif /* ARM_UNWIND_INFO */
26096 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26098 static void
26099 arm_asm_init_sections (void)
26101 #if ARM_UNWIND_INFO
26102 exception_section = get_unnamed_section (0, output_section_asm_op,
26103 "\t.handlerdata");
26104 #endif /* ARM_UNWIND_INFO */
26106 #ifdef OBJECT_FORMAT_ELF
26107 if (target_pure_code)
26108 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
26109 #endif
26112 /* Output unwind directives for the start/end of a function. */
26114 void
26115 arm_output_fn_unwind (FILE * f, bool prologue)
26117 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26118 return;
26120 if (prologue)
26121 fputs ("\t.fnstart\n", f);
26122 else
26124 /* If this function will never be unwound, then mark it as such.
26125 The came condition is used in arm_unwind_emit to suppress
26126 the frame annotations. */
26127 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26128 && (TREE_NOTHROW (current_function_decl)
26129 || crtl->all_throwers_are_sibcalls))
26130 fputs("\t.cantunwind\n", f);
26132 fputs ("\t.fnend\n", f);
26136 static bool
26137 arm_emit_tls_decoration (FILE *fp, rtx x)
26139 enum tls_reloc reloc;
26140 rtx val;
26142 val = XVECEXP (x, 0, 0);
26143 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26145 output_addr_const (fp, val);
26147 switch (reloc)
26149 case TLS_GD32:
26150 fputs ("(tlsgd)", fp);
26151 break;
26152 case TLS_LDM32:
26153 fputs ("(tlsldm)", fp);
26154 break;
26155 case TLS_LDO32:
26156 fputs ("(tlsldo)", fp);
26157 break;
26158 case TLS_IE32:
26159 fputs ("(gottpoff)", fp);
26160 break;
26161 case TLS_LE32:
26162 fputs ("(tpoff)", fp);
26163 break;
26164 case TLS_DESCSEQ:
26165 fputs ("(tlsdesc)", fp);
26166 break;
26167 default:
26168 gcc_unreachable ();
26171 switch (reloc)
26173 case TLS_GD32:
26174 case TLS_LDM32:
26175 case TLS_IE32:
26176 case TLS_DESCSEQ:
26177 fputs (" + (. - ", fp);
26178 output_addr_const (fp, XVECEXP (x, 0, 2));
26179 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26180 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26181 output_addr_const (fp, XVECEXP (x, 0, 3));
26182 fputc (')', fp);
26183 break;
26184 default:
26185 break;
26188 return TRUE;
26191 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26193 static void
26194 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26196 gcc_assert (size == 4);
26197 fputs ("\t.word\t", file);
26198 output_addr_const (file, x);
26199 fputs ("(tlsldo)", file);
26202 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26204 static bool
26205 arm_output_addr_const_extra (FILE *fp, rtx x)
26207 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26208 return arm_emit_tls_decoration (fp, x);
26209 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26211 char label[256];
26212 int labelno = INTVAL (XVECEXP (x, 0, 0));
26214 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26215 assemble_name_raw (fp, label);
26217 return TRUE;
26219 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26221 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26222 if (GOT_PCREL)
26223 fputs ("+.", fp);
26224 fputs ("-(", fp);
26225 output_addr_const (fp, XVECEXP (x, 0, 0));
26226 fputc (')', fp);
26227 return TRUE;
26229 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26231 output_addr_const (fp, XVECEXP (x, 0, 0));
26232 if (GOT_PCREL)
26233 fputs ("+.", fp);
26234 fputs ("-(", fp);
26235 output_addr_const (fp, XVECEXP (x, 0, 1));
26236 fputc (')', fp);
26237 return TRUE;
26239 else if (GET_CODE (x) == CONST_VECTOR)
26240 return arm_emit_vector_const (fp, x);
26242 return FALSE;
26245 /* Output assembly for a shift instruction.
26246 SET_FLAGS determines how the instruction modifies the condition codes.
26247 0 - Do not set condition codes.
26248 1 - Set condition codes.
26249 2 - Use smallest instruction. */
26250 const char *
26251 arm_output_shift(rtx * operands, int set_flags)
26253 char pattern[100];
26254 static const char flag_chars[3] = {'?', '.', '!'};
26255 const char *shift;
26256 HOST_WIDE_INT val;
26257 char c;
26259 c = flag_chars[set_flags];
26260 shift = shift_op(operands[3], &val);
26261 if (shift)
26263 if (val != -1)
26264 operands[2] = GEN_INT(val);
26265 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26267 else
26268 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26270 output_asm_insn (pattern, operands);
26271 return "";
26274 /* Output assembly for a WMMX immediate shift instruction. */
26275 const char *
26276 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26278 int shift = INTVAL (operands[2]);
26279 char templ[50];
26280 machine_mode opmode = GET_MODE (operands[0]);
26282 gcc_assert (shift >= 0);
26284 /* If the shift value in the register versions is > 63 (for D qualifier),
26285 31 (for W qualifier) or 15 (for H qualifier). */
26286 if (((opmode == V4HImode) && (shift > 15))
26287 || ((opmode == V2SImode) && (shift > 31))
26288 || ((opmode == DImode) && (shift > 63)))
26290 if (wror_or_wsra)
26292 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26293 output_asm_insn (templ, operands);
26294 if (opmode == DImode)
26296 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26297 output_asm_insn (templ, operands);
26300 else
26302 /* The destination register will contain all zeros. */
26303 sprintf (templ, "wzero\t%%0");
26304 output_asm_insn (templ, operands);
26306 return "";
26309 if ((opmode == DImode) && (shift > 32))
26311 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26312 output_asm_insn (templ, operands);
26313 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26314 output_asm_insn (templ, operands);
26316 else
26318 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26319 output_asm_insn (templ, operands);
26321 return "";
26324 /* Output assembly for a WMMX tinsr instruction. */
26325 const char *
26326 arm_output_iwmmxt_tinsr (rtx *operands)
26328 int mask = INTVAL (operands[3]);
26329 int i;
26330 char templ[50];
26331 int units = mode_nunits[GET_MODE (operands[0])];
26332 gcc_assert ((mask & (mask - 1)) == 0);
26333 for (i = 0; i < units; ++i)
26335 if ((mask & 0x01) == 1)
26337 break;
26339 mask >>= 1;
26341 gcc_assert (i < units);
26343 switch (GET_MODE (operands[0]))
26345 case V8QImode:
26346 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
26347 break;
26348 case V4HImode:
26349 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
26350 break;
26351 case V2SImode:
26352 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
26353 break;
26354 default:
26355 gcc_unreachable ();
26356 break;
26358 output_asm_insn (templ, operands);
26360 return "";
26363 /* Output a Thumb-1 casesi dispatch sequence. */
26364 const char *
26365 thumb1_output_casesi (rtx *operands)
26367 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
26369 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26371 switch (GET_MODE(diff_vec))
26373 case QImode:
26374 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26375 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26376 case HImode:
26377 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26378 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26379 case SImode:
26380 return "bl\t%___gnu_thumb1_case_si";
26381 default:
26382 gcc_unreachable ();
26386 /* Output a Thumb-2 casesi instruction. */
26387 const char *
26388 thumb2_output_casesi (rtx *operands)
26390 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
26392 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26394 output_asm_insn ("cmp\t%0, %1", operands);
26395 output_asm_insn ("bhi\t%l3", operands);
26396 switch (GET_MODE(diff_vec))
26398 case QImode:
26399 return "tbb\t[%|pc, %0]";
26400 case HImode:
26401 return "tbh\t[%|pc, %0, lsl #1]";
26402 case SImode:
26403 if (flag_pic)
26405 output_asm_insn ("adr\t%4, %l2", operands);
26406 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
26407 output_asm_insn ("add\t%4, %4, %5", operands);
26408 return "bx\t%4";
26410 else
26412 output_asm_insn ("adr\t%4, %l2", operands);
26413 return "ldr\t%|pc, [%4, %0, lsl #2]";
26415 default:
26416 gcc_unreachable ();
26420 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
26421 per-core tuning structs. */
26422 static int
26423 arm_issue_rate (void)
26425 return current_tune->issue_rate;
26428 /* Return how many instructions should scheduler lookahead to choose the
26429 best one. */
26430 static int
26431 arm_first_cycle_multipass_dfa_lookahead (void)
26433 int issue_rate = arm_issue_rate ();
26435 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
26438 /* Enable modeling of L2 auto-prefetcher. */
26439 static int
26440 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
26442 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
26445 const char *
26446 arm_mangle_type (const_tree type)
26448 /* The ARM ABI documents (10th October 2008) say that "__va_list"
26449 has to be managled as if it is in the "std" namespace. */
26450 if (TARGET_AAPCS_BASED
26451 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
26452 return "St9__va_list";
26454 /* Half-precision float. */
26455 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
26456 return "Dh";
26458 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
26459 builtin type. */
26460 if (TYPE_NAME (type) != NULL)
26461 return arm_mangle_builtin_type (type);
26463 /* Use the default mangling. */
26464 return NULL;
26467 /* Order of allocation of core registers for Thumb: this allocation is
26468 written over the corresponding initial entries of the array
26469 initialized with REG_ALLOC_ORDER. We allocate all low registers
26470 first. Saving and restoring a low register is usually cheaper than
26471 using a call-clobbered high register. */
26473 static const int thumb_core_reg_alloc_order[] =
26475 3, 2, 1, 0, 4, 5, 6, 7,
26476 14, 12, 8, 9, 10, 11
26479 /* Adjust register allocation order when compiling for Thumb. */
26481 void
26482 arm_order_regs_for_local_alloc (void)
26484 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
26485 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
26486 if (TARGET_THUMB)
26487 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
26488 sizeof (thumb_core_reg_alloc_order));
26491 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
26493 bool
26494 arm_frame_pointer_required (void)
26496 if (SUBTARGET_FRAME_POINTER_REQUIRED)
26497 return true;
26499 /* If the function receives nonlocal gotos, it needs to save the frame
26500 pointer in the nonlocal_goto_save_area object. */
26501 if (cfun->has_nonlocal_label)
26502 return true;
26504 /* The frame pointer is required for non-leaf APCS frames. */
26505 if (TARGET_ARM && TARGET_APCS_FRAME && !leaf_function_p ())
26506 return true;
26508 /* If we are probing the stack in the prologue, we will have a faulting
26509 instruction prior to the stack adjustment and this requires a frame
26510 pointer if we want to catch the exception using the EABI unwinder. */
26511 if (!IS_INTERRUPT (arm_current_func_type ())
26512 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
26513 && arm_except_unwind_info (&global_options) == UI_TARGET
26514 && cfun->can_throw_non_call_exceptions)
26516 HOST_WIDE_INT size = get_frame_size ();
26518 /* That's irrelevant if there is no stack adjustment. */
26519 if (size <= 0)
26520 return false;
26522 /* That's relevant only if there is a stack probe. */
26523 if (crtl->is_leaf && !cfun->calls_alloca)
26525 /* We don't have the final size of the frame so adjust. */
26526 size += 32 * UNITS_PER_WORD;
26527 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
26528 return true;
26530 else
26531 return true;
26534 return false;
26537 /* Only thumb1 can't support conditional execution, so return true if
26538 the target is not thumb1. */
26539 static bool
26540 arm_have_conditional_execution (void)
26542 return !TARGET_THUMB1;
26545 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
26546 static HOST_WIDE_INT
26547 arm_vector_alignment (const_tree type)
26549 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
26551 if (TARGET_AAPCS_BASED)
26552 align = MIN (align, 64);
26554 return align;
26557 static unsigned int
26558 arm_autovectorize_vector_sizes (void)
26560 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
26563 static bool
26564 arm_vector_alignment_reachable (const_tree type, bool is_packed)
26566 /* Vectors which aren't in packed structures will not be less aligned than
26567 the natural alignment of their element type, so this is safe. */
26568 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
26569 return !is_packed;
26571 return default_builtin_vector_alignment_reachable (type, is_packed);
26574 static bool
26575 arm_builtin_support_vector_misalignment (machine_mode mode,
26576 const_tree type, int misalignment,
26577 bool is_packed)
26579 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
26581 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
26583 if (is_packed)
26584 return align == 1;
26586 /* If the misalignment is unknown, we should be able to handle the access
26587 so long as it is not to a member of a packed data structure. */
26588 if (misalignment == -1)
26589 return true;
26591 /* Return true if the misalignment is a multiple of the natural alignment
26592 of the vector's element type. This is probably always going to be
26593 true in practice, since we've already established that this isn't a
26594 packed access. */
26595 return ((misalignment % align) == 0);
26598 return default_builtin_support_vector_misalignment (mode, type, misalignment,
26599 is_packed);
26602 static void
26603 arm_conditional_register_usage (void)
26605 int regno;
26607 if (TARGET_THUMB1 && optimize_size)
26609 /* When optimizing for size on Thumb-1, it's better not
26610 to use the HI regs, because of the overhead of
26611 stacking them. */
26612 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
26613 fixed_regs[regno] = call_used_regs[regno] = 1;
26616 /* The link register can be clobbered by any branch insn,
26617 but we have no way to track that at present, so mark
26618 it as unavailable. */
26619 if (TARGET_THUMB1)
26620 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
26622 if (TARGET_32BIT && TARGET_HARD_FLOAT)
26624 /* VFPv3 registers are disabled when earlier VFP
26625 versions are selected due to the definition of
26626 LAST_VFP_REGNUM. */
26627 for (regno = FIRST_VFP_REGNUM;
26628 regno <= LAST_VFP_REGNUM; ++ regno)
26630 fixed_regs[regno] = 0;
26631 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
26632 || regno >= FIRST_VFP_REGNUM + 32;
26636 if (TARGET_REALLY_IWMMXT)
26638 regno = FIRST_IWMMXT_GR_REGNUM;
26639 /* The 2002/10/09 revision of the XScale ABI has wCG0
26640 and wCG1 as call-preserved registers. The 2002/11/21
26641 revision changed this so that all wCG registers are
26642 scratch registers. */
26643 for (regno = FIRST_IWMMXT_GR_REGNUM;
26644 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
26645 fixed_regs[regno] = 0;
26646 /* The XScale ABI has wR0 - wR9 as scratch registers,
26647 the rest as call-preserved registers. */
26648 for (regno = FIRST_IWMMXT_REGNUM;
26649 regno <= LAST_IWMMXT_REGNUM; ++ regno)
26651 fixed_regs[regno] = 0;
26652 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
26656 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
26658 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
26659 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
26661 else if (TARGET_APCS_STACK)
26663 fixed_regs[10] = 1;
26664 call_used_regs[10] = 1;
26666 /* -mcaller-super-interworking reserves r11 for calls to
26667 _interwork_r11_call_via_rN(). Making the register global
26668 is an easy way of ensuring that it remains valid for all
26669 calls. */
26670 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
26671 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
26673 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26674 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26675 if (TARGET_CALLER_INTERWORKING)
26676 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26678 SUBTARGET_CONDITIONAL_REGISTER_USAGE
26681 static reg_class_t
26682 arm_preferred_rename_class (reg_class_t rclass)
26684 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
26685 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
26686 and code size can be reduced. */
26687 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
26688 return LO_REGS;
26689 else
26690 return NO_REGS;
26693 /* Compute the attribute "length" of insn "*push_multi".
26694 So this function MUST be kept in sync with that insn pattern. */
26696 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
26698 int i, regno, hi_reg;
26699 int num_saves = XVECLEN (parallel_op, 0);
26701 /* ARM mode. */
26702 if (TARGET_ARM)
26703 return 4;
26704 /* Thumb1 mode. */
26705 if (TARGET_THUMB1)
26706 return 2;
26708 /* Thumb2 mode. */
26709 regno = REGNO (first_op);
26710 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
26711 list is 8-bit. Normally this means all registers in the list must be
26712 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
26713 encodings. There is one exception for PUSH that LR in HI_REGS can be used
26714 with 16-bit encoding. */
26715 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
26716 for (i = 1; i < num_saves && !hi_reg; i++)
26718 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
26719 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
26722 if (!hi_reg)
26723 return 2;
26724 return 4;
26727 /* Compute the attribute "length" of insn. Currently, this function is used
26728 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
26729 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
26730 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
26731 true if OPERANDS contains insn which explicit updates base register. */
26734 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
26736 /* ARM mode. */
26737 if (TARGET_ARM)
26738 return 4;
26739 /* Thumb1 mode. */
26740 if (TARGET_THUMB1)
26741 return 2;
26743 rtx parallel_op = operands[0];
26744 /* Initialize to elements number of PARALLEL. */
26745 unsigned indx = XVECLEN (parallel_op, 0) - 1;
26746 /* Initialize the value to base register. */
26747 unsigned regno = REGNO (operands[1]);
26748 /* Skip return and write back pattern.
26749 We only need register pop pattern for later analysis. */
26750 unsigned first_indx = 0;
26751 first_indx += return_pc ? 1 : 0;
26752 first_indx += write_back_p ? 1 : 0;
26754 /* A pop operation can be done through LDM or POP. If the base register is SP
26755 and if it's with write back, then a LDM will be alias of POP. */
26756 bool pop_p = (regno == SP_REGNUM && write_back_p);
26757 bool ldm_p = !pop_p;
26759 /* Check base register for LDM. */
26760 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
26761 return 4;
26763 /* Check each register in the list. */
26764 for (; indx >= first_indx; indx--)
26766 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
26767 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
26768 comment in arm_attr_length_push_multi. */
26769 if (REGNO_REG_CLASS (regno) == HI_REGS
26770 && (regno != PC_REGNUM || ldm_p))
26771 return 4;
26774 return 2;
26777 /* Compute the number of instructions emitted by output_move_double. */
26779 arm_count_output_move_double_insns (rtx *operands)
26781 int count;
26782 rtx ops[2];
26783 /* output_move_double may modify the operands array, so call it
26784 here on a copy of the array. */
26785 ops[0] = operands[0];
26786 ops[1] = operands[1];
26787 output_move_double (ops, false, &count);
26788 return count;
26792 vfp3_const_double_for_fract_bits (rtx operand)
26794 REAL_VALUE_TYPE r0;
26796 if (!CONST_DOUBLE_P (operand))
26797 return 0;
26799 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
26800 if (exact_real_inverse (DFmode, &r0)
26801 && !REAL_VALUE_NEGATIVE (r0))
26803 if (exact_real_truncate (DFmode, &r0))
26805 HOST_WIDE_INT value = real_to_integer (&r0);
26806 value = value & 0xffffffff;
26807 if ((value != 0) && ( (value & (value - 1)) == 0))
26809 int ret = exact_log2 (value);
26810 gcc_assert (IN_RANGE (ret, 0, 31));
26811 return ret;
26815 return 0;
26818 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
26819 log2 is in [1, 32], return that log2. Otherwise return -1.
26820 This is used in the patterns for vcvt.s32.f32 floating-point to
26821 fixed-point conversions. */
26824 vfp3_const_double_for_bits (rtx x)
26826 const REAL_VALUE_TYPE *r;
26828 if (!CONST_DOUBLE_P (x))
26829 return -1;
26831 r = CONST_DOUBLE_REAL_VALUE (x);
26833 if (REAL_VALUE_NEGATIVE (*r)
26834 || REAL_VALUE_ISNAN (*r)
26835 || REAL_VALUE_ISINF (*r)
26836 || !real_isinteger (r, SFmode))
26837 return -1;
26839 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
26841 /* The exact_log2 above will have returned -1 if this is
26842 not an exact log2. */
26843 if (!IN_RANGE (hwint, 1, 32))
26844 return -1;
26846 return hwint;
26850 /* Emit a memory barrier around an atomic sequence according to MODEL. */
26852 static void
26853 arm_pre_atomic_barrier (enum memmodel model)
26855 if (need_atomic_barrier_p (model, true))
26856 emit_insn (gen_memory_barrier ());
26859 static void
26860 arm_post_atomic_barrier (enum memmodel model)
26862 if (need_atomic_barrier_p (model, false))
26863 emit_insn (gen_memory_barrier ());
26866 /* Emit the load-exclusive and store-exclusive instructions.
26867 Use acquire and release versions if necessary. */
26869 static void
26870 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
26872 rtx (*gen) (rtx, rtx);
26874 if (acq)
26876 switch (mode)
26878 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
26879 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
26880 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
26881 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
26882 default:
26883 gcc_unreachable ();
26886 else
26888 switch (mode)
26890 case QImode: gen = gen_arm_load_exclusiveqi; break;
26891 case HImode: gen = gen_arm_load_exclusivehi; break;
26892 case SImode: gen = gen_arm_load_exclusivesi; break;
26893 case DImode: gen = gen_arm_load_exclusivedi; break;
26894 default:
26895 gcc_unreachable ();
26899 emit_insn (gen (rval, mem));
26902 static void
26903 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
26904 rtx mem, bool rel)
26906 rtx (*gen) (rtx, rtx, rtx);
26908 if (rel)
26910 switch (mode)
26912 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
26913 case HImode: gen = gen_arm_store_release_exclusivehi; break;
26914 case SImode: gen = gen_arm_store_release_exclusivesi; break;
26915 case DImode: gen = gen_arm_store_release_exclusivedi; break;
26916 default:
26917 gcc_unreachable ();
26920 else
26922 switch (mode)
26924 case QImode: gen = gen_arm_store_exclusiveqi; break;
26925 case HImode: gen = gen_arm_store_exclusivehi; break;
26926 case SImode: gen = gen_arm_store_exclusivesi; break;
26927 case DImode: gen = gen_arm_store_exclusivedi; break;
26928 default:
26929 gcc_unreachable ();
26933 emit_insn (gen (bval, rval, mem));
26936 /* Mark the previous jump instruction as unlikely. */
26938 static void
26939 emit_unlikely_jump (rtx insn)
26941 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
26943 insn = emit_jump_insn (insn);
26944 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
26947 /* Expand a compare and swap pattern. */
26949 void
26950 arm_expand_compare_and_swap (rtx operands[])
26952 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
26953 machine_mode mode;
26954 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
26956 bval = operands[0];
26957 rval = operands[1];
26958 mem = operands[2];
26959 oldval = operands[3];
26960 newval = operands[4];
26961 is_weak = operands[5];
26962 mod_s = operands[6];
26963 mod_f = operands[7];
26964 mode = GET_MODE (mem);
26966 /* Normally the succ memory model must be stronger than fail, but in the
26967 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
26968 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
26970 if (TARGET_HAVE_LDACQ
26971 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
26972 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
26973 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
26975 switch (mode)
26977 case QImode:
26978 case HImode:
26979 /* For narrow modes, we're going to perform the comparison in SImode,
26980 so do the zero-extension now. */
26981 rval = gen_reg_rtx (SImode);
26982 oldval = convert_modes (SImode, mode, oldval, true);
26983 /* FALLTHRU */
26985 case SImode:
26986 /* Force the value into a register if needed. We waited until after
26987 the zero-extension above to do this properly. */
26988 if (!arm_add_operand (oldval, SImode))
26989 oldval = force_reg (SImode, oldval);
26990 break;
26992 case DImode:
26993 if (!cmpdi_operand (oldval, mode))
26994 oldval = force_reg (mode, oldval);
26995 break;
26997 default:
26998 gcc_unreachable ();
27001 switch (mode)
27003 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27004 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27005 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27006 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27007 default:
27008 gcc_unreachable ();
27011 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CCmode, CC_REGNUM);
27012 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27014 if (mode == QImode || mode == HImode)
27015 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27017 /* In all cases, we arrange for success to be signaled by Z set.
27018 This arrangement allows for the boolean result to be used directly
27019 in a subsequent branch, post optimization. For Thumb-1 targets, the
27020 boolean negation of the result is also stored in bval because Thumb-1
27021 backend lacks dependency tracking for CC flag due to flag-setting not
27022 being represented at RTL level. */
27023 if (TARGET_THUMB1)
27024 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
27025 else
27027 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
27028 emit_insn (gen_rtx_SET (bval, x));
27032 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27033 another memory store between the load-exclusive and store-exclusive can
27034 reset the monitor from Exclusive to Open state. This means we must wait
27035 until after reload to split the pattern, lest we get a register spill in
27036 the middle of the atomic sequence. Success of the compare and swap is
27037 indicated by the Z flag set for 32bit targets and by neg_bval being zero
27038 for Thumb-1 targets (ie. negation of the boolean value returned by
27039 atomic_compare_and_swapmode standard pattern in operand 0). */
27041 void
27042 arm_split_compare_and_swap (rtx operands[])
27044 rtx rval, mem, oldval, newval, neg_bval;
27045 machine_mode mode;
27046 enum memmodel mod_s, mod_f;
27047 bool is_weak;
27048 rtx_code_label *label1, *label2;
27049 rtx x, cond;
27051 rval = operands[1];
27052 mem = operands[2];
27053 oldval = operands[3];
27054 newval = operands[4];
27055 is_weak = (operands[5] != const0_rtx);
27056 mod_s = memmodel_from_int (INTVAL (operands[6]));
27057 mod_f = memmodel_from_int (INTVAL (operands[7]));
27058 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
27059 mode = GET_MODE (mem);
27061 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
27063 bool use_acquire = TARGET_HAVE_LDACQ
27064 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27065 || is_mm_release (mod_s));
27067 bool use_release = TARGET_HAVE_LDACQ
27068 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27069 || is_mm_acquire (mod_s));
27071 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
27072 a full barrier is emitted after the store-release. */
27073 if (is_armv8_sync)
27074 use_acquire = false;
27076 /* Checks whether a barrier is needed and emits one accordingly. */
27077 if (!(use_acquire || use_release))
27078 arm_pre_atomic_barrier (mod_s);
27080 label1 = NULL;
27081 if (!is_weak)
27083 label1 = gen_label_rtx ();
27084 emit_label (label1);
27086 label2 = gen_label_rtx ();
27088 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27090 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
27091 as required to communicate with arm_expand_compare_and_swap. */
27092 if (TARGET_32BIT)
27094 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
27095 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27096 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27097 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27098 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
27100 else
27102 emit_move_insn (neg_bval, const1_rtx);
27103 cond = gen_rtx_NE (VOIDmode, rval, oldval);
27104 if (thumb1_cmpneg_operand (oldval, SImode))
27105 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
27106 label2, cond));
27107 else
27108 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
27111 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
27113 /* Weak or strong, we want EQ to be true for success, so that we
27114 match the flags that we got from the compare above. */
27115 if (TARGET_32BIT)
27117 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27118 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
27119 emit_insn (gen_rtx_SET (cond, x));
27122 if (!is_weak)
27124 /* Z is set to boolean value of !neg_bval, as required to communicate
27125 with arm_expand_compare_and_swap. */
27126 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
27127 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
27130 if (!is_mm_relaxed (mod_f))
27131 emit_label (label2);
27133 /* Checks whether a barrier is needed and emits one accordingly. */
27134 if (is_armv8_sync
27135 || !(use_acquire || use_release))
27136 arm_post_atomic_barrier (mod_s);
27138 if (is_mm_relaxed (mod_f))
27139 emit_label (label2);
27142 /* Split an atomic operation pattern. Operation is given by CODE and is one
27143 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
27144 operation). Operation is performed on the content at MEM and on VALUE
27145 following the memory model MODEL_RTX. The content at MEM before and after
27146 the operation is returned in OLD_OUT and NEW_OUT respectively while the
27147 success of the operation is returned in COND. Using a scratch register or
27148 an operand register for these determines what result is returned for that
27149 pattern. */
27151 void
27152 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27153 rtx value, rtx model_rtx, rtx cond)
27155 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
27156 machine_mode mode = GET_MODE (mem);
27157 machine_mode wmode = (mode == DImode ? DImode : SImode);
27158 rtx_code_label *label;
27159 bool all_low_regs, bind_old_new;
27160 rtx x;
27162 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
27164 bool use_acquire = TARGET_HAVE_LDACQ
27165 && !(is_mm_relaxed (model) || is_mm_consume (model)
27166 || is_mm_release (model));
27168 bool use_release = TARGET_HAVE_LDACQ
27169 && !(is_mm_relaxed (model) || is_mm_consume (model)
27170 || is_mm_acquire (model));
27172 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
27173 a full barrier is emitted after the store-release. */
27174 if (is_armv8_sync)
27175 use_acquire = false;
27177 /* Checks whether a barrier is needed and emits one accordingly. */
27178 if (!(use_acquire || use_release))
27179 arm_pre_atomic_barrier (model);
27181 label = gen_label_rtx ();
27182 emit_label (label);
27184 if (new_out)
27185 new_out = gen_lowpart (wmode, new_out);
27186 if (old_out)
27187 old_out = gen_lowpart (wmode, old_out);
27188 else
27189 old_out = new_out;
27190 value = simplify_gen_subreg (wmode, value, mode, 0);
27192 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27194 /* Does the operation require destination and first operand to use the same
27195 register? This is decided by register constraints of relevant insn
27196 patterns in thumb1.md. */
27197 gcc_assert (!new_out || REG_P (new_out));
27198 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
27199 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
27200 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
27201 bind_old_new =
27202 (TARGET_THUMB1
27203 && code != SET
27204 && code != MINUS
27205 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
27207 /* We want to return the old value while putting the result of the operation
27208 in the same register as the old value so copy the old value over to the
27209 destination register and use that register for the operation. */
27210 if (old_out && bind_old_new)
27212 emit_move_insn (new_out, old_out);
27213 old_out = new_out;
27216 switch (code)
27218 case SET:
27219 new_out = value;
27220 break;
27222 case NOT:
27223 x = gen_rtx_AND (wmode, old_out, value);
27224 emit_insn (gen_rtx_SET (new_out, x));
27225 x = gen_rtx_NOT (wmode, new_out);
27226 emit_insn (gen_rtx_SET (new_out, x));
27227 break;
27229 case MINUS:
27230 if (CONST_INT_P (value))
27232 value = GEN_INT (-INTVAL (value));
27233 code = PLUS;
27235 /* FALLTHRU */
27237 case PLUS:
27238 if (mode == DImode)
27240 /* DImode plus/minus need to clobber flags. */
27241 /* The adddi3 and subdi3 patterns are incorrectly written so that
27242 they require matching operands, even when we could easily support
27243 three operands. Thankfully, this can be fixed up post-splitting,
27244 as the individual add+adc patterns do accept three operands and
27245 post-reload cprop can make these moves go away. */
27246 emit_move_insn (new_out, old_out);
27247 if (code == PLUS)
27248 x = gen_adddi3 (new_out, new_out, value);
27249 else
27250 x = gen_subdi3 (new_out, new_out, value);
27251 emit_insn (x);
27252 break;
27254 /* FALLTHRU */
27256 default:
27257 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27258 emit_insn (gen_rtx_SET (new_out, x));
27259 break;
27262 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27263 use_release);
27265 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27266 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27268 /* Checks whether a barrier is needed and emits one accordingly. */
27269 if (is_armv8_sync
27270 || !(use_acquire || use_release))
27271 arm_post_atomic_barrier (model);
27274 #define MAX_VECT_LEN 16
27276 struct expand_vec_perm_d
27278 rtx target, op0, op1;
27279 unsigned char perm[MAX_VECT_LEN];
27280 machine_mode vmode;
27281 unsigned char nelt;
27282 bool one_vector_p;
27283 bool testing_p;
27286 /* Generate a variable permutation. */
27288 static void
27289 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27291 machine_mode vmode = GET_MODE (target);
27292 bool one_vector_p = rtx_equal_p (op0, op1);
27294 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27295 gcc_checking_assert (GET_MODE (op0) == vmode);
27296 gcc_checking_assert (GET_MODE (op1) == vmode);
27297 gcc_checking_assert (GET_MODE (sel) == vmode);
27298 gcc_checking_assert (TARGET_NEON);
27300 if (one_vector_p)
27302 if (vmode == V8QImode)
27303 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27304 else
27305 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27307 else
27309 rtx pair;
27311 if (vmode == V8QImode)
27313 pair = gen_reg_rtx (V16QImode);
27314 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27315 pair = gen_lowpart (TImode, pair);
27316 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27318 else
27320 pair = gen_reg_rtx (OImode);
27321 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27322 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27327 void
27328 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27330 machine_mode vmode = GET_MODE (target);
27331 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27332 bool one_vector_p = rtx_equal_p (op0, op1);
27333 rtx rmask[MAX_VECT_LEN], mask;
27335 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27336 numbering of elements for big-endian, we must reverse the order. */
27337 gcc_checking_assert (!BYTES_BIG_ENDIAN);
27339 /* The VTBL instruction does not use a modulo index, so we must take care
27340 of that ourselves. */
27341 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27342 for (i = 0; i < nelt; ++i)
27343 rmask[i] = mask;
27344 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27345 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27347 arm_expand_vec_perm_1 (target, op0, op1, sel);
27350 /* Map lane ordering between architectural lane order, and GCC lane order,
27351 taking into account ABI. See comment above output_move_neon for details. */
27353 static int
27354 neon_endian_lane_map (machine_mode mode, int lane)
27356 if (BYTES_BIG_ENDIAN)
27358 int nelems = GET_MODE_NUNITS (mode);
27359 /* Reverse lane order. */
27360 lane = (nelems - 1 - lane);
27361 /* Reverse D register order, to match ABI. */
27362 if (GET_MODE_SIZE (mode) == 16)
27363 lane = lane ^ (nelems / 2);
27365 return lane;
27368 /* Some permutations index into pairs of vectors, this is a helper function
27369 to map indexes into those pairs of vectors. */
27371 static int
27372 neon_pair_endian_lane_map (machine_mode mode, int lane)
27374 int nelem = GET_MODE_NUNITS (mode);
27375 if (BYTES_BIG_ENDIAN)
27376 lane =
27377 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
27378 return lane;
27381 /* Generate or test for an insn that supports a constant permutation. */
27383 /* Recognize patterns for the VUZP insns. */
27385 static bool
27386 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27388 unsigned int i, odd, mask, nelt = d->nelt;
27389 rtx out0, out1, in0, in1;
27390 rtx (*gen)(rtx, rtx, rtx, rtx);
27391 int first_elem;
27392 int swap_nelt;
27394 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27395 return false;
27397 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
27398 big endian pattern on 64 bit vectors, so we correct for that. */
27399 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
27400 && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
27402 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
27404 if (first_elem == neon_endian_lane_map (d->vmode, 0))
27405 odd = 0;
27406 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
27407 odd = 1;
27408 else
27409 return false;
27410 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27412 for (i = 0; i < nelt; i++)
27414 unsigned elt =
27415 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
27416 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
27417 return false;
27420 /* Success! */
27421 if (d->testing_p)
27422 return true;
27424 switch (d->vmode)
27426 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27427 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
27428 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
27429 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
27430 case V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
27431 case V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
27432 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
27433 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
27434 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
27435 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
27436 default:
27437 gcc_unreachable ();
27440 in0 = d->op0;
27441 in1 = d->op1;
27442 if (swap_nelt != 0)
27443 std::swap (in0, in1);
27445 out0 = d->target;
27446 out1 = gen_reg_rtx (d->vmode);
27447 if (odd)
27448 std::swap (out0, out1);
27450 emit_insn (gen (out0, in0, in1, out1));
27451 return true;
27454 /* Recognize patterns for the VZIP insns. */
27456 static bool
27457 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27459 unsigned int i, high, mask, nelt = d->nelt;
27460 rtx out0, out1, in0, in1;
27461 rtx (*gen)(rtx, rtx, rtx, rtx);
27462 int first_elem;
27463 bool is_swapped;
27465 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27466 return false;
27468 is_swapped = BYTES_BIG_ENDIAN;
27470 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
27472 high = nelt / 2;
27473 if (first_elem == neon_endian_lane_map (d->vmode, high))
27475 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
27476 high = 0;
27477 else
27478 return false;
27479 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27481 for (i = 0; i < nelt / 2; i++)
27483 unsigned elt =
27484 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
27485 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
27486 != elt)
27487 return false;
27488 elt =
27489 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
27490 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
27491 != elt)
27492 return false;
27495 /* Success! */
27496 if (d->testing_p)
27497 return true;
27499 switch (d->vmode)
27501 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27502 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
27503 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
27504 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
27505 case V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
27506 case V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
27507 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
27508 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
27509 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
27510 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
27511 default:
27512 gcc_unreachable ();
27515 in0 = d->op0;
27516 in1 = d->op1;
27517 if (is_swapped)
27518 std::swap (in0, in1);
27520 out0 = d->target;
27521 out1 = gen_reg_rtx (d->vmode);
27522 if (high)
27523 std::swap (out0, out1);
27525 emit_insn (gen (out0, in0, in1, out1));
27526 return true;
27529 /* Recognize patterns for the VREV insns. */
27531 static bool
27532 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27534 unsigned int i, j, diff, nelt = d->nelt;
27535 rtx (*gen)(rtx, rtx);
27537 if (!d->one_vector_p)
27538 return false;
27540 diff = d->perm[0];
27541 switch (diff)
27543 case 7:
27544 switch (d->vmode)
27546 case V16QImode: gen = gen_neon_vrev64v16qi; break;
27547 case V8QImode: gen = gen_neon_vrev64v8qi; break;
27548 default:
27549 return false;
27551 break;
27552 case 3:
27553 switch (d->vmode)
27555 case V16QImode: gen = gen_neon_vrev32v16qi; break;
27556 case V8QImode: gen = gen_neon_vrev32v8qi; break;
27557 case V8HImode: gen = gen_neon_vrev64v8hi; break;
27558 case V4HImode: gen = gen_neon_vrev64v4hi; break;
27559 case V8HFmode: gen = gen_neon_vrev64v8hf; break;
27560 case V4HFmode: gen = gen_neon_vrev64v4hf; break;
27561 default:
27562 return false;
27564 break;
27565 case 1:
27566 switch (d->vmode)
27568 case V16QImode: gen = gen_neon_vrev16v16qi; break;
27569 case V8QImode: gen = gen_neon_vrev16v8qi; break;
27570 case V8HImode: gen = gen_neon_vrev32v8hi; break;
27571 case V4HImode: gen = gen_neon_vrev32v4hi; break;
27572 case V4SImode: gen = gen_neon_vrev64v4si; break;
27573 case V2SImode: gen = gen_neon_vrev64v2si; break;
27574 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
27575 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
27576 default:
27577 return false;
27579 break;
27580 default:
27581 return false;
27584 for (i = 0; i < nelt ; i += diff + 1)
27585 for (j = 0; j <= diff; j += 1)
27587 /* This is guaranteed to be true as the value of diff
27588 is 7, 3, 1 and we should have enough elements in the
27589 queue to generate this. Getting a vector mask with a
27590 value of diff other than these values implies that
27591 something is wrong by the time we get here. */
27592 gcc_assert (i + j < nelt);
27593 if (d->perm[i + j] != i + diff - j)
27594 return false;
27597 /* Success! */
27598 if (d->testing_p)
27599 return true;
27601 emit_insn (gen (d->target, d->op0));
27602 return true;
27605 /* Recognize patterns for the VTRN insns. */
27607 static bool
27608 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
27610 unsigned int i, odd, mask, nelt = d->nelt;
27611 rtx out0, out1, in0, in1;
27612 rtx (*gen)(rtx, rtx, rtx, rtx);
27614 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27615 return false;
27617 /* Note that these are little-endian tests. Adjust for big-endian later. */
27618 if (d->perm[0] == 0)
27619 odd = 0;
27620 else if (d->perm[0] == 1)
27621 odd = 1;
27622 else
27623 return false;
27624 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27626 for (i = 0; i < nelt; i += 2)
27628 if (d->perm[i] != i + odd)
27629 return false;
27630 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
27631 return false;
27634 /* Success! */
27635 if (d->testing_p)
27636 return true;
27638 switch (d->vmode)
27640 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
27641 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
27642 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
27643 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
27644 case V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
27645 case V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
27646 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
27647 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
27648 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
27649 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
27650 default:
27651 gcc_unreachable ();
27654 in0 = d->op0;
27655 in1 = d->op1;
27656 if (BYTES_BIG_ENDIAN)
27658 std::swap (in0, in1);
27659 odd = !odd;
27662 out0 = d->target;
27663 out1 = gen_reg_rtx (d->vmode);
27664 if (odd)
27665 std::swap (out0, out1);
27667 emit_insn (gen (out0, in0, in1, out1));
27668 return true;
27671 /* Recognize patterns for the VEXT insns. */
27673 static bool
27674 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
27676 unsigned int i, nelt = d->nelt;
27677 rtx (*gen) (rtx, rtx, rtx, rtx);
27678 rtx offset;
27680 unsigned int location;
27682 unsigned int next = d->perm[0] + 1;
27684 /* TODO: Handle GCC's numbering of elements for big-endian. */
27685 if (BYTES_BIG_ENDIAN)
27686 return false;
27688 /* Check if the extracted indexes are increasing by one. */
27689 for (i = 1; i < nelt; next++, i++)
27691 /* If we hit the most significant element of the 2nd vector in
27692 the previous iteration, no need to test further. */
27693 if (next == 2 * nelt)
27694 return false;
27696 /* If we are operating on only one vector: it could be a
27697 rotation. If there are only two elements of size < 64, let
27698 arm_evpc_neon_vrev catch it. */
27699 if (d->one_vector_p && (next == nelt))
27701 if ((nelt == 2) && (d->vmode != V2DImode))
27702 return false;
27703 else
27704 next = 0;
27707 if (d->perm[i] != next)
27708 return false;
27711 location = d->perm[0];
27713 switch (d->vmode)
27715 case V16QImode: gen = gen_neon_vextv16qi; break;
27716 case V8QImode: gen = gen_neon_vextv8qi; break;
27717 case V4HImode: gen = gen_neon_vextv4hi; break;
27718 case V8HImode: gen = gen_neon_vextv8hi; break;
27719 case V2SImode: gen = gen_neon_vextv2si; break;
27720 case V4SImode: gen = gen_neon_vextv4si; break;
27721 case V4HFmode: gen = gen_neon_vextv4hf; break;
27722 case V8HFmode: gen = gen_neon_vextv8hf; break;
27723 case V2SFmode: gen = gen_neon_vextv2sf; break;
27724 case V4SFmode: gen = gen_neon_vextv4sf; break;
27725 case V2DImode: gen = gen_neon_vextv2di; break;
27726 default:
27727 return false;
27730 /* Success! */
27731 if (d->testing_p)
27732 return true;
27734 offset = GEN_INT (location);
27735 emit_insn (gen (d->target, d->op0, d->op1, offset));
27736 return true;
27739 /* The NEON VTBL instruction is a fully variable permuation that's even
27740 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
27741 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
27742 can do slightly better by expanding this as a constant where we don't
27743 have to apply a mask. */
27745 static bool
27746 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
27748 rtx rperm[MAX_VECT_LEN], sel;
27749 machine_mode vmode = d->vmode;
27750 unsigned int i, nelt = d->nelt;
27752 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27753 numbering of elements for big-endian, we must reverse the order. */
27754 if (BYTES_BIG_ENDIAN)
27755 return false;
27757 if (d->testing_p)
27758 return true;
27760 /* Generic code will try constant permutation twice. Once with the
27761 original mode and again with the elements lowered to QImode.
27762 So wait and don't do the selector expansion ourselves. */
27763 if (vmode != V8QImode && vmode != V16QImode)
27764 return false;
27766 for (i = 0; i < nelt; ++i)
27767 rperm[i] = GEN_INT (d->perm[i]);
27768 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
27769 sel = force_reg (vmode, sel);
27771 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
27772 return true;
27775 static bool
27776 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
27778 /* Check if the input mask matches vext before reordering the
27779 operands. */
27780 if (TARGET_NEON)
27781 if (arm_evpc_neon_vext (d))
27782 return true;
27784 /* The pattern matching functions above are written to look for a small
27785 number to begin the sequence (0, 1, N/2). If we begin with an index
27786 from the second operand, we can swap the operands. */
27787 if (d->perm[0] >= d->nelt)
27789 unsigned i, nelt = d->nelt;
27791 for (i = 0; i < nelt; ++i)
27792 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
27794 std::swap (d->op0, d->op1);
27797 if (TARGET_NEON)
27799 if (arm_evpc_neon_vuzp (d))
27800 return true;
27801 if (arm_evpc_neon_vzip (d))
27802 return true;
27803 if (arm_evpc_neon_vrev (d))
27804 return true;
27805 if (arm_evpc_neon_vtrn (d))
27806 return true;
27807 return arm_evpc_neon_vtbl (d);
27809 return false;
27812 /* Expand a vec_perm_const pattern. */
27814 bool
27815 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
27817 struct expand_vec_perm_d d;
27818 int i, nelt, which;
27820 d.target = target;
27821 d.op0 = op0;
27822 d.op1 = op1;
27824 d.vmode = GET_MODE (target);
27825 gcc_assert (VECTOR_MODE_P (d.vmode));
27826 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
27827 d.testing_p = false;
27829 for (i = which = 0; i < nelt; ++i)
27831 rtx e = XVECEXP (sel, 0, i);
27832 int ei = INTVAL (e) & (2 * nelt - 1);
27833 which |= (ei < nelt ? 1 : 2);
27834 d.perm[i] = ei;
27837 switch (which)
27839 default:
27840 gcc_unreachable();
27842 case 3:
27843 d.one_vector_p = false;
27844 if (!rtx_equal_p (op0, op1))
27845 break;
27847 /* The elements of PERM do not suggest that only the first operand
27848 is used, but both operands are identical. Allow easier matching
27849 of the permutation by folding the permutation into the single
27850 input vector. */
27851 /* FALLTHRU */
27852 case 2:
27853 for (i = 0; i < nelt; ++i)
27854 d.perm[i] &= nelt - 1;
27855 d.op0 = op1;
27856 d.one_vector_p = true;
27857 break;
27859 case 1:
27860 d.op1 = op0;
27861 d.one_vector_p = true;
27862 break;
27865 return arm_expand_vec_perm_const_1 (&d);
27868 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
27870 static bool
27871 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
27872 const unsigned char *sel)
27874 struct expand_vec_perm_d d;
27875 unsigned int i, nelt, which;
27876 bool ret;
27878 d.vmode = vmode;
27879 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
27880 d.testing_p = true;
27881 memcpy (d.perm, sel, nelt);
27883 /* Categorize the set of elements in the selector. */
27884 for (i = which = 0; i < nelt; ++i)
27886 unsigned char e = d.perm[i];
27887 gcc_assert (e < 2 * nelt);
27888 which |= (e < nelt ? 1 : 2);
27891 /* For all elements from second vector, fold the elements to first. */
27892 if (which == 2)
27893 for (i = 0; i < nelt; ++i)
27894 d.perm[i] -= nelt;
27896 /* Check whether the mask can be applied to the vector type. */
27897 d.one_vector_p = (which != 3);
27899 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
27900 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
27901 if (!d.one_vector_p)
27902 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
27904 start_sequence ();
27905 ret = arm_expand_vec_perm_const_1 (&d);
27906 end_sequence ();
27908 return ret;
27911 bool
27912 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
27914 /* If we are soft float and we do not have ldrd
27915 then all auto increment forms are ok. */
27916 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
27917 return true;
27919 switch (code)
27921 /* Post increment and Pre Decrement are supported for all
27922 instruction forms except for vector forms. */
27923 case ARM_POST_INC:
27924 case ARM_PRE_DEC:
27925 if (VECTOR_MODE_P (mode))
27927 if (code != ARM_PRE_DEC)
27928 return true;
27929 else
27930 return false;
27933 return true;
27935 case ARM_POST_DEC:
27936 case ARM_PRE_INC:
27937 /* Without LDRD and mode size greater than
27938 word size, there is no point in auto-incrementing
27939 because ldm and stm will not have these forms. */
27940 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
27941 return false;
27943 /* Vector and floating point modes do not support
27944 these auto increment forms. */
27945 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
27946 return false;
27948 return true;
27950 default:
27951 return false;
27955 return false;
27958 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
27959 on ARM, since we know that shifts by negative amounts are no-ops.
27960 Additionally, the default expansion code is not available or suitable
27961 for post-reload insn splits (this can occur when the register allocator
27962 chooses not to do a shift in NEON).
27964 This function is used in both initial expand and post-reload splits, and
27965 handles all kinds of 64-bit shifts.
27967 Input requirements:
27968 - It is safe for the input and output to be the same register, but
27969 early-clobber rules apply for the shift amount and scratch registers.
27970 - Shift by register requires both scratch registers. In all other cases
27971 the scratch registers may be NULL.
27972 - Ashiftrt by a register also clobbers the CC register. */
27973 void
27974 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
27975 rtx amount, rtx scratch1, rtx scratch2)
27977 rtx out_high = gen_highpart (SImode, out);
27978 rtx out_low = gen_lowpart (SImode, out);
27979 rtx in_high = gen_highpart (SImode, in);
27980 rtx in_low = gen_lowpart (SImode, in);
27982 /* Terminology:
27983 in = the register pair containing the input value.
27984 out = the destination register pair.
27985 up = the high- or low-part of each pair.
27986 down = the opposite part to "up".
27987 In a shift, we can consider bits to shift from "up"-stream to
27988 "down"-stream, so in a left-shift "up" is the low-part and "down"
27989 is the high-part of each register pair. */
27991 rtx out_up = code == ASHIFT ? out_low : out_high;
27992 rtx out_down = code == ASHIFT ? out_high : out_low;
27993 rtx in_up = code == ASHIFT ? in_low : in_high;
27994 rtx in_down = code == ASHIFT ? in_high : in_low;
27996 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
27997 gcc_assert (out
27998 && (REG_P (out) || GET_CODE (out) == SUBREG)
27999 && GET_MODE (out) == DImode);
28000 gcc_assert (in
28001 && (REG_P (in) || GET_CODE (in) == SUBREG)
28002 && GET_MODE (in) == DImode);
28003 gcc_assert (amount
28004 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28005 && GET_MODE (amount) == SImode)
28006 || CONST_INT_P (amount)));
28007 gcc_assert (scratch1 == NULL
28008 || (GET_CODE (scratch1) == SCRATCH)
28009 || (GET_MODE (scratch1) == SImode
28010 && REG_P (scratch1)));
28011 gcc_assert (scratch2 == NULL
28012 || (GET_CODE (scratch2) == SCRATCH)
28013 || (GET_MODE (scratch2) == SImode
28014 && REG_P (scratch2)));
28015 gcc_assert (!REG_P (out) || !REG_P (amount)
28016 || !HARD_REGISTER_P (out)
28017 || (REGNO (out) != REGNO (amount)
28018 && REGNO (out) + 1 != REGNO (amount)));
28020 /* Macros to make following code more readable. */
28021 #define SUB_32(DEST,SRC) \
28022 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28023 #define RSB_32(DEST,SRC) \
28024 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28025 #define SUB_S_32(DEST,SRC) \
28026 gen_addsi3_compare0 ((DEST), (SRC), \
28027 GEN_INT (-32))
28028 #define SET(DEST,SRC) \
28029 gen_rtx_SET ((DEST), (SRC))
28030 #define SHIFT(CODE,SRC,AMOUNT) \
28031 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28032 #define LSHIFT(CODE,SRC,AMOUNT) \
28033 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28034 SImode, (SRC), (AMOUNT))
28035 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28036 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28037 SImode, (SRC), (AMOUNT))
28038 #define ORR(A,B) \
28039 gen_rtx_IOR (SImode, (A), (B))
28040 #define BRANCH(COND,LABEL) \
28041 gen_arm_cond_branch ((LABEL), \
28042 gen_rtx_ ## COND (CCmode, cc_reg, \
28043 const0_rtx), \
28044 cc_reg)
28046 /* Shifts by register and shifts by constant are handled separately. */
28047 if (CONST_INT_P (amount))
28049 /* We have a shift-by-constant. */
28051 /* First, handle out-of-range shift amounts.
28052 In both cases we try to match the result an ARM instruction in a
28053 shift-by-register would give. This helps reduce execution
28054 differences between optimization levels, but it won't stop other
28055 parts of the compiler doing different things. This is "undefined
28056 behavior, in any case. */
28057 if (INTVAL (amount) <= 0)
28058 emit_insn (gen_movdi (out, in));
28059 else if (INTVAL (amount) >= 64)
28061 if (code == ASHIFTRT)
28063 rtx const31_rtx = GEN_INT (31);
28064 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28065 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28067 else
28068 emit_insn (gen_movdi (out, const0_rtx));
28071 /* Now handle valid shifts. */
28072 else if (INTVAL (amount) < 32)
28074 /* Shifts by a constant less than 32. */
28075 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28077 /* Clearing the out register in DImode first avoids lots
28078 of spilling and results in less stack usage.
28079 Later this redundant insn is completely removed.
28080 Do that only if "in" and "out" are different registers. */
28081 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
28082 emit_insn (SET (out, const0_rtx));
28083 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28084 emit_insn (SET (out_down,
28085 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28086 out_down)));
28087 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28089 else
28091 /* Shifts by a constant greater than 31. */
28092 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28094 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
28095 emit_insn (SET (out, const0_rtx));
28096 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28097 if (code == ASHIFTRT)
28098 emit_insn (gen_ashrsi3 (out_up, in_up,
28099 GEN_INT (31)));
28100 else
28101 emit_insn (SET (out_up, const0_rtx));
28104 else
28106 /* We have a shift-by-register. */
28107 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28109 /* This alternative requires the scratch registers. */
28110 gcc_assert (scratch1 && REG_P (scratch1));
28111 gcc_assert (scratch2 && REG_P (scratch2));
28113 /* We will need the values "amount-32" and "32-amount" later.
28114 Swapping them around now allows the later code to be more general. */
28115 switch (code)
28117 case ASHIFT:
28118 emit_insn (SUB_32 (scratch1, amount));
28119 emit_insn (RSB_32 (scratch2, amount));
28120 break;
28121 case ASHIFTRT:
28122 emit_insn (RSB_32 (scratch1, amount));
28123 /* Also set CC = amount > 32. */
28124 emit_insn (SUB_S_32 (scratch2, amount));
28125 break;
28126 case LSHIFTRT:
28127 emit_insn (RSB_32 (scratch1, amount));
28128 emit_insn (SUB_32 (scratch2, amount));
28129 break;
28130 default:
28131 gcc_unreachable ();
28134 /* Emit code like this:
28136 arithmetic-left:
28137 out_down = in_down << amount;
28138 out_down = (in_up << (amount - 32)) | out_down;
28139 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28140 out_up = in_up << amount;
28142 arithmetic-right:
28143 out_down = in_down >> amount;
28144 out_down = (in_up << (32 - amount)) | out_down;
28145 if (amount < 32)
28146 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28147 out_up = in_up << amount;
28149 logical-right:
28150 out_down = in_down >> amount;
28151 out_down = (in_up << (32 - amount)) | out_down;
28152 if (amount < 32)
28153 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28154 out_up = in_up << amount;
28156 The ARM and Thumb2 variants are the same but implemented slightly
28157 differently. If this were only called during expand we could just
28158 use the Thumb2 case and let combine do the right thing, but this
28159 can also be called from post-reload splitters. */
28161 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28163 if (!TARGET_THUMB2)
28165 /* Emit code for ARM mode. */
28166 emit_insn (SET (out_down,
28167 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28168 if (code == ASHIFTRT)
28170 rtx_code_label *done_label = gen_label_rtx ();
28171 emit_jump_insn (BRANCH (LT, done_label));
28172 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28173 out_down)));
28174 emit_label (done_label);
28176 else
28177 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28178 out_down)));
28180 else
28182 /* Emit code for Thumb2 mode.
28183 Thumb2 can't do shift and or in one insn. */
28184 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28185 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28187 if (code == ASHIFTRT)
28189 rtx_code_label *done_label = gen_label_rtx ();
28190 emit_jump_insn (BRANCH (LT, done_label));
28191 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28192 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28193 emit_label (done_label);
28195 else
28197 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28198 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28202 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28205 #undef SUB_32
28206 #undef RSB_32
28207 #undef SUB_S_32
28208 #undef SET
28209 #undef SHIFT
28210 #undef LSHIFT
28211 #undef REV_LSHIFT
28212 #undef ORR
28213 #undef BRANCH
28216 /* Returns true if the pattern is a valid symbolic address, which is either a
28217 symbol_ref or (symbol_ref + addend).
28219 According to the ARM ELF ABI, the initial addend of REL-type relocations
28220 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
28221 literal field of the instruction as a 16-bit signed value in the range
28222 -32768 <= A < 32768. */
28224 bool
28225 arm_valid_symbolic_address_p (rtx addr)
28227 rtx xop0, xop1 = NULL_RTX;
28228 rtx tmp = addr;
28230 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
28231 return true;
28233 /* (const (plus: symbol_ref const_int)) */
28234 if (GET_CODE (addr) == CONST)
28235 tmp = XEXP (addr, 0);
28237 if (GET_CODE (tmp) == PLUS)
28239 xop0 = XEXP (tmp, 0);
28240 xop1 = XEXP (tmp, 1);
28242 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
28243 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
28246 return false;
28249 /* Returns true if a valid comparison operation and makes
28250 the operands in a form that is valid. */
28251 bool
28252 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28254 enum rtx_code code = GET_CODE (*comparison);
28255 int code_int;
28256 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28257 ? GET_MODE (*op2) : GET_MODE (*op1);
28259 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28261 if (code == UNEQ || code == LTGT)
28262 return false;
28264 code_int = (int)code;
28265 arm_canonicalize_comparison (&code_int, op1, op2, 0);
28266 PUT_CODE (*comparison, (enum rtx_code)code_int);
28268 switch (mode)
28270 case SImode:
28271 if (!arm_add_operand (*op1, mode))
28272 *op1 = force_reg (mode, *op1);
28273 if (!arm_add_operand (*op2, mode))
28274 *op2 = force_reg (mode, *op2);
28275 return true;
28277 case DImode:
28278 if (!cmpdi_operand (*op1, mode))
28279 *op1 = force_reg (mode, *op1);
28280 if (!cmpdi_operand (*op2, mode))
28281 *op2 = force_reg (mode, *op2);
28282 return true;
28284 case HFmode:
28285 if (!TARGET_VFP_FP16INST)
28286 break;
28287 /* FP16 comparisons are done in SF mode. */
28288 mode = SFmode;
28289 *op1 = convert_to_mode (mode, *op1, 1);
28290 *op2 = convert_to_mode (mode, *op2, 1);
28291 /* Fall through. */
28292 case SFmode:
28293 case DFmode:
28294 if (!vfp_compare_operand (*op1, mode))
28295 *op1 = force_reg (mode, *op1);
28296 if (!vfp_compare_operand (*op2, mode))
28297 *op2 = force_reg (mode, *op2);
28298 return true;
28299 default:
28300 break;
28303 return false;
28307 /* Maximum number of instructions to set block of memory. */
28308 static int
28309 arm_block_set_max_insns (void)
28311 if (optimize_function_for_size_p (cfun))
28312 return 4;
28313 else
28314 return current_tune->max_insns_inline_memset;
28317 /* Return TRUE if it's profitable to set block of memory for
28318 non-vectorized case. VAL is the value to set the memory
28319 with. LENGTH is the number of bytes to set. ALIGN is the
28320 alignment of the destination memory in bytes. UNALIGNED_P
28321 is TRUE if we can only set the memory with instructions
28322 meeting alignment requirements. USE_STRD_P is TRUE if we
28323 can use strd to set the memory. */
28324 static bool
28325 arm_block_set_non_vect_profit_p (rtx val,
28326 unsigned HOST_WIDE_INT length,
28327 unsigned HOST_WIDE_INT align,
28328 bool unaligned_p, bool use_strd_p)
28330 int num = 0;
28331 /* For leftovers in bytes of 0-7, we can set the memory block using
28332 strb/strh/str with minimum instruction number. */
28333 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28335 if (unaligned_p)
28337 num = arm_const_inline_cost (SET, val);
28338 num += length / align + length % align;
28340 else if (use_strd_p)
28342 num = arm_const_double_inline_cost (val);
28343 num += (length >> 3) + leftover[length & 7];
28345 else
28347 num = arm_const_inline_cost (SET, val);
28348 num += (length >> 2) + leftover[length & 3];
28351 /* We may be able to combine last pair STRH/STRB into a single STR
28352 by shifting one byte back. */
28353 if (unaligned_access && length > 3 && (length & 3) == 3)
28354 num--;
28356 return (num <= arm_block_set_max_insns ());
28359 /* Return TRUE if it's profitable to set block of memory for
28360 vectorized case. LENGTH is the number of bytes to set.
28361 ALIGN is the alignment of destination memory in bytes.
28362 MODE is the vector mode used to set the memory. */
28363 static bool
28364 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28365 unsigned HOST_WIDE_INT align,
28366 machine_mode mode)
28368 int num;
28369 bool unaligned_p = ((align & 3) != 0);
28370 unsigned int nelt = GET_MODE_NUNITS (mode);
28372 /* Instruction loading constant value. */
28373 num = 1;
28374 /* Instructions storing the memory. */
28375 num += (length + nelt - 1) / nelt;
28376 /* Instructions adjusting the address expression. Only need to
28377 adjust address expression if it's 4 bytes aligned and bytes
28378 leftover can only be stored by mis-aligned store instruction. */
28379 if (!unaligned_p && (length & 3) != 0)
28380 num++;
28382 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28383 if (!unaligned_p && mode == V16QImode)
28384 num--;
28386 return (num <= arm_block_set_max_insns ());
28389 /* Set a block of memory using vectorization instructions for the
28390 unaligned case. We fill the first LENGTH bytes of the memory
28391 area starting from DSTBASE with byte constant VALUE. ALIGN is
28392 the alignment requirement of memory. Return TRUE if succeeded. */
28393 static bool
28394 arm_block_set_unaligned_vect (rtx dstbase,
28395 unsigned HOST_WIDE_INT length,
28396 unsigned HOST_WIDE_INT value,
28397 unsigned HOST_WIDE_INT align)
28399 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28400 rtx dst, mem;
28401 rtx val_elt, val_vec, reg;
28402 rtx rval[MAX_VECT_LEN];
28403 rtx (*gen_func) (rtx, rtx);
28404 machine_mode mode;
28405 unsigned HOST_WIDE_INT v = value;
28406 unsigned int offset = 0;
28407 gcc_assert ((align & 0x3) != 0);
28408 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28409 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28410 if (length >= nelt_v16)
28412 mode = V16QImode;
28413 gen_func = gen_movmisalignv16qi;
28415 else
28417 mode = V8QImode;
28418 gen_func = gen_movmisalignv8qi;
28420 nelt_mode = GET_MODE_NUNITS (mode);
28421 gcc_assert (length >= nelt_mode);
28422 /* Skip if it isn't profitable. */
28423 if (!arm_block_set_vect_profit_p (length, align, mode))
28424 return false;
28426 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28427 mem = adjust_automodify_address (dstbase, mode, dst, offset);
28429 v = sext_hwi (v, BITS_PER_WORD);
28430 val_elt = GEN_INT (v);
28431 for (j = 0; j < nelt_mode; j++)
28432 rval[j] = val_elt;
28434 reg = gen_reg_rtx (mode);
28435 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28436 /* Emit instruction loading the constant value. */
28437 emit_move_insn (reg, val_vec);
28439 /* Handle nelt_mode bytes in a vector. */
28440 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28442 emit_insn ((*gen_func) (mem, reg));
28443 if (i + 2 * nelt_mode <= length)
28445 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28446 offset += nelt_mode;
28447 mem = adjust_automodify_address (dstbase, mode, dst, offset);
28451 /* If there are not less than nelt_v8 bytes leftover, we must be in
28452 V16QI mode. */
28453 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28455 /* Handle (8, 16) bytes leftover. */
28456 if (i + nelt_v8 < length)
28458 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28459 offset += length - i;
28460 mem = adjust_automodify_address (dstbase, mode, dst, offset);
28462 /* We are shifting bytes back, set the alignment accordingly. */
28463 if ((length & 1) != 0 && align >= 2)
28464 set_mem_align (mem, BITS_PER_UNIT);
28466 emit_insn (gen_movmisalignv16qi (mem, reg));
28468 /* Handle (0, 8] bytes leftover. */
28469 else if (i < length && i + nelt_v8 >= length)
28471 if (mode == V16QImode)
28472 reg = gen_lowpart (V8QImode, reg);
28474 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28475 + (nelt_mode - nelt_v8))));
28476 offset += (length - i) + (nelt_mode - nelt_v8);
28477 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
28479 /* We are shifting bytes back, set the alignment accordingly. */
28480 if ((length & 1) != 0 && align >= 2)
28481 set_mem_align (mem, BITS_PER_UNIT);
28483 emit_insn (gen_movmisalignv8qi (mem, reg));
28486 return true;
28489 /* Set a block of memory using vectorization instructions for the
28490 aligned case. We fill the first LENGTH bytes of the memory area
28491 starting from DSTBASE with byte constant VALUE. ALIGN is the
28492 alignment requirement of memory. Return TRUE if succeeded. */
28493 static bool
28494 arm_block_set_aligned_vect (rtx dstbase,
28495 unsigned HOST_WIDE_INT length,
28496 unsigned HOST_WIDE_INT value,
28497 unsigned HOST_WIDE_INT align)
28499 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
28500 rtx dst, addr, mem;
28501 rtx val_elt, val_vec, reg;
28502 rtx rval[MAX_VECT_LEN];
28503 machine_mode mode;
28504 unsigned HOST_WIDE_INT v = value;
28505 unsigned int offset = 0;
28507 gcc_assert ((align & 0x3) == 0);
28508 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28509 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28510 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
28511 mode = V16QImode;
28512 else
28513 mode = V8QImode;
28515 nelt_mode = GET_MODE_NUNITS (mode);
28516 gcc_assert (length >= nelt_mode);
28517 /* Skip if it isn't profitable. */
28518 if (!arm_block_set_vect_profit_p (length, align, mode))
28519 return false;
28521 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28523 v = sext_hwi (v, BITS_PER_WORD);
28524 val_elt = GEN_INT (v);
28525 for (j = 0; j < nelt_mode; j++)
28526 rval[j] = val_elt;
28528 reg = gen_reg_rtx (mode);
28529 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28530 /* Emit instruction loading the constant value. */
28531 emit_move_insn (reg, val_vec);
28533 i = 0;
28534 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28535 if (mode == V16QImode)
28537 mem = adjust_automodify_address (dstbase, mode, dst, offset);
28538 emit_insn (gen_movmisalignv16qi (mem, reg));
28539 i += nelt_mode;
28540 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28541 if (i + nelt_v8 < length && i + nelt_v16 > length)
28543 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28544 offset += length - nelt_mode;
28545 mem = adjust_automodify_address (dstbase, mode, dst, offset);
28546 /* We are shifting bytes back, set the alignment accordingly. */
28547 if ((length & 0x3) == 0)
28548 set_mem_align (mem, BITS_PER_UNIT * 4);
28549 else if ((length & 0x1) == 0)
28550 set_mem_align (mem, BITS_PER_UNIT * 2);
28551 else
28552 set_mem_align (mem, BITS_PER_UNIT);
28554 emit_insn (gen_movmisalignv16qi (mem, reg));
28555 return true;
28557 /* Fall through for bytes leftover. */
28558 mode = V8QImode;
28559 nelt_mode = GET_MODE_NUNITS (mode);
28560 reg = gen_lowpart (V8QImode, reg);
28563 /* Handle 8 bytes in a vector. */
28564 for (; (i + nelt_mode <= length); i += nelt_mode)
28566 addr = plus_constant (Pmode, dst, i);
28567 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
28568 emit_move_insn (mem, reg);
28571 /* Handle single word leftover by shifting 4 bytes back. We can
28572 use aligned access for this case. */
28573 if (i + UNITS_PER_WORD == length)
28575 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
28576 offset += i - UNITS_PER_WORD;
28577 mem = adjust_automodify_address (dstbase, mode, addr, offset);
28578 /* We are shifting 4 bytes back, set the alignment accordingly. */
28579 if (align > UNITS_PER_WORD)
28580 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
28582 emit_move_insn (mem, reg);
28584 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28585 We have to use unaligned access for this case. */
28586 else if (i < length)
28588 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28589 offset += length - nelt_mode;
28590 mem = adjust_automodify_address (dstbase, mode, dst, offset);
28591 /* We are shifting bytes back, set the alignment accordingly. */
28592 if ((length & 1) == 0)
28593 set_mem_align (mem, BITS_PER_UNIT * 2);
28594 else
28595 set_mem_align (mem, BITS_PER_UNIT);
28597 emit_insn (gen_movmisalignv8qi (mem, reg));
28600 return true;
28603 /* Set a block of memory using plain strh/strb instructions, only
28604 using instructions allowed by ALIGN on processor. We fill the
28605 first LENGTH bytes of the memory area starting from DSTBASE
28606 with byte constant VALUE. ALIGN is the alignment requirement
28607 of memory. */
28608 static bool
28609 arm_block_set_unaligned_non_vect (rtx dstbase,
28610 unsigned HOST_WIDE_INT length,
28611 unsigned HOST_WIDE_INT value,
28612 unsigned HOST_WIDE_INT align)
28614 unsigned int i;
28615 rtx dst, addr, mem;
28616 rtx val_exp, val_reg, reg;
28617 machine_mode mode;
28618 HOST_WIDE_INT v = value;
28620 gcc_assert (align == 1 || align == 2);
28622 if (align == 2)
28623 v |= (value << BITS_PER_UNIT);
28625 v = sext_hwi (v, BITS_PER_WORD);
28626 val_exp = GEN_INT (v);
28627 /* Skip if it isn't profitable. */
28628 if (!arm_block_set_non_vect_profit_p (val_exp, length,
28629 align, true, false))
28630 return false;
28632 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28633 mode = (align == 2 ? HImode : QImode);
28634 val_reg = force_reg (SImode, val_exp);
28635 reg = gen_lowpart (mode, val_reg);
28637 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
28639 addr = plus_constant (Pmode, dst, i);
28640 mem = adjust_automodify_address (dstbase, mode, addr, i);
28641 emit_move_insn (mem, reg);
28644 /* Handle single byte leftover. */
28645 if (i + 1 == length)
28647 reg = gen_lowpart (QImode, val_reg);
28648 addr = plus_constant (Pmode, dst, i);
28649 mem = adjust_automodify_address (dstbase, QImode, addr, i);
28650 emit_move_insn (mem, reg);
28651 i++;
28654 gcc_assert (i == length);
28655 return true;
28658 /* Set a block of memory using plain strd/str/strh/strb instructions,
28659 to permit unaligned copies on processors which support unaligned
28660 semantics for those instructions. We fill the first LENGTH bytes
28661 of the memory area starting from DSTBASE with byte constant VALUE.
28662 ALIGN is the alignment requirement of memory. */
28663 static bool
28664 arm_block_set_aligned_non_vect (rtx dstbase,
28665 unsigned HOST_WIDE_INT length,
28666 unsigned HOST_WIDE_INT value,
28667 unsigned HOST_WIDE_INT align)
28669 unsigned int i;
28670 rtx dst, addr, mem;
28671 rtx val_exp, val_reg, reg;
28672 unsigned HOST_WIDE_INT v;
28673 bool use_strd_p;
28675 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
28676 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
28678 v = (value | (value << 8) | (value << 16) | (value << 24));
28679 if (length < UNITS_PER_WORD)
28680 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
28682 if (use_strd_p)
28683 v |= (v << BITS_PER_WORD);
28684 else
28685 v = sext_hwi (v, BITS_PER_WORD);
28687 val_exp = GEN_INT (v);
28688 /* Skip if it isn't profitable. */
28689 if (!arm_block_set_non_vect_profit_p (val_exp, length,
28690 align, false, use_strd_p))
28692 if (!use_strd_p)
28693 return false;
28695 /* Try without strd. */
28696 v = (v >> BITS_PER_WORD);
28697 v = sext_hwi (v, BITS_PER_WORD);
28698 val_exp = GEN_INT (v);
28699 use_strd_p = false;
28700 if (!arm_block_set_non_vect_profit_p (val_exp, length,
28701 align, false, use_strd_p))
28702 return false;
28705 i = 0;
28706 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28707 /* Handle double words using strd if possible. */
28708 if (use_strd_p)
28710 val_reg = force_reg (DImode, val_exp);
28711 reg = val_reg;
28712 for (; (i + 8 <= length); i += 8)
28714 addr = plus_constant (Pmode, dst, i);
28715 mem = adjust_automodify_address (dstbase, DImode, addr, i);
28716 emit_move_insn (mem, reg);
28719 else
28720 val_reg = force_reg (SImode, val_exp);
28722 /* Handle words. */
28723 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
28724 for (; (i + 4 <= length); i += 4)
28726 addr = plus_constant (Pmode, dst, i);
28727 mem = adjust_automodify_address (dstbase, SImode, addr, i);
28728 if ((align & 3) == 0)
28729 emit_move_insn (mem, reg);
28730 else
28731 emit_insn (gen_unaligned_storesi (mem, reg));
28734 /* Merge last pair of STRH and STRB into a STR if possible. */
28735 if (unaligned_access && i > 0 && (i + 3) == length)
28737 addr = plus_constant (Pmode, dst, i - 1);
28738 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
28739 /* We are shifting one byte back, set the alignment accordingly. */
28740 if ((align & 1) == 0)
28741 set_mem_align (mem, BITS_PER_UNIT);
28743 /* Most likely this is an unaligned access, and we can't tell at
28744 compilation time. */
28745 emit_insn (gen_unaligned_storesi (mem, reg));
28746 return true;
28749 /* Handle half word leftover. */
28750 if (i + 2 <= length)
28752 reg = gen_lowpart (HImode, val_reg);
28753 addr = plus_constant (Pmode, dst, i);
28754 mem = adjust_automodify_address (dstbase, HImode, addr, i);
28755 if ((align & 1) == 0)
28756 emit_move_insn (mem, reg);
28757 else
28758 emit_insn (gen_unaligned_storehi (mem, reg));
28760 i += 2;
28763 /* Handle single byte leftover. */
28764 if (i + 1 == length)
28766 reg = gen_lowpart (QImode, val_reg);
28767 addr = plus_constant (Pmode, dst, i);
28768 mem = adjust_automodify_address (dstbase, QImode, addr, i);
28769 emit_move_insn (mem, reg);
28772 return true;
28775 /* Set a block of memory using vectorization instructions for both
28776 aligned and unaligned cases. We fill the first LENGTH bytes of
28777 the memory area starting from DSTBASE with byte constant VALUE.
28778 ALIGN is the alignment requirement of memory. */
28779 static bool
28780 arm_block_set_vect (rtx dstbase,
28781 unsigned HOST_WIDE_INT length,
28782 unsigned HOST_WIDE_INT value,
28783 unsigned HOST_WIDE_INT align)
28785 /* Check whether we need to use unaligned store instruction. */
28786 if (((align & 3) != 0 || (length & 3) != 0)
28787 /* Check whether unaligned store instruction is available. */
28788 && (!unaligned_access || BYTES_BIG_ENDIAN))
28789 return false;
28791 if ((align & 3) == 0)
28792 return arm_block_set_aligned_vect (dstbase, length, value, align);
28793 else
28794 return arm_block_set_unaligned_vect (dstbase, length, value, align);
28797 /* Expand string store operation. Firstly we try to do that by using
28798 vectorization instructions, then try with ARM unaligned access and
28799 double-word store if profitable. OPERANDS[0] is the destination,
28800 OPERANDS[1] is the number of bytes, operands[2] is the value to
28801 initialize the memory, OPERANDS[3] is the known alignment of the
28802 destination. */
28803 bool
28804 arm_gen_setmem (rtx *operands)
28806 rtx dstbase = operands[0];
28807 unsigned HOST_WIDE_INT length;
28808 unsigned HOST_WIDE_INT value;
28809 unsigned HOST_WIDE_INT align;
28811 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
28812 return false;
28814 length = UINTVAL (operands[1]);
28815 if (length > 64)
28816 return false;
28818 value = (UINTVAL (operands[2]) & 0xFF);
28819 align = UINTVAL (operands[3]);
28820 if (TARGET_NEON && length >= 8
28821 && current_tune->string_ops_prefer_neon
28822 && arm_block_set_vect (dstbase, length, value, align))
28823 return true;
28825 if (!unaligned_access && (align & 3) != 0)
28826 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
28828 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
28832 static bool
28833 arm_macro_fusion_p (void)
28835 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
28838 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
28839 for MOVW / MOVT macro fusion. */
28841 static bool
28842 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
28844 /* We are trying to fuse
28845 movw imm / movt imm
28846 instructions as a group that gets scheduled together. */
28848 rtx set_dest = SET_DEST (curr_set);
28850 if (GET_MODE (set_dest) != SImode)
28851 return false;
28853 /* We are trying to match:
28854 prev (movw) == (set (reg r0) (const_int imm16))
28855 curr (movt) == (set (zero_extract (reg r0)
28856 (const_int 16)
28857 (const_int 16))
28858 (const_int imm16_1))
28860 prev (movw) == (set (reg r1)
28861 (high (symbol_ref ("SYM"))))
28862 curr (movt) == (set (reg r0)
28863 (lo_sum (reg r1)
28864 (symbol_ref ("SYM")))) */
28866 if (GET_CODE (set_dest) == ZERO_EXTRACT)
28868 if (CONST_INT_P (SET_SRC (curr_set))
28869 && CONST_INT_P (SET_SRC (prev_set))
28870 && REG_P (XEXP (set_dest, 0))
28871 && REG_P (SET_DEST (prev_set))
28872 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
28873 return true;
28876 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
28877 && REG_P (SET_DEST (curr_set))
28878 && REG_P (SET_DEST (prev_set))
28879 && GET_CODE (SET_SRC (prev_set)) == HIGH
28880 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
28881 return true;
28883 return false;
28886 static bool
28887 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
28889 rtx prev_set = single_set (prev);
28890 rtx curr_set = single_set (curr);
28892 if (!prev_set
28893 || !curr_set)
28894 return false;
28896 if (any_condjump_p (curr))
28897 return false;
28899 if (!arm_macro_fusion_p ())
28900 return false;
28902 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
28903 && aarch_crypto_can_dual_issue (prev, curr))
28904 return true;
28906 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
28907 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
28908 return true;
28910 return false;
28913 /* Return true iff the instruction fusion described by OP is enabled. */
28914 bool
28915 arm_fusion_enabled_p (tune_params::fuse_ops op)
28917 return current_tune->fusible_ops & op;
28920 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
28922 static unsigned HOST_WIDE_INT
28923 arm_asan_shadow_offset (void)
28925 return HOST_WIDE_INT_1U << 29;
28929 /* This is a temporary fix for PR60655. Ideally we need
28930 to handle most of these cases in the generic part but
28931 currently we reject minus (..) (sym_ref). We try to
28932 ameliorate the case with minus (sym_ref1) (sym_ref2)
28933 where they are in the same section. */
28935 static bool
28936 arm_const_not_ok_for_debug_p (rtx p)
28938 tree decl_op0 = NULL;
28939 tree decl_op1 = NULL;
28941 if (GET_CODE (p) == MINUS)
28943 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
28945 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
28946 if (decl_op1
28947 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
28948 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
28950 if ((VAR_P (decl_op1)
28951 || TREE_CODE (decl_op1) == CONST_DECL)
28952 && (VAR_P (decl_op0)
28953 || TREE_CODE (decl_op0) == CONST_DECL))
28954 return (get_variable_section (decl_op1, false)
28955 != get_variable_section (decl_op0, false));
28957 if (TREE_CODE (decl_op1) == LABEL_DECL
28958 && TREE_CODE (decl_op0) == LABEL_DECL)
28959 return (DECL_CONTEXT (decl_op1)
28960 != DECL_CONTEXT (decl_op0));
28963 return true;
28967 return false;
28970 /* return TRUE if x is a reference to a value in a constant pool */
28971 extern bool
28972 arm_is_constant_pool_ref (rtx x)
28974 return (MEM_P (x)
28975 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
28976 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
28979 /* Remember the last target of arm_set_current_function. */
28980 static GTY(()) tree arm_previous_fndecl;
28982 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
28984 void
28985 save_restore_target_globals (tree new_tree)
28987 /* If we have a previous state, use it. */
28988 if (TREE_TARGET_GLOBALS (new_tree))
28989 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
28990 else if (new_tree == target_option_default_node)
28991 restore_target_globals (&default_target_globals);
28992 else
28994 /* Call target_reinit and save the state for TARGET_GLOBALS. */
28995 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
28998 arm_option_params_internal ();
29001 /* Invalidate arm_previous_fndecl. */
29003 void
29004 arm_reset_previous_fndecl (void)
29006 arm_previous_fndecl = NULL_TREE;
29009 /* Establish appropriate back-end context for processing the function
29010 FNDECL. The argument might be NULL to indicate processing at top
29011 level, outside of any function scope. */
29013 static void
29014 arm_set_current_function (tree fndecl)
29016 if (!fndecl || fndecl == arm_previous_fndecl)
29017 return;
29019 tree old_tree = (arm_previous_fndecl
29020 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
29021 : NULL_TREE);
29023 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
29025 /* If current function has no attributes but previous one did,
29026 use the default node. */
29027 if (! new_tree && old_tree)
29028 new_tree = target_option_default_node;
29030 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
29031 the default have been handled by save_restore_target_globals from
29032 arm_pragma_target_parse. */
29033 if (old_tree == new_tree)
29034 return;
29036 arm_previous_fndecl = fndecl;
29038 /* First set the target options. */
29039 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
29041 save_restore_target_globals (new_tree);
29044 /* Implement TARGET_OPTION_PRINT. */
29046 static void
29047 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
29049 int flags = ptr->x_target_flags;
29050 const struct arm_fpu_desc *fpu_desc = &all_fpus[ptr->x_arm_fpu_index];
29052 fprintf (file, "%*sselected arch %s\n", indent, "",
29053 TARGET_THUMB2_P (flags) ? "thumb2" :
29054 TARGET_THUMB_P (flags) ? "thumb1" :
29055 "arm");
29057 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_desc->name);
29060 /* Hook to determine if one function can safely inline another. */
29062 static bool
29063 arm_can_inline_p (tree caller, tree callee)
29065 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
29066 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
29068 struct cl_target_option *caller_opts
29069 = TREE_TARGET_OPTION (caller_tree ? caller_tree
29070 : target_option_default_node);
29072 struct cl_target_option *callee_opts
29073 = TREE_TARGET_OPTION (callee_tree ? callee_tree
29074 : target_option_default_node);
29076 const struct arm_fpu_desc *caller_fpu
29077 = &all_fpus[caller_opts->x_arm_fpu_index];
29078 const struct arm_fpu_desc *callee_fpu
29079 = &all_fpus[callee_opts->x_arm_fpu_index];
29081 /* Callee's fpu features should be a subset of the caller's. */
29082 if ((caller_fpu->features & callee_fpu->features) != callee_fpu->features)
29083 return false;
29085 /* Need same FPU regs. */
29086 if (callee_fpu->regs != callee_fpu->regs)
29087 return false;
29089 /* OK to inline between different modes.
29090 Function with mode specific instructions, e.g using asm,
29091 must be explicitly protected with noinline. */
29092 return true;
29095 /* Hook to fix function's alignment affected by target attribute. */
29097 static void
29098 arm_relayout_function (tree fndecl)
29100 if (DECL_USER_ALIGN (fndecl))
29101 return;
29103 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
29105 if (!callee_tree)
29106 callee_tree = target_option_default_node;
29108 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
29109 SET_DECL_ALIGN (fndecl, FUNCTION_BOUNDARY_P (opts->x_target_flags));
29112 /* Inner function to process the attribute((target(...))), take an argument and
29113 set the current options from the argument. If we have a list, recursively
29114 go over the list. */
29116 static bool
29117 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
29119 if (TREE_CODE (args) == TREE_LIST)
29121 bool ret = true;
29123 for (; args; args = TREE_CHAIN (args))
29124 if (TREE_VALUE (args)
29125 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
29126 ret = false;
29127 return ret;
29130 else if (TREE_CODE (args) != STRING_CST)
29132 error ("attribute %<target%> argument not a string");
29133 return false;
29136 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
29137 char *q;
29139 while ((q = strtok (argstr, ",")) != NULL)
29141 while (ISSPACE (*q)) ++q;
29143 argstr = NULL;
29144 if (!strncmp (q, "thumb", 5))
29145 opts->x_target_flags |= MASK_THUMB;
29147 else if (!strncmp (q, "arm", 3))
29148 opts->x_target_flags &= ~MASK_THUMB;
29150 else if (!strncmp (q, "fpu=", 4))
29152 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
29153 &opts->x_arm_fpu_index, CL_TARGET))
29155 error ("invalid fpu for attribute(target(\"%s\"))", q);
29156 return false;
29159 else
29161 error ("attribute(target(\"%s\")) is unknown", q);
29162 return false;
29165 arm_option_check_internal (opts);
29168 return true;
29171 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
29173 tree
29174 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
29175 struct gcc_options *opts_set)
29177 if (!arm_valid_target_attribute_rec (args, opts))
29178 return NULL_TREE;
29180 /* Do any overrides, such as global options arch=xxx. */
29181 arm_option_override_internal (opts, opts_set);
29183 return build_target_option_node (opts);
29186 static void
29187 add_attribute (const char * mode, tree *attributes)
29189 size_t len = strlen (mode);
29190 tree value = build_string (len, mode);
29192 TREE_TYPE (value) = build_array_type (char_type_node,
29193 build_index_type (size_int (len)));
29195 *attributes = tree_cons (get_identifier ("target"),
29196 build_tree_list (NULL_TREE, value),
29197 *attributes);
29200 /* For testing. Insert thumb or arm modes alternatively on functions. */
29202 static void
29203 arm_insert_attributes (tree fndecl, tree * attributes)
29205 const char *mode;
29207 if (! TARGET_FLIP_THUMB)
29208 return;
29210 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
29211 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
29212 return;
29214 /* Nested definitions must inherit mode. */
29215 if (current_function_decl)
29217 mode = TARGET_THUMB ? "thumb" : "arm";
29218 add_attribute (mode, attributes);
29219 return;
29222 /* If there is already a setting don't change it. */
29223 if (lookup_attribute ("target", *attributes) != NULL)
29224 return;
29226 mode = thumb_flipper ? "thumb" : "arm";
29227 add_attribute (mode, attributes);
29229 thumb_flipper = !thumb_flipper;
29232 /* Hook to validate attribute((target("string"))). */
29234 static bool
29235 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
29236 tree args, int ARG_UNUSED (flags))
29238 bool ret = true;
29239 struct gcc_options func_options;
29240 tree cur_tree, new_optimize;
29241 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
29243 /* Get the optimization options of the current function. */
29244 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
29246 /* If the function changed the optimization levels as well as setting target
29247 options, start with the optimizations specified. */
29248 if (!func_optimize)
29249 func_optimize = optimization_default_node;
29251 /* Init func_options. */
29252 memset (&func_options, 0, sizeof (func_options));
29253 init_options_struct (&func_options, NULL);
29254 lang_hooks.init_options_struct (&func_options);
29256 /* Initialize func_options to the defaults. */
29257 cl_optimization_restore (&func_options,
29258 TREE_OPTIMIZATION (func_optimize));
29260 cl_target_option_restore (&func_options,
29261 TREE_TARGET_OPTION (target_option_default_node));
29263 /* Set func_options flags with new target mode. */
29264 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
29265 &global_options_set);
29267 if (cur_tree == NULL_TREE)
29268 ret = false;
29270 new_optimize = build_optimization_node (&func_options);
29272 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
29274 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
29276 finalize_options_struct (&func_options);
29278 return ret;
29281 void
29282 arm_declare_function_name (FILE *stream, const char *name, tree decl)
29285 fprintf (stream, "\t.syntax unified\n");
29287 if (TARGET_THUMB)
29289 if (is_called_in_ARM_mode (decl)
29290 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
29291 && cfun->is_thunk))
29292 fprintf (stream, "\t.code 32\n");
29293 else if (TARGET_THUMB1)
29294 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
29295 else
29296 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
29298 else
29299 fprintf (stream, "\t.arm\n");
29301 asm_fprintf (asm_out_file, "\t.fpu %s\n",
29302 TARGET_SOFT_FLOAT ? "softvfp" : TARGET_FPU_NAME);
29304 if (TARGET_POKE_FUNCTION_NAME)
29305 arm_poke_function_name (stream, (const char *) name);
29308 /* If MEM is in the form of [base+offset], extract the two parts
29309 of address and set to BASE and OFFSET, otherwise return false
29310 after clearing BASE and OFFSET. */
29312 static bool
29313 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29315 rtx addr;
29317 gcc_assert (MEM_P (mem));
29319 addr = XEXP (mem, 0);
29321 /* Strip off const from addresses like (const (addr)). */
29322 if (GET_CODE (addr) == CONST)
29323 addr = XEXP (addr, 0);
29325 if (GET_CODE (addr) == REG)
29327 *base = addr;
29328 *offset = const0_rtx;
29329 return true;
29332 if (GET_CODE (addr) == PLUS
29333 && GET_CODE (XEXP (addr, 0)) == REG
29334 && CONST_INT_P (XEXP (addr, 1)))
29336 *base = XEXP (addr, 0);
29337 *offset = XEXP (addr, 1);
29338 return true;
29341 *base = NULL_RTX;
29342 *offset = NULL_RTX;
29344 return false;
29347 /* If INSN is a load or store of address in the form of [base+offset],
29348 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29349 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29350 otherwise return FALSE. */
29352 static bool
29353 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29355 rtx x, dest, src;
29357 gcc_assert (INSN_P (insn));
29358 x = PATTERN (insn);
29359 if (GET_CODE (x) != SET)
29360 return false;
29362 src = SET_SRC (x);
29363 dest = SET_DEST (x);
29364 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29366 *is_load = false;
29367 extract_base_offset_in_addr (dest, base, offset);
29369 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29371 *is_load = true;
29372 extract_base_offset_in_addr (src, base, offset);
29374 else
29375 return false;
29377 return (*base != NULL_RTX && *offset != NULL_RTX);
29380 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29382 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29383 and PRI are only calculated for these instructions. For other instruction,
29384 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29385 instruction fusion can be supported by returning different priorities.
29387 It's important that irrelevant instructions get the largest FUSION_PRI. */
29389 static void
29390 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29391 int *fusion_pri, int *pri)
29393 int tmp, off_val;
29394 bool is_load;
29395 rtx base, offset;
29397 gcc_assert (INSN_P (insn));
29399 tmp = max_pri - 1;
29400 if (!fusion_load_store (insn, &base, &offset, &is_load))
29402 *pri = tmp;
29403 *fusion_pri = tmp;
29404 return;
29407 /* Load goes first. */
29408 if (is_load)
29409 *fusion_pri = tmp - 1;
29410 else
29411 *fusion_pri = tmp - 2;
29413 tmp /= 2;
29415 /* INSN with smaller base register goes first. */
29416 tmp -= ((REGNO (base) & 0xff) << 20);
29418 /* INSN with smaller offset goes first. */
29419 off_val = (int)(INTVAL (offset));
29420 if (off_val >= 0)
29421 tmp -= (off_val & 0xfffff);
29422 else
29423 tmp += ((- off_val) & 0xfffff);
29425 *pri = tmp;
29426 return;
29430 /* Construct and return a PARALLEL RTX vector with elements numbering the
29431 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
29432 the vector - from the perspective of the architecture. This does not
29433 line up with GCC's perspective on lane numbers, so we end up with
29434 different masks depending on our target endian-ness. The diagram
29435 below may help. We must draw the distinction when building masks
29436 which select one half of the vector. An instruction selecting
29437 architectural low-lanes for a big-endian target, must be described using
29438 a mask selecting GCC high-lanes.
29440 Big-Endian Little-Endian
29442 GCC 0 1 2 3 3 2 1 0
29443 | x | x | x | x | | x | x | x | x |
29444 Architecture 3 2 1 0 3 2 1 0
29446 Low Mask: { 2, 3 } { 0, 1 }
29447 High Mask: { 0, 1 } { 2, 3 }
29451 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
29453 int nunits = GET_MODE_NUNITS (mode);
29454 rtvec v = rtvec_alloc (nunits / 2);
29455 int high_base = nunits / 2;
29456 int low_base = 0;
29457 int base;
29458 rtx t1;
29459 int i;
29461 if (BYTES_BIG_ENDIAN)
29462 base = high ? low_base : high_base;
29463 else
29464 base = high ? high_base : low_base;
29466 for (i = 0; i < nunits / 2; i++)
29467 RTVEC_ELT (v, i) = GEN_INT (base + i);
29469 t1 = gen_rtx_PARALLEL (mode, v);
29470 return t1;
29473 /* Check OP for validity as a PARALLEL RTX vector with elements
29474 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
29475 from the perspective of the architecture. See the diagram above
29476 arm_simd_vect_par_cnst_half_p for more details. */
29478 bool
29479 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
29480 bool high)
29482 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
29483 HOST_WIDE_INT count_op = XVECLEN (op, 0);
29484 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
29485 int i = 0;
29487 if (!VECTOR_MODE_P (mode))
29488 return false;
29490 if (count_op != count_ideal)
29491 return false;
29493 for (i = 0; i < count_ideal; i++)
29495 rtx elt_op = XVECEXP (op, 0, i);
29496 rtx elt_ideal = XVECEXP (ideal, 0, i);
29498 if (!CONST_INT_P (elt_op)
29499 || INTVAL (elt_ideal) != INTVAL (elt_op))
29500 return false;
29502 return true;
29505 /* Can output mi_thunk for all cases except for non-zero vcall_offset
29506 in Thumb1. */
29507 static bool
29508 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
29509 const_tree)
29511 /* For now, we punt and not handle this for TARGET_THUMB1. */
29512 if (vcall_offset && TARGET_THUMB1)
29513 return false;
29515 /* Otherwise ok. */
29516 return true;
29519 /* Generate RTL for a conditional branch with rtx comparison CODE in
29520 mode CC_MODE. The destination of the unlikely conditional branch
29521 is LABEL_REF. */
29523 void
29524 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
29525 rtx label_ref)
29527 rtx x;
29528 x = gen_rtx_fmt_ee (code, VOIDmode,
29529 gen_rtx_REG (cc_mode, CC_REGNUM),
29530 const0_rtx);
29532 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
29533 gen_rtx_LABEL_REF (VOIDmode, label_ref),
29534 pc_rtx);
29535 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
29538 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
29540 For pure-code sections there is no letter code for this attribute, so
29541 output all the section flags numerically when this is needed. */
29543 static bool
29544 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
29547 if (flags & SECTION_ARM_PURECODE)
29549 *num = 0x20000000;
29551 if (!(flags & SECTION_DEBUG))
29552 *num |= 0x2;
29553 if (flags & SECTION_EXCLUDE)
29554 *num |= 0x80000000;
29555 if (flags & SECTION_WRITE)
29556 *num |= 0x1;
29557 if (flags & SECTION_CODE)
29558 *num |= 0x4;
29559 if (flags & SECTION_MERGE)
29560 *num |= 0x10;
29561 if (flags & SECTION_STRINGS)
29562 *num |= 0x20;
29563 if (flags & SECTION_TLS)
29564 *num |= 0x400;
29565 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
29566 *num |= 0x200;
29568 return true;
29571 return false;
29574 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
29576 If pure-code is passed as an option, make sure all functions are in
29577 sections that have the SHF_ARM_PURECODE attribute. */
29579 static section *
29580 arm_function_section (tree decl, enum node_frequency freq,
29581 bool startup, bool exit)
29583 const char * section_name;
29584 section * sec;
29586 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
29587 return default_function_section (decl, freq, startup, exit);
29589 if (!target_pure_code)
29590 return default_function_section (decl, freq, startup, exit);
29593 section_name = DECL_SECTION_NAME (decl);
29595 /* If a function is not in a named section then it falls under the 'default'
29596 text section, also known as '.text'. We can preserve previous behavior as
29597 the default text section already has the SHF_ARM_PURECODE section
29598 attribute. */
29599 if (!section_name)
29601 section *default_sec = default_function_section (decl, freq, startup,
29602 exit);
29604 /* If default_sec is not null, then it must be a special section like for
29605 example .text.startup. We set the pure-code attribute and return the
29606 same section to preserve existing behavior. */
29607 if (default_sec)
29608 default_sec->common.flags |= SECTION_ARM_PURECODE;
29609 return default_sec;
29612 /* Otherwise look whether a section has already been created with
29613 'section_name'. */
29614 sec = get_named_section (decl, section_name, 0);
29615 if (!sec)
29616 /* If that is not the case passing NULL as the section's name to
29617 'get_named_section' will create a section with the declaration's
29618 section name. */
29619 sec = get_named_section (decl, NULL, 0);
29621 /* Set the SHF_ARM_PURECODE attribute. */
29622 sec->common.flags |= SECTION_ARM_PURECODE;
29624 return sec;
29627 /* Implements the TARGET_SECTION_FLAGS hook.
29629 If DECL is a function declaration and pure-code is passed as an option
29630 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
29631 section's name and RELOC indicates whether the declarations initializer may
29632 contain runtime relocations. */
29634 static unsigned int
29635 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
29637 unsigned int flags = default_section_type_flags (decl, name, reloc);
29639 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
29640 flags |= SECTION_ARM_PURECODE;
29642 return flags;
29645 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
29647 static void
29648 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
29649 rtx op0, rtx op1,
29650 rtx *quot_p, rtx *rem_p)
29652 if (mode == SImode)
29653 gcc_assert (!TARGET_IDIV);
29655 machine_mode libval_mode = smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode),
29656 MODE_INT);
29658 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
29659 libval_mode, 2,
29660 op0, GET_MODE (op0),
29661 op1, GET_MODE (op1));
29663 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
29664 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
29665 GET_MODE_SIZE (mode));
29667 gcc_assert (quotient);
29668 gcc_assert (remainder);
29670 *quot_p = quotient;
29671 *rem_p = remainder;
29674 #include "gt-arm.h"