2016-07-13 Thomas Preud'homme <thomas.preudhomme@arm.com>
[official-gcc.git] / gcc / config / arm / arm.c
blob195de4822c8f26acbd67e08604719365b3d68534
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "cfghooks.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "emit-rtl.h"
37 #include "recog.h"
38 #include "cgraph.h"
39 #include "diagnostic-core.h"
40 #include "alias.h"
41 #include "fold-const.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "varasm.h"
45 #include "output.h"
46 #include "insn-attr.h"
47 #include "flags.h"
48 #include "reload.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "cfgrtl.h"
52 #include "sched-int.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
55 #include "intl.h"
56 #include "libfuncs.h"
57 #include "params.h"
58 #include "opts.h"
59 #include "dumpfile.h"
60 #include "target-globals.h"
61 #include "builtins.h"
62 #include "tm-constrs.h"
63 #include "rtl-iter.h"
65 /* This file should be included last. */
66 #include "target-def.h"
68 /* Forward definitions of types. */
69 typedef struct minipool_node Mnode;
70 typedef struct minipool_fixup Mfix;
72 void (*arm_lang_output_object_attributes_hook)(void);
74 struct four_ints
76 int i[4];
79 /* Forward function declarations. */
80 static bool arm_const_not_ok_for_debug_p (rtx);
81 static bool arm_needs_doubleword_align (machine_mode, const_tree);
82 static int arm_compute_static_chain_stack_bytes (void);
83 static arm_stack_offsets *arm_get_frame_offsets (void);
84 static void arm_add_gc_roots (void);
85 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
86 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
87 static unsigned bit_count (unsigned long);
88 static unsigned feature_count (const arm_feature_set*);
89 static int arm_address_register_rtx_p (rtx, int);
90 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
91 static bool is_called_in_ARM_mode (tree);
92 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
93 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
94 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
95 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
96 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
97 inline static int thumb1_index_register_rtx_p (rtx, int);
98 static int thumb_far_jump_used_p (void);
99 static bool thumb_force_lr_save (void);
100 static unsigned arm_size_return_regs (void);
101 static bool arm_assemble_integer (rtx, unsigned int, int);
102 static void arm_print_operand (FILE *, rtx, int);
103 static void arm_print_operand_address (FILE *, machine_mode, rtx);
104 static bool arm_print_operand_punct_valid_p (unsigned char code);
105 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
106 static arm_cc get_arm_condition_code (rtx);
107 static const char *output_multi_immediate (rtx *, const char *, const char *,
108 int, HOST_WIDE_INT);
109 static const char *shift_op (rtx, HOST_WIDE_INT *);
110 static struct machine_function *arm_init_machine_status (void);
111 static void thumb_exit (FILE *, int);
112 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
113 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
114 static Mnode *add_minipool_forward_ref (Mfix *);
115 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
116 static Mnode *add_minipool_backward_ref (Mfix *);
117 static void assign_minipool_offsets (Mfix *);
118 static void arm_print_value (FILE *, rtx);
119 static void dump_minipool (rtx_insn *);
120 static int arm_barrier_cost (rtx_insn *);
121 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
122 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
123 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
124 machine_mode, rtx);
125 static void arm_reorg (void);
126 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
127 static unsigned long arm_compute_save_reg0_reg12_mask (void);
128 static unsigned long arm_compute_save_reg_mask (void);
129 static unsigned long arm_isr_value (tree);
130 static unsigned long arm_compute_func_type (void);
131 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
132 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
133 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
134 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
135 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
136 #endif
137 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
138 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
139 static int arm_comp_type_attributes (const_tree, const_tree);
140 static void arm_set_default_type_attributes (tree);
141 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
142 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
143 static int optimal_immediate_sequence (enum rtx_code code,
144 unsigned HOST_WIDE_INT val,
145 struct four_ints *return_sequence);
146 static int optimal_immediate_sequence_1 (enum rtx_code code,
147 unsigned HOST_WIDE_INT val,
148 struct four_ints *return_sequence,
149 int i);
150 static int arm_get_strip_length (int);
151 static bool arm_function_ok_for_sibcall (tree, tree);
152 static machine_mode arm_promote_function_mode (const_tree,
153 machine_mode, int *,
154 const_tree, int);
155 static bool arm_return_in_memory (const_tree, const_tree);
156 static rtx arm_function_value (const_tree, const_tree, bool);
157 static rtx arm_libcall_value_1 (machine_mode);
158 static rtx arm_libcall_value (machine_mode, const_rtx);
159 static bool arm_function_value_regno_p (const unsigned int);
160 static void arm_internal_label (FILE *, const char *, unsigned long);
161 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
162 tree);
163 static bool arm_have_conditional_execution (void);
164 static bool arm_cannot_force_const_mem (machine_mode, rtx);
165 static bool arm_legitimate_constant_p (machine_mode, rtx);
166 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
167 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
168 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
169 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
170 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
171 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
172 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
173 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
174 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
175 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
176 static void emit_constant_insn (rtx cond, rtx pattern);
177 static rtx_insn *emit_set_insn (rtx, rtx);
178 static rtx emit_multi_reg_push (unsigned long, unsigned long);
179 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
180 tree, bool);
181 static rtx arm_function_arg (cumulative_args_t, machine_mode,
182 const_tree, bool);
183 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
184 const_tree, bool);
185 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
186 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
187 const_tree);
188 static rtx aapcs_libcall_value (machine_mode);
189 static int aapcs_select_return_coproc (const_tree, const_tree);
191 #ifdef OBJECT_FORMAT_ELF
192 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
193 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
194 #endif
195 #ifndef ARM_PE
196 static void arm_encode_section_info (tree, rtx, int);
197 #endif
199 static void arm_file_end (void);
200 static void arm_file_start (void);
201 static void arm_insert_attributes (tree, tree *);
203 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
204 tree, int *, int);
205 static bool arm_pass_by_reference (cumulative_args_t,
206 machine_mode, const_tree, bool);
207 static bool arm_promote_prototypes (const_tree);
208 static bool arm_default_short_enums (void);
209 static bool arm_align_anon_bitfield (void);
210 static bool arm_return_in_msb (const_tree);
211 static bool arm_must_pass_in_stack (machine_mode, const_tree);
212 static bool arm_return_in_memory (const_tree, const_tree);
213 #if ARM_UNWIND_INFO
214 static void arm_unwind_emit (FILE *, rtx_insn *);
215 static bool arm_output_ttype (rtx);
216 static void arm_asm_emit_except_personality (rtx);
217 static void arm_asm_init_sections (void);
218 #endif
219 static rtx arm_dwarf_register_span (rtx);
221 static tree arm_cxx_guard_type (void);
222 static bool arm_cxx_guard_mask_bit (void);
223 static tree arm_get_cookie_size (tree);
224 static bool arm_cookie_has_size (void);
225 static bool arm_cxx_cdtor_returns_this (void);
226 static bool arm_cxx_key_method_may_be_inline (void);
227 static void arm_cxx_determine_class_data_visibility (tree);
228 static bool arm_cxx_class_data_always_comdat (void);
229 static bool arm_cxx_use_aeabi_atexit (void);
230 static void arm_init_libfuncs (void);
231 static tree arm_build_builtin_va_list (void);
232 static void arm_expand_builtin_va_start (tree, rtx);
233 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
234 static void arm_option_override (void);
235 static void arm_override_options_after_change (void);
236 static void arm_option_print (FILE *, int, struct cl_target_option *);
237 static void arm_set_current_function (tree);
238 static bool arm_can_inline_p (tree, tree);
239 static void arm_relayout_function (tree);
240 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
241 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
242 static bool arm_macro_fusion_p (void);
243 static bool arm_cannot_copy_insn_p (rtx_insn *);
244 static int arm_issue_rate (void);
245 static int arm_first_cycle_multipass_dfa_lookahead (void);
246 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
247 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
248 static bool arm_output_addr_const_extra (FILE *, rtx);
249 static bool arm_allocate_stack_slots_for_args (void);
250 static bool arm_warn_func_return (tree);
251 static tree arm_promoted_type (const_tree t);
252 static tree arm_convert_to_type (tree type, tree expr);
253 static bool arm_scalar_mode_supported_p (machine_mode);
254 static bool arm_frame_pointer_required (void);
255 static bool arm_can_eliminate (const int, const int);
256 static void arm_asm_trampoline_template (FILE *);
257 static void arm_trampoline_init (rtx, tree, rtx);
258 static rtx arm_trampoline_adjust_address (rtx);
259 static rtx arm_pic_static_addr (rtx orig, rtx reg);
260 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
261 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
262 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
263 static bool arm_array_mode_supported_p (machine_mode,
264 unsigned HOST_WIDE_INT);
265 static machine_mode arm_preferred_simd_mode (machine_mode);
266 static bool arm_class_likely_spilled_p (reg_class_t);
267 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
268 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
269 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
270 const_tree type,
271 int misalignment,
272 bool is_packed);
273 static void arm_conditional_register_usage (void);
274 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
275 static unsigned int arm_autovectorize_vector_sizes (void);
276 static int arm_default_branch_cost (bool, bool);
277 static int arm_cortex_a5_branch_cost (bool, bool);
278 static int arm_cortex_m_branch_cost (bool, bool);
279 static int arm_cortex_m7_branch_cost (bool, bool);
281 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
282 const unsigned char *sel);
284 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
286 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
287 tree vectype,
288 int misalign ATTRIBUTE_UNUSED);
289 static unsigned arm_add_stmt_cost (void *data, int count,
290 enum vect_cost_for_stmt kind,
291 struct _stmt_vec_info *stmt_info,
292 int misalign,
293 enum vect_cost_model_location where);
295 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
296 bool op0_preserve_value);
297 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
299 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
300 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
301 const_tree);
304 /* Table of machine attributes. */
305 static const struct attribute_spec arm_attribute_table[] =
307 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
308 affects_type_identity } */
309 /* Function calls made to this symbol must be done indirectly, because
310 it may lie outside of the 26 bit addressing range of a normal function
311 call. */
312 { "long_call", 0, 0, false, true, true, NULL, false },
313 /* Whereas these functions are always known to reside within the 26 bit
314 addressing range. */
315 { "short_call", 0, 0, false, true, true, NULL, false },
316 /* Specify the procedure call conventions for a function. */
317 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
318 false },
319 /* Interrupt Service Routines have special prologue and epilogue requirements. */
320 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
321 false },
322 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
323 false },
324 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
325 false },
326 #ifdef ARM_PE
327 /* ARM/PE has three new attributes:
328 interfacearm - ?
329 dllexport - for exporting a function/variable that will live in a dll
330 dllimport - for importing a function/variable from a dll
332 Microsoft allows multiple declspecs in one __declspec, separating
333 them with spaces. We do NOT support this. Instead, use __declspec
334 multiple times.
336 { "dllimport", 0, 0, true, false, false, NULL, false },
337 { "dllexport", 0, 0, true, false, false, NULL, false },
338 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
339 false },
340 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
341 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
342 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
343 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
344 false },
345 #endif
346 { NULL, 0, 0, false, false, false, NULL, false }
349 /* Initialize the GCC target structure. */
350 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
351 #undef TARGET_MERGE_DECL_ATTRIBUTES
352 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
353 #endif
355 #undef TARGET_LEGITIMIZE_ADDRESS
356 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
358 #undef TARGET_LRA_P
359 #define TARGET_LRA_P hook_bool_void_true
361 #undef TARGET_ATTRIBUTE_TABLE
362 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
364 #undef TARGET_INSERT_ATTRIBUTES
365 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
367 #undef TARGET_ASM_FILE_START
368 #define TARGET_ASM_FILE_START arm_file_start
369 #undef TARGET_ASM_FILE_END
370 #define TARGET_ASM_FILE_END arm_file_end
372 #undef TARGET_ASM_ALIGNED_SI_OP
373 #define TARGET_ASM_ALIGNED_SI_OP NULL
374 #undef TARGET_ASM_INTEGER
375 #define TARGET_ASM_INTEGER arm_assemble_integer
377 #undef TARGET_PRINT_OPERAND
378 #define TARGET_PRINT_OPERAND arm_print_operand
379 #undef TARGET_PRINT_OPERAND_ADDRESS
380 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
381 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
382 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
384 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
385 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
387 #undef TARGET_ASM_FUNCTION_PROLOGUE
388 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
390 #undef TARGET_ASM_FUNCTION_EPILOGUE
391 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
393 #undef TARGET_CAN_INLINE_P
394 #define TARGET_CAN_INLINE_P arm_can_inline_p
396 #undef TARGET_RELAYOUT_FUNCTION
397 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
399 #undef TARGET_OPTION_OVERRIDE
400 #define TARGET_OPTION_OVERRIDE arm_option_override
402 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
403 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
405 #undef TARGET_OPTION_PRINT
406 #define TARGET_OPTION_PRINT arm_option_print
408 #undef TARGET_COMP_TYPE_ATTRIBUTES
409 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
411 #undef TARGET_SCHED_MACRO_FUSION_P
412 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
414 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
415 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
417 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
418 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
420 #undef TARGET_SCHED_ADJUST_COST
421 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
423 #undef TARGET_SET_CURRENT_FUNCTION
424 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
426 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
427 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
429 #undef TARGET_SCHED_REORDER
430 #define TARGET_SCHED_REORDER arm_sched_reorder
432 #undef TARGET_REGISTER_MOVE_COST
433 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
435 #undef TARGET_MEMORY_MOVE_COST
436 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
438 #undef TARGET_ENCODE_SECTION_INFO
439 #ifdef ARM_PE
440 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
441 #else
442 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
443 #endif
445 #undef TARGET_STRIP_NAME_ENCODING
446 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
448 #undef TARGET_ASM_INTERNAL_LABEL
449 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
451 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
452 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
454 #undef TARGET_FUNCTION_VALUE
455 #define TARGET_FUNCTION_VALUE arm_function_value
457 #undef TARGET_LIBCALL_VALUE
458 #define TARGET_LIBCALL_VALUE arm_libcall_value
460 #undef TARGET_FUNCTION_VALUE_REGNO_P
461 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
463 #undef TARGET_ASM_OUTPUT_MI_THUNK
464 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
465 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
466 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
468 #undef TARGET_RTX_COSTS
469 #define TARGET_RTX_COSTS arm_rtx_costs
470 #undef TARGET_ADDRESS_COST
471 #define TARGET_ADDRESS_COST arm_address_cost
473 #undef TARGET_SHIFT_TRUNCATION_MASK
474 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
475 #undef TARGET_VECTOR_MODE_SUPPORTED_P
476 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
477 #undef TARGET_ARRAY_MODE_SUPPORTED_P
478 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
479 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
480 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
481 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
482 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
483 arm_autovectorize_vector_sizes
485 #undef TARGET_MACHINE_DEPENDENT_REORG
486 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
488 #undef TARGET_INIT_BUILTINS
489 #define TARGET_INIT_BUILTINS arm_init_builtins
490 #undef TARGET_EXPAND_BUILTIN
491 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
492 #undef TARGET_BUILTIN_DECL
493 #define TARGET_BUILTIN_DECL arm_builtin_decl
495 #undef TARGET_INIT_LIBFUNCS
496 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
498 #undef TARGET_PROMOTE_FUNCTION_MODE
499 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
500 #undef TARGET_PROMOTE_PROTOTYPES
501 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
502 #undef TARGET_PASS_BY_REFERENCE
503 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
504 #undef TARGET_ARG_PARTIAL_BYTES
505 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
506 #undef TARGET_FUNCTION_ARG
507 #define TARGET_FUNCTION_ARG arm_function_arg
508 #undef TARGET_FUNCTION_ARG_ADVANCE
509 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
510 #undef TARGET_FUNCTION_ARG_BOUNDARY
511 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
513 #undef TARGET_SETUP_INCOMING_VARARGS
514 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
516 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
517 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
519 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
520 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
521 #undef TARGET_TRAMPOLINE_INIT
522 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
523 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
524 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
526 #undef TARGET_WARN_FUNC_RETURN
527 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
529 #undef TARGET_DEFAULT_SHORT_ENUMS
530 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
532 #undef TARGET_ALIGN_ANON_BITFIELD
533 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
535 #undef TARGET_NARROW_VOLATILE_BITFIELD
536 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
538 #undef TARGET_CXX_GUARD_TYPE
539 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
541 #undef TARGET_CXX_GUARD_MASK_BIT
542 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
544 #undef TARGET_CXX_GET_COOKIE_SIZE
545 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
547 #undef TARGET_CXX_COOKIE_HAS_SIZE
548 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
550 #undef TARGET_CXX_CDTOR_RETURNS_THIS
551 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
553 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
554 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
556 #undef TARGET_CXX_USE_AEABI_ATEXIT
557 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
559 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
560 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
561 arm_cxx_determine_class_data_visibility
563 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
564 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
566 #undef TARGET_RETURN_IN_MSB
567 #define TARGET_RETURN_IN_MSB arm_return_in_msb
569 #undef TARGET_RETURN_IN_MEMORY
570 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
572 #undef TARGET_MUST_PASS_IN_STACK
573 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
575 #if ARM_UNWIND_INFO
576 #undef TARGET_ASM_UNWIND_EMIT
577 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
579 /* EABI unwinding tables use a different format for the typeinfo tables. */
580 #undef TARGET_ASM_TTYPE
581 #define TARGET_ASM_TTYPE arm_output_ttype
583 #undef TARGET_ARM_EABI_UNWINDER
584 #define TARGET_ARM_EABI_UNWINDER true
586 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
587 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
589 #undef TARGET_ASM_INIT_SECTIONS
590 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
591 #endif /* ARM_UNWIND_INFO */
593 #undef TARGET_DWARF_REGISTER_SPAN
594 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
596 #undef TARGET_CANNOT_COPY_INSN_P
597 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
599 #ifdef HAVE_AS_TLS
600 #undef TARGET_HAVE_TLS
601 #define TARGET_HAVE_TLS true
602 #endif
604 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
605 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
607 #undef TARGET_LEGITIMATE_CONSTANT_P
608 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
610 #undef TARGET_CANNOT_FORCE_CONST_MEM
611 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
613 #undef TARGET_MAX_ANCHOR_OFFSET
614 #define TARGET_MAX_ANCHOR_OFFSET 4095
616 /* The minimum is set such that the total size of the block
617 for a particular anchor is -4088 + 1 + 4095 bytes, which is
618 divisible by eight, ensuring natural spacing of anchors. */
619 #undef TARGET_MIN_ANCHOR_OFFSET
620 #define TARGET_MIN_ANCHOR_OFFSET -4088
622 #undef TARGET_SCHED_ISSUE_RATE
623 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
625 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
626 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
627 arm_first_cycle_multipass_dfa_lookahead
629 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
630 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
631 arm_first_cycle_multipass_dfa_lookahead_guard
633 #undef TARGET_MANGLE_TYPE
634 #define TARGET_MANGLE_TYPE arm_mangle_type
636 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
637 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
639 #undef TARGET_BUILD_BUILTIN_VA_LIST
640 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
641 #undef TARGET_EXPAND_BUILTIN_VA_START
642 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
643 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
644 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
646 #ifdef HAVE_AS_TLS
647 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
648 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
649 #endif
651 #undef TARGET_LEGITIMATE_ADDRESS_P
652 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
654 #undef TARGET_PREFERRED_RELOAD_CLASS
655 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
657 #undef TARGET_PROMOTED_TYPE
658 #define TARGET_PROMOTED_TYPE arm_promoted_type
660 #undef TARGET_CONVERT_TO_TYPE
661 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
663 #undef TARGET_SCALAR_MODE_SUPPORTED_P
664 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
666 #undef TARGET_FRAME_POINTER_REQUIRED
667 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
669 #undef TARGET_CAN_ELIMINATE
670 #define TARGET_CAN_ELIMINATE arm_can_eliminate
672 #undef TARGET_CONDITIONAL_REGISTER_USAGE
673 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
675 #undef TARGET_CLASS_LIKELY_SPILLED_P
676 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
678 #undef TARGET_VECTORIZE_BUILTINS
679 #define TARGET_VECTORIZE_BUILTINS
681 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
682 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
683 arm_builtin_vectorized_function
685 #undef TARGET_VECTOR_ALIGNMENT
686 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
688 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
689 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
690 arm_vector_alignment_reachable
692 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
693 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
694 arm_builtin_support_vector_misalignment
696 #undef TARGET_PREFERRED_RENAME_CLASS
697 #define TARGET_PREFERRED_RENAME_CLASS \
698 arm_preferred_rename_class
700 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
701 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
702 arm_vectorize_vec_perm_const_ok
704 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
705 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
706 arm_builtin_vectorization_cost
707 #undef TARGET_VECTORIZE_ADD_STMT_COST
708 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
710 #undef TARGET_CANONICALIZE_COMPARISON
711 #define TARGET_CANONICALIZE_COMPARISON \
712 arm_canonicalize_comparison
714 #undef TARGET_ASAN_SHADOW_OFFSET
715 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
717 #undef MAX_INSN_PER_IT_BLOCK
718 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
720 #undef TARGET_CAN_USE_DOLOOP_P
721 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
723 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
724 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
726 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
727 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
729 #undef TARGET_SCHED_FUSION_PRIORITY
730 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
732 struct gcc_target targetm = TARGET_INITIALIZER;
734 /* Obstack for minipool constant handling. */
735 static struct obstack minipool_obstack;
736 static char * minipool_startobj;
738 /* The maximum number of insns skipped which
739 will be conditionalised if possible. */
740 static int max_insns_skipped = 5;
742 extern FILE * asm_out_file;
744 /* True if we are currently building a constant table. */
745 int making_const_table;
747 /* The processor for which instructions should be scheduled. */
748 enum processor_type arm_tune = arm_none;
750 /* The current tuning set. */
751 const struct tune_params *current_tune;
753 /* Which floating point hardware to schedule for. */
754 int arm_fpu_attr;
756 /* Used for Thumb call_via trampolines. */
757 rtx thumb_call_via_label[14];
758 static int thumb_call_reg_needed;
760 /* The bits in this mask specify which
761 instructions we are allowed to generate. */
762 arm_feature_set insn_flags = ARM_FSET_EMPTY;
764 /* The bits in this mask specify which instruction scheduling options should
765 be used. */
766 arm_feature_set tune_flags = ARM_FSET_EMPTY;
768 /* The highest ARM architecture version supported by the
769 target. */
770 enum base_architecture arm_base_arch = BASE_ARCH_0;
772 /* The following are used in the arm.md file as equivalents to bits
773 in the above two flag variables. */
775 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
776 int arm_arch3m = 0;
778 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
779 int arm_arch4 = 0;
781 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
782 int arm_arch4t = 0;
784 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
785 int arm_arch5 = 0;
787 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
788 int arm_arch5e = 0;
790 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
791 int arm_arch6 = 0;
793 /* Nonzero if this chip supports the ARM 6K extensions. */
794 int arm_arch6k = 0;
796 /* Nonzero if this chip supports the ARM 6KZ extensions. */
797 int arm_arch6kz = 0;
799 /* Nonzero if instructions present in ARMv6-M can be used. */
800 int arm_arch6m = 0;
802 /* Nonzero if this chip supports the ARM 7 extensions. */
803 int arm_arch7 = 0;
805 /* Nonzero if instructions not present in the 'M' profile can be used. */
806 int arm_arch_notm = 0;
808 /* Nonzero if instructions present in ARMv7E-M can be used. */
809 int arm_arch7em = 0;
811 /* Nonzero if instructions present in ARMv8 can be used. */
812 int arm_arch8 = 0;
814 /* Nonzero if this chip supports the ARMv8.1 extensions. */
815 int arm_arch8_1 = 0;
817 /* Nonzero if this chip can benefit from load scheduling. */
818 int arm_ld_sched = 0;
820 /* Nonzero if this chip is a StrongARM. */
821 int arm_tune_strongarm = 0;
823 /* Nonzero if this chip supports Intel Wireless MMX technology. */
824 int arm_arch_iwmmxt = 0;
826 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
827 int arm_arch_iwmmxt2 = 0;
829 /* Nonzero if this chip is an XScale. */
830 int arm_arch_xscale = 0;
832 /* Nonzero if tuning for XScale */
833 int arm_tune_xscale = 0;
835 /* Nonzero if we want to tune for stores that access the write-buffer.
836 This typically means an ARM6 or ARM7 with MMU or MPU. */
837 int arm_tune_wbuf = 0;
839 /* Nonzero if tuning for Cortex-A9. */
840 int arm_tune_cortex_a9 = 0;
842 /* Nonzero if we should define __THUMB_INTERWORK__ in the
843 preprocessor.
844 XXX This is a bit of a hack, it's intended to help work around
845 problems in GLD which doesn't understand that armv5t code is
846 interworking clean. */
847 int arm_cpp_interwork = 0;
849 /* Nonzero if chip supports Thumb 1. */
850 int arm_arch_thumb1;
852 /* Nonzero if chip supports Thumb 2. */
853 int arm_arch_thumb2;
855 /* Nonzero if chip supports integer division instruction. */
856 int arm_arch_arm_hwdiv;
857 int arm_arch_thumb_hwdiv;
859 /* Nonzero if chip disallows volatile memory access in IT block. */
860 int arm_arch_no_volatile_ce;
862 /* Nonzero if we should use Neon to handle 64-bits operations rather
863 than core registers. */
864 int prefer_neon_for_64bits = 0;
866 /* Nonzero if we shouldn't use literal pools. */
867 bool arm_disable_literal_pool = false;
869 /* The register number to be used for the PIC offset register. */
870 unsigned arm_pic_register = INVALID_REGNUM;
872 enum arm_pcs arm_pcs_default;
874 /* For an explanation of these variables, see final_prescan_insn below. */
875 int arm_ccfsm_state;
876 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
877 enum arm_cond_code arm_current_cc;
879 rtx arm_target_insn;
880 int arm_target_label;
881 /* The number of conditionally executed insns, including the current insn. */
882 int arm_condexec_count = 0;
883 /* A bitmask specifying the patterns for the IT block.
884 Zero means do not output an IT block before this insn. */
885 int arm_condexec_mask = 0;
886 /* The number of bits used in arm_condexec_mask. */
887 int arm_condexec_masklen = 0;
889 /* Nonzero if chip supports the ARMv8 CRC instructions. */
890 int arm_arch_crc = 0;
892 /* Nonzero if the core has a very small, high-latency, multiply unit. */
893 int arm_m_profile_small_mul = 0;
895 /* The condition codes of the ARM, and the inverse function. */
896 static const char * const arm_condition_codes[] =
898 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
899 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
902 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
903 int arm_regs_in_sequence[] =
905 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
908 #define ARM_LSL_NAME "lsl"
909 #define streq(string1, string2) (strcmp (string1, string2) == 0)
911 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
912 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
913 | (1 << PIC_OFFSET_TABLE_REGNUM)))
915 /* Initialization code. */
917 struct processors
919 const char *const name;
920 enum processor_type core;
921 const char *arch;
922 enum base_architecture base_arch;
923 const arm_feature_set flags;
924 const struct tune_params *const tune;
928 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
929 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
931 num_slots, \
932 l1_size, \
933 l1_line_size \
936 /* arm generic vectorizer costs. */
937 static const
938 struct cpu_vec_costs arm_default_vec_cost = {
939 1, /* scalar_stmt_cost. */
940 1, /* scalar load_cost. */
941 1, /* scalar_store_cost. */
942 1, /* vec_stmt_cost. */
943 1, /* vec_to_scalar_cost. */
944 1, /* scalar_to_vec_cost. */
945 1, /* vec_align_load_cost. */
946 1, /* vec_unalign_load_cost. */
947 1, /* vec_unalign_store_cost. */
948 1, /* vec_store_cost. */
949 3, /* cond_taken_branch_cost. */
950 1, /* cond_not_taken_branch_cost. */
953 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
954 #include "aarch-cost-tables.h"
958 const struct cpu_cost_table cortexa9_extra_costs =
960 /* ALU */
962 0, /* arith. */
963 0, /* logical. */
964 0, /* shift. */
965 COSTS_N_INSNS (1), /* shift_reg. */
966 COSTS_N_INSNS (1), /* arith_shift. */
967 COSTS_N_INSNS (2), /* arith_shift_reg. */
968 0, /* log_shift. */
969 COSTS_N_INSNS (1), /* log_shift_reg. */
970 COSTS_N_INSNS (1), /* extend. */
971 COSTS_N_INSNS (2), /* extend_arith. */
972 COSTS_N_INSNS (1), /* bfi. */
973 COSTS_N_INSNS (1), /* bfx. */
974 0, /* clz. */
975 0, /* rev. */
976 0, /* non_exec. */
977 true /* non_exec_costs_exec. */
980 /* MULT SImode */
982 COSTS_N_INSNS (3), /* simple. */
983 COSTS_N_INSNS (3), /* flag_setting. */
984 COSTS_N_INSNS (2), /* extend. */
985 COSTS_N_INSNS (3), /* add. */
986 COSTS_N_INSNS (2), /* extend_add. */
987 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
989 /* MULT DImode */
991 0, /* simple (N/A). */
992 0, /* flag_setting (N/A). */
993 COSTS_N_INSNS (4), /* extend. */
994 0, /* add (N/A). */
995 COSTS_N_INSNS (4), /* extend_add. */
996 0 /* idiv (N/A). */
999 /* LD/ST */
1001 COSTS_N_INSNS (2), /* load. */
1002 COSTS_N_INSNS (2), /* load_sign_extend. */
1003 COSTS_N_INSNS (2), /* ldrd. */
1004 COSTS_N_INSNS (2), /* ldm_1st. */
1005 1, /* ldm_regs_per_insn_1st. */
1006 2, /* ldm_regs_per_insn_subsequent. */
1007 COSTS_N_INSNS (5), /* loadf. */
1008 COSTS_N_INSNS (5), /* loadd. */
1009 COSTS_N_INSNS (1), /* load_unaligned. */
1010 COSTS_N_INSNS (2), /* store. */
1011 COSTS_N_INSNS (2), /* strd. */
1012 COSTS_N_INSNS (2), /* stm_1st. */
1013 1, /* stm_regs_per_insn_1st. */
1014 2, /* stm_regs_per_insn_subsequent. */
1015 COSTS_N_INSNS (1), /* storef. */
1016 COSTS_N_INSNS (1), /* stored. */
1017 COSTS_N_INSNS (1), /* store_unaligned. */
1018 COSTS_N_INSNS (1), /* loadv. */
1019 COSTS_N_INSNS (1) /* storev. */
1022 /* FP SFmode */
1024 COSTS_N_INSNS (14), /* div. */
1025 COSTS_N_INSNS (4), /* mult. */
1026 COSTS_N_INSNS (7), /* mult_addsub. */
1027 COSTS_N_INSNS (30), /* fma. */
1028 COSTS_N_INSNS (3), /* addsub. */
1029 COSTS_N_INSNS (1), /* fpconst. */
1030 COSTS_N_INSNS (1), /* neg. */
1031 COSTS_N_INSNS (3), /* compare. */
1032 COSTS_N_INSNS (3), /* widen. */
1033 COSTS_N_INSNS (3), /* narrow. */
1034 COSTS_N_INSNS (3), /* toint. */
1035 COSTS_N_INSNS (3), /* fromint. */
1036 COSTS_N_INSNS (3) /* roundint. */
1038 /* FP DFmode */
1040 COSTS_N_INSNS (24), /* div. */
1041 COSTS_N_INSNS (5), /* mult. */
1042 COSTS_N_INSNS (8), /* mult_addsub. */
1043 COSTS_N_INSNS (30), /* fma. */
1044 COSTS_N_INSNS (3), /* addsub. */
1045 COSTS_N_INSNS (1), /* fpconst. */
1046 COSTS_N_INSNS (1), /* neg. */
1047 COSTS_N_INSNS (3), /* compare. */
1048 COSTS_N_INSNS (3), /* widen. */
1049 COSTS_N_INSNS (3), /* narrow. */
1050 COSTS_N_INSNS (3), /* toint. */
1051 COSTS_N_INSNS (3), /* fromint. */
1052 COSTS_N_INSNS (3) /* roundint. */
1055 /* Vector */
1057 COSTS_N_INSNS (1) /* alu. */
1061 const struct cpu_cost_table cortexa8_extra_costs =
1063 /* ALU */
1065 0, /* arith. */
1066 0, /* logical. */
1067 COSTS_N_INSNS (1), /* shift. */
1068 0, /* shift_reg. */
1069 COSTS_N_INSNS (1), /* arith_shift. */
1070 0, /* arith_shift_reg. */
1071 COSTS_N_INSNS (1), /* log_shift. */
1072 0, /* log_shift_reg. */
1073 0, /* extend. */
1074 0, /* extend_arith. */
1075 0, /* bfi. */
1076 0, /* bfx. */
1077 0, /* clz. */
1078 0, /* rev. */
1079 0, /* non_exec. */
1080 true /* non_exec_costs_exec. */
1083 /* MULT SImode */
1085 COSTS_N_INSNS (1), /* simple. */
1086 COSTS_N_INSNS (1), /* flag_setting. */
1087 COSTS_N_INSNS (1), /* extend. */
1088 COSTS_N_INSNS (1), /* add. */
1089 COSTS_N_INSNS (1), /* extend_add. */
1090 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1092 /* MULT DImode */
1094 0, /* simple (N/A). */
1095 0, /* flag_setting (N/A). */
1096 COSTS_N_INSNS (2), /* extend. */
1097 0, /* add (N/A). */
1098 COSTS_N_INSNS (2), /* extend_add. */
1099 0 /* idiv (N/A). */
1102 /* LD/ST */
1104 COSTS_N_INSNS (1), /* load. */
1105 COSTS_N_INSNS (1), /* load_sign_extend. */
1106 COSTS_N_INSNS (1), /* ldrd. */
1107 COSTS_N_INSNS (1), /* ldm_1st. */
1108 1, /* ldm_regs_per_insn_1st. */
1109 2, /* ldm_regs_per_insn_subsequent. */
1110 COSTS_N_INSNS (1), /* loadf. */
1111 COSTS_N_INSNS (1), /* loadd. */
1112 COSTS_N_INSNS (1), /* load_unaligned. */
1113 COSTS_N_INSNS (1), /* store. */
1114 COSTS_N_INSNS (1), /* strd. */
1115 COSTS_N_INSNS (1), /* stm_1st. */
1116 1, /* stm_regs_per_insn_1st. */
1117 2, /* stm_regs_per_insn_subsequent. */
1118 COSTS_N_INSNS (1), /* storef. */
1119 COSTS_N_INSNS (1), /* stored. */
1120 COSTS_N_INSNS (1), /* store_unaligned. */
1121 COSTS_N_INSNS (1), /* loadv. */
1122 COSTS_N_INSNS (1) /* storev. */
1125 /* FP SFmode */
1127 COSTS_N_INSNS (36), /* div. */
1128 COSTS_N_INSNS (11), /* mult. */
1129 COSTS_N_INSNS (20), /* mult_addsub. */
1130 COSTS_N_INSNS (30), /* fma. */
1131 COSTS_N_INSNS (9), /* addsub. */
1132 COSTS_N_INSNS (3), /* fpconst. */
1133 COSTS_N_INSNS (3), /* neg. */
1134 COSTS_N_INSNS (6), /* compare. */
1135 COSTS_N_INSNS (4), /* widen. */
1136 COSTS_N_INSNS (4), /* narrow. */
1137 COSTS_N_INSNS (8), /* toint. */
1138 COSTS_N_INSNS (8), /* fromint. */
1139 COSTS_N_INSNS (8) /* roundint. */
1141 /* FP DFmode */
1143 COSTS_N_INSNS (64), /* div. */
1144 COSTS_N_INSNS (16), /* mult. */
1145 COSTS_N_INSNS (25), /* mult_addsub. */
1146 COSTS_N_INSNS (30), /* fma. */
1147 COSTS_N_INSNS (9), /* addsub. */
1148 COSTS_N_INSNS (3), /* fpconst. */
1149 COSTS_N_INSNS (3), /* neg. */
1150 COSTS_N_INSNS (6), /* compare. */
1151 COSTS_N_INSNS (6), /* widen. */
1152 COSTS_N_INSNS (6), /* narrow. */
1153 COSTS_N_INSNS (8), /* toint. */
1154 COSTS_N_INSNS (8), /* fromint. */
1155 COSTS_N_INSNS (8) /* roundint. */
1158 /* Vector */
1160 COSTS_N_INSNS (1) /* alu. */
1164 const struct cpu_cost_table cortexa5_extra_costs =
1166 /* ALU */
1168 0, /* arith. */
1169 0, /* logical. */
1170 COSTS_N_INSNS (1), /* shift. */
1171 COSTS_N_INSNS (1), /* shift_reg. */
1172 COSTS_N_INSNS (1), /* arith_shift. */
1173 COSTS_N_INSNS (1), /* arith_shift_reg. */
1174 COSTS_N_INSNS (1), /* log_shift. */
1175 COSTS_N_INSNS (1), /* log_shift_reg. */
1176 COSTS_N_INSNS (1), /* extend. */
1177 COSTS_N_INSNS (1), /* extend_arith. */
1178 COSTS_N_INSNS (1), /* bfi. */
1179 COSTS_N_INSNS (1), /* bfx. */
1180 COSTS_N_INSNS (1), /* clz. */
1181 COSTS_N_INSNS (1), /* rev. */
1182 0, /* non_exec. */
1183 true /* non_exec_costs_exec. */
1187 /* MULT SImode */
1189 0, /* simple. */
1190 COSTS_N_INSNS (1), /* flag_setting. */
1191 COSTS_N_INSNS (1), /* extend. */
1192 COSTS_N_INSNS (1), /* add. */
1193 COSTS_N_INSNS (1), /* extend_add. */
1194 COSTS_N_INSNS (7) /* idiv. */
1196 /* MULT DImode */
1198 0, /* simple (N/A). */
1199 0, /* flag_setting (N/A). */
1200 COSTS_N_INSNS (1), /* extend. */
1201 0, /* add. */
1202 COSTS_N_INSNS (2), /* extend_add. */
1203 0 /* idiv (N/A). */
1206 /* LD/ST */
1208 COSTS_N_INSNS (1), /* load. */
1209 COSTS_N_INSNS (1), /* load_sign_extend. */
1210 COSTS_N_INSNS (6), /* ldrd. */
1211 COSTS_N_INSNS (1), /* ldm_1st. */
1212 1, /* ldm_regs_per_insn_1st. */
1213 2, /* ldm_regs_per_insn_subsequent. */
1214 COSTS_N_INSNS (2), /* loadf. */
1215 COSTS_N_INSNS (4), /* loadd. */
1216 COSTS_N_INSNS (1), /* load_unaligned. */
1217 COSTS_N_INSNS (1), /* store. */
1218 COSTS_N_INSNS (3), /* strd. */
1219 COSTS_N_INSNS (1), /* stm_1st. */
1220 1, /* stm_regs_per_insn_1st. */
1221 2, /* stm_regs_per_insn_subsequent. */
1222 COSTS_N_INSNS (2), /* storef. */
1223 COSTS_N_INSNS (2), /* stored. */
1224 COSTS_N_INSNS (1), /* store_unaligned. */
1225 COSTS_N_INSNS (1), /* loadv. */
1226 COSTS_N_INSNS (1) /* storev. */
1229 /* FP SFmode */
1231 COSTS_N_INSNS (15), /* div. */
1232 COSTS_N_INSNS (3), /* mult. */
1233 COSTS_N_INSNS (7), /* mult_addsub. */
1234 COSTS_N_INSNS (7), /* fma. */
1235 COSTS_N_INSNS (3), /* addsub. */
1236 COSTS_N_INSNS (3), /* fpconst. */
1237 COSTS_N_INSNS (3), /* neg. */
1238 COSTS_N_INSNS (3), /* compare. */
1239 COSTS_N_INSNS (3), /* widen. */
1240 COSTS_N_INSNS (3), /* narrow. */
1241 COSTS_N_INSNS (3), /* toint. */
1242 COSTS_N_INSNS (3), /* fromint. */
1243 COSTS_N_INSNS (3) /* roundint. */
1245 /* FP DFmode */
1247 COSTS_N_INSNS (30), /* div. */
1248 COSTS_N_INSNS (6), /* mult. */
1249 COSTS_N_INSNS (10), /* mult_addsub. */
1250 COSTS_N_INSNS (7), /* fma. */
1251 COSTS_N_INSNS (3), /* addsub. */
1252 COSTS_N_INSNS (3), /* fpconst. */
1253 COSTS_N_INSNS (3), /* neg. */
1254 COSTS_N_INSNS (3), /* compare. */
1255 COSTS_N_INSNS (3), /* widen. */
1256 COSTS_N_INSNS (3), /* narrow. */
1257 COSTS_N_INSNS (3), /* toint. */
1258 COSTS_N_INSNS (3), /* fromint. */
1259 COSTS_N_INSNS (3) /* roundint. */
1262 /* Vector */
1264 COSTS_N_INSNS (1) /* alu. */
1269 const struct cpu_cost_table cortexa7_extra_costs =
1271 /* ALU */
1273 0, /* arith. */
1274 0, /* logical. */
1275 COSTS_N_INSNS (1), /* shift. */
1276 COSTS_N_INSNS (1), /* shift_reg. */
1277 COSTS_N_INSNS (1), /* arith_shift. */
1278 COSTS_N_INSNS (1), /* arith_shift_reg. */
1279 COSTS_N_INSNS (1), /* log_shift. */
1280 COSTS_N_INSNS (1), /* log_shift_reg. */
1281 COSTS_N_INSNS (1), /* extend. */
1282 COSTS_N_INSNS (1), /* extend_arith. */
1283 COSTS_N_INSNS (1), /* bfi. */
1284 COSTS_N_INSNS (1), /* bfx. */
1285 COSTS_N_INSNS (1), /* clz. */
1286 COSTS_N_INSNS (1), /* rev. */
1287 0, /* non_exec. */
1288 true /* non_exec_costs_exec. */
1292 /* MULT SImode */
1294 0, /* simple. */
1295 COSTS_N_INSNS (1), /* flag_setting. */
1296 COSTS_N_INSNS (1), /* extend. */
1297 COSTS_N_INSNS (1), /* add. */
1298 COSTS_N_INSNS (1), /* extend_add. */
1299 COSTS_N_INSNS (7) /* idiv. */
1301 /* MULT DImode */
1303 0, /* simple (N/A). */
1304 0, /* flag_setting (N/A). */
1305 COSTS_N_INSNS (1), /* extend. */
1306 0, /* add. */
1307 COSTS_N_INSNS (2), /* extend_add. */
1308 0 /* idiv (N/A). */
1311 /* LD/ST */
1313 COSTS_N_INSNS (1), /* load. */
1314 COSTS_N_INSNS (1), /* load_sign_extend. */
1315 COSTS_N_INSNS (3), /* ldrd. */
1316 COSTS_N_INSNS (1), /* ldm_1st. */
1317 1, /* ldm_regs_per_insn_1st. */
1318 2, /* ldm_regs_per_insn_subsequent. */
1319 COSTS_N_INSNS (2), /* loadf. */
1320 COSTS_N_INSNS (2), /* loadd. */
1321 COSTS_N_INSNS (1), /* load_unaligned. */
1322 COSTS_N_INSNS (1), /* store. */
1323 COSTS_N_INSNS (3), /* strd. */
1324 COSTS_N_INSNS (1), /* stm_1st. */
1325 1, /* stm_regs_per_insn_1st. */
1326 2, /* stm_regs_per_insn_subsequent. */
1327 COSTS_N_INSNS (2), /* storef. */
1328 COSTS_N_INSNS (2), /* stored. */
1329 COSTS_N_INSNS (1), /* store_unaligned. */
1330 COSTS_N_INSNS (1), /* loadv. */
1331 COSTS_N_INSNS (1) /* storev. */
1334 /* FP SFmode */
1336 COSTS_N_INSNS (15), /* div. */
1337 COSTS_N_INSNS (3), /* mult. */
1338 COSTS_N_INSNS (7), /* mult_addsub. */
1339 COSTS_N_INSNS (7), /* fma. */
1340 COSTS_N_INSNS (3), /* addsub. */
1341 COSTS_N_INSNS (3), /* fpconst. */
1342 COSTS_N_INSNS (3), /* neg. */
1343 COSTS_N_INSNS (3), /* compare. */
1344 COSTS_N_INSNS (3), /* widen. */
1345 COSTS_N_INSNS (3), /* narrow. */
1346 COSTS_N_INSNS (3), /* toint. */
1347 COSTS_N_INSNS (3), /* fromint. */
1348 COSTS_N_INSNS (3) /* roundint. */
1350 /* FP DFmode */
1352 COSTS_N_INSNS (30), /* div. */
1353 COSTS_N_INSNS (6), /* mult. */
1354 COSTS_N_INSNS (10), /* mult_addsub. */
1355 COSTS_N_INSNS (7), /* fma. */
1356 COSTS_N_INSNS (3), /* addsub. */
1357 COSTS_N_INSNS (3), /* fpconst. */
1358 COSTS_N_INSNS (3), /* neg. */
1359 COSTS_N_INSNS (3), /* compare. */
1360 COSTS_N_INSNS (3), /* widen. */
1361 COSTS_N_INSNS (3), /* narrow. */
1362 COSTS_N_INSNS (3), /* toint. */
1363 COSTS_N_INSNS (3), /* fromint. */
1364 COSTS_N_INSNS (3) /* roundint. */
1367 /* Vector */
1369 COSTS_N_INSNS (1) /* alu. */
1373 const struct cpu_cost_table cortexa12_extra_costs =
1375 /* ALU */
1377 0, /* arith. */
1378 0, /* logical. */
1379 0, /* shift. */
1380 COSTS_N_INSNS (1), /* shift_reg. */
1381 COSTS_N_INSNS (1), /* arith_shift. */
1382 COSTS_N_INSNS (1), /* arith_shift_reg. */
1383 COSTS_N_INSNS (1), /* log_shift. */
1384 COSTS_N_INSNS (1), /* log_shift_reg. */
1385 0, /* extend. */
1386 COSTS_N_INSNS (1), /* extend_arith. */
1387 0, /* bfi. */
1388 COSTS_N_INSNS (1), /* bfx. */
1389 COSTS_N_INSNS (1), /* clz. */
1390 COSTS_N_INSNS (1), /* rev. */
1391 0, /* non_exec. */
1392 true /* non_exec_costs_exec. */
1394 /* MULT SImode */
1397 COSTS_N_INSNS (2), /* simple. */
1398 COSTS_N_INSNS (3), /* flag_setting. */
1399 COSTS_N_INSNS (2), /* extend. */
1400 COSTS_N_INSNS (3), /* add. */
1401 COSTS_N_INSNS (2), /* extend_add. */
1402 COSTS_N_INSNS (18) /* idiv. */
1404 /* MULT DImode */
1406 0, /* simple (N/A). */
1407 0, /* flag_setting (N/A). */
1408 COSTS_N_INSNS (3), /* extend. */
1409 0, /* add (N/A). */
1410 COSTS_N_INSNS (3), /* extend_add. */
1411 0 /* idiv (N/A). */
1414 /* LD/ST */
1416 COSTS_N_INSNS (3), /* load. */
1417 COSTS_N_INSNS (3), /* load_sign_extend. */
1418 COSTS_N_INSNS (3), /* ldrd. */
1419 COSTS_N_INSNS (3), /* ldm_1st. */
1420 1, /* ldm_regs_per_insn_1st. */
1421 2, /* ldm_regs_per_insn_subsequent. */
1422 COSTS_N_INSNS (3), /* loadf. */
1423 COSTS_N_INSNS (3), /* loadd. */
1424 0, /* load_unaligned. */
1425 0, /* store. */
1426 0, /* strd. */
1427 0, /* stm_1st. */
1428 1, /* stm_regs_per_insn_1st. */
1429 2, /* stm_regs_per_insn_subsequent. */
1430 COSTS_N_INSNS (2), /* storef. */
1431 COSTS_N_INSNS (2), /* stored. */
1432 0, /* store_unaligned. */
1433 COSTS_N_INSNS (1), /* loadv. */
1434 COSTS_N_INSNS (1) /* storev. */
1437 /* FP SFmode */
1439 COSTS_N_INSNS (17), /* div. */
1440 COSTS_N_INSNS (4), /* mult. */
1441 COSTS_N_INSNS (8), /* mult_addsub. */
1442 COSTS_N_INSNS (8), /* fma. */
1443 COSTS_N_INSNS (4), /* addsub. */
1444 COSTS_N_INSNS (2), /* fpconst. */
1445 COSTS_N_INSNS (2), /* neg. */
1446 COSTS_N_INSNS (2), /* compare. */
1447 COSTS_N_INSNS (4), /* widen. */
1448 COSTS_N_INSNS (4), /* narrow. */
1449 COSTS_N_INSNS (4), /* toint. */
1450 COSTS_N_INSNS (4), /* fromint. */
1451 COSTS_N_INSNS (4) /* roundint. */
1453 /* FP DFmode */
1455 COSTS_N_INSNS (31), /* div. */
1456 COSTS_N_INSNS (4), /* mult. */
1457 COSTS_N_INSNS (8), /* mult_addsub. */
1458 COSTS_N_INSNS (8), /* fma. */
1459 COSTS_N_INSNS (4), /* addsub. */
1460 COSTS_N_INSNS (2), /* fpconst. */
1461 COSTS_N_INSNS (2), /* neg. */
1462 COSTS_N_INSNS (2), /* compare. */
1463 COSTS_N_INSNS (4), /* widen. */
1464 COSTS_N_INSNS (4), /* narrow. */
1465 COSTS_N_INSNS (4), /* toint. */
1466 COSTS_N_INSNS (4), /* fromint. */
1467 COSTS_N_INSNS (4) /* roundint. */
1470 /* Vector */
1472 COSTS_N_INSNS (1) /* alu. */
1476 const struct cpu_cost_table cortexa15_extra_costs =
1478 /* ALU */
1480 0, /* arith. */
1481 0, /* logical. */
1482 0, /* shift. */
1483 0, /* shift_reg. */
1484 COSTS_N_INSNS (1), /* arith_shift. */
1485 COSTS_N_INSNS (1), /* arith_shift_reg. */
1486 COSTS_N_INSNS (1), /* log_shift. */
1487 COSTS_N_INSNS (1), /* log_shift_reg. */
1488 0, /* extend. */
1489 COSTS_N_INSNS (1), /* extend_arith. */
1490 COSTS_N_INSNS (1), /* bfi. */
1491 0, /* bfx. */
1492 0, /* clz. */
1493 0, /* rev. */
1494 0, /* non_exec. */
1495 true /* non_exec_costs_exec. */
1497 /* MULT SImode */
1500 COSTS_N_INSNS (2), /* simple. */
1501 COSTS_N_INSNS (3), /* flag_setting. */
1502 COSTS_N_INSNS (2), /* extend. */
1503 COSTS_N_INSNS (2), /* add. */
1504 COSTS_N_INSNS (2), /* extend_add. */
1505 COSTS_N_INSNS (18) /* idiv. */
1507 /* MULT DImode */
1509 0, /* simple (N/A). */
1510 0, /* flag_setting (N/A). */
1511 COSTS_N_INSNS (3), /* extend. */
1512 0, /* add (N/A). */
1513 COSTS_N_INSNS (3), /* extend_add. */
1514 0 /* idiv (N/A). */
1517 /* LD/ST */
1519 COSTS_N_INSNS (3), /* load. */
1520 COSTS_N_INSNS (3), /* load_sign_extend. */
1521 COSTS_N_INSNS (3), /* ldrd. */
1522 COSTS_N_INSNS (4), /* ldm_1st. */
1523 1, /* ldm_regs_per_insn_1st. */
1524 2, /* ldm_regs_per_insn_subsequent. */
1525 COSTS_N_INSNS (4), /* loadf. */
1526 COSTS_N_INSNS (4), /* loadd. */
1527 0, /* load_unaligned. */
1528 0, /* store. */
1529 0, /* strd. */
1530 COSTS_N_INSNS (1), /* stm_1st. */
1531 1, /* stm_regs_per_insn_1st. */
1532 2, /* stm_regs_per_insn_subsequent. */
1533 0, /* storef. */
1534 0, /* stored. */
1535 0, /* store_unaligned. */
1536 COSTS_N_INSNS (1), /* loadv. */
1537 COSTS_N_INSNS (1) /* storev. */
1540 /* FP SFmode */
1542 COSTS_N_INSNS (17), /* div. */
1543 COSTS_N_INSNS (4), /* mult. */
1544 COSTS_N_INSNS (8), /* mult_addsub. */
1545 COSTS_N_INSNS (8), /* fma. */
1546 COSTS_N_INSNS (4), /* addsub. */
1547 COSTS_N_INSNS (2), /* fpconst. */
1548 COSTS_N_INSNS (2), /* neg. */
1549 COSTS_N_INSNS (5), /* compare. */
1550 COSTS_N_INSNS (4), /* widen. */
1551 COSTS_N_INSNS (4), /* narrow. */
1552 COSTS_N_INSNS (4), /* toint. */
1553 COSTS_N_INSNS (4), /* fromint. */
1554 COSTS_N_INSNS (4) /* roundint. */
1556 /* FP DFmode */
1558 COSTS_N_INSNS (31), /* div. */
1559 COSTS_N_INSNS (4), /* mult. */
1560 COSTS_N_INSNS (8), /* mult_addsub. */
1561 COSTS_N_INSNS (8), /* fma. */
1562 COSTS_N_INSNS (4), /* addsub. */
1563 COSTS_N_INSNS (2), /* fpconst. */
1564 COSTS_N_INSNS (2), /* neg. */
1565 COSTS_N_INSNS (2), /* compare. */
1566 COSTS_N_INSNS (4), /* widen. */
1567 COSTS_N_INSNS (4), /* narrow. */
1568 COSTS_N_INSNS (4), /* toint. */
1569 COSTS_N_INSNS (4), /* fromint. */
1570 COSTS_N_INSNS (4) /* roundint. */
1573 /* Vector */
1575 COSTS_N_INSNS (1) /* alu. */
1579 const struct cpu_cost_table v7m_extra_costs =
1581 /* ALU */
1583 0, /* arith. */
1584 0, /* logical. */
1585 0, /* shift. */
1586 0, /* shift_reg. */
1587 0, /* arith_shift. */
1588 COSTS_N_INSNS (1), /* arith_shift_reg. */
1589 0, /* log_shift. */
1590 COSTS_N_INSNS (1), /* log_shift_reg. */
1591 0, /* extend. */
1592 COSTS_N_INSNS (1), /* extend_arith. */
1593 0, /* bfi. */
1594 0, /* bfx. */
1595 0, /* clz. */
1596 0, /* rev. */
1597 COSTS_N_INSNS (1), /* non_exec. */
1598 false /* non_exec_costs_exec. */
1601 /* MULT SImode */
1603 COSTS_N_INSNS (1), /* simple. */
1604 COSTS_N_INSNS (1), /* flag_setting. */
1605 COSTS_N_INSNS (2), /* extend. */
1606 COSTS_N_INSNS (1), /* add. */
1607 COSTS_N_INSNS (3), /* extend_add. */
1608 COSTS_N_INSNS (8) /* idiv. */
1610 /* MULT DImode */
1612 0, /* simple (N/A). */
1613 0, /* flag_setting (N/A). */
1614 COSTS_N_INSNS (2), /* extend. */
1615 0, /* add (N/A). */
1616 COSTS_N_INSNS (3), /* extend_add. */
1617 0 /* idiv (N/A). */
1620 /* LD/ST */
1622 COSTS_N_INSNS (2), /* load. */
1623 0, /* load_sign_extend. */
1624 COSTS_N_INSNS (3), /* ldrd. */
1625 COSTS_N_INSNS (2), /* ldm_1st. */
1626 1, /* ldm_regs_per_insn_1st. */
1627 1, /* ldm_regs_per_insn_subsequent. */
1628 COSTS_N_INSNS (2), /* loadf. */
1629 COSTS_N_INSNS (3), /* loadd. */
1630 COSTS_N_INSNS (1), /* load_unaligned. */
1631 COSTS_N_INSNS (2), /* store. */
1632 COSTS_N_INSNS (3), /* strd. */
1633 COSTS_N_INSNS (2), /* stm_1st. */
1634 1, /* stm_regs_per_insn_1st. */
1635 1, /* stm_regs_per_insn_subsequent. */
1636 COSTS_N_INSNS (2), /* storef. */
1637 COSTS_N_INSNS (3), /* stored. */
1638 COSTS_N_INSNS (1), /* store_unaligned. */
1639 COSTS_N_INSNS (1), /* loadv. */
1640 COSTS_N_INSNS (1) /* storev. */
1643 /* FP SFmode */
1645 COSTS_N_INSNS (7), /* div. */
1646 COSTS_N_INSNS (2), /* mult. */
1647 COSTS_N_INSNS (5), /* mult_addsub. */
1648 COSTS_N_INSNS (3), /* fma. */
1649 COSTS_N_INSNS (1), /* addsub. */
1650 0, /* fpconst. */
1651 0, /* neg. */
1652 0, /* compare. */
1653 0, /* widen. */
1654 0, /* narrow. */
1655 0, /* toint. */
1656 0, /* fromint. */
1657 0 /* roundint. */
1659 /* FP DFmode */
1661 COSTS_N_INSNS (15), /* div. */
1662 COSTS_N_INSNS (5), /* mult. */
1663 COSTS_N_INSNS (7), /* mult_addsub. */
1664 COSTS_N_INSNS (7), /* fma. */
1665 COSTS_N_INSNS (3), /* addsub. */
1666 0, /* fpconst. */
1667 0, /* neg. */
1668 0, /* compare. */
1669 0, /* widen. */
1670 0, /* narrow. */
1671 0, /* toint. */
1672 0, /* fromint. */
1673 0 /* roundint. */
1676 /* Vector */
1678 COSTS_N_INSNS (1) /* alu. */
1682 const struct tune_params arm_slowmul_tune =
1684 arm_slowmul_rtx_costs,
1685 NULL, /* Insn extra costs. */
1686 NULL, /* Sched adj cost. */
1687 arm_default_branch_cost,
1688 &arm_default_vec_cost,
1689 3, /* Constant limit. */
1690 5, /* Max cond insns. */
1691 8, /* Memset max inline. */
1692 1, /* Issue rate. */
1693 ARM_PREFETCH_NOT_BENEFICIAL,
1694 tune_params::PREF_CONST_POOL_TRUE,
1695 tune_params::PREF_LDRD_FALSE,
1696 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1697 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1698 tune_params::DISPARAGE_FLAGS_NEITHER,
1699 tune_params::PREF_NEON_64_FALSE,
1700 tune_params::PREF_NEON_STRINGOPS_FALSE,
1701 tune_params::FUSE_NOTHING,
1702 tune_params::SCHED_AUTOPREF_OFF
1705 const struct tune_params arm_fastmul_tune =
1707 arm_fastmul_rtx_costs,
1708 NULL, /* Insn extra costs. */
1709 NULL, /* Sched adj cost. */
1710 arm_default_branch_cost,
1711 &arm_default_vec_cost,
1712 1, /* Constant limit. */
1713 5, /* Max cond insns. */
1714 8, /* Memset max inline. */
1715 1, /* Issue rate. */
1716 ARM_PREFETCH_NOT_BENEFICIAL,
1717 tune_params::PREF_CONST_POOL_TRUE,
1718 tune_params::PREF_LDRD_FALSE,
1719 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1720 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1721 tune_params::DISPARAGE_FLAGS_NEITHER,
1722 tune_params::PREF_NEON_64_FALSE,
1723 tune_params::PREF_NEON_STRINGOPS_FALSE,
1724 tune_params::FUSE_NOTHING,
1725 tune_params::SCHED_AUTOPREF_OFF
1728 /* StrongARM has early execution of branches, so a sequence that is worth
1729 skipping is shorter. Set max_insns_skipped to a lower value. */
1731 const struct tune_params arm_strongarm_tune =
1733 arm_fastmul_rtx_costs,
1734 NULL, /* Insn extra costs. */
1735 NULL, /* Sched adj cost. */
1736 arm_default_branch_cost,
1737 &arm_default_vec_cost,
1738 1, /* Constant limit. */
1739 3, /* Max cond insns. */
1740 8, /* Memset max inline. */
1741 1, /* Issue rate. */
1742 ARM_PREFETCH_NOT_BENEFICIAL,
1743 tune_params::PREF_CONST_POOL_TRUE,
1744 tune_params::PREF_LDRD_FALSE,
1745 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1746 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1747 tune_params::DISPARAGE_FLAGS_NEITHER,
1748 tune_params::PREF_NEON_64_FALSE,
1749 tune_params::PREF_NEON_STRINGOPS_FALSE,
1750 tune_params::FUSE_NOTHING,
1751 tune_params::SCHED_AUTOPREF_OFF
1754 const struct tune_params arm_xscale_tune =
1756 arm_xscale_rtx_costs,
1757 NULL, /* Insn extra costs. */
1758 xscale_sched_adjust_cost,
1759 arm_default_branch_cost,
1760 &arm_default_vec_cost,
1761 2, /* Constant limit. */
1762 3, /* Max cond insns. */
1763 8, /* Memset max inline. */
1764 1, /* Issue rate. */
1765 ARM_PREFETCH_NOT_BENEFICIAL,
1766 tune_params::PREF_CONST_POOL_TRUE,
1767 tune_params::PREF_LDRD_FALSE,
1768 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1769 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1770 tune_params::DISPARAGE_FLAGS_NEITHER,
1771 tune_params::PREF_NEON_64_FALSE,
1772 tune_params::PREF_NEON_STRINGOPS_FALSE,
1773 tune_params::FUSE_NOTHING,
1774 tune_params::SCHED_AUTOPREF_OFF
1777 const struct tune_params arm_9e_tune =
1779 arm_9e_rtx_costs,
1780 NULL, /* Insn extra costs. */
1781 NULL, /* Sched adj cost. */
1782 arm_default_branch_cost,
1783 &arm_default_vec_cost,
1784 1, /* Constant limit. */
1785 5, /* Max cond insns. */
1786 8, /* Memset max inline. */
1787 1, /* Issue rate. */
1788 ARM_PREFETCH_NOT_BENEFICIAL,
1789 tune_params::PREF_CONST_POOL_TRUE,
1790 tune_params::PREF_LDRD_FALSE,
1791 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1792 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1793 tune_params::DISPARAGE_FLAGS_NEITHER,
1794 tune_params::PREF_NEON_64_FALSE,
1795 tune_params::PREF_NEON_STRINGOPS_FALSE,
1796 tune_params::FUSE_NOTHING,
1797 tune_params::SCHED_AUTOPREF_OFF
1800 const struct tune_params arm_marvell_pj4_tune =
1802 arm_9e_rtx_costs,
1803 NULL, /* Insn extra costs. */
1804 NULL, /* Sched adj cost. */
1805 arm_default_branch_cost,
1806 &arm_default_vec_cost,
1807 1, /* Constant limit. */
1808 5, /* Max cond insns. */
1809 8, /* Memset max inline. */
1810 2, /* Issue rate. */
1811 ARM_PREFETCH_NOT_BENEFICIAL,
1812 tune_params::PREF_CONST_POOL_TRUE,
1813 tune_params::PREF_LDRD_FALSE,
1814 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1815 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1816 tune_params::DISPARAGE_FLAGS_NEITHER,
1817 tune_params::PREF_NEON_64_FALSE,
1818 tune_params::PREF_NEON_STRINGOPS_FALSE,
1819 tune_params::FUSE_NOTHING,
1820 tune_params::SCHED_AUTOPREF_OFF
1823 const struct tune_params arm_v6t2_tune =
1825 arm_9e_rtx_costs,
1826 NULL, /* Insn extra costs. */
1827 NULL, /* Sched adj cost. */
1828 arm_default_branch_cost,
1829 &arm_default_vec_cost,
1830 1, /* Constant limit. */
1831 5, /* Max cond insns. */
1832 8, /* Memset max inline. */
1833 1, /* Issue rate. */
1834 ARM_PREFETCH_NOT_BENEFICIAL,
1835 tune_params::PREF_CONST_POOL_FALSE,
1836 tune_params::PREF_LDRD_FALSE,
1837 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1838 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1839 tune_params::DISPARAGE_FLAGS_NEITHER,
1840 tune_params::PREF_NEON_64_FALSE,
1841 tune_params::PREF_NEON_STRINGOPS_FALSE,
1842 tune_params::FUSE_NOTHING,
1843 tune_params::SCHED_AUTOPREF_OFF
1847 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1848 const struct tune_params arm_cortex_tune =
1850 arm_9e_rtx_costs,
1851 &generic_extra_costs,
1852 NULL, /* Sched adj cost. */
1853 arm_default_branch_cost,
1854 &arm_default_vec_cost,
1855 1, /* Constant limit. */
1856 5, /* Max cond insns. */
1857 8, /* Memset max inline. */
1858 2, /* Issue rate. */
1859 ARM_PREFETCH_NOT_BENEFICIAL,
1860 tune_params::PREF_CONST_POOL_FALSE,
1861 tune_params::PREF_LDRD_FALSE,
1862 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1863 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1864 tune_params::DISPARAGE_FLAGS_NEITHER,
1865 tune_params::PREF_NEON_64_FALSE,
1866 tune_params::PREF_NEON_STRINGOPS_FALSE,
1867 tune_params::FUSE_NOTHING,
1868 tune_params::SCHED_AUTOPREF_OFF
1871 const struct tune_params arm_cortex_a8_tune =
1873 arm_9e_rtx_costs,
1874 &cortexa8_extra_costs,
1875 NULL, /* Sched adj cost. */
1876 arm_default_branch_cost,
1877 &arm_default_vec_cost,
1878 1, /* Constant limit. */
1879 5, /* Max cond insns. */
1880 8, /* Memset max inline. */
1881 2, /* Issue rate. */
1882 ARM_PREFETCH_NOT_BENEFICIAL,
1883 tune_params::PREF_CONST_POOL_FALSE,
1884 tune_params::PREF_LDRD_FALSE,
1885 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1886 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1887 tune_params::DISPARAGE_FLAGS_NEITHER,
1888 tune_params::PREF_NEON_64_FALSE,
1889 tune_params::PREF_NEON_STRINGOPS_TRUE,
1890 tune_params::FUSE_NOTHING,
1891 tune_params::SCHED_AUTOPREF_OFF
1894 const struct tune_params arm_cortex_a7_tune =
1896 arm_9e_rtx_costs,
1897 &cortexa7_extra_costs,
1898 NULL, /* Sched adj cost. */
1899 arm_default_branch_cost,
1900 &arm_default_vec_cost,
1901 1, /* Constant limit. */
1902 5, /* Max cond insns. */
1903 8, /* Memset max inline. */
1904 2, /* Issue rate. */
1905 ARM_PREFETCH_NOT_BENEFICIAL,
1906 tune_params::PREF_CONST_POOL_FALSE,
1907 tune_params::PREF_LDRD_FALSE,
1908 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1909 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1910 tune_params::DISPARAGE_FLAGS_NEITHER,
1911 tune_params::PREF_NEON_64_FALSE,
1912 tune_params::PREF_NEON_STRINGOPS_TRUE,
1913 tune_params::FUSE_NOTHING,
1914 tune_params::SCHED_AUTOPREF_OFF
1917 const struct tune_params arm_cortex_a15_tune =
1919 arm_9e_rtx_costs,
1920 &cortexa15_extra_costs,
1921 NULL, /* Sched adj cost. */
1922 arm_default_branch_cost,
1923 &arm_default_vec_cost,
1924 1, /* Constant limit. */
1925 2, /* Max cond insns. */
1926 8, /* Memset max inline. */
1927 3, /* Issue rate. */
1928 ARM_PREFETCH_NOT_BENEFICIAL,
1929 tune_params::PREF_CONST_POOL_FALSE,
1930 tune_params::PREF_LDRD_TRUE,
1931 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1932 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1933 tune_params::DISPARAGE_FLAGS_ALL,
1934 tune_params::PREF_NEON_64_FALSE,
1935 tune_params::PREF_NEON_STRINGOPS_TRUE,
1936 tune_params::FUSE_NOTHING,
1937 tune_params::SCHED_AUTOPREF_FULL
1940 const struct tune_params arm_cortex_a35_tune =
1942 arm_9e_rtx_costs,
1943 &cortexa53_extra_costs,
1944 NULL, /* Sched adj cost. */
1945 arm_default_branch_cost,
1946 &arm_default_vec_cost,
1947 1, /* Constant limit. */
1948 5, /* Max cond insns. */
1949 8, /* Memset max inline. */
1950 1, /* Issue rate. */
1951 ARM_PREFETCH_NOT_BENEFICIAL,
1952 tune_params::PREF_CONST_POOL_FALSE,
1953 tune_params::PREF_LDRD_FALSE,
1954 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1955 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1956 tune_params::DISPARAGE_FLAGS_NEITHER,
1957 tune_params::PREF_NEON_64_FALSE,
1958 tune_params::PREF_NEON_STRINGOPS_TRUE,
1959 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1960 tune_params::SCHED_AUTOPREF_OFF
1963 const struct tune_params arm_cortex_a53_tune =
1965 arm_9e_rtx_costs,
1966 &cortexa53_extra_costs,
1967 NULL, /* Sched adj cost. */
1968 arm_default_branch_cost,
1969 &arm_default_vec_cost,
1970 1, /* Constant limit. */
1971 5, /* Max cond insns. */
1972 8, /* Memset max inline. */
1973 2, /* Issue rate. */
1974 ARM_PREFETCH_NOT_BENEFICIAL,
1975 tune_params::PREF_CONST_POOL_FALSE,
1976 tune_params::PREF_LDRD_FALSE,
1977 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1978 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1979 tune_params::DISPARAGE_FLAGS_NEITHER,
1980 tune_params::PREF_NEON_64_FALSE,
1981 tune_params::PREF_NEON_STRINGOPS_TRUE,
1982 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
1983 tune_params::SCHED_AUTOPREF_OFF
1986 const struct tune_params arm_cortex_a57_tune =
1988 arm_9e_rtx_costs,
1989 &cortexa57_extra_costs,
1990 NULL, /* Sched adj cost. */
1991 arm_default_branch_cost,
1992 &arm_default_vec_cost,
1993 1, /* Constant limit. */
1994 2, /* Max cond insns. */
1995 8, /* Memset max inline. */
1996 3, /* Issue rate. */
1997 ARM_PREFETCH_NOT_BENEFICIAL,
1998 tune_params::PREF_CONST_POOL_FALSE,
1999 tune_params::PREF_LDRD_TRUE,
2000 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2001 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2002 tune_params::DISPARAGE_FLAGS_ALL,
2003 tune_params::PREF_NEON_64_FALSE,
2004 tune_params::PREF_NEON_STRINGOPS_TRUE,
2005 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2006 tune_params::SCHED_AUTOPREF_FULL
2009 const struct tune_params arm_exynosm1_tune =
2011 arm_9e_rtx_costs,
2012 &exynosm1_extra_costs,
2013 NULL, /* Sched adj cost. */
2014 arm_default_branch_cost,
2015 &arm_default_vec_cost,
2016 1, /* Constant limit. */
2017 2, /* Max cond insns. */
2018 8, /* Memset max inline. */
2019 3, /* Issue rate. */
2020 ARM_PREFETCH_NOT_BENEFICIAL,
2021 tune_params::PREF_CONST_POOL_FALSE,
2022 tune_params::PREF_LDRD_TRUE,
2023 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2024 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2025 tune_params::DISPARAGE_FLAGS_ALL,
2026 tune_params::PREF_NEON_64_FALSE,
2027 tune_params::PREF_NEON_STRINGOPS_TRUE,
2028 tune_params::FUSE_NOTHING,
2029 tune_params::SCHED_AUTOPREF_OFF
2032 const struct tune_params arm_xgene1_tune =
2034 arm_9e_rtx_costs,
2035 &xgene1_extra_costs,
2036 NULL, /* Sched adj cost. */
2037 arm_default_branch_cost,
2038 &arm_default_vec_cost,
2039 1, /* Constant limit. */
2040 2, /* Max cond insns. */
2041 32, /* Memset max inline. */
2042 4, /* Issue rate. */
2043 ARM_PREFETCH_NOT_BENEFICIAL,
2044 tune_params::PREF_CONST_POOL_FALSE,
2045 tune_params::PREF_LDRD_TRUE,
2046 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2047 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2048 tune_params::DISPARAGE_FLAGS_ALL,
2049 tune_params::PREF_NEON_64_FALSE,
2050 tune_params::PREF_NEON_STRINGOPS_FALSE,
2051 tune_params::FUSE_NOTHING,
2052 tune_params::SCHED_AUTOPREF_OFF
2055 const struct tune_params arm_qdf24xx_tune =
2057 arm_9e_rtx_costs,
2058 &qdf24xx_extra_costs,
2059 NULL, /* Scheduler cost adjustment. */
2060 arm_default_branch_cost,
2061 &arm_default_vec_cost, /* Vectorizer costs. */
2062 1, /* Constant limit. */
2063 2, /* Max cond insns. */
2064 8, /* Memset max inline. */
2065 4, /* Issue rate. */
2066 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2067 tune_params::PREF_CONST_POOL_FALSE,
2068 tune_params::PREF_LDRD_TRUE,
2069 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2070 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2071 tune_params::DISPARAGE_FLAGS_ALL,
2072 tune_params::PREF_NEON_64_FALSE,
2073 tune_params::PREF_NEON_STRINGOPS_TRUE,
2074 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2075 tune_params::SCHED_AUTOPREF_FULL
2078 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2079 less appealing. Set max_insns_skipped to a low value. */
2081 const struct tune_params arm_cortex_a5_tune =
2083 arm_9e_rtx_costs,
2084 &cortexa5_extra_costs,
2085 NULL, /* Sched adj cost. */
2086 arm_cortex_a5_branch_cost,
2087 &arm_default_vec_cost,
2088 1, /* Constant limit. */
2089 1, /* Max cond insns. */
2090 8, /* Memset max inline. */
2091 2, /* Issue rate. */
2092 ARM_PREFETCH_NOT_BENEFICIAL,
2093 tune_params::PREF_CONST_POOL_FALSE,
2094 tune_params::PREF_LDRD_FALSE,
2095 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2096 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2097 tune_params::DISPARAGE_FLAGS_NEITHER,
2098 tune_params::PREF_NEON_64_FALSE,
2099 tune_params::PREF_NEON_STRINGOPS_TRUE,
2100 tune_params::FUSE_NOTHING,
2101 tune_params::SCHED_AUTOPREF_OFF
2104 const struct tune_params arm_cortex_a9_tune =
2106 arm_9e_rtx_costs,
2107 &cortexa9_extra_costs,
2108 cortex_a9_sched_adjust_cost,
2109 arm_default_branch_cost,
2110 &arm_default_vec_cost,
2111 1, /* Constant limit. */
2112 5, /* Max cond insns. */
2113 8, /* Memset max inline. */
2114 2, /* Issue rate. */
2115 ARM_PREFETCH_BENEFICIAL(4,32,32),
2116 tune_params::PREF_CONST_POOL_FALSE,
2117 tune_params::PREF_LDRD_FALSE,
2118 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2119 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2120 tune_params::DISPARAGE_FLAGS_NEITHER,
2121 tune_params::PREF_NEON_64_FALSE,
2122 tune_params::PREF_NEON_STRINGOPS_FALSE,
2123 tune_params::FUSE_NOTHING,
2124 tune_params::SCHED_AUTOPREF_OFF
2127 const struct tune_params arm_cortex_a12_tune =
2129 arm_9e_rtx_costs,
2130 &cortexa12_extra_costs,
2131 NULL, /* Sched adj cost. */
2132 arm_default_branch_cost,
2133 &arm_default_vec_cost, /* Vectorizer costs. */
2134 1, /* Constant limit. */
2135 2, /* Max cond insns. */
2136 8, /* Memset max inline. */
2137 2, /* Issue rate. */
2138 ARM_PREFETCH_NOT_BENEFICIAL,
2139 tune_params::PREF_CONST_POOL_FALSE,
2140 tune_params::PREF_LDRD_TRUE,
2141 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2142 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2143 tune_params::DISPARAGE_FLAGS_ALL,
2144 tune_params::PREF_NEON_64_FALSE,
2145 tune_params::PREF_NEON_STRINGOPS_TRUE,
2146 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2147 tune_params::SCHED_AUTOPREF_OFF
2150 const struct tune_params arm_cortex_a73_tune =
2152 arm_9e_rtx_costs,
2153 &cortexa57_extra_costs,
2154 NULL, /* Sched adj cost. */
2155 arm_default_branch_cost,
2156 &arm_default_vec_cost, /* Vectorizer costs. */
2157 1, /* Constant limit. */
2158 2, /* Max cond insns. */
2159 8, /* Memset max inline. */
2160 2, /* Issue rate. */
2161 ARM_PREFETCH_NOT_BENEFICIAL,
2162 tune_params::PREF_CONST_POOL_FALSE,
2163 tune_params::PREF_LDRD_TRUE,
2164 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2165 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2166 tune_params::DISPARAGE_FLAGS_ALL,
2167 tune_params::PREF_NEON_64_FALSE,
2168 tune_params::PREF_NEON_STRINGOPS_TRUE,
2169 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2170 tune_params::SCHED_AUTOPREF_FULL
2173 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2174 cycle to execute each. An LDR from the constant pool also takes two cycles
2175 to execute, but mildly increases pipelining opportunity (consecutive
2176 loads/stores can be pipelined together, saving one cycle), and may also
2177 improve icache utilisation. Hence we prefer the constant pool for such
2178 processors. */
2180 const struct tune_params arm_v7m_tune =
2182 arm_9e_rtx_costs,
2183 &v7m_extra_costs,
2184 NULL, /* Sched adj cost. */
2185 arm_cortex_m_branch_cost,
2186 &arm_default_vec_cost,
2187 1, /* Constant limit. */
2188 2, /* Max cond insns. */
2189 8, /* Memset max inline. */
2190 1, /* Issue rate. */
2191 ARM_PREFETCH_NOT_BENEFICIAL,
2192 tune_params::PREF_CONST_POOL_TRUE,
2193 tune_params::PREF_LDRD_FALSE,
2194 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2195 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2196 tune_params::DISPARAGE_FLAGS_NEITHER,
2197 tune_params::PREF_NEON_64_FALSE,
2198 tune_params::PREF_NEON_STRINGOPS_FALSE,
2199 tune_params::FUSE_NOTHING,
2200 tune_params::SCHED_AUTOPREF_OFF
2203 /* Cortex-M7 tuning. */
2205 const struct tune_params arm_cortex_m7_tune =
2207 arm_9e_rtx_costs,
2208 &v7m_extra_costs,
2209 NULL, /* Sched adj cost. */
2210 arm_cortex_m7_branch_cost,
2211 &arm_default_vec_cost,
2212 0, /* Constant limit. */
2213 1, /* Max cond insns. */
2214 8, /* Memset max inline. */
2215 2, /* Issue rate. */
2216 ARM_PREFETCH_NOT_BENEFICIAL,
2217 tune_params::PREF_CONST_POOL_TRUE,
2218 tune_params::PREF_LDRD_FALSE,
2219 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2220 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2221 tune_params::DISPARAGE_FLAGS_NEITHER,
2222 tune_params::PREF_NEON_64_FALSE,
2223 tune_params::PREF_NEON_STRINGOPS_FALSE,
2224 tune_params::FUSE_NOTHING,
2225 tune_params::SCHED_AUTOPREF_OFF
2228 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2229 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2230 const struct tune_params arm_v6m_tune =
2232 arm_9e_rtx_costs,
2233 NULL, /* Insn extra costs. */
2234 NULL, /* Sched adj cost. */
2235 arm_default_branch_cost,
2236 &arm_default_vec_cost, /* Vectorizer costs. */
2237 1, /* Constant limit. */
2238 5, /* Max cond insns. */
2239 8, /* Memset max inline. */
2240 1, /* Issue rate. */
2241 ARM_PREFETCH_NOT_BENEFICIAL,
2242 tune_params::PREF_CONST_POOL_FALSE,
2243 tune_params::PREF_LDRD_FALSE,
2244 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2245 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2246 tune_params::DISPARAGE_FLAGS_NEITHER,
2247 tune_params::PREF_NEON_64_FALSE,
2248 tune_params::PREF_NEON_STRINGOPS_FALSE,
2249 tune_params::FUSE_NOTHING,
2250 tune_params::SCHED_AUTOPREF_OFF
2253 const struct tune_params arm_fa726te_tune =
2255 arm_9e_rtx_costs,
2256 NULL, /* Insn extra costs. */
2257 fa726te_sched_adjust_cost,
2258 arm_default_branch_cost,
2259 &arm_default_vec_cost,
2260 1, /* Constant limit. */
2261 5, /* Max cond insns. */
2262 8, /* Memset max inline. */
2263 2, /* Issue rate. */
2264 ARM_PREFETCH_NOT_BENEFICIAL,
2265 tune_params::PREF_CONST_POOL_TRUE,
2266 tune_params::PREF_LDRD_FALSE,
2267 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2268 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2269 tune_params::DISPARAGE_FLAGS_NEITHER,
2270 tune_params::PREF_NEON_64_FALSE,
2271 tune_params::PREF_NEON_STRINGOPS_FALSE,
2272 tune_params::FUSE_NOTHING,
2273 tune_params::SCHED_AUTOPREF_OFF
2277 /* Not all of these give usefully different compilation alternatives,
2278 but there is no simple way of generalizing them. */
2279 static const struct processors all_cores[] =
2281 /* ARM Cores */
2282 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2283 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2284 FLAGS, &arm_##COSTS##_tune},
2285 #include "arm-cores.def"
2286 #undef ARM_CORE
2287 {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
2290 static const struct processors all_architectures[] =
2292 /* ARM Architectures */
2293 /* We don't specify tuning costs here as it will be figured out
2294 from the core. */
2296 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2297 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2298 #include "arm-arches.def"
2299 #undef ARM_ARCH
2300 {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
2304 /* These are populated as commandline arguments are processed, or NULL
2305 if not specified. */
2306 static const struct processors *arm_selected_arch;
2307 static const struct processors *arm_selected_cpu;
2308 static const struct processors *arm_selected_tune;
2310 /* The name of the preprocessor macro to define for this architecture. PROFILE
2311 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2312 is thus chosen to be big enough to hold the longest architecture name. */
2314 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2316 /* Available values for -mfpu=. */
2318 const struct arm_fpu_desc all_fpus[] =
2320 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, FEATURES) \
2321 { NAME, MODEL, REV, VFP_REGS, FEATURES },
2322 #include "arm-fpus.def"
2323 #undef ARM_FPU
2326 /* Supported TLS relocations. */
2328 enum tls_reloc {
2329 TLS_GD32,
2330 TLS_LDM32,
2331 TLS_LDO32,
2332 TLS_IE32,
2333 TLS_LE32,
2334 TLS_DESCSEQ /* GNU scheme */
2337 /* The maximum number of insns to be used when loading a constant. */
2338 inline static int
2339 arm_constant_limit (bool size_p)
2341 return size_p ? 1 : current_tune->constant_limit;
2344 /* Emit an insn that's a simple single-set. Both the operands must be known
2345 to be valid. */
2346 inline static rtx_insn *
2347 emit_set_insn (rtx x, rtx y)
2349 return emit_insn (gen_rtx_SET (x, y));
2352 /* Return the number of bits set in VALUE. */
2353 static unsigned
2354 bit_count (unsigned long value)
2356 unsigned long count = 0;
2358 while (value)
2360 count++;
2361 value &= value - 1; /* Clear the least-significant set bit. */
2364 return count;
2367 /* Return the number of features in feature-set SET. */
2368 static unsigned
2369 feature_count (const arm_feature_set * set)
2371 return (bit_count (ARM_FSET_CPU1 (*set))
2372 + bit_count (ARM_FSET_CPU2 (*set)));
2375 typedef struct
2377 machine_mode mode;
2378 const char *name;
2379 } arm_fixed_mode_set;
2381 /* A small helper for setting fixed-point library libfuncs. */
2383 static void
2384 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2385 const char *funcname, const char *modename,
2386 int num_suffix)
2388 char buffer[50];
2390 if (num_suffix == 0)
2391 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2392 else
2393 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2395 set_optab_libfunc (optable, mode, buffer);
2398 static void
2399 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2400 machine_mode from, const char *funcname,
2401 const char *toname, const char *fromname)
2403 char buffer[50];
2404 const char *maybe_suffix_2 = "";
2406 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2407 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2408 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2409 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2410 maybe_suffix_2 = "2";
2412 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2413 maybe_suffix_2);
2415 set_conv_libfunc (optable, to, from, buffer);
2418 /* Set up library functions unique to ARM. */
2420 static void
2421 arm_init_libfuncs (void)
2423 /* For Linux, we have access to kernel support for atomic operations. */
2424 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2425 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2427 /* There are no special library functions unless we are using the
2428 ARM BPABI. */
2429 if (!TARGET_BPABI)
2430 return;
2432 /* The functions below are described in Section 4 of the "Run-Time
2433 ABI for the ARM architecture", Version 1.0. */
2435 /* Double-precision floating-point arithmetic. Table 2. */
2436 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2437 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2438 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2439 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2440 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2442 /* Double-precision comparisons. Table 3. */
2443 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2444 set_optab_libfunc (ne_optab, DFmode, NULL);
2445 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2446 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2447 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2448 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2449 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2451 /* Single-precision floating-point arithmetic. Table 4. */
2452 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2453 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2454 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2455 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2456 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2458 /* Single-precision comparisons. Table 5. */
2459 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2460 set_optab_libfunc (ne_optab, SFmode, NULL);
2461 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2462 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2463 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2464 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2465 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2467 /* Floating-point to integer conversions. Table 6. */
2468 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2469 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2470 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2471 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2472 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2473 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2474 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2475 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2477 /* Conversions between floating types. Table 7. */
2478 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2479 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2481 /* Integer to floating-point conversions. Table 8. */
2482 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2483 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2484 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2485 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2486 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2487 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2488 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2489 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2491 /* Long long. Table 9. */
2492 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2493 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2494 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2495 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2496 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2497 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2498 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2499 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2501 /* Integer (32/32->32) division. \S 4.3.1. */
2502 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2503 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2505 /* The divmod functions are designed so that they can be used for
2506 plain division, even though they return both the quotient and the
2507 remainder. The quotient is returned in the usual location (i.e.,
2508 r0 for SImode, {r0, r1} for DImode), just as would be expected
2509 for an ordinary division routine. Because the AAPCS calling
2510 conventions specify that all of { r0, r1, r2, r3 } are
2511 callee-saved registers, there is no need to tell the compiler
2512 explicitly that those registers are clobbered by these
2513 routines. */
2514 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2515 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2517 /* For SImode division the ABI provides div-without-mod routines,
2518 which are faster. */
2519 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2520 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2522 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2523 divmod libcalls instead. */
2524 set_optab_libfunc (smod_optab, DImode, NULL);
2525 set_optab_libfunc (umod_optab, DImode, NULL);
2526 set_optab_libfunc (smod_optab, SImode, NULL);
2527 set_optab_libfunc (umod_optab, SImode, NULL);
2529 /* Half-precision float operations. The compiler handles all operations
2530 with NULL libfuncs by converting the SFmode. */
2531 switch (arm_fp16_format)
2533 case ARM_FP16_FORMAT_IEEE:
2534 case ARM_FP16_FORMAT_ALTERNATIVE:
2536 /* Conversions. */
2537 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2538 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2539 ? "__gnu_f2h_ieee"
2540 : "__gnu_f2h_alternative"));
2541 set_conv_libfunc (sext_optab, SFmode, HFmode,
2542 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2543 ? "__gnu_h2f_ieee"
2544 : "__gnu_h2f_alternative"));
2546 /* Arithmetic. */
2547 set_optab_libfunc (add_optab, HFmode, NULL);
2548 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2549 set_optab_libfunc (smul_optab, HFmode, NULL);
2550 set_optab_libfunc (neg_optab, HFmode, NULL);
2551 set_optab_libfunc (sub_optab, HFmode, NULL);
2553 /* Comparisons. */
2554 set_optab_libfunc (eq_optab, HFmode, NULL);
2555 set_optab_libfunc (ne_optab, HFmode, NULL);
2556 set_optab_libfunc (lt_optab, HFmode, NULL);
2557 set_optab_libfunc (le_optab, HFmode, NULL);
2558 set_optab_libfunc (ge_optab, HFmode, NULL);
2559 set_optab_libfunc (gt_optab, HFmode, NULL);
2560 set_optab_libfunc (unord_optab, HFmode, NULL);
2561 break;
2563 default:
2564 break;
2567 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2569 const arm_fixed_mode_set fixed_arith_modes[] =
2571 { QQmode, "qq" },
2572 { UQQmode, "uqq" },
2573 { HQmode, "hq" },
2574 { UHQmode, "uhq" },
2575 { SQmode, "sq" },
2576 { USQmode, "usq" },
2577 { DQmode, "dq" },
2578 { UDQmode, "udq" },
2579 { TQmode, "tq" },
2580 { UTQmode, "utq" },
2581 { HAmode, "ha" },
2582 { UHAmode, "uha" },
2583 { SAmode, "sa" },
2584 { USAmode, "usa" },
2585 { DAmode, "da" },
2586 { UDAmode, "uda" },
2587 { TAmode, "ta" },
2588 { UTAmode, "uta" }
2590 const arm_fixed_mode_set fixed_conv_modes[] =
2592 { QQmode, "qq" },
2593 { UQQmode, "uqq" },
2594 { HQmode, "hq" },
2595 { UHQmode, "uhq" },
2596 { SQmode, "sq" },
2597 { USQmode, "usq" },
2598 { DQmode, "dq" },
2599 { UDQmode, "udq" },
2600 { TQmode, "tq" },
2601 { UTQmode, "utq" },
2602 { HAmode, "ha" },
2603 { UHAmode, "uha" },
2604 { SAmode, "sa" },
2605 { USAmode, "usa" },
2606 { DAmode, "da" },
2607 { UDAmode, "uda" },
2608 { TAmode, "ta" },
2609 { UTAmode, "uta" },
2610 { QImode, "qi" },
2611 { HImode, "hi" },
2612 { SImode, "si" },
2613 { DImode, "di" },
2614 { TImode, "ti" },
2615 { SFmode, "sf" },
2616 { DFmode, "df" }
2618 unsigned int i, j;
2620 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2622 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2623 "add", fixed_arith_modes[i].name, 3);
2624 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2625 "ssadd", fixed_arith_modes[i].name, 3);
2626 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2627 "usadd", fixed_arith_modes[i].name, 3);
2628 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2629 "sub", fixed_arith_modes[i].name, 3);
2630 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2631 "sssub", fixed_arith_modes[i].name, 3);
2632 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2633 "ussub", fixed_arith_modes[i].name, 3);
2634 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2635 "mul", fixed_arith_modes[i].name, 3);
2636 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2637 "ssmul", fixed_arith_modes[i].name, 3);
2638 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2639 "usmul", fixed_arith_modes[i].name, 3);
2640 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2641 "div", fixed_arith_modes[i].name, 3);
2642 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2643 "udiv", fixed_arith_modes[i].name, 3);
2644 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2645 "ssdiv", fixed_arith_modes[i].name, 3);
2646 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2647 "usdiv", fixed_arith_modes[i].name, 3);
2648 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2649 "neg", fixed_arith_modes[i].name, 2);
2650 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2651 "ssneg", fixed_arith_modes[i].name, 2);
2652 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2653 "usneg", fixed_arith_modes[i].name, 2);
2654 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2655 "ashl", fixed_arith_modes[i].name, 3);
2656 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2657 "ashr", fixed_arith_modes[i].name, 3);
2658 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2659 "lshr", fixed_arith_modes[i].name, 3);
2660 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2661 "ssashl", fixed_arith_modes[i].name, 3);
2662 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2663 "usashl", fixed_arith_modes[i].name, 3);
2664 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2665 "cmp", fixed_arith_modes[i].name, 2);
2668 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2669 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2671 if (i == j
2672 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2673 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2674 continue;
2676 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2677 fixed_conv_modes[j].mode, "fract",
2678 fixed_conv_modes[i].name,
2679 fixed_conv_modes[j].name);
2680 arm_set_fixed_conv_libfunc (satfract_optab,
2681 fixed_conv_modes[i].mode,
2682 fixed_conv_modes[j].mode, "satfract",
2683 fixed_conv_modes[i].name,
2684 fixed_conv_modes[j].name);
2685 arm_set_fixed_conv_libfunc (fractuns_optab,
2686 fixed_conv_modes[i].mode,
2687 fixed_conv_modes[j].mode, "fractuns",
2688 fixed_conv_modes[i].name,
2689 fixed_conv_modes[j].name);
2690 arm_set_fixed_conv_libfunc (satfractuns_optab,
2691 fixed_conv_modes[i].mode,
2692 fixed_conv_modes[j].mode, "satfractuns",
2693 fixed_conv_modes[i].name,
2694 fixed_conv_modes[j].name);
2698 if (TARGET_AAPCS_BASED)
2699 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2702 /* On AAPCS systems, this is the "struct __va_list". */
2703 static GTY(()) tree va_list_type;
2705 /* Return the type to use as __builtin_va_list. */
2706 static tree
2707 arm_build_builtin_va_list (void)
2709 tree va_list_name;
2710 tree ap_field;
2712 if (!TARGET_AAPCS_BASED)
2713 return std_build_builtin_va_list ();
2715 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2716 defined as:
2718 struct __va_list
2720 void *__ap;
2723 The C Library ABI further reinforces this definition in \S
2724 4.1.
2726 We must follow this definition exactly. The structure tag
2727 name is visible in C++ mangled names, and thus forms a part
2728 of the ABI. The field name may be used by people who
2729 #include <stdarg.h>. */
2730 /* Create the type. */
2731 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2732 /* Give it the required name. */
2733 va_list_name = build_decl (BUILTINS_LOCATION,
2734 TYPE_DECL,
2735 get_identifier ("__va_list"),
2736 va_list_type);
2737 DECL_ARTIFICIAL (va_list_name) = 1;
2738 TYPE_NAME (va_list_type) = va_list_name;
2739 TYPE_STUB_DECL (va_list_type) = va_list_name;
2740 /* Create the __ap field. */
2741 ap_field = build_decl (BUILTINS_LOCATION,
2742 FIELD_DECL,
2743 get_identifier ("__ap"),
2744 ptr_type_node);
2745 DECL_ARTIFICIAL (ap_field) = 1;
2746 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2747 TYPE_FIELDS (va_list_type) = ap_field;
2748 /* Compute its layout. */
2749 layout_type (va_list_type);
2751 return va_list_type;
2754 /* Return an expression of type "void *" pointing to the next
2755 available argument in a variable-argument list. VALIST is the
2756 user-level va_list object, of type __builtin_va_list. */
2757 static tree
2758 arm_extract_valist_ptr (tree valist)
2760 if (TREE_TYPE (valist) == error_mark_node)
2761 return error_mark_node;
2763 /* On an AAPCS target, the pointer is stored within "struct
2764 va_list". */
2765 if (TARGET_AAPCS_BASED)
2767 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2768 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2769 valist, ap_field, NULL_TREE);
2772 return valist;
2775 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2776 static void
2777 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2779 valist = arm_extract_valist_ptr (valist);
2780 std_expand_builtin_va_start (valist, nextarg);
2783 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2784 static tree
2785 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2786 gimple_seq *post_p)
2788 valist = arm_extract_valist_ptr (valist);
2789 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2792 /* Check any incompatible options that the user has specified. */
2793 static void
2794 arm_option_check_internal (struct gcc_options *opts)
2796 int flags = opts->x_target_flags;
2797 const struct arm_fpu_desc *fpu_desc = &all_fpus[opts->x_arm_fpu_index];
2799 /* iWMMXt and NEON are incompatible. */
2800 if (TARGET_IWMMXT && TARGET_VFP
2801 && ARM_FPU_FSET_HAS (fpu_desc->features, FPU_FL_NEON))
2802 error ("iWMMXt and NEON are incompatible");
2804 /* Make sure that the processor choice does not conflict with any of the
2805 other command line choices. */
2806 if (TARGET_ARM_P (flags) && !ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM))
2807 error ("target CPU does not support ARM mode");
2809 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2810 from here where no function is being compiled currently. */
2811 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2812 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2814 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2815 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2817 /* If this target is normally configured to use APCS frames, warn if they
2818 are turned off and debugging is turned on. */
2819 if (TARGET_ARM_P (flags)
2820 && write_symbols != NO_DEBUG
2821 && !TARGET_APCS_FRAME
2822 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2823 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2825 /* iWMMXt unsupported under Thumb mode. */
2826 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2827 error ("iWMMXt unsupported under Thumb mode");
2829 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2830 error ("can not use -mtp=cp15 with 16-bit Thumb");
2832 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2834 error ("RTP PIC is incompatible with Thumb");
2835 flag_pic = 0;
2838 /* We only support -mslow-flash-data on armv7-m targets. */
2839 if (target_slow_flash_data
2840 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2841 || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON)))
2842 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2845 /* Recompute the global settings depending on target attribute options. */
2847 static void
2848 arm_option_params_internal (void)
2850 /* If we are not using the default (ARM mode) section anchor offset
2851 ranges, then set the correct ranges now. */
2852 if (TARGET_THUMB1)
2854 /* Thumb-1 LDR instructions cannot have negative offsets.
2855 Permissible positive offset ranges are 5-bit (for byte loads),
2856 6-bit (for halfword loads), or 7-bit (for word loads).
2857 Empirical results suggest a 7-bit anchor range gives the best
2858 overall code size. */
2859 targetm.min_anchor_offset = 0;
2860 targetm.max_anchor_offset = 127;
2862 else if (TARGET_THUMB2)
2864 /* The minimum is set such that the total size of the block
2865 for a particular anchor is 248 + 1 + 4095 bytes, which is
2866 divisible by eight, ensuring natural spacing of anchors. */
2867 targetm.min_anchor_offset = -248;
2868 targetm.max_anchor_offset = 4095;
2870 else
2872 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2873 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2876 if (optimize_size)
2878 /* If optimizing for size, bump the number of instructions that we
2879 are prepared to conditionally execute (even on a StrongARM). */
2880 max_insns_skipped = 6;
2882 /* For THUMB2, we limit the conditional sequence to one IT block. */
2883 if (TARGET_THUMB2)
2884 max_insns_skipped = arm_restrict_it ? 1 : 4;
2886 else
2887 /* When -mrestrict-it is in use tone down the if-conversion. */
2888 max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2889 ? 1 : current_tune->max_insns_skipped;
2892 /* True if -mflip-thumb should next add an attribute for the default
2893 mode, false if it should next add an attribute for the opposite mode. */
2894 static GTY(()) bool thumb_flipper;
2896 /* Options after initial target override. */
2897 static GTY(()) tree init_optimize;
2899 static void
2900 arm_override_options_after_change_1 (struct gcc_options *opts)
2902 if (opts->x_align_functions <= 0)
2903 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2904 && opts->x_optimize_size ? 2 : 4;
2907 /* Implement targetm.override_options_after_change. */
2909 static void
2910 arm_override_options_after_change (void)
2912 arm_override_options_after_change_1 (&global_options);
2915 /* Reset options between modes that the user has specified. */
2916 static void
2917 arm_option_override_internal (struct gcc_options *opts,
2918 struct gcc_options *opts_set)
2920 arm_override_options_after_change_1 (opts);
2922 if (TARGET_INTERWORK && !ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB))
2924 /* The default is to enable interworking, so this warning message would
2925 be confusing to users who have just compiled with, eg, -march=armv3. */
2926 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2927 opts->x_target_flags &= ~MASK_INTERWORK;
2930 if (TARGET_THUMB_P (opts->x_target_flags)
2931 && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB)))
2933 warning (0, "target CPU does not support THUMB instructions");
2934 opts->x_target_flags &= ~MASK_THUMB;
2937 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2939 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2940 opts->x_target_flags &= ~MASK_APCS_FRAME;
2943 /* Callee super interworking implies thumb interworking. Adding
2944 this to the flags here simplifies the logic elsewhere. */
2945 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2946 opts->x_target_flags |= MASK_INTERWORK;
2948 /* need to remember initial values so combinaisons of options like
2949 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2950 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2952 if (! opts_set->x_arm_restrict_it)
2953 opts->x_arm_restrict_it = arm_arch8;
2955 /* ARM execution state and M profile don't have [restrict] IT. */
2956 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2957 opts->x_arm_restrict_it = 0;
2959 /* Enable -munaligned-access by default for
2960 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2961 i.e. Thumb2 and ARM state only.
2962 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2963 - ARMv8 architecture-base processors.
2965 Disable -munaligned-access by default for
2966 - all pre-ARMv6 architecture-based processors
2967 - ARMv6-M architecture-based processors
2968 - ARMv8-M Baseline processors. */
2970 if (! opts_set->x_unaligned_access)
2972 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2973 && arm_arch6 && (arm_arch_notm || arm_arch7));
2975 else if (opts->x_unaligned_access == 1
2976 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2978 warning (0, "target CPU does not support unaligned accesses");
2979 opts->x_unaligned_access = 0;
2982 /* Don't warn since it's on by default in -O2. */
2983 if (TARGET_THUMB1_P (opts->x_target_flags))
2984 opts->x_flag_schedule_insns = 0;
2985 else
2986 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
2988 /* Disable shrink-wrap when optimizing function for size, since it tends to
2989 generate additional returns. */
2990 if (optimize_function_for_size_p (cfun)
2991 && TARGET_THUMB2_P (opts->x_target_flags))
2992 opts->x_flag_shrink_wrap = false;
2993 else
2994 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
2996 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2997 - epilogue_insns - does not accurately model the corresponding insns
2998 emitted in the asm file. In particular, see the comment in thumb_exit
2999 'Find out how many of the (return) argument registers we can corrupt'.
3000 As a consequence, the epilogue may clobber registers without fipa-ra
3001 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3002 TODO: Accurately model clobbers for epilogue_insns and reenable
3003 fipa-ra. */
3004 if (TARGET_THUMB1_P (opts->x_target_flags))
3005 opts->x_flag_ipa_ra = 0;
3006 else
3007 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3009 /* Thumb2 inline assembly code should always use unified syntax.
3010 This will apply to ARM and Thumb1 eventually. */
3011 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3013 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3014 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3015 #endif
3018 /* Fix up any incompatible options that the user has specified. */
3019 static void
3020 arm_option_override (void)
3022 arm_selected_arch = NULL;
3023 arm_selected_cpu = NULL;
3024 arm_selected_tune = NULL;
3026 if (global_options_set.x_arm_arch_option)
3027 arm_selected_arch = &all_architectures[arm_arch_option];
3029 if (global_options_set.x_arm_cpu_option)
3031 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
3032 arm_selected_tune = &all_cores[(int) arm_cpu_option];
3035 if (global_options_set.x_arm_tune_option)
3036 arm_selected_tune = &all_cores[(int) arm_tune_option];
3038 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3039 SUBTARGET_OVERRIDE_OPTIONS;
3040 #endif
3042 if (arm_selected_arch)
3044 if (arm_selected_cpu)
3046 const arm_feature_set tuning_flags = ARM_FSET_MAKE_CPU1 (FL_TUNE);
3047 arm_feature_set selected_flags;
3048 ARM_FSET_XOR (selected_flags, arm_selected_cpu->flags,
3049 arm_selected_arch->flags);
3050 ARM_FSET_EXCLUDE (selected_flags, selected_flags, tuning_flags);
3051 /* Check for conflict between mcpu and march. */
3052 if (!ARM_FSET_IS_EMPTY (selected_flags))
3054 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3055 arm_selected_cpu->name, arm_selected_arch->name);
3056 /* -march wins for code generation.
3057 -mcpu wins for default tuning. */
3058 if (!arm_selected_tune)
3059 arm_selected_tune = arm_selected_cpu;
3061 arm_selected_cpu = arm_selected_arch;
3063 else
3064 /* -mcpu wins. */
3065 arm_selected_arch = NULL;
3067 else
3068 /* Pick a CPU based on the architecture. */
3069 arm_selected_cpu = arm_selected_arch;
3072 /* If the user did not specify a processor, choose one for them. */
3073 if (!arm_selected_cpu)
3075 const struct processors * sel;
3076 arm_feature_set sought = ARM_FSET_EMPTY;;
3078 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
3079 if (!arm_selected_cpu->name)
3081 #ifdef SUBTARGET_CPU_DEFAULT
3082 /* Use the subtarget default CPU if none was specified by
3083 configure. */
3084 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
3085 #endif
3086 /* Default to ARM6. */
3087 if (!arm_selected_cpu->name)
3088 arm_selected_cpu = &all_cores[arm6];
3091 sel = arm_selected_cpu;
3092 insn_flags = sel->flags;
3094 /* Now check to see if the user has specified some command line
3095 switch that require certain abilities from the cpu. */
3097 if (TARGET_INTERWORK || TARGET_THUMB)
3099 ARM_FSET_ADD_CPU1 (sought, FL_THUMB);
3100 ARM_FSET_ADD_CPU1 (sought, FL_MODE32);
3102 /* There are no ARM processors that support both APCS-26 and
3103 interworking. Therefore we force FL_MODE26 to be removed
3104 from insn_flags here (if it was set), so that the search
3105 below will always be able to find a compatible processor. */
3106 ARM_FSET_DEL_CPU1 (insn_flags, FL_MODE26);
3109 if (!ARM_FSET_IS_EMPTY (sought)
3110 && !(ARM_FSET_CPU_SUBSET (sought, insn_flags)))
3112 /* Try to locate a CPU type that supports all of the abilities
3113 of the default CPU, plus the extra abilities requested by
3114 the user. */
3115 for (sel = all_cores; sel->name != NULL; sel++)
3116 if (ARM_FSET_CPU_SUBSET (sought, sel->flags))
3117 break;
3119 if (sel->name == NULL)
3121 unsigned current_bit_count = 0;
3122 const struct processors * best_fit = NULL;
3124 /* Ideally we would like to issue an error message here
3125 saying that it was not possible to find a CPU compatible
3126 with the default CPU, but which also supports the command
3127 line options specified by the programmer, and so they
3128 ought to use the -mcpu=<name> command line option to
3129 override the default CPU type.
3131 If we cannot find a cpu that has both the
3132 characteristics of the default cpu and the given
3133 command line options we scan the array again looking
3134 for a best match. */
3135 for (sel = all_cores; sel->name != NULL; sel++)
3137 arm_feature_set required = ARM_FSET_EMPTY;
3138 ARM_FSET_UNION (required, sought, insn_flags);
3139 if (ARM_FSET_CPU_SUBSET (required, sel->flags))
3141 unsigned count;
3142 arm_feature_set flags;
3143 ARM_FSET_INTER (flags, sel->flags, insn_flags);
3144 count = feature_count (&flags);
3146 if (count >= current_bit_count)
3148 best_fit = sel;
3149 current_bit_count = count;
3153 gcc_assert (best_fit);
3154 sel = best_fit;
3157 arm_selected_cpu = sel;
3161 gcc_assert (arm_selected_cpu);
3162 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3163 if (!arm_selected_tune)
3164 arm_selected_tune = &all_cores[arm_selected_cpu->core];
3166 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
3167 insn_flags = arm_selected_cpu->flags;
3168 arm_base_arch = arm_selected_cpu->base_arch;
3170 arm_tune = arm_selected_tune->core;
3171 tune_flags = arm_selected_tune->flags;
3172 current_tune = arm_selected_tune->tune;
3174 /* TBD: Dwarf info for apcs frame is not handled yet. */
3175 if (TARGET_APCS_FRAME)
3176 flag_shrink_wrap = false;
3178 /* BPABI targets use linker tricks to allow interworking on cores
3179 without thumb support. */
3180 if (TARGET_INTERWORK
3181 && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB) || TARGET_BPABI))
3183 warning (0, "target CPU does not support interworking" );
3184 target_flags &= ~MASK_INTERWORK;
3187 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3189 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3190 target_flags |= MASK_APCS_FRAME;
3193 if (TARGET_POKE_FUNCTION_NAME)
3194 target_flags |= MASK_APCS_FRAME;
3196 if (TARGET_APCS_REENT && flag_pic)
3197 error ("-fpic and -mapcs-reent are incompatible");
3199 if (TARGET_APCS_REENT)
3200 warning (0, "APCS reentrant code not supported. Ignored");
3202 if (TARGET_APCS_FLOAT)
3203 warning (0, "passing floating point arguments in fp regs not yet supported");
3205 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3206 arm_arch3m = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH3M);
3207 arm_arch4 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH4);
3208 arm_arch4t = arm_arch4 && (ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB));
3209 arm_arch5 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5);
3210 arm_arch5e = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5E);
3211 arm_arch6 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6);
3212 arm_arch6k = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6K);
3213 arm_arch6kz = arm_arch6k && ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6KZ);
3214 arm_arch_notm = ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM);
3215 arm_arch6m = arm_arch6 && !arm_arch_notm;
3216 arm_arch7 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7);
3217 arm_arch7em = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7EM);
3218 arm_arch8 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH8);
3219 arm_arch8_1 = ARM_FSET_HAS_CPU2 (insn_flags, FL2_ARCH8_1);
3220 arm_arch_thumb1 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB);
3221 arm_arch_thumb2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB2);
3222 arm_arch_xscale = ARM_FSET_HAS_CPU1 (insn_flags, FL_XSCALE);
3224 arm_ld_sched = ARM_FSET_HAS_CPU1 (tune_flags, FL_LDSCHED);
3225 arm_tune_strongarm = ARM_FSET_HAS_CPU1 (tune_flags, FL_STRONG);
3226 arm_tune_wbuf = ARM_FSET_HAS_CPU1 (tune_flags, FL_WBUF);
3227 arm_tune_xscale = ARM_FSET_HAS_CPU1 (tune_flags, FL_XSCALE);
3228 arm_arch_iwmmxt = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT);
3229 arm_arch_iwmmxt2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT2);
3230 arm_arch_thumb_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB_DIV);
3231 arm_arch_arm_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARM_DIV);
3232 arm_arch_no_volatile_ce = ARM_FSET_HAS_CPU1 (insn_flags, FL_NO_VOLATILE_CE);
3233 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
3234 arm_arch_crc = ARM_FSET_HAS_CPU1 (insn_flags, FL_CRC32);
3235 arm_m_profile_small_mul = ARM_FSET_HAS_CPU1 (insn_flags, FL_SMALLMUL);
3237 /* V5 code we generate is completely interworking capable, so we turn off
3238 TARGET_INTERWORK here to avoid many tests later on. */
3240 /* XXX However, we must pass the right pre-processor defines to CPP
3241 or GLD can get confused. This is a hack. */
3242 if (TARGET_INTERWORK)
3243 arm_cpp_interwork = 1;
3245 if (arm_arch5)
3246 target_flags &= ~MASK_INTERWORK;
3248 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3249 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3251 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3252 error ("iwmmxt abi requires an iwmmxt capable cpu");
3254 if (!global_options_set.x_arm_fpu_index)
3256 const char *target_fpu_name;
3257 bool ok;
3259 #ifdef FPUTYPE_DEFAULT
3260 target_fpu_name = FPUTYPE_DEFAULT;
3261 #else
3262 target_fpu_name = "vfp";
3263 #endif
3265 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
3266 CL_TARGET);
3267 gcc_assert (ok);
3270 /* If soft-float is specified then don't use FPU. */
3271 if (TARGET_SOFT_FLOAT)
3272 arm_fpu_attr = FPU_NONE;
3273 else if (TARGET_VFP)
3274 arm_fpu_attr = FPU_VFP;
3275 else
3276 gcc_unreachable();
3278 if (TARGET_AAPCS_BASED)
3280 if (TARGET_CALLER_INTERWORKING)
3281 error ("AAPCS does not support -mcaller-super-interworking");
3282 else
3283 if (TARGET_CALLEE_INTERWORKING)
3284 error ("AAPCS does not support -mcallee-super-interworking");
3287 /* __fp16 support currently assumes the core has ldrh. */
3288 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3289 sorry ("__fp16 and no ldrh");
3291 if (TARGET_AAPCS_BASED)
3293 if (arm_abi == ARM_ABI_IWMMXT)
3294 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3295 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3296 && TARGET_HARD_FLOAT
3297 && TARGET_VFP)
3298 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3299 else
3300 arm_pcs_default = ARM_PCS_AAPCS;
3302 else
3304 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
3305 sorry ("-mfloat-abi=hard and VFP");
3307 if (arm_abi == ARM_ABI_APCS)
3308 arm_pcs_default = ARM_PCS_APCS;
3309 else
3310 arm_pcs_default = ARM_PCS_ATPCS;
3313 /* For arm2/3 there is no need to do any scheduling if we are doing
3314 software floating-point. */
3315 if (TARGET_SOFT_FLOAT && !ARM_FSET_HAS_CPU1 (tune_flags, FL_MODE32))
3316 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3318 /* Use the cp15 method if it is available. */
3319 if (target_thread_pointer == TP_AUTO)
3321 if (arm_arch6k && !TARGET_THUMB1)
3322 target_thread_pointer = TP_CP15;
3323 else
3324 target_thread_pointer = TP_SOFT;
3327 /* Override the default structure alignment for AAPCS ABI. */
3328 if (!global_options_set.x_arm_structure_size_boundary)
3330 if (TARGET_AAPCS_BASED)
3331 arm_structure_size_boundary = 8;
3333 else
3335 if (arm_structure_size_boundary != 8
3336 && arm_structure_size_boundary != 32
3337 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3339 if (ARM_DOUBLEWORD_ALIGN)
3340 warning (0,
3341 "structure size boundary can only be set to 8, 32 or 64");
3342 else
3343 warning (0, "structure size boundary can only be set to 8 or 32");
3344 arm_structure_size_boundary
3345 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3349 if (TARGET_VXWORKS_RTP)
3351 if (!global_options_set.x_arm_pic_data_is_text_relative)
3352 arm_pic_data_is_text_relative = 0;
3354 else if (flag_pic
3355 && !arm_pic_data_is_text_relative
3356 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3357 /* When text & data segments don't have a fixed displacement, the
3358 intended use is with a single, read only, pic base register.
3359 Unless the user explicitly requested not to do that, set
3360 it. */
3361 target_flags |= MASK_SINGLE_PIC_BASE;
3363 /* If stack checking is disabled, we can use r10 as the PIC register,
3364 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3365 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3367 if (TARGET_VXWORKS_RTP)
3368 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3369 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3372 if (flag_pic && TARGET_VXWORKS_RTP)
3373 arm_pic_register = 9;
3375 if (arm_pic_register_string != NULL)
3377 int pic_register = decode_reg_name (arm_pic_register_string);
3379 if (!flag_pic)
3380 warning (0, "-mpic-register= is useless without -fpic");
3382 /* Prevent the user from choosing an obviously stupid PIC register. */
3383 else if (pic_register < 0 || call_used_regs[pic_register]
3384 || pic_register == HARD_FRAME_POINTER_REGNUM
3385 || pic_register == STACK_POINTER_REGNUM
3386 || pic_register >= PC_REGNUM
3387 || (TARGET_VXWORKS_RTP
3388 && (unsigned int) pic_register != arm_pic_register))
3389 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3390 else
3391 arm_pic_register = pic_register;
3394 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3395 if (fix_cm3_ldrd == 2)
3397 if (arm_selected_cpu->core == cortexm3)
3398 fix_cm3_ldrd = 1;
3399 else
3400 fix_cm3_ldrd = 0;
3403 /* Hot/Cold partitioning is not currently supported, since we can't
3404 handle literal pool placement in that case. */
3405 if (flag_reorder_blocks_and_partition)
3407 inform (input_location,
3408 "-freorder-blocks-and-partition not supported on this architecture");
3409 flag_reorder_blocks_and_partition = 0;
3410 flag_reorder_blocks = 1;
3413 if (flag_pic)
3414 /* Hoisting PIC address calculations more aggressively provides a small,
3415 but measurable, size reduction for PIC code. Therefore, we decrease
3416 the bar for unrestricted expression hoisting to the cost of PIC address
3417 calculation, which is 2 instructions. */
3418 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3419 global_options.x_param_values,
3420 global_options_set.x_param_values);
3422 /* ARM EABI defaults to strict volatile bitfields. */
3423 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3424 && abi_version_at_least(2))
3425 flag_strict_volatile_bitfields = 1;
3427 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3428 have deemed it beneficial (signified by setting
3429 prefetch.num_slots to 1 or more). */
3430 if (flag_prefetch_loop_arrays < 0
3431 && HAVE_prefetch
3432 && optimize >= 3
3433 && current_tune->prefetch.num_slots > 0)
3434 flag_prefetch_loop_arrays = 1;
3436 /* Set up parameters to be used in prefetching algorithm. Do not
3437 override the defaults unless we are tuning for a core we have
3438 researched values for. */
3439 if (current_tune->prefetch.num_slots > 0)
3440 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3441 current_tune->prefetch.num_slots,
3442 global_options.x_param_values,
3443 global_options_set.x_param_values);
3444 if (current_tune->prefetch.l1_cache_line_size >= 0)
3445 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3446 current_tune->prefetch.l1_cache_line_size,
3447 global_options.x_param_values,
3448 global_options_set.x_param_values);
3449 if (current_tune->prefetch.l1_cache_size >= 0)
3450 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3451 current_tune->prefetch.l1_cache_size,
3452 global_options.x_param_values,
3453 global_options_set.x_param_values);
3455 /* Use Neon to perform 64-bits operations rather than core
3456 registers. */
3457 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3458 if (use_neon_for_64bits == 1)
3459 prefer_neon_for_64bits = true;
3461 /* Use the alternative scheduling-pressure algorithm by default. */
3462 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3463 global_options.x_param_values,
3464 global_options_set.x_param_values);
3466 /* Look through ready list and all of queue for instructions
3467 relevant for L2 auto-prefetcher. */
3468 int param_sched_autopref_queue_depth;
3470 switch (current_tune->sched_autopref)
3472 case tune_params::SCHED_AUTOPREF_OFF:
3473 param_sched_autopref_queue_depth = -1;
3474 break;
3476 case tune_params::SCHED_AUTOPREF_RANK:
3477 param_sched_autopref_queue_depth = 0;
3478 break;
3480 case tune_params::SCHED_AUTOPREF_FULL:
3481 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3482 break;
3484 default:
3485 gcc_unreachable ();
3488 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3489 param_sched_autopref_queue_depth,
3490 global_options.x_param_values,
3491 global_options_set.x_param_values);
3493 /* Currently, for slow flash data, we just disable literal pools. */
3494 if (target_slow_flash_data)
3495 arm_disable_literal_pool = true;
3497 /* Disable scheduling fusion by default if it's not armv7 processor
3498 or doesn't prefer ldrd/strd. */
3499 if (flag_schedule_fusion == 2
3500 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3501 flag_schedule_fusion = 0;
3503 /* Need to remember initial options before they are overriden. */
3504 init_optimize = build_optimization_node (&global_options);
3506 arm_option_override_internal (&global_options, &global_options_set);
3507 arm_option_check_internal (&global_options);
3508 arm_option_params_internal ();
3510 /* Register global variables with the garbage collector. */
3511 arm_add_gc_roots ();
3513 /* Save the initial options in case the user does function specific
3514 options or #pragma target. */
3515 target_option_default_node = target_option_current_node
3516 = build_target_option_node (&global_options);
3518 /* Init initial mode for testing. */
3519 thumb_flipper = TARGET_THUMB;
3522 static void
3523 arm_add_gc_roots (void)
3525 gcc_obstack_init(&minipool_obstack);
3526 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3529 /* A table of known ARM exception types.
3530 For use with the interrupt function attribute. */
3532 typedef struct
3534 const char *const arg;
3535 const unsigned long return_value;
3537 isr_attribute_arg;
3539 static const isr_attribute_arg isr_attribute_args [] =
3541 { "IRQ", ARM_FT_ISR },
3542 { "irq", ARM_FT_ISR },
3543 { "FIQ", ARM_FT_FIQ },
3544 { "fiq", ARM_FT_FIQ },
3545 { "ABORT", ARM_FT_ISR },
3546 { "abort", ARM_FT_ISR },
3547 { "ABORT", ARM_FT_ISR },
3548 { "abort", ARM_FT_ISR },
3549 { "UNDEF", ARM_FT_EXCEPTION },
3550 { "undef", ARM_FT_EXCEPTION },
3551 { "SWI", ARM_FT_EXCEPTION },
3552 { "swi", ARM_FT_EXCEPTION },
3553 { NULL, ARM_FT_NORMAL }
3556 /* Returns the (interrupt) function type of the current
3557 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3559 static unsigned long
3560 arm_isr_value (tree argument)
3562 const isr_attribute_arg * ptr;
3563 const char * arg;
3565 if (!arm_arch_notm)
3566 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3568 /* No argument - default to IRQ. */
3569 if (argument == NULL_TREE)
3570 return ARM_FT_ISR;
3572 /* Get the value of the argument. */
3573 if (TREE_VALUE (argument) == NULL_TREE
3574 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3575 return ARM_FT_UNKNOWN;
3577 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3579 /* Check it against the list of known arguments. */
3580 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3581 if (streq (arg, ptr->arg))
3582 return ptr->return_value;
3584 /* An unrecognized interrupt type. */
3585 return ARM_FT_UNKNOWN;
3588 /* Computes the type of the current function. */
3590 static unsigned long
3591 arm_compute_func_type (void)
3593 unsigned long type = ARM_FT_UNKNOWN;
3594 tree a;
3595 tree attr;
3597 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3599 /* Decide if the current function is volatile. Such functions
3600 never return, and many memory cycles can be saved by not storing
3601 register values that will never be needed again. This optimization
3602 was added to speed up context switching in a kernel application. */
3603 if (optimize > 0
3604 && (TREE_NOTHROW (current_function_decl)
3605 || !(flag_unwind_tables
3606 || (flag_exceptions
3607 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3608 && TREE_THIS_VOLATILE (current_function_decl))
3609 type |= ARM_FT_VOLATILE;
3611 if (cfun->static_chain_decl != NULL)
3612 type |= ARM_FT_NESTED;
3614 attr = DECL_ATTRIBUTES (current_function_decl);
3616 a = lookup_attribute ("naked", attr);
3617 if (a != NULL_TREE)
3618 type |= ARM_FT_NAKED;
3620 a = lookup_attribute ("isr", attr);
3621 if (a == NULL_TREE)
3622 a = lookup_attribute ("interrupt", attr);
3624 if (a == NULL_TREE)
3625 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3626 else
3627 type |= arm_isr_value (TREE_VALUE (a));
3629 return type;
3632 /* Returns the type of the current function. */
3634 unsigned long
3635 arm_current_func_type (void)
3637 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3638 cfun->machine->func_type = arm_compute_func_type ();
3640 return cfun->machine->func_type;
3643 bool
3644 arm_allocate_stack_slots_for_args (void)
3646 /* Naked functions should not allocate stack slots for arguments. */
3647 return !IS_NAKED (arm_current_func_type ());
3650 static bool
3651 arm_warn_func_return (tree decl)
3653 /* Naked functions are implemented entirely in assembly, including the
3654 return sequence, so suppress warnings about this. */
3655 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3659 /* Output assembler code for a block containing the constant parts
3660 of a trampoline, leaving space for the variable parts.
3662 On the ARM, (if r8 is the static chain regnum, and remembering that
3663 referencing pc adds an offset of 8) the trampoline looks like:
3664 ldr r8, [pc, #0]
3665 ldr pc, [pc]
3666 .word static chain value
3667 .word function's address
3668 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3670 static void
3671 arm_asm_trampoline_template (FILE *f)
3673 fprintf (f, "\t.syntax unified\n");
3675 if (TARGET_ARM)
3677 fprintf (f, "\t.arm\n");
3678 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3679 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3681 else if (TARGET_THUMB2)
3683 fprintf (f, "\t.thumb\n");
3684 /* The Thumb-2 trampoline is similar to the arm implementation.
3685 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3686 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3687 STATIC_CHAIN_REGNUM, PC_REGNUM);
3688 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3690 else
3692 ASM_OUTPUT_ALIGN (f, 2);
3693 fprintf (f, "\t.code\t16\n");
3694 fprintf (f, ".Ltrampoline_start:\n");
3695 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3696 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3697 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3698 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3699 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3700 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3702 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3703 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3706 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3708 static void
3709 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3711 rtx fnaddr, mem, a_tramp;
3713 emit_block_move (m_tramp, assemble_trampoline_template (),
3714 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3716 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3717 emit_move_insn (mem, chain_value);
3719 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3720 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3721 emit_move_insn (mem, fnaddr);
3723 a_tramp = XEXP (m_tramp, 0);
3724 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3725 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3726 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3729 /* Thumb trampolines should be entered in thumb mode, so set
3730 the bottom bit of the address. */
3732 static rtx
3733 arm_trampoline_adjust_address (rtx addr)
3735 if (TARGET_THUMB)
3736 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3737 NULL, 0, OPTAB_LIB_WIDEN);
3738 return addr;
3741 /* Return 1 if it is possible to return using a single instruction.
3742 If SIBLING is non-null, this is a test for a return before a sibling
3743 call. SIBLING is the call insn, so we can examine its register usage. */
3746 use_return_insn (int iscond, rtx sibling)
3748 int regno;
3749 unsigned int func_type;
3750 unsigned long saved_int_regs;
3751 unsigned HOST_WIDE_INT stack_adjust;
3752 arm_stack_offsets *offsets;
3754 /* Never use a return instruction before reload has run. */
3755 if (!reload_completed)
3756 return 0;
3758 func_type = arm_current_func_type ();
3760 /* Naked, volatile and stack alignment functions need special
3761 consideration. */
3762 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3763 return 0;
3765 /* So do interrupt functions that use the frame pointer and Thumb
3766 interrupt functions. */
3767 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3768 return 0;
3770 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3771 && !optimize_function_for_size_p (cfun))
3772 return 0;
3774 offsets = arm_get_frame_offsets ();
3775 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3777 /* As do variadic functions. */
3778 if (crtl->args.pretend_args_size
3779 || cfun->machine->uses_anonymous_args
3780 /* Or if the function calls __builtin_eh_return () */
3781 || crtl->calls_eh_return
3782 /* Or if the function calls alloca */
3783 || cfun->calls_alloca
3784 /* Or if there is a stack adjustment. However, if the stack pointer
3785 is saved on the stack, we can use a pre-incrementing stack load. */
3786 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3787 && stack_adjust == 4))
3788 /* Or if the static chain register was saved above the frame, under the
3789 assumption that the stack pointer isn't saved on the stack. */
3790 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3791 && arm_compute_static_chain_stack_bytes() != 0))
3792 return 0;
3794 saved_int_regs = offsets->saved_regs_mask;
3796 /* Unfortunately, the insn
3798 ldmib sp, {..., sp, ...}
3800 triggers a bug on most SA-110 based devices, such that the stack
3801 pointer won't be correctly restored if the instruction takes a
3802 page fault. We work around this problem by popping r3 along with
3803 the other registers, since that is never slower than executing
3804 another instruction.
3806 We test for !arm_arch5 here, because code for any architecture
3807 less than this could potentially be run on one of the buggy
3808 chips. */
3809 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3811 /* Validate that r3 is a call-clobbered register (always true in
3812 the default abi) ... */
3813 if (!call_used_regs[3])
3814 return 0;
3816 /* ... that it isn't being used for a return value ... */
3817 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3818 return 0;
3820 /* ... or for a tail-call argument ... */
3821 if (sibling)
3823 gcc_assert (CALL_P (sibling));
3825 if (find_regno_fusage (sibling, USE, 3))
3826 return 0;
3829 /* ... and that there are no call-saved registers in r0-r2
3830 (always true in the default ABI). */
3831 if (saved_int_regs & 0x7)
3832 return 0;
3835 /* Can't be done if interworking with Thumb, and any registers have been
3836 stacked. */
3837 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3838 return 0;
3840 /* On StrongARM, conditional returns are expensive if they aren't
3841 taken and multiple registers have been stacked. */
3842 if (iscond && arm_tune_strongarm)
3844 /* Conditional return when just the LR is stored is a simple
3845 conditional-load instruction, that's not expensive. */
3846 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3847 return 0;
3849 if (flag_pic
3850 && arm_pic_register != INVALID_REGNUM
3851 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3852 return 0;
3855 /* If there are saved registers but the LR isn't saved, then we need
3856 two instructions for the return. */
3857 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3858 return 0;
3860 /* Can't be done if any of the VFP regs are pushed,
3861 since this also requires an insn. */
3862 if (TARGET_HARD_FLOAT && TARGET_VFP)
3863 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3864 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3865 return 0;
3867 if (TARGET_REALLY_IWMMXT)
3868 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3869 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3870 return 0;
3872 return 1;
3875 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3876 shrink-wrapping if possible. This is the case if we need to emit a
3877 prologue, which we can test by looking at the offsets. */
3878 bool
3879 use_simple_return_p (void)
3881 arm_stack_offsets *offsets;
3883 offsets = arm_get_frame_offsets ();
3884 return offsets->outgoing_args != 0;
3887 /* Return TRUE if int I is a valid immediate ARM constant. */
3890 const_ok_for_arm (HOST_WIDE_INT i)
3892 int lowbit;
3894 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3895 be all zero, or all one. */
3896 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3897 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3898 != ((~(unsigned HOST_WIDE_INT) 0)
3899 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3900 return FALSE;
3902 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3904 /* Fast return for 0 and small values. We must do this for zero, since
3905 the code below can't handle that one case. */
3906 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3907 return TRUE;
3909 /* Get the number of trailing zeros. */
3910 lowbit = ffs((int) i) - 1;
3912 /* Only even shifts are allowed in ARM mode so round down to the
3913 nearest even number. */
3914 if (TARGET_ARM)
3915 lowbit &= ~1;
3917 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3918 return TRUE;
3920 if (TARGET_ARM)
3922 /* Allow rotated constants in ARM mode. */
3923 if (lowbit <= 4
3924 && ((i & ~0xc000003f) == 0
3925 || (i & ~0xf000000f) == 0
3926 || (i & ~0xfc000003) == 0))
3927 return TRUE;
3929 else
3931 HOST_WIDE_INT v;
3933 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3934 v = i & 0xff;
3935 v |= v << 16;
3936 if (i == v || i == (v | (v << 8)))
3937 return TRUE;
3939 /* Allow repeated pattern 0xXY00XY00. */
3940 v = i & 0xff00;
3941 v |= v << 16;
3942 if (i == v)
3943 return TRUE;
3946 return FALSE;
3949 /* Return true if I is a valid constant for the operation CODE. */
3951 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3953 if (const_ok_for_arm (i))
3954 return 1;
3956 switch (code)
3958 case SET:
3959 /* See if we can use movw. */
3960 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
3961 return 1;
3962 else
3963 /* Otherwise, try mvn. */
3964 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3966 case PLUS:
3967 /* See if we can use addw or subw. */
3968 if (TARGET_THUMB2
3969 && ((i & 0xfffff000) == 0
3970 || ((-i) & 0xfffff000) == 0))
3971 return 1;
3972 /* else fall through. */
3974 case COMPARE:
3975 case EQ:
3976 case NE:
3977 case GT:
3978 case LE:
3979 case LT:
3980 case GE:
3981 case GEU:
3982 case LTU:
3983 case GTU:
3984 case LEU:
3985 case UNORDERED:
3986 case ORDERED:
3987 case UNEQ:
3988 case UNGE:
3989 case UNLT:
3990 case UNGT:
3991 case UNLE:
3992 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3994 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3995 case XOR:
3996 return 0;
3998 case IOR:
3999 if (TARGET_THUMB2)
4000 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4001 return 0;
4003 case AND:
4004 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4006 default:
4007 gcc_unreachable ();
4011 /* Return true if I is a valid di mode constant for the operation CODE. */
4013 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4015 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4016 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4017 rtx hi = GEN_INT (hi_val);
4018 rtx lo = GEN_INT (lo_val);
4020 if (TARGET_THUMB1)
4021 return 0;
4023 switch (code)
4025 case AND:
4026 case IOR:
4027 case XOR:
4028 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4029 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4030 case PLUS:
4031 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4033 default:
4034 return 0;
4038 /* Emit a sequence of insns to handle a large constant.
4039 CODE is the code of the operation required, it can be any of SET, PLUS,
4040 IOR, AND, XOR, MINUS;
4041 MODE is the mode in which the operation is being performed;
4042 VAL is the integer to operate on;
4043 SOURCE is the other operand (a register, or a null-pointer for SET);
4044 SUBTARGETS means it is safe to create scratch registers if that will
4045 either produce a simpler sequence, or we will want to cse the values.
4046 Return value is the number of insns emitted. */
4048 /* ??? Tweak this for thumb2. */
4050 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4051 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4053 rtx cond;
4055 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4056 cond = COND_EXEC_TEST (PATTERN (insn));
4057 else
4058 cond = NULL_RTX;
4060 if (subtargets || code == SET
4061 || (REG_P (target) && REG_P (source)
4062 && REGNO (target) != REGNO (source)))
4064 /* After arm_reorg has been called, we can't fix up expensive
4065 constants by pushing them into memory so we must synthesize
4066 them in-line, regardless of the cost. This is only likely to
4067 be more costly on chips that have load delay slots and we are
4068 compiling without running the scheduler (so no splitting
4069 occurred before the final instruction emission).
4071 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4073 if (!cfun->machine->after_arm_reorg
4074 && !cond
4075 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4076 1, 0)
4077 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4078 + (code != SET))))
4080 if (code == SET)
4082 /* Currently SET is the only monadic value for CODE, all
4083 the rest are diadic. */
4084 if (TARGET_USE_MOVT)
4085 arm_emit_movpair (target, GEN_INT (val));
4086 else
4087 emit_set_insn (target, GEN_INT (val));
4089 return 1;
4091 else
4093 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4095 if (TARGET_USE_MOVT)
4096 arm_emit_movpair (temp, GEN_INT (val));
4097 else
4098 emit_set_insn (temp, GEN_INT (val));
4100 /* For MINUS, the value is subtracted from, since we never
4101 have subtraction of a constant. */
4102 if (code == MINUS)
4103 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4104 else
4105 emit_set_insn (target,
4106 gen_rtx_fmt_ee (code, mode, source, temp));
4107 return 2;
4112 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4116 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4117 ARM/THUMB2 immediates, and add up to VAL.
4118 Thr function return value gives the number of insns required. */
4119 static int
4120 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4121 struct four_ints *return_sequence)
4123 int best_consecutive_zeros = 0;
4124 int i;
4125 int best_start = 0;
4126 int insns1, insns2;
4127 struct four_ints tmp_sequence;
4129 /* If we aren't targeting ARM, the best place to start is always at
4130 the bottom, otherwise look more closely. */
4131 if (TARGET_ARM)
4133 for (i = 0; i < 32; i += 2)
4135 int consecutive_zeros = 0;
4137 if (!(val & (3 << i)))
4139 while ((i < 32) && !(val & (3 << i)))
4141 consecutive_zeros += 2;
4142 i += 2;
4144 if (consecutive_zeros > best_consecutive_zeros)
4146 best_consecutive_zeros = consecutive_zeros;
4147 best_start = i - consecutive_zeros;
4149 i -= 2;
4154 /* So long as it won't require any more insns to do so, it's
4155 desirable to emit a small constant (in bits 0...9) in the last
4156 insn. This way there is more chance that it can be combined with
4157 a later addressing insn to form a pre-indexed load or store
4158 operation. Consider:
4160 *((volatile int *)0xe0000100) = 1;
4161 *((volatile int *)0xe0000110) = 2;
4163 We want this to wind up as:
4165 mov rA, #0xe0000000
4166 mov rB, #1
4167 str rB, [rA, #0x100]
4168 mov rB, #2
4169 str rB, [rA, #0x110]
4171 rather than having to synthesize both large constants from scratch.
4173 Therefore, we calculate how many insns would be required to emit
4174 the constant starting from `best_start', and also starting from
4175 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4176 yield a shorter sequence, we may as well use zero. */
4177 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4178 if (best_start != 0
4179 && ((HOST_WIDE_INT_1U << best_start) < val))
4181 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4182 if (insns2 <= insns1)
4184 *return_sequence = tmp_sequence;
4185 insns1 = insns2;
4189 return insns1;
4192 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4193 static int
4194 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4195 struct four_ints *return_sequence, int i)
4197 int remainder = val & 0xffffffff;
4198 int insns = 0;
4200 /* Try and find a way of doing the job in either two or three
4201 instructions.
4203 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4204 location. We start at position I. This may be the MSB, or
4205 optimial_immediate_sequence may have positioned it at the largest block
4206 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4207 wrapping around to the top of the word when we drop off the bottom.
4208 In the worst case this code should produce no more than four insns.
4210 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4211 constants, shifted to any arbitrary location. We should always start
4212 at the MSB. */
4215 int end;
4216 unsigned int b1, b2, b3, b4;
4217 unsigned HOST_WIDE_INT result;
4218 int loc;
4220 gcc_assert (insns < 4);
4222 if (i <= 0)
4223 i += 32;
4225 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4226 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4228 loc = i;
4229 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4230 /* We can use addw/subw for the last 12 bits. */
4231 result = remainder;
4232 else
4234 /* Use an 8-bit shifted/rotated immediate. */
4235 end = i - 8;
4236 if (end < 0)
4237 end += 32;
4238 result = remainder & ((0x0ff << end)
4239 | ((i < end) ? (0xff >> (32 - end))
4240 : 0));
4241 i -= 8;
4244 else
4246 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4247 arbitrary shifts. */
4248 i -= TARGET_ARM ? 2 : 1;
4249 continue;
4252 /* Next, see if we can do a better job with a thumb2 replicated
4253 constant.
4255 We do it this way around to catch the cases like 0x01F001E0 where
4256 two 8-bit immediates would work, but a replicated constant would
4257 make it worse.
4259 TODO: 16-bit constants that don't clear all the bits, but still win.
4260 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4261 if (TARGET_THUMB2)
4263 b1 = (remainder & 0xff000000) >> 24;
4264 b2 = (remainder & 0x00ff0000) >> 16;
4265 b3 = (remainder & 0x0000ff00) >> 8;
4266 b4 = remainder & 0xff;
4268 if (loc > 24)
4270 /* The 8-bit immediate already found clears b1 (and maybe b2),
4271 but must leave b3 and b4 alone. */
4273 /* First try to find a 32-bit replicated constant that clears
4274 almost everything. We can assume that we can't do it in one,
4275 or else we wouldn't be here. */
4276 unsigned int tmp = b1 & b2 & b3 & b4;
4277 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4278 + (tmp << 24);
4279 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4280 + (tmp == b3) + (tmp == b4);
4281 if (tmp
4282 && (matching_bytes >= 3
4283 || (matching_bytes == 2
4284 && const_ok_for_op (remainder & ~tmp2, code))))
4286 /* At least 3 of the bytes match, and the fourth has at
4287 least as many bits set, or two of the bytes match
4288 and it will only require one more insn to finish. */
4289 result = tmp2;
4290 i = tmp != b1 ? 32
4291 : tmp != b2 ? 24
4292 : tmp != b3 ? 16
4293 : 8;
4296 /* Second, try to find a 16-bit replicated constant that can
4297 leave three of the bytes clear. If b2 or b4 is already
4298 zero, then we can. If the 8-bit from above would not
4299 clear b2 anyway, then we still win. */
4300 else if (b1 == b3 && (!b2 || !b4
4301 || (remainder & 0x00ff0000 & ~result)))
4303 result = remainder & 0xff00ff00;
4304 i = 24;
4307 else if (loc > 16)
4309 /* The 8-bit immediate already found clears b2 (and maybe b3)
4310 and we don't get here unless b1 is alredy clear, but it will
4311 leave b4 unchanged. */
4313 /* If we can clear b2 and b4 at once, then we win, since the
4314 8-bits couldn't possibly reach that far. */
4315 if (b2 == b4)
4317 result = remainder & 0x00ff00ff;
4318 i = 16;
4323 return_sequence->i[insns++] = result;
4324 remainder &= ~result;
4326 if (code == SET || code == MINUS)
4327 code = PLUS;
4329 while (remainder);
4331 return insns;
4334 /* Emit an instruction with the indicated PATTERN. If COND is
4335 non-NULL, conditionalize the execution of the instruction on COND
4336 being true. */
4338 static void
4339 emit_constant_insn (rtx cond, rtx pattern)
4341 if (cond)
4342 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4343 emit_insn (pattern);
4346 /* As above, but extra parameter GENERATE which, if clear, suppresses
4347 RTL generation. */
4349 static int
4350 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4351 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4352 int subtargets, int generate)
4354 int can_invert = 0;
4355 int can_negate = 0;
4356 int final_invert = 0;
4357 int i;
4358 int set_sign_bit_copies = 0;
4359 int clear_sign_bit_copies = 0;
4360 int clear_zero_bit_copies = 0;
4361 int set_zero_bit_copies = 0;
4362 int insns = 0, neg_insns, inv_insns;
4363 unsigned HOST_WIDE_INT temp1, temp2;
4364 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4365 struct four_ints *immediates;
4366 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4368 /* Find out which operations are safe for a given CODE. Also do a quick
4369 check for degenerate cases; these can occur when DImode operations
4370 are split. */
4371 switch (code)
4373 case SET:
4374 can_invert = 1;
4375 break;
4377 case PLUS:
4378 can_negate = 1;
4379 break;
4381 case IOR:
4382 if (remainder == 0xffffffff)
4384 if (generate)
4385 emit_constant_insn (cond,
4386 gen_rtx_SET (target,
4387 GEN_INT (ARM_SIGN_EXTEND (val))));
4388 return 1;
4391 if (remainder == 0)
4393 if (reload_completed && rtx_equal_p (target, source))
4394 return 0;
4396 if (generate)
4397 emit_constant_insn (cond, gen_rtx_SET (target, source));
4398 return 1;
4400 break;
4402 case AND:
4403 if (remainder == 0)
4405 if (generate)
4406 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4407 return 1;
4409 if (remainder == 0xffffffff)
4411 if (reload_completed && rtx_equal_p (target, source))
4412 return 0;
4413 if (generate)
4414 emit_constant_insn (cond, gen_rtx_SET (target, source));
4415 return 1;
4417 can_invert = 1;
4418 break;
4420 case XOR:
4421 if (remainder == 0)
4423 if (reload_completed && rtx_equal_p (target, source))
4424 return 0;
4425 if (generate)
4426 emit_constant_insn (cond, gen_rtx_SET (target, source));
4427 return 1;
4430 if (remainder == 0xffffffff)
4432 if (generate)
4433 emit_constant_insn (cond,
4434 gen_rtx_SET (target,
4435 gen_rtx_NOT (mode, source)));
4436 return 1;
4438 final_invert = 1;
4439 break;
4441 case MINUS:
4442 /* We treat MINUS as (val - source), since (source - val) is always
4443 passed as (source + (-val)). */
4444 if (remainder == 0)
4446 if (generate)
4447 emit_constant_insn (cond,
4448 gen_rtx_SET (target,
4449 gen_rtx_NEG (mode, source)));
4450 return 1;
4452 if (const_ok_for_arm (val))
4454 if (generate)
4455 emit_constant_insn (cond,
4456 gen_rtx_SET (target,
4457 gen_rtx_MINUS (mode, GEN_INT (val),
4458 source)));
4459 return 1;
4462 break;
4464 default:
4465 gcc_unreachable ();
4468 /* If we can do it in one insn get out quickly. */
4469 if (const_ok_for_op (val, code))
4471 if (generate)
4472 emit_constant_insn (cond,
4473 gen_rtx_SET (target,
4474 (source
4475 ? gen_rtx_fmt_ee (code, mode, source,
4476 GEN_INT (val))
4477 : GEN_INT (val))));
4478 return 1;
4481 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4482 insn. */
4483 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4484 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4486 if (generate)
4488 if (mode == SImode && i == 16)
4489 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4490 smaller insn. */
4491 emit_constant_insn (cond,
4492 gen_zero_extendhisi2
4493 (target, gen_lowpart (HImode, source)));
4494 else
4495 /* Extz only supports SImode, but we can coerce the operands
4496 into that mode. */
4497 emit_constant_insn (cond,
4498 gen_extzv_t2 (gen_lowpart (SImode, target),
4499 gen_lowpart (SImode, source),
4500 GEN_INT (i), const0_rtx));
4503 return 1;
4506 /* Calculate a few attributes that may be useful for specific
4507 optimizations. */
4508 /* Count number of leading zeros. */
4509 for (i = 31; i >= 0; i--)
4511 if ((remainder & (1 << i)) == 0)
4512 clear_sign_bit_copies++;
4513 else
4514 break;
4517 /* Count number of leading 1's. */
4518 for (i = 31; i >= 0; i--)
4520 if ((remainder & (1 << i)) != 0)
4521 set_sign_bit_copies++;
4522 else
4523 break;
4526 /* Count number of trailing zero's. */
4527 for (i = 0; i <= 31; i++)
4529 if ((remainder & (1 << i)) == 0)
4530 clear_zero_bit_copies++;
4531 else
4532 break;
4535 /* Count number of trailing 1's. */
4536 for (i = 0; i <= 31; i++)
4538 if ((remainder & (1 << i)) != 0)
4539 set_zero_bit_copies++;
4540 else
4541 break;
4544 switch (code)
4546 case SET:
4547 /* See if we can do this by sign_extending a constant that is known
4548 to be negative. This is a good, way of doing it, since the shift
4549 may well merge into a subsequent insn. */
4550 if (set_sign_bit_copies > 1)
4552 if (const_ok_for_arm
4553 (temp1 = ARM_SIGN_EXTEND (remainder
4554 << (set_sign_bit_copies - 1))))
4556 if (generate)
4558 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4559 emit_constant_insn (cond,
4560 gen_rtx_SET (new_src, GEN_INT (temp1)));
4561 emit_constant_insn (cond,
4562 gen_ashrsi3 (target, new_src,
4563 GEN_INT (set_sign_bit_copies - 1)));
4565 return 2;
4567 /* For an inverted constant, we will need to set the low bits,
4568 these will be shifted out of harm's way. */
4569 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4570 if (const_ok_for_arm (~temp1))
4572 if (generate)
4574 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4575 emit_constant_insn (cond,
4576 gen_rtx_SET (new_src, GEN_INT (temp1)));
4577 emit_constant_insn (cond,
4578 gen_ashrsi3 (target, new_src,
4579 GEN_INT (set_sign_bit_copies - 1)));
4581 return 2;
4585 /* See if we can calculate the value as the difference between two
4586 valid immediates. */
4587 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4589 int topshift = clear_sign_bit_copies & ~1;
4591 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4592 & (0xff000000 >> topshift));
4594 /* If temp1 is zero, then that means the 9 most significant
4595 bits of remainder were 1 and we've caused it to overflow.
4596 When topshift is 0 we don't need to do anything since we
4597 can borrow from 'bit 32'. */
4598 if (temp1 == 0 && topshift != 0)
4599 temp1 = 0x80000000 >> (topshift - 1);
4601 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4603 if (const_ok_for_arm (temp2))
4605 if (generate)
4607 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4608 emit_constant_insn (cond,
4609 gen_rtx_SET (new_src, GEN_INT (temp1)));
4610 emit_constant_insn (cond,
4611 gen_addsi3 (target, new_src,
4612 GEN_INT (-temp2)));
4615 return 2;
4619 /* See if we can generate this by setting the bottom (or the top)
4620 16 bits, and then shifting these into the other half of the
4621 word. We only look for the simplest cases, to do more would cost
4622 too much. Be careful, however, not to generate this when the
4623 alternative would take fewer insns. */
4624 if (val & 0xffff0000)
4626 temp1 = remainder & 0xffff0000;
4627 temp2 = remainder & 0x0000ffff;
4629 /* Overlaps outside this range are best done using other methods. */
4630 for (i = 9; i < 24; i++)
4632 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4633 && !const_ok_for_arm (temp2))
4635 rtx new_src = (subtargets
4636 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4637 : target);
4638 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4639 source, subtargets, generate);
4640 source = new_src;
4641 if (generate)
4642 emit_constant_insn
4643 (cond,
4644 gen_rtx_SET
4645 (target,
4646 gen_rtx_IOR (mode,
4647 gen_rtx_ASHIFT (mode, source,
4648 GEN_INT (i)),
4649 source)));
4650 return insns + 1;
4654 /* Don't duplicate cases already considered. */
4655 for (i = 17; i < 24; i++)
4657 if (((temp1 | (temp1 >> i)) == remainder)
4658 && !const_ok_for_arm (temp1))
4660 rtx new_src = (subtargets
4661 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4662 : target);
4663 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4664 source, subtargets, generate);
4665 source = new_src;
4666 if (generate)
4667 emit_constant_insn
4668 (cond,
4669 gen_rtx_SET (target,
4670 gen_rtx_IOR
4671 (mode,
4672 gen_rtx_LSHIFTRT (mode, source,
4673 GEN_INT (i)),
4674 source)));
4675 return insns + 1;
4679 break;
4681 case IOR:
4682 case XOR:
4683 /* If we have IOR or XOR, and the constant can be loaded in a
4684 single instruction, and we can find a temporary to put it in,
4685 then this can be done in two instructions instead of 3-4. */
4686 if (subtargets
4687 /* TARGET can't be NULL if SUBTARGETS is 0 */
4688 || (reload_completed && !reg_mentioned_p (target, source)))
4690 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4692 if (generate)
4694 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4696 emit_constant_insn (cond,
4697 gen_rtx_SET (sub, GEN_INT (val)));
4698 emit_constant_insn (cond,
4699 gen_rtx_SET (target,
4700 gen_rtx_fmt_ee (code, mode,
4701 source, sub)));
4703 return 2;
4707 if (code == XOR)
4708 break;
4710 /* Convert.
4711 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4712 and the remainder 0s for e.g. 0xfff00000)
4713 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4715 This can be done in 2 instructions by using shifts with mov or mvn.
4716 e.g. for
4717 x = x | 0xfff00000;
4718 we generate.
4719 mvn r0, r0, asl #12
4720 mvn r0, r0, lsr #12 */
4721 if (set_sign_bit_copies > 8
4722 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4724 if (generate)
4726 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4727 rtx shift = GEN_INT (set_sign_bit_copies);
4729 emit_constant_insn
4730 (cond,
4731 gen_rtx_SET (sub,
4732 gen_rtx_NOT (mode,
4733 gen_rtx_ASHIFT (mode,
4734 source,
4735 shift))));
4736 emit_constant_insn
4737 (cond,
4738 gen_rtx_SET (target,
4739 gen_rtx_NOT (mode,
4740 gen_rtx_LSHIFTRT (mode, sub,
4741 shift))));
4743 return 2;
4746 /* Convert
4747 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4749 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4751 For eg. r0 = r0 | 0xfff
4752 mvn r0, r0, lsr #12
4753 mvn r0, r0, asl #12
4756 if (set_zero_bit_copies > 8
4757 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4759 if (generate)
4761 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4762 rtx shift = GEN_INT (set_zero_bit_copies);
4764 emit_constant_insn
4765 (cond,
4766 gen_rtx_SET (sub,
4767 gen_rtx_NOT (mode,
4768 gen_rtx_LSHIFTRT (mode,
4769 source,
4770 shift))));
4771 emit_constant_insn
4772 (cond,
4773 gen_rtx_SET (target,
4774 gen_rtx_NOT (mode,
4775 gen_rtx_ASHIFT (mode, sub,
4776 shift))));
4778 return 2;
4781 /* This will never be reached for Thumb2 because orn is a valid
4782 instruction. This is for Thumb1 and the ARM 32 bit cases.
4784 x = y | constant (such that ~constant is a valid constant)
4785 Transform this to
4786 x = ~(~y & ~constant).
4788 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4790 if (generate)
4792 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4793 emit_constant_insn (cond,
4794 gen_rtx_SET (sub,
4795 gen_rtx_NOT (mode, source)));
4796 source = sub;
4797 if (subtargets)
4798 sub = gen_reg_rtx (mode);
4799 emit_constant_insn (cond,
4800 gen_rtx_SET (sub,
4801 gen_rtx_AND (mode, source,
4802 GEN_INT (temp1))));
4803 emit_constant_insn (cond,
4804 gen_rtx_SET (target,
4805 gen_rtx_NOT (mode, sub)));
4807 return 3;
4809 break;
4811 case AND:
4812 /* See if two shifts will do 2 or more insn's worth of work. */
4813 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4815 HOST_WIDE_INT shift_mask = ((0xffffffff
4816 << (32 - clear_sign_bit_copies))
4817 & 0xffffffff);
4819 if ((remainder | shift_mask) != 0xffffffff)
4821 HOST_WIDE_INT new_val
4822 = ARM_SIGN_EXTEND (remainder | shift_mask);
4824 if (generate)
4826 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4827 insns = arm_gen_constant (AND, SImode, cond, new_val,
4828 new_src, source, subtargets, 1);
4829 source = new_src;
4831 else
4833 rtx targ = subtargets ? NULL_RTX : target;
4834 insns = arm_gen_constant (AND, mode, cond, new_val,
4835 targ, source, subtargets, 0);
4839 if (generate)
4841 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4842 rtx shift = GEN_INT (clear_sign_bit_copies);
4844 emit_insn (gen_ashlsi3 (new_src, source, shift));
4845 emit_insn (gen_lshrsi3 (target, new_src, shift));
4848 return insns + 2;
4851 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4853 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4855 if ((remainder | shift_mask) != 0xffffffff)
4857 HOST_WIDE_INT new_val
4858 = ARM_SIGN_EXTEND (remainder | shift_mask);
4859 if (generate)
4861 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4863 insns = arm_gen_constant (AND, mode, cond, new_val,
4864 new_src, source, subtargets, 1);
4865 source = new_src;
4867 else
4869 rtx targ = subtargets ? NULL_RTX : target;
4871 insns = arm_gen_constant (AND, mode, cond, new_val,
4872 targ, source, subtargets, 0);
4876 if (generate)
4878 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4879 rtx shift = GEN_INT (clear_zero_bit_copies);
4881 emit_insn (gen_lshrsi3 (new_src, source, shift));
4882 emit_insn (gen_ashlsi3 (target, new_src, shift));
4885 return insns + 2;
4888 break;
4890 default:
4891 break;
4894 /* Calculate what the instruction sequences would be if we generated it
4895 normally, negated, or inverted. */
4896 if (code == AND)
4897 /* AND cannot be split into multiple insns, so invert and use BIC. */
4898 insns = 99;
4899 else
4900 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4902 if (can_negate)
4903 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4904 &neg_immediates);
4905 else
4906 neg_insns = 99;
4908 if (can_invert || final_invert)
4909 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4910 &inv_immediates);
4911 else
4912 inv_insns = 99;
4914 immediates = &pos_immediates;
4916 /* Is the negated immediate sequence more efficient? */
4917 if (neg_insns < insns && neg_insns <= inv_insns)
4919 insns = neg_insns;
4920 immediates = &neg_immediates;
4922 else
4923 can_negate = 0;
4925 /* Is the inverted immediate sequence more efficient?
4926 We must allow for an extra NOT instruction for XOR operations, although
4927 there is some chance that the final 'mvn' will get optimized later. */
4928 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4930 insns = inv_insns;
4931 immediates = &inv_immediates;
4933 else
4935 can_invert = 0;
4936 final_invert = 0;
4939 /* Now output the chosen sequence as instructions. */
4940 if (generate)
4942 for (i = 0; i < insns; i++)
4944 rtx new_src, temp1_rtx;
4946 temp1 = immediates->i[i];
4948 if (code == SET || code == MINUS)
4949 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4950 else if ((final_invert || i < (insns - 1)) && subtargets)
4951 new_src = gen_reg_rtx (mode);
4952 else
4953 new_src = target;
4955 if (can_invert)
4956 temp1 = ~temp1;
4957 else if (can_negate)
4958 temp1 = -temp1;
4960 temp1 = trunc_int_for_mode (temp1, mode);
4961 temp1_rtx = GEN_INT (temp1);
4963 if (code == SET)
4965 else if (code == MINUS)
4966 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4967 else
4968 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4970 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
4971 source = new_src;
4973 if (code == SET)
4975 can_negate = can_invert;
4976 can_invert = 0;
4977 code = PLUS;
4979 else if (code == MINUS)
4980 code = PLUS;
4984 if (final_invert)
4986 if (generate)
4987 emit_constant_insn (cond, gen_rtx_SET (target,
4988 gen_rtx_NOT (mode, source)));
4989 insns++;
4992 return insns;
4995 /* Canonicalize a comparison so that we are more likely to recognize it.
4996 This can be done for a few constant compares, where we can make the
4997 immediate value easier to load. */
4999 static void
5000 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5001 bool op0_preserve_value)
5003 machine_mode mode;
5004 unsigned HOST_WIDE_INT i, maxval;
5006 mode = GET_MODE (*op0);
5007 if (mode == VOIDmode)
5008 mode = GET_MODE (*op1);
5010 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5012 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5013 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5014 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5015 for GTU/LEU in Thumb mode. */
5016 if (mode == DImode)
5019 if (*code == GT || *code == LE
5020 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5022 /* Missing comparison. First try to use an available
5023 comparison. */
5024 if (CONST_INT_P (*op1))
5026 i = INTVAL (*op1);
5027 switch (*code)
5029 case GT:
5030 case LE:
5031 if (i != maxval
5032 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5034 *op1 = GEN_INT (i + 1);
5035 *code = *code == GT ? GE : LT;
5036 return;
5038 break;
5039 case GTU:
5040 case LEU:
5041 if (i != ~((unsigned HOST_WIDE_INT) 0)
5042 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5044 *op1 = GEN_INT (i + 1);
5045 *code = *code == GTU ? GEU : LTU;
5046 return;
5048 break;
5049 default:
5050 gcc_unreachable ();
5054 /* If that did not work, reverse the condition. */
5055 if (!op0_preserve_value)
5057 std::swap (*op0, *op1);
5058 *code = (int)swap_condition ((enum rtx_code)*code);
5061 return;
5064 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5065 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5066 to facilitate possible combining with a cmp into 'ands'. */
5067 if (mode == SImode
5068 && GET_CODE (*op0) == ZERO_EXTEND
5069 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5070 && GET_MODE (XEXP (*op0, 0)) == QImode
5071 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5072 && subreg_lowpart_p (XEXP (*op0, 0))
5073 && *op1 == const0_rtx)
5074 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5075 GEN_INT (255));
5077 /* Comparisons smaller than DImode. Only adjust comparisons against
5078 an out-of-range constant. */
5079 if (!CONST_INT_P (*op1)
5080 || const_ok_for_arm (INTVAL (*op1))
5081 || const_ok_for_arm (- INTVAL (*op1)))
5082 return;
5084 i = INTVAL (*op1);
5086 switch (*code)
5088 case EQ:
5089 case NE:
5090 return;
5092 case GT:
5093 case LE:
5094 if (i != maxval
5095 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5097 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5098 *code = *code == GT ? GE : LT;
5099 return;
5101 break;
5103 case GE:
5104 case LT:
5105 if (i != ~maxval
5106 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5108 *op1 = GEN_INT (i - 1);
5109 *code = *code == GE ? GT : LE;
5110 return;
5112 break;
5114 case GTU:
5115 case LEU:
5116 if (i != ~((unsigned HOST_WIDE_INT) 0)
5117 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5119 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5120 *code = *code == GTU ? GEU : LTU;
5121 return;
5123 break;
5125 case GEU:
5126 case LTU:
5127 if (i != 0
5128 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5130 *op1 = GEN_INT (i - 1);
5131 *code = *code == GEU ? GTU : LEU;
5132 return;
5134 break;
5136 default:
5137 gcc_unreachable ();
5142 /* Define how to find the value returned by a function. */
5144 static rtx
5145 arm_function_value(const_tree type, const_tree func,
5146 bool outgoing ATTRIBUTE_UNUSED)
5148 machine_mode mode;
5149 int unsignedp ATTRIBUTE_UNUSED;
5150 rtx r ATTRIBUTE_UNUSED;
5152 mode = TYPE_MODE (type);
5154 if (TARGET_AAPCS_BASED)
5155 return aapcs_allocate_return_reg (mode, type, func);
5157 /* Promote integer types. */
5158 if (INTEGRAL_TYPE_P (type))
5159 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5161 /* Promotes small structs returned in a register to full-word size
5162 for big-endian AAPCS. */
5163 if (arm_return_in_msb (type))
5165 HOST_WIDE_INT size = int_size_in_bytes (type);
5166 if (size % UNITS_PER_WORD != 0)
5168 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5169 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5173 return arm_libcall_value_1 (mode);
5176 /* libcall hashtable helpers. */
5178 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5180 static inline hashval_t hash (const rtx_def *);
5181 static inline bool equal (const rtx_def *, const rtx_def *);
5182 static inline void remove (rtx_def *);
5185 inline bool
5186 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5188 return rtx_equal_p (p1, p2);
5191 inline hashval_t
5192 libcall_hasher::hash (const rtx_def *p1)
5194 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5197 typedef hash_table<libcall_hasher> libcall_table_type;
5199 static void
5200 add_libcall (libcall_table_type *htab, rtx libcall)
5202 *htab->find_slot (libcall, INSERT) = libcall;
5205 static bool
5206 arm_libcall_uses_aapcs_base (const_rtx libcall)
5208 static bool init_done = false;
5209 static libcall_table_type *libcall_htab = NULL;
5211 if (!init_done)
5213 init_done = true;
5215 libcall_htab = new libcall_table_type (31);
5216 add_libcall (libcall_htab,
5217 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5218 add_libcall (libcall_htab,
5219 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5220 add_libcall (libcall_htab,
5221 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5222 add_libcall (libcall_htab,
5223 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5225 add_libcall (libcall_htab,
5226 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5227 add_libcall (libcall_htab,
5228 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5229 add_libcall (libcall_htab,
5230 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5231 add_libcall (libcall_htab,
5232 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5234 add_libcall (libcall_htab,
5235 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5236 add_libcall (libcall_htab,
5237 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5238 add_libcall (libcall_htab,
5239 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5240 add_libcall (libcall_htab,
5241 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5242 add_libcall (libcall_htab,
5243 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5244 add_libcall (libcall_htab,
5245 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5246 add_libcall (libcall_htab,
5247 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5248 add_libcall (libcall_htab,
5249 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5251 /* Values from double-precision helper functions are returned in core
5252 registers if the selected core only supports single-precision
5253 arithmetic, even if we are using the hard-float ABI. The same is
5254 true for single-precision helpers, but we will never be using the
5255 hard-float ABI on a CPU which doesn't support single-precision
5256 operations in hardware. */
5257 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5258 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5259 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5260 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5261 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5262 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5263 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5264 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5265 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5266 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5267 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5268 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5269 SFmode));
5270 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5271 DFmode));
5274 return libcall && libcall_htab->find (libcall) != NULL;
5277 static rtx
5278 arm_libcall_value_1 (machine_mode mode)
5280 if (TARGET_AAPCS_BASED)
5281 return aapcs_libcall_value (mode);
5282 else if (TARGET_IWMMXT_ABI
5283 && arm_vector_mode_supported_p (mode))
5284 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5285 else
5286 return gen_rtx_REG (mode, ARG_REGISTER (1));
5289 /* Define how to find the value returned by a library function
5290 assuming the value has mode MODE. */
5292 static rtx
5293 arm_libcall_value (machine_mode mode, const_rtx libcall)
5295 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5296 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5298 /* The following libcalls return their result in integer registers,
5299 even though they return a floating point value. */
5300 if (arm_libcall_uses_aapcs_base (libcall))
5301 return gen_rtx_REG (mode, ARG_REGISTER(1));
5305 return arm_libcall_value_1 (mode);
5308 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5310 static bool
5311 arm_function_value_regno_p (const unsigned int regno)
5313 if (regno == ARG_REGISTER (1)
5314 || (TARGET_32BIT
5315 && TARGET_AAPCS_BASED
5316 && TARGET_VFP
5317 && TARGET_HARD_FLOAT
5318 && regno == FIRST_VFP_REGNUM)
5319 || (TARGET_IWMMXT_ABI
5320 && regno == FIRST_IWMMXT_REGNUM))
5321 return true;
5323 return false;
5326 /* Determine the amount of memory needed to store the possible return
5327 registers of an untyped call. */
5329 arm_apply_result_size (void)
5331 int size = 16;
5333 if (TARGET_32BIT)
5335 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
5336 size += 32;
5337 if (TARGET_IWMMXT_ABI)
5338 size += 8;
5341 return size;
5344 /* Decide whether TYPE should be returned in memory (true)
5345 or in a register (false). FNTYPE is the type of the function making
5346 the call. */
5347 static bool
5348 arm_return_in_memory (const_tree type, const_tree fntype)
5350 HOST_WIDE_INT size;
5352 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5354 if (TARGET_AAPCS_BASED)
5356 /* Simple, non-aggregate types (ie not including vectors and
5357 complex) are always returned in a register (or registers).
5358 We don't care about which register here, so we can short-cut
5359 some of the detail. */
5360 if (!AGGREGATE_TYPE_P (type)
5361 && TREE_CODE (type) != VECTOR_TYPE
5362 && TREE_CODE (type) != COMPLEX_TYPE)
5363 return false;
5365 /* Any return value that is no larger than one word can be
5366 returned in r0. */
5367 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5368 return false;
5370 /* Check any available co-processors to see if they accept the
5371 type as a register candidate (VFP, for example, can return
5372 some aggregates in consecutive registers). These aren't
5373 available if the call is variadic. */
5374 if (aapcs_select_return_coproc (type, fntype) >= 0)
5375 return false;
5377 /* Vector values should be returned using ARM registers, not
5378 memory (unless they're over 16 bytes, which will break since
5379 we only have four call-clobbered registers to play with). */
5380 if (TREE_CODE (type) == VECTOR_TYPE)
5381 return (size < 0 || size > (4 * UNITS_PER_WORD));
5383 /* The rest go in memory. */
5384 return true;
5387 if (TREE_CODE (type) == VECTOR_TYPE)
5388 return (size < 0 || size > (4 * UNITS_PER_WORD));
5390 if (!AGGREGATE_TYPE_P (type) &&
5391 (TREE_CODE (type) != VECTOR_TYPE))
5392 /* All simple types are returned in registers. */
5393 return false;
5395 if (arm_abi != ARM_ABI_APCS)
5397 /* ATPCS and later return aggregate types in memory only if they are
5398 larger than a word (or are variable size). */
5399 return (size < 0 || size > UNITS_PER_WORD);
5402 /* For the arm-wince targets we choose to be compatible with Microsoft's
5403 ARM and Thumb compilers, which always return aggregates in memory. */
5404 #ifndef ARM_WINCE
5405 /* All structures/unions bigger than one word are returned in memory.
5406 Also catch the case where int_size_in_bytes returns -1. In this case
5407 the aggregate is either huge or of variable size, and in either case
5408 we will want to return it via memory and not in a register. */
5409 if (size < 0 || size > UNITS_PER_WORD)
5410 return true;
5412 if (TREE_CODE (type) == RECORD_TYPE)
5414 tree field;
5416 /* For a struct the APCS says that we only return in a register
5417 if the type is 'integer like' and every addressable element
5418 has an offset of zero. For practical purposes this means
5419 that the structure can have at most one non bit-field element
5420 and that this element must be the first one in the structure. */
5422 /* Find the first field, ignoring non FIELD_DECL things which will
5423 have been created by C++. */
5424 for (field = TYPE_FIELDS (type);
5425 field && TREE_CODE (field) != FIELD_DECL;
5426 field = DECL_CHAIN (field))
5427 continue;
5429 if (field == NULL)
5430 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5432 /* Check that the first field is valid for returning in a register. */
5434 /* ... Floats are not allowed */
5435 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5436 return true;
5438 /* ... Aggregates that are not themselves valid for returning in
5439 a register are not allowed. */
5440 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5441 return true;
5443 /* Now check the remaining fields, if any. Only bitfields are allowed,
5444 since they are not addressable. */
5445 for (field = DECL_CHAIN (field);
5446 field;
5447 field = DECL_CHAIN (field))
5449 if (TREE_CODE (field) != FIELD_DECL)
5450 continue;
5452 if (!DECL_BIT_FIELD_TYPE (field))
5453 return true;
5456 return false;
5459 if (TREE_CODE (type) == UNION_TYPE)
5461 tree field;
5463 /* Unions can be returned in registers if every element is
5464 integral, or can be returned in an integer register. */
5465 for (field = TYPE_FIELDS (type);
5466 field;
5467 field = DECL_CHAIN (field))
5469 if (TREE_CODE (field) != FIELD_DECL)
5470 continue;
5472 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5473 return true;
5475 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5476 return true;
5479 return false;
5481 #endif /* not ARM_WINCE */
5483 /* Return all other types in memory. */
5484 return true;
5487 const struct pcs_attribute_arg
5489 const char *arg;
5490 enum arm_pcs value;
5491 } pcs_attribute_args[] =
5493 {"aapcs", ARM_PCS_AAPCS},
5494 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5495 #if 0
5496 /* We could recognize these, but changes would be needed elsewhere
5497 * to implement them. */
5498 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5499 {"atpcs", ARM_PCS_ATPCS},
5500 {"apcs", ARM_PCS_APCS},
5501 #endif
5502 {NULL, ARM_PCS_UNKNOWN}
5505 static enum arm_pcs
5506 arm_pcs_from_attribute (tree attr)
5508 const struct pcs_attribute_arg *ptr;
5509 const char *arg;
5511 /* Get the value of the argument. */
5512 if (TREE_VALUE (attr) == NULL_TREE
5513 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5514 return ARM_PCS_UNKNOWN;
5516 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5518 /* Check it against the list of known arguments. */
5519 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5520 if (streq (arg, ptr->arg))
5521 return ptr->value;
5523 /* An unrecognized interrupt type. */
5524 return ARM_PCS_UNKNOWN;
5527 /* Get the PCS variant to use for this call. TYPE is the function's type
5528 specification, DECL is the specific declartion. DECL may be null if
5529 the call could be indirect or if this is a library call. */
5530 static enum arm_pcs
5531 arm_get_pcs_model (const_tree type, const_tree decl)
5533 bool user_convention = false;
5534 enum arm_pcs user_pcs = arm_pcs_default;
5535 tree attr;
5537 gcc_assert (type);
5539 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5540 if (attr)
5542 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5543 user_convention = true;
5546 if (TARGET_AAPCS_BASED)
5548 /* Detect varargs functions. These always use the base rules
5549 (no argument is ever a candidate for a co-processor
5550 register). */
5551 bool base_rules = stdarg_p (type);
5553 if (user_convention)
5555 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5556 sorry ("non-AAPCS derived PCS variant");
5557 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5558 error ("variadic functions must use the base AAPCS variant");
5561 if (base_rules)
5562 return ARM_PCS_AAPCS;
5563 else if (user_convention)
5564 return user_pcs;
5565 else if (decl && flag_unit_at_a_time)
5567 /* Local functions never leak outside this compilation unit,
5568 so we are free to use whatever conventions are
5569 appropriate. */
5570 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5571 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5572 if (i && i->local)
5573 return ARM_PCS_AAPCS_LOCAL;
5576 else if (user_convention && user_pcs != arm_pcs_default)
5577 sorry ("PCS variant");
5579 /* For everything else we use the target's default. */
5580 return arm_pcs_default;
5584 static void
5585 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5586 const_tree fntype ATTRIBUTE_UNUSED,
5587 rtx libcall ATTRIBUTE_UNUSED,
5588 const_tree fndecl ATTRIBUTE_UNUSED)
5590 /* Record the unallocated VFP registers. */
5591 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5592 pcum->aapcs_vfp_reg_alloc = 0;
5595 /* Walk down the type tree of TYPE counting consecutive base elements.
5596 If *MODEP is VOIDmode, then set it to the first valid floating point
5597 type. If a non-floating point type is found, or if a floating point
5598 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5599 otherwise return the count in the sub-tree. */
5600 static int
5601 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5603 machine_mode mode;
5604 HOST_WIDE_INT size;
5606 switch (TREE_CODE (type))
5608 case REAL_TYPE:
5609 mode = TYPE_MODE (type);
5610 if (mode != DFmode && mode != SFmode && mode != HFmode)
5611 return -1;
5613 if (*modep == VOIDmode)
5614 *modep = mode;
5616 if (*modep == mode)
5617 return 1;
5619 break;
5621 case COMPLEX_TYPE:
5622 mode = TYPE_MODE (TREE_TYPE (type));
5623 if (mode != DFmode && mode != SFmode)
5624 return -1;
5626 if (*modep == VOIDmode)
5627 *modep = mode;
5629 if (*modep == mode)
5630 return 2;
5632 break;
5634 case VECTOR_TYPE:
5635 /* Use V2SImode and V4SImode as representatives of all 64-bit
5636 and 128-bit vector types, whether or not those modes are
5637 supported with the present options. */
5638 size = int_size_in_bytes (type);
5639 switch (size)
5641 case 8:
5642 mode = V2SImode;
5643 break;
5644 case 16:
5645 mode = V4SImode;
5646 break;
5647 default:
5648 return -1;
5651 if (*modep == VOIDmode)
5652 *modep = mode;
5654 /* Vector modes are considered to be opaque: two vectors are
5655 equivalent for the purposes of being homogeneous aggregates
5656 if they are the same size. */
5657 if (*modep == mode)
5658 return 1;
5660 break;
5662 case ARRAY_TYPE:
5664 int count;
5665 tree index = TYPE_DOMAIN (type);
5667 /* Can't handle incomplete types nor sizes that are not
5668 fixed. */
5669 if (!COMPLETE_TYPE_P (type)
5670 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5671 return -1;
5673 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5674 if (count == -1
5675 || !index
5676 || !TYPE_MAX_VALUE (index)
5677 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5678 || !TYPE_MIN_VALUE (index)
5679 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5680 || count < 0)
5681 return -1;
5683 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5684 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5686 /* There must be no padding. */
5687 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5688 return -1;
5690 return count;
5693 case RECORD_TYPE:
5695 int count = 0;
5696 int sub_count;
5697 tree field;
5699 /* Can't handle incomplete types nor sizes that are not
5700 fixed. */
5701 if (!COMPLETE_TYPE_P (type)
5702 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5703 return -1;
5705 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5707 if (TREE_CODE (field) != FIELD_DECL)
5708 continue;
5710 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5711 if (sub_count < 0)
5712 return -1;
5713 count += sub_count;
5716 /* There must be no padding. */
5717 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5718 return -1;
5720 return count;
5723 case UNION_TYPE:
5724 case QUAL_UNION_TYPE:
5726 /* These aren't very interesting except in a degenerate case. */
5727 int count = 0;
5728 int sub_count;
5729 tree field;
5731 /* Can't handle incomplete types nor sizes that are not
5732 fixed. */
5733 if (!COMPLETE_TYPE_P (type)
5734 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5735 return -1;
5737 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5739 if (TREE_CODE (field) != FIELD_DECL)
5740 continue;
5742 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5743 if (sub_count < 0)
5744 return -1;
5745 count = count > sub_count ? count : sub_count;
5748 /* There must be no padding. */
5749 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5750 return -1;
5752 return count;
5755 default:
5756 break;
5759 return -1;
5762 /* Return true if PCS_VARIANT should use VFP registers. */
5763 static bool
5764 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5766 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5768 static bool seen_thumb1_vfp = false;
5770 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5772 sorry ("Thumb-1 hard-float VFP ABI");
5773 /* sorry() is not immediately fatal, so only display this once. */
5774 seen_thumb1_vfp = true;
5777 return true;
5780 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5781 return false;
5783 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5784 (TARGET_VFP_DOUBLE || !is_double));
5787 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5788 suitable for passing or returning in VFP registers for the PCS
5789 variant selected. If it is, then *BASE_MODE is updated to contain
5790 a machine mode describing each element of the argument's type and
5791 *COUNT to hold the number of such elements. */
5792 static bool
5793 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5794 machine_mode mode, const_tree type,
5795 machine_mode *base_mode, int *count)
5797 machine_mode new_mode = VOIDmode;
5799 /* If we have the type information, prefer that to working things
5800 out from the mode. */
5801 if (type)
5803 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5805 if (ag_count > 0 && ag_count <= 4)
5806 *count = ag_count;
5807 else
5808 return false;
5810 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5811 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5812 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5814 *count = 1;
5815 new_mode = mode;
5817 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5819 *count = 2;
5820 new_mode = (mode == DCmode ? DFmode : SFmode);
5822 else
5823 return false;
5826 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5827 return false;
5829 *base_mode = new_mode;
5830 return true;
5833 static bool
5834 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5835 machine_mode mode, const_tree type)
5837 int count ATTRIBUTE_UNUSED;
5838 machine_mode ag_mode ATTRIBUTE_UNUSED;
5840 if (!use_vfp_abi (pcs_variant, false))
5841 return false;
5842 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5843 &ag_mode, &count);
5846 static bool
5847 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5848 const_tree type)
5850 if (!use_vfp_abi (pcum->pcs_variant, false))
5851 return false;
5853 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5854 &pcum->aapcs_vfp_rmode,
5855 &pcum->aapcs_vfp_rcount);
5858 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
5859 for the behaviour of this function. */
5861 static bool
5862 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5863 const_tree type ATTRIBUTE_UNUSED)
5865 int rmode_size
5866 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
5867 int shift = rmode_size / GET_MODE_SIZE (SFmode);
5868 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5869 int regno;
5871 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5872 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5874 pcum->aapcs_vfp_reg_alloc = mask << regno;
5875 if (mode == BLKmode
5876 || (mode == TImode && ! TARGET_NEON)
5877 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5879 int i;
5880 int rcount = pcum->aapcs_vfp_rcount;
5881 int rshift = shift;
5882 machine_mode rmode = pcum->aapcs_vfp_rmode;
5883 rtx par;
5884 if (!TARGET_NEON)
5886 /* Avoid using unsupported vector modes. */
5887 if (rmode == V2SImode)
5888 rmode = DImode;
5889 else if (rmode == V4SImode)
5891 rmode = DImode;
5892 rcount *= 2;
5893 rshift /= 2;
5896 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5897 for (i = 0; i < rcount; i++)
5899 rtx tmp = gen_rtx_REG (rmode,
5900 FIRST_VFP_REGNUM + regno + i * rshift);
5901 tmp = gen_rtx_EXPR_LIST
5902 (VOIDmode, tmp,
5903 GEN_INT (i * GET_MODE_SIZE (rmode)));
5904 XVECEXP (par, 0, i) = tmp;
5907 pcum->aapcs_reg = par;
5909 else
5910 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5911 return true;
5913 return false;
5916 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
5917 comment there for the behaviour of this function. */
5919 static rtx
5920 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5921 machine_mode mode,
5922 const_tree type ATTRIBUTE_UNUSED)
5924 if (!use_vfp_abi (pcs_variant, false))
5925 return NULL;
5927 if (mode == BLKmode
5928 || (GET_MODE_CLASS (mode) == MODE_INT
5929 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
5930 && !TARGET_NEON))
5932 int count;
5933 machine_mode ag_mode;
5934 int i;
5935 rtx par;
5936 int shift;
5938 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5939 &ag_mode, &count);
5941 if (!TARGET_NEON)
5943 if (ag_mode == V2SImode)
5944 ag_mode = DImode;
5945 else if (ag_mode == V4SImode)
5947 ag_mode = DImode;
5948 count *= 2;
5951 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5952 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5953 for (i = 0; i < count; i++)
5955 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5956 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5957 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5958 XVECEXP (par, 0, i) = tmp;
5961 return par;
5964 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5967 static void
5968 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5969 machine_mode mode ATTRIBUTE_UNUSED,
5970 const_tree type ATTRIBUTE_UNUSED)
5972 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5973 pcum->aapcs_vfp_reg_alloc = 0;
5974 return;
5977 #define AAPCS_CP(X) \
5979 aapcs_ ## X ## _cum_init, \
5980 aapcs_ ## X ## _is_call_candidate, \
5981 aapcs_ ## X ## _allocate, \
5982 aapcs_ ## X ## _is_return_candidate, \
5983 aapcs_ ## X ## _allocate_return_reg, \
5984 aapcs_ ## X ## _advance \
5987 /* Table of co-processors that can be used to pass arguments in
5988 registers. Idealy no arugment should be a candidate for more than
5989 one co-processor table entry, but the table is processed in order
5990 and stops after the first match. If that entry then fails to put
5991 the argument into a co-processor register, the argument will go on
5992 the stack. */
5993 static struct
5995 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5996 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5998 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5999 BLKmode) is a candidate for this co-processor's registers; this
6000 function should ignore any position-dependent state in
6001 CUMULATIVE_ARGS and only use call-type dependent information. */
6002 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6004 /* Return true if the argument does get a co-processor register; it
6005 should set aapcs_reg to an RTX of the register allocated as is
6006 required for a return from FUNCTION_ARG. */
6007 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6009 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6010 be returned in this co-processor's registers. */
6011 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6013 /* Allocate and return an RTX element to hold the return type of a call. This
6014 routine must not fail and will only be called if is_return_candidate
6015 returned true with the same parameters. */
6016 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6018 /* Finish processing this argument and prepare to start processing
6019 the next one. */
6020 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6021 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6023 AAPCS_CP(vfp)
6026 #undef AAPCS_CP
6028 static int
6029 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6030 const_tree type)
6032 int i;
6034 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6035 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6036 return i;
6038 return -1;
6041 static int
6042 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6044 /* We aren't passed a decl, so we can't check that a call is local.
6045 However, it isn't clear that that would be a win anyway, since it
6046 might limit some tail-calling opportunities. */
6047 enum arm_pcs pcs_variant;
6049 if (fntype)
6051 const_tree fndecl = NULL_TREE;
6053 if (TREE_CODE (fntype) == FUNCTION_DECL)
6055 fndecl = fntype;
6056 fntype = TREE_TYPE (fntype);
6059 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6061 else
6062 pcs_variant = arm_pcs_default;
6064 if (pcs_variant != ARM_PCS_AAPCS)
6066 int i;
6068 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6069 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6070 TYPE_MODE (type),
6071 type))
6072 return i;
6074 return -1;
6077 static rtx
6078 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6079 const_tree fntype)
6081 /* We aren't passed a decl, so we can't check that a call is local.
6082 However, it isn't clear that that would be a win anyway, since it
6083 might limit some tail-calling opportunities. */
6084 enum arm_pcs pcs_variant;
6085 int unsignedp ATTRIBUTE_UNUSED;
6087 if (fntype)
6089 const_tree fndecl = NULL_TREE;
6091 if (TREE_CODE (fntype) == FUNCTION_DECL)
6093 fndecl = fntype;
6094 fntype = TREE_TYPE (fntype);
6097 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6099 else
6100 pcs_variant = arm_pcs_default;
6102 /* Promote integer types. */
6103 if (type && INTEGRAL_TYPE_P (type))
6104 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6106 if (pcs_variant != ARM_PCS_AAPCS)
6108 int i;
6110 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6111 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6112 type))
6113 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6114 mode, type);
6117 /* Promotes small structs returned in a register to full-word size
6118 for big-endian AAPCS. */
6119 if (type && arm_return_in_msb (type))
6121 HOST_WIDE_INT size = int_size_in_bytes (type);
6122 if (size % UNITS_PER_WORD != 0)
6124 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6125 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6129 return gen_rtx_REG (mode, R0_REGNUM);
6132 static rtx
6133 aapcs_libcall_value (machine_mode mode)
6135 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6136 && GET_MODE_SIZE (mode) <= 4)
6137 mode = SImode;
6139 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6142 /* Lay out a function argument using the AAPCS rules. The rule
6143 numbers referred to here are those in the AAPCS. */
6144 static void
6145 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6146 const_tree type, bool named)
6148 int nregs, nregs2;
6149 int ncrn;
6151 /* We only need to do this once per argument. */
6152 if (pcum->aapcs_arg_processed)
6153 return;
6155 pcum->aapcs_arg_processed = true;
6157 /* Special case: if named is false then we are handling an incoming
6158 anonymous argument which is on the stack. */
6159 if (!named)
6160 return;
6162 /* Is this a potential co-processor register candidate? */
6163 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6165 int slot = aapcs_select_call_coproc (pcum, mode, type);
6166 pcum->aapcs_cprc_slot = slot;
6168 /* We don't have to apply any of the rules from part B of the
6169 preparation phase, these are handled elsewhere in the
6170 compiler. */
6172 if (slot >= 0)
6174 /* A Co-processor register candidate goes either in its own
6175 class of registers or on the stack. */
6176 if (!pcum->aapcs_cprc_failed[slot])
6178 /* C1.cp - Try to allocate the argument to co-processor
6179 registers. */
6180 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6181 return;
6183 /* C2.cp - Put the argument on the stack and note that we
6184 can't assign any more candidates in this slot. We also
6185 need to note that we have allocated stack space, so that
6186 we won't later try to split a non-cprc candidate between
6187 core registers and the stack. */
6188 pcum->aapcs_cprc_failed[slot] = true;
6189 pcum->can_split = false;
6192 /* We didn't get a register, so this argument goes on the
6193 stack. */
6194 gcc_assert (pcum->can_split == false);
6195 return;
6199 /* C3 - For double-word aligned arguments, round the NCRN up to the
6200 next even number. */
6201 ncrn = pcum->aapcs_ncrn;
6202 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
6203 ncrn++;
6205 nregs = ARM_NUM_REGS2(mode, type);
6207 /* Sigh, this test should really assert that nregs > 0, but a GCC
6208 extension allows empty structs and then gives them empty size; it
6209 then allows such a structure to be passed by value. For some of
6210 the code below we have to pretend that such an argument has
6211 non-zero size so that we 'locate' it correctly either in
6212 registers or on the stack. */
6213 gcc_assert (nregs >= 0);
6215 nregs2 = nregs ? nregs : 1;
6217 /* C4 - Argument fits entirely in core registers. */
6218 if (ncrn + nregs2 <= NUM_ARG_REGS)
6220 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6221 pcum->aapcs_next_ncrn = ncrn + nregs;
6222 return;
6225 /* C5 - Some core registers left and there are no arguments already
6226 on the stack: split this argument between the remaining core
6227 registers and the stack. */
6228 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6230 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6231 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6232 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6233 return;
6236 /* C6 - NCRN is set to 4. */
6237 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6239 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6240 return;
6243 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6244 for a call to a function whose data type is FNTYPE.
6245 For a library call, FNTYPE is NULL. */
6246 void
6247 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6248 rtx libname,
6249 tree fndecl ATTRIBUTE_UNUSED)
6251 /* Long call handling. */
6252 if (fntype)
6253 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6254 else
6255 pcum->pcs_variant = arm_pcs_default;
6257 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6259 if (arm_libcall_uses_aapcs_base (libname))
6260 pcum->pcs_variant = ARM_PCS_AAPCS;
6262 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6263 pcum->aapcs_reg = NULL_RTX;
6264 pcum->aapcs_partial = 0;
6265 pcum->aapcs_arg_processed = false;
6266 pcum->aapcs_cprc_slot = -1;
6267 pcum->can_split = true;
6269 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6271 int i;
6273 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6275 pcum->aapcs_cprc_failed[i] = false;
6276 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6279 return;
6282 /* Legacy ABIs */
6284 /* On the ARM, the offset starts at 0. */
6285 pcum->nregs = 0;
6286 pcum->iwmmxt_nregs = 0;
6287 pcum->can_split = true;
6289 /* Varargs vectors are treated the same as long long.
6290 named_count avoids having to change the way arm handles 'named' */
6291 pcum->named_count = 0;
6292 pcum->nargs = 0;
6294 if (TARGET_REALLY_IWMMXT && fntype)
6296 tree fn_arg;
6298 for (fn_arg = TYPE_ARG_TYPES (fntype);
6299 fn_arg;
6300 fn_arg = TREE_CHAIN (fn_arg))
6301 pcum->named_count += 1;
6303 if (! pcum->named_count)
6304 pcum->named_count = INT_MAX;
6308 /* Return true if mode/type need doubleword alignment. */
6309 static bool
6310 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6312 if (!type)
6313 return PARM_BOUNDARY < GET_MODE_ALIGNMENT (mode);
6315 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6316 if (!AGGREGATE_TYPE_P (type))
6317 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6319 /* Array types: Use member alignment of element type. */
6320 if (TREE_CODE (type) == ARRAY_TYPE)
6321 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6323 /* Record/aggregate types: Use greatest member alignment of any member. */
6324 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6325 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6326 return true;
6328 return false;
6332 /* Determine where to put an argument to a function.
6333 Value is zero to push the argument on the stack,
6334 or a hard register in which to store the argument.
6336 MODE is the argument's machine mode.
6337 TYPE is the data type of the argument (as a tree).
6338 This is null for libcalls where that information may
6339 not be available.
6340 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6341 the preceding args and about the function being called.
6342 NAMED is nonzero if this argument is a named parameter
6343 (otherwise it is an extra parameter matching an ellipsis).
6345 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6346 other arguments are passed on the stack. If (NAMED == 0) (which happens
6347 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6348 defined), say it is passed in the stack (function_prologue will
6349 indeed make it pass in the stack if necessary). */
6351 static rtx
6352 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6353 const_tree type, bool named)
6355 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6356 int nregs;
6358 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6359 a call insn (op3 of a call_value insn). */
6360 if (mode == VOIDmode)
6361 return const0_rtx;
6363 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6365 aapcs_layout_arg (pcum, mode, type, named);
6366 return pcum->aapcs_reg;
6369 /* Varargs vectors are treated the same as long long.
6370 named_count avoids having to change the way arm handles 'named' */
6371 if (TARGET_IWMMXT_ABI
6372 && arm_vector_mode_supported_p (mode)
6373 && pcum->named_count > pcum->nargs + 1)
6375 if (pcum->iwmmxt_nregs <= 9)
6376 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6377 else
6379 pcum->can_split = false;
6380 return NULL_RTX;
6384 /* Put doubleword aligned quantities in even register pairs. */
6385 if (pcum->nregs & 1
6386 && ARM_DOUBLEWORD_ALIGN
6387 && arm_needs_doubleword_align (mode, type))
6388 pcum->nregs++;
6390 /* Only allow splitting an arg between regs and memory if all preceding
6391 args were allocated to regs. For args passed by reference we only count
6392 the reference pointer. */
6393 if (pcum->can_split)
6394 nregs = 1;
6395 else
6396 nregs = ARM_NUM_REGS2 (mode, type);
6398 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6399 return NULL_RTX;
6401 return gen_rtx_REG (mode, pcum->nregs);
6404 static unsigned int
6405 arm_function_arg_boundary (machine_mode mode, const_tree type)
6407 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6408 ? DOUBLEWORD_ALIGNMENT
6409 : PARM_BOUNDARY);
6412 static int
6413 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6414 tree type, bool named)
6416 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6417 int nregs = pcum->nregs;
6419 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6421 aapcs_layout_arg (pcum, mode, type, named);
6422 return pcum->aapcs_partial;
6425 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6426 return 0;
6428 if (NUM_ARG_REGS > nregs
6429 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6430 && pcum->can_split)
6431 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6433 return 0;
6436 /* Update the data in PCUM to advance over an argument
6437 of mode MODE and data type TYPE.
6438 (TYPE is null for libcalls where that information may not be available.) */
6440 static void
6441 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6442 const_tree type, bool named)
6444 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6446 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6448 aapcs_layout_arg (pcum, mode, type, named);
6450 if (pcum->aapcs_cprc_slot >= 0)
6452 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6453 type);
6454 pcum->aapcs_cprc_slot = -1;
6457 /* Generic stuff. */
6458 pcum->aapcs_arg_processed = false;
6459 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6460 pcum->aapcs_reg = NULL_RTX;
6461 pcum->aapcs_partial = 0;
6463 else
6465 pcum->nargs += 1;
6466 if (arm_vector_mode_supported_p (mode)
6467 && pcum->named_count > pcum->nargs
6468 && TARGET_IWMMXT_ABI)
6469 pcum->iwmmxt_nregs += 1;
6470 else
6471 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6475 /* Variable sized types are passed by reference. This is a GCC
6476 extension to the ARM ABI. */
6478 static bool
6479 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6480 machine_mode mode ATTRIBUTE_UNUSED,
6481 const_tree type, bool named ATTRIBUTE_UNUSED)
6483 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6486 /* Encode the current state of the #pragma [no_]long_calls. */
6487 typedef enum
6489 OFF, /* No #pragma [no_]long_calls is in effect. */
6490 LONG, /* #pragma long_calls is in effect. */
6491 SHORT /* #pragma no_long_calls is in effect. */
6492 } arm_pragma_enum;
6494 static arm_pragma_enum arm_pragma_long_calls = OFF;
6496 void
6497 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6499 arm_pragma_long_calls = LONG;
6502 void
6503 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6505 arm_pragma_long_calls = SHORT;
6508 void
6509 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6511 arm_pragma_long_calls = OFF;
6514 /* Handle an attribute requiring a FUNCTION_DECL;
6515 arguments as in struct attribute_spec.handler. */
6516 static tree
6517 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6518 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6520 if (TREE_CODE (*node) != FUNCTION_DECL)
6522 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6523 name);
6524 *no_add_attrs = true;
6527 return NULL_TREE;
6530 /* Handle an "interrupt" or "isr" attribute;
6531 arguments as in struct attribute_spec.handler. */
6532 static tree
6533 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6534 bool *no_add_attrs)
6536 if (DECL_P (*node))
6538 if (TREE_CODE (*node) != FUNCTION_DECL)
6540 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6541 name);
6542 *no_add_attrs = true;
6544 /* FIXME: the argument if any is checked for type attributes;
6545 should it be checked for decl ones? */
6547 else
6549 if (TREE_CODE (*node) == FUNCTION_TYPE
6550 || TREE_CODE (*node) == METHOD_TYPE)
6552 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6554 warning (OPT_Wattributes, "%qE attribute ignored",
6555 name);
6556 *no_add_attrs = true;
6559 else if (TREE_CODE (*node) == POINTER_TYPE
6560 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6561 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6562 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6564 *node = build_variant_type_copy (*node);
6565 TREE_TYPE (*node) = build_type_attribute_variant
6566 (TREE_TYPE (*node),
6567 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6568 *no_add_attrs = true;
6570 else
6572 /* Possibly pass this attribute on from the type to a decl. */
6573 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6574 | (int) ATTR_FLAG_FUNCTION_NEXT
6575 | (int) ATTR_FLAG_ARRAY_NEXT))
6577 *no_add_attrs = true;
6578 return tree_cons (name, args, NULL_TREE);
6580 else
6582 warning (OPT_Wattributes, "%qE attribute ignored",
6583 name);
6588 return NULL_TREE;
6591 /* Handle a "pcs" attribute; arguments as in struct
6592 attribute_spec.handler. */
6593 static tree
6594 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6595 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6597 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6599 warning (OPT_Wattributes, "%qE attribute ignored", name);
6600 *no_add_attrs = true;
6602 return NULL_TREE;
6605 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6606 /* Handle the "notshared" attribute. This attribute is another way of
6607 requesting hidden visibility. ARM's compiler supports
6608 "__declspec(notshared)"; we support the same thing via an
6609 attribute. */
6611 static tree
6612 arm_handle_notshared_attribute (tree *node,
6613 tree name ATTRIBUTE_UNUSED,
6614 tree args ATTRIBUTE_UNUSED,
6615 int flags ATTRIBUTE_UNUSED,
6616 bool *no_add_attrs)
6618 tree decl = TYPE_NAME (*node);
6620 if (decl)
6622 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6623 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6624 *no_add_attrs = false;
6626 return NULL_TREE;
6628 #endif
6630 /* Return 0 if the attributes for two types are incompatible, 1 if they
6631 are compatible, and 2 if they are nearly compatible (which causes a
6632 warning to be generated). */
6633 static int
6634 arm_comp_type_attributes (const_tree type1, const_tree type2)
6636 int l1, l2, s1, s2;
6638 /* Check for mismatch of non-default calling convention. */
6639 if (TREE_CODE (type1) != FUNCTION_TYPE)
6640 return 1;
6642 /* Check for mismatched call attributes. */
6643 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6644 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6645 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6646 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6648 /* Only bother to check if an attribute is defined. */
6649 if (l1 | l2 | s1 | s2)
6651 /* If one type has an attribute, the other must have the same attribute. */
6652 if ((l1 != l2) || (s1 != s2))
6653 return 0;
6655 /* Disallow mixed attributes. */
6656 if ((l1 & s2) || (l2 & s1))
6657 return 0;
6660 /* Check for mismatched ISR attribute. */
6661 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6662 if (! l1)
6663 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6664 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6665 if (! l2)
6666 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6667 if (l1 != l2)
6668 return 0;
6670 return 1;
6673 /* Assigns default attributes to newly defined type. This is used to
6674 set short_call/long_call attributes for function types of
6675 functions defined inside corresponding #pragma scopes. */
6676 static void
6677 arm_set_default_type_attributes (tree type)
6679 /* Add __attribute__ ((long_call)) to all functions, when
6680 inside #pragma long_calls or __attribute__ ((short_call)),
6681 when inside #pragma no_long_calls. */
6682 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6684 tree type_attr_list, attr_name;
6685 type_attr_list = TYPE_ATTRIBUTES (type);
6687 if (arm_pragma_long_calls == LONG)
6688 attr_name = get_identifier ("long_call");
6689 else if (arm_pragma_long_calls == SHORT)
6690 attr_name = get_identifier ("short_call");
6691 else
6692 return;
6694 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6695 TYPE_ATTRIBUTES (type) = type_attr_list;
6699 /* Return true if DECL is known to be linked into section SECTION. */
6701 static bool
6702 arm_function_in_section_p (tree decl, section *section)
6704 /* We can only be certain about the prevailing symbol definition. */
6705 if (!decl_binds_to_current_def_p (decl))
6706 return false;
6708 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6709 if (!DECL_SECTION_NAME (decl))
6711 /* Make sure that we will not create a unique section for DECL. */
6712 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6713 return false;
6716 return function_section (decl) == section;
6719 /* Return nonzero if a 32-bit "long_call" should be generated for
6720 a call from the current function to DECL. We generate a long_call
6721 if the function:
6723 a. has an __attribute__((long call))
6724 or b. is within the scope of a #pragma long_calls
6725 or c. the -mlong-calls command line switch has been specified
6727 However we do not generate a long call if the function:
6729 d. has an __attribute__ ((short_call))
6730 or e. is inside the scope of a #pragma no_long_calls
6731 or f. is defined in the same section as the current function. */
6733 bool
6734 arm_is_long_call_p (tree decl)
6736 tree attrs;
6738 if (!decl)
6739 return TARGET_LONG_CALLS;
6741 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6742 if (lookup_attribute ("short_call", attrs))
6743 return false;
6745 /* For "f", be conservative, and only cater for cases in which the
6746 whole of the current function is placed in the same section. */
6747 if (!flag_reorder_blocks_and_partition
6748 && TREE_CODE (decl) == FUNCTION_DECL
6749 && arm_function_in_section_p (decl, current_function_section ()))
6750 return false;
6752 if (lookup_attribute ("long_call", attrs))
6753 return true;
6755 return TARGET_LONG_CALLS;
6758 /* Return nonzero if it is ok to make a tail-call to DECL. */
6759 static bool
6760 arm_function_ok_for_sibcall (tree decl, tree exp)
6762 unsigned long func_type;
6764 if (cfun->machine->sibcall_blocked)
6765 return false;
6767 /* Never tailcall something if we are generating code for Thumb-1. */
6768 if (TARGET_THUMB1)
6769 return false;
6771 /* The PIC register is live on entry to VxWorks PLT entries, so we
6772 must make the call before restoring the PIC register. */
6773 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
6774 return false;
6776 /* If we are interworking and the function is not declared static
6777 then we can't tail-call it unless we know that it exists in this
6778 compilation unit (since it might be a Thumb routine). */
6779 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6780 && !TREE_ASM_WRITTEN (decl))
6781 return false;
6783 func_type = arm_current_func_type ();
6784 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6785 if (IS_INTERRUPT (func_type))
6786 return false;
6788 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6790 /* Check that the return value locations are the same. For
6791 example that we aren't returning a value from the sibling in
6792 a VFP register but then need to transfer it to a core
6793 register. */
6794 rtx a, b;
6795 tree decl_or_type = decl;
6797 /* If it is an indirect function pointer, get the function type. */
6798 if (!decl)
6799 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
6801 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
6802 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6803 cfun->decl, false);
6804 if (!rtx_equal_p (a, b))
6805 return false;
6808 /* Never tailcall if function may be called with a misaligned SP. */
6809 if (IS_STACKALIGN (func_type))
6810 return false;
6812 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6813 references should become a NOP. Don't convert such calls into
6814 sibling calls. */
6815 if (TARGET_AAPCS_BASED
6816 && arm_abi == ARM_ABI_AAPCS
6817 && decl
6818 && DECL_WEAK (decl))
6819 return false;
6821 /* Everything else is ok. */
6822 return true;
6826 /* Addressing mode support functions. */
6828 /* Return nonzero if X is a legitimate immediate operand when compiling
6829 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6831 legitimate_pic_operand_p (rtx x)
6833 if (GET_CODE (x) == SYMBOL_REF
6834 || (GET_CODE (x) == CONST
6835 && GET_CODE (XEXP (x, 0)) == PLUS
6836 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6837 return 0;
6839 return 1;
6842 /* Record that the current function needs a PIC register. Initialize
6843 cfun->machine->pic_reg if we have not already done so. */
6845 static void
6846 require_pic_register (void)
6848 /* A lot of the logic here is made obscure by the fact that this
6849 routine gets called as part of the rtx cost estimation process.
6850 We don't want those calls to affect any assumptions about the real
6851 function; and further, we can't call entry_of_function() until we
6852 start the real expansion process. */
6853 if (!crtl->uses_pic_offset_table)
6855 gcc_assert (can_create_pseudo_p ());
6856 if (arm_pic_register != INVALID_REGNUM
6857 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6859 if (!cfun->machine->pic_reg)
6860 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6862 /* Play games to avoid marking the function as needing pic
6863 if we are being called as part of the cost-estimation
6864 process. */
6865 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6866 crtl->uses_pic_offset_table = 1;
6868 else
6870 rtx_insn *seq, *insn;
6872 if (!cfun->machine->pic_reg)
6873 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6875 /* Play games to avoid marking the function as needing pic
6876 if we are being called as part of the cost-estimation
6877 process. */
6878 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6880 crtl->uses_pic_offset_table = 1;
6881 start_sequence ();
6883 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6884 && arm_pic_register > LAST_LO_REGNUM)
6885 emit_move_insn (cfun->machine->pic_reg,
6886 gen_rtx_REG (Pmode, arm_pic_register));
6887 else
6888 arm_load_pic_register (0UL);
6890 seq = get_insns ();
6891 end_sequence ();
6893 for (insn = seq; insn; insn = NEXT_INSN (insn))
6894 if (INSN_P (insn))
6895 INSN_LOCATION (insn) = prologue_location;
6897 /* We can be called during expansion of PHI nodes, where
6898 we can't yet emit instructions directly in the final
6899 insn stream. Queue the insns on the entry edge, they will
6900 be committed after everything else is expanded. */
6901 insert_insn_on_edge (seq,
6902 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6909 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6911 if (GET_CODE (orig) == SYMBOL_REF
6912 || GET_CODE (orig) == LABEL_REF)
6914 rtx insn;
6916 if (reg == 0)
6918 gcc_assert (can_create_pseudo_p ());
6919 reg = gen_reg_rtx (Pmode);
6922 /* VxWorks does not impose a fixed gap between segments; the run-time
6923 gap can be different from the object-file gap. We therefore can't
6924 use GOTOFF unless we are absolutely sure that the symbol is in the
6925 same segment as the GOT. Unfortunately, the flexibility of linker
6926 scripts means that we can't be sure of that in general, so assume
6927 that GOTOFF is never valid on VxWorks. */
6928 if ((GET_CODE (orig) == LABEL_REF
6929 || (GET_CODE (orig) == SYMBOL_REF &&
6930 SYMBOL_REF_LOCAL_P (orig)))
6931 && NEED_GOT_RELOC
6932 && arm_pic_data_is_text_relative)
6933 insn = arm_pic_static_addr (orig, reg);
6934 else
6936 rtx pat;
6937 rtx mem;
6939 /* If this function doesn't have a pic register, create one now. */
6940 require_pic_register ();
6942 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6944 /* Make the MEM as close to a constant as possible. */
6945 mem = SET_SRC (pat);
6946 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6947 MEM_READONLY_P (mem) = 1;
6948 MEM_NOTRAP_P (mem) = 1;
6950 insn = emit_insn (pat);
6953 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6954 by loop. */
6955 set_unique_reg_note (insn, REG_EQUAL, orig);
6957 return reg;
6959 else if (GET_CODE (orig) == CONST)
6961 rtx base, offset;
6963 if (GET_CODE (XEXP (orig, 0)) == PLUS
6964 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6965 return orig;
6967 /* Handle the case where we have: const (UNSPEC_TLS). */
6968 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6969 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6970 return orig;
6972 /* Handle the case where we have:
6973 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6974 CONST_INT. */
6975 if (GET_CODE (XEXP (orig, 0)) == PLUS
6976 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6977 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6979 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6980 return orig;
6983 if (reg == 0)
6985 gcc_assert (can_create_pseudo_p ());
6986 reg = gen_reg_rtx (Pmode);
6989 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6991 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6992 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6993 base == reg ? 0 : reg);
6995 if (CONST_INT_P (offset))
6997 /* The base register doesn't really matter, we only want to
6998 test the index for the appropriate mode. */
6999 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7001 gcc_assert (can_create_pseudo_p ());
7002 offset = force_reg (Pmode, offset);
7005 if (CONST_INT_P (offset))
7006 return plus_constant (Pmode, base, INTVAL (offset));
7009 if (GET_MODE_SIZE (mode) > 4
7010 && (GET_MODE_CLASS (mode) == MODE_INT
7011 || TARGET_SOFT_FLOAT))
7013 emit_insn (gen_addsi3 (reg, base, offset));
7014 return reg;
7017 return gen_rtx_PLUS (Pmode, base, offset);
7020 return orig;
7024 /* Find a spare register to use during the prolog of a function. */
7026 static int
7027 thumb_find_work_register (unsigned long pushed_regs_mask)
7029 int reg;
7031 /* Check the argument registers first as these are call-used. The
7032 register allocation order means that sometimes r3 might be used
7033 but earlier argument registers might not, so check them all. */
7034 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7035 if (!df_regs_ever_live_p (reg))
7036 return reg;
7038 /* Before going on to check the call-saved registers we can try a couple
7039 more ways of deducing that r3 is available. The first is when we are
7040 pushing anonymous arguments onto the stack and we have less than 4
7041 registers worth of fixed arguments(*). In this case r3 will be part of
7042 the variable argument list and so we can be sure that it will be
7043 pushed right at the start of the function. Hence it will be available
7044 for the rest of the prologue.
7045 (*): ie crtl->args.pretend_args_size is greater than 0. */
7046 if (cfun->machine->uses_anonymous_args
7047 && crtl->args.pretend_args_size > 0)
7048 return LAST_ARG_REGNUM;
7050 /* The other case is when we have fixed arguments but less than 4 registers
7051 worth. In this case r3 might be used in the body of the function, but
7052 it is not being used to convey an argument into the function. In theory
7053 we could just check crtl->args.size to see how many bytes are
7054 being passed in argument registers, but it seems that it is unreliable.
7055 Sometimes it will have the value 0 when in fact arguments are being
7056 passed. (See testcase execute/20021111-1.c for an example). So we also
7057 check the args_info.nregs field as well. The problem with this field is
7058 that it makes no allowances for arguments that are passed to the
7059 function but which are not used. Hence we could miss an opportunity
7060 when a function has an unused argument in r3. But it is better to be
7061 safe than to be sorry. */
7062 if (! cfun->machine->uses_anonymous_args
7063 && crtl->args.size >= 0
7064 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7065 && (TARGET_AAPCS_BASED
7066 ? crtl->args.info.aapcs_ncrn < 4
7067 : crtl->args.info.nregs < 4))
7068 return LAST_ARG_REGNUM;
7070 /* Otherwise look for a call-saved register that is going to be pushed. */
7071 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7072 if (pushed_regs_mask & (1 << reg))
7073 return reg;
7075 if (TARGET_THUMB2)
7077 /* Thumb-2 can use high regs. */
7078 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7079 if (pushed_regs_mask & (1 << reg))
7080 return reg;
7082 /* Something went wrong - thumb_compute_save_reg_mask()
7083 should have arranged for a suitable register to be pushed. */
7084 gcc_unreachable ();
7087 static GTY(()) int pic_labelno;
7089 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7090 low register. */
7092 void
7093 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7095 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7097 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7098 return;
7100 gcc_assert (flag_pic);
7102 pic_reg = cfun->machine->pic_reg;
7103 if (TARGET_VXWORKS_RTP)
7105 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7106 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7107 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7109 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7111 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7112 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7114 else
7116 /* We use an UNSPEC rather than a LABEL_REF because this label
7117 never appears in the code stream. */
7119 labelno = GEN_INT (pic_labelno++);
7120 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7121 l1 = gen_rtx_CONST (VOIDmode, l1);
7123 /* On the ARM the PC register contains 'dot + 8' at the time of the
7124 addition, on the Thumb it is 'dot + 4'. */
7125 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7126 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7127 UNSPEC_GOTSYM_OFF);
7128 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7130 if (TARGET_32BIT)
7132 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7134 else /* TARGET_THUMB1 */
7136 if (arm_pic_register != INVALID_REGNUM
7137 && REGNO (pic_reg) > LAST_LO_REGNUM)
7139 /* We will have pushed the pic register, so we should always be
7140 able to find a work register. */
7141 pic_tmp = gen_rtx_REG (SImode,
7142 thumb_find_work_register (saved_regs));
7143 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7144 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7145 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7147 else if (arm_pic_register != INVALID_REGNUM
7148 && arm_pic_register > LAST_LO_REGNUM
7149 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7151 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7152 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7153 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7155 else
7156 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7160 /* Need to emit this whether or not we obey regdecls,
7161 since setjmp/longjmp can cause life info to screw up. */
7162 emit_use (pic_reg);
7165 /* Generate code to load the address of a static var when flag_pic is set. */
7166 static rtx
7167 arm_pic_static_addr (rtx orig, rtx reg)
7169 rtx l1, labelno, offset_rtx, insn;
7171 gcc_assert (flag_pic);
7173 /* We use an UNSPEC rather than a LABEL_REF because this label
7174 never appears in the code stream. */
7175 labelno = GEN_INT (pic_labelno++);
7176 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7177 l1 = gen_rtx_CONST (VOIDmode, l1);
7179 /* On the ARM the PC register contains 'dot + 8' at the time of the
7180 addition, on the Thumb it is 'dot + 4'. */
7181 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7182 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7183 UNSPEC_SYMBOL_OFFSET);
7184 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7186 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7187 return insn;
7190 /* Return nonzero if X is valid as an ARM state addressing register. */
7191 static int
7192 arm_address_register_rtx_p (rtx x, int strict_p)
7194 int regno;
7196 if (!REG_P (x))
7197 return 0;
7199 regno = REGNO (x);
7201 if (strict_p)
7202 return ARM_REGNO_OK_FOR_BASE_P (regno);
7204 return (regno <= LAST_ARM_REGNUM
7205 || regno >= FIRST_PSEUDO_REGISTER
7206 || regno == FRAME_POINTER_REGNUM
7207 || regno == ARG_POINTER_REGNUM);
7210 /* Return TRUE if this rtx is the difference of a symbol and a label,
7211 and will reduce to a PC-relative relocation in the object file.
7212 Expressions like this can be left alone when generating PIC, rather
7213 than forced through the GOT. */
7214 static int
7215 pcrel_constant_p (rtx x)
7217 if (GET_CODE (x) == MINUS)
7218 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7220 return FALSE;
7223 /* Return true if X will surely end up in an index register after next
7224 splitting pass. */
7225 static bool
7226 will_be_in_index_register (const_rtx x)
7228 /* arm.md: calculate_pic_address will split this into a register. */
7229 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7232 /* Return nonzero if X is a valid ARM state address operand. */
7234 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7235 int strict_p)
7237 bool use_ldrd;
7238 enum rtx_code code = GET_CODE (x);
7240 if (arm_address_register_rtx_p (x, strict_p))
7241 return 1;
7243 use_ldrd = (TARGET_LDRD
7244 && (mode == DImode
7245 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7247 if (code == POST_INC || code == PRE_DEC
7248 || ((code == PRE_INC || code == POST_DEC)
7249 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7250 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7252 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7253 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7254 && GET_CODE (XEXP (x, 1)) == PLUS
7255 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7257 rtx addend = XEXP (XEXP (x, 1), 1);
7259 /* Don't allow ldrd post increment by register because it's hard
7260 to fixup invalid register choices. */
7261 if (use_ldrd
7262 && GET_CODE (x) == POST_MODIFY
7263 && REG_P (addend))
7264 return 0;
7266 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7267 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7270 /* After reload constants split into minipools will have addresses
7271 from a LABEL_REF. */
7272 else if (reload_completed
7273 && (code == LABEL_REF
7274 || (code == CONST
7275 && GET_CODE (XEXP (x, 0)) == PLUS
7276 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7277 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7278 return 1;
7280 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7281 return 0;
7283 else if (code == PLUS)
7285 rtx xop0 = XEXP (x, 0);
7286 rtx xop1 = XEXP (x, 1);
7288 return ((arm_address_register_rtx_p (xop0, strict_p)
7289 && ((CONST_INT_P (xop1)
7290 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7291 || (!strict_p && will_be_in_index_register (xop1))))
7292 || (arm_address_register_rtx_p (xop1, strict_p)
7293 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7296 #if 0
7297 /* Reload currently can't handle MINUS, so disable this for now */
7298 else if (GET_CODE (x) == MINUS)
7300 rtx xop0 = XEXP (x, 0);
7301 rtx xop1 = XEXP (x, 1);
7303 return (arm_address_register_rtx_p (xop0, strict_p)
7304 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7306 #endif
7308 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7309 && code == SYMBOL_REF
7310 && CONSTANT_POOL_ADDRESS_P (x)
7311 && ! (flag_pic
7312 && symbol_mentioned_p (get_pool_constant (x))
7313 && ! pcrel_constant_p (get_pool_constant (x))))
7314 return 1;
7316 return 0;
7319 /* Return nonzero if X is a valid Thumb-2 address operand. */
7320 static int
7321 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7323 bool use_ldrd;
7324 enum rtx_code code = GET_CODE (x);
7326 if (arm_address_register_rtx_p (x, strict_p))
7327 return 1;
7329 use_ldrd = (TARGET_LDRD
7330 && (mode == DImode
7331 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7333 if (code == POST_INC || code == PRE_DEC
7334 || ((code == PRE_INC || code == POST_DEC)
7335 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7336 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7338 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7339 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7340 && GET_CODE (XEXP (x, 1)) == PLUS
7341 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7343 /* Thumb-2 only has autoincrement by constant. */
7344 rtx addend = XEXP (XEXP (x, 1), 1);
7345 HOST_WIDE_INT offset;
7347 if (!CONST_INT_P (addend))
7348 return 0;
7350 offset = INTVAL(addend);
7351 if (GET_MODE_SIZE (mode) <= 4)
7352 return (offset > -256 && offset < 256);
7354 return (use_ldrd && offset > -1024 && offset < 1024
7355 && (offset & 3) == 0);
7358 /* After reload constants split into minipools will have addresses
7359 from a LABEL_REF. */
7360 else if (reload_completed
7361 && (code == LABEL_REF
7362 || (code == CONST
7363 && GET_CODE (XEXP (x, 0)) == PLUS
7364 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7365 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7366 return 1;
7368 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7369 return 0;
7371 else if (code == PLUS)
7373 rtx xop0 = XEXP (x, 0);
7374 rtx xop1 = XEXP (x, 1);
7376 return ((arm_address_register_rtx_p (xop0, strict_p)
7377 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7378 || (!strict_p && will_be_in_index_register (xop1))))
7379 || (arm_address_register_rtx_p (xop1, strict_p)
7380 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7383 /* Normally we can assign constant values to target registers without
7384 the help of constant pool. But there are cases we have to use constant
7385 pool like:
7386 1) assign a label to register.
7387 2) sign-extend a 8bit value to 32bit and then assign to register.
7389 Constant pool access in format:
7390 (set (reg r0) (mem (symbol_ref (".LC0"))))
7391 will cause the use of literal pool (later in function arm_reorg).
7392 So here we mark such format as an invalid format, then the compiler
7393 will adjust it into:
7394 (set (reg r0) (symbol_ref (".LC0")))
7395 (set (reg r0) (mem (reg r0))).
7396 No extra register is required, and (mem (reg r0)) won't cause the use
7397 of literal pools. */
7398 else if (arm_disable_literal_pool && code == SYMBOL_REF
7399 && CONSTANT_POOL_ADDRESS_P (x))
7400 return 0;
7402 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7403 && code == SYMBOL_REF
7404 && CONSTANT_POOL_ADDRESS_P (x)
7405 && ! (flag_pic
7406 && symbol_mentioned_p (get_pool_constant (x))
7407 && ! pcrel_constant_p (get_pool_constant (x))))
7408 return 1;
7410 return 0;
7413 /* Return nonzero if INDEX is valid for an address index operand in
7414 ARM state. */
7415 static int
7416 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7417 int strict_p)
7419 HOST_WIDE_INT range;
7420 enum rtx_code code = GET_CODE (index);
7422 /* Standard coprocessor addressing modes. */
7423 if (TARGET_HARD_FLOAT
7424 && TARGET_VFP
7425 && (mode == SFmode || mode == DFmode))
7426 return (code == CONST_INT && INTVAL (index) < 1024
7427 && INTVAL (index) > -1024
7428 && (INTVAL (index) & 3) == 0);
7430 /* For quad modes, we restrict the constant offset to be slightly less
7431 than what the instruction format permits. We do this because for
7432 quad mode moves, we will actually decompose them into two separate
7433 double-mode reads or writes. INDEX must therefore be a valid
7434 (double-mode) offset and so should INDEX+8. */
7435 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7436 return (code == CONST_INT
7437 && INTVAL (index) < 1016
7438 && INTVAL (index) > -1024
7439 && (INTVAL (index) & 3) == 0);
7441 /* We have no such constraint on double mode offsets, so we permit the
7442 full range of the instruction format. */
7443 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7444 return (code == CONST_INT
7445 && INTVAL (index) < 1024
7446 && INTVAL (index) > -1024
7447 && (INTVAL (index) & 3) == 0);
7449 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7450 return (code == CONST_INT
7451 && INTVAL (index) < 1024
7452 && INTVAL (index) > -1024
7453 && (INTVAL (index) & 3) == 0);
7455 if (arm_address_register_rtx_p (index, strict_p)
7456 && (GET_MODE_SIZE (mode) <= 4))
7457 return 1;
7459 if (mode == DImode || mode == DFmode)
7461 if (code == CONST_INT)
7463 HOST_WIDE_INT val = INTVAL (index);
7465 if (TARGET_LDRD)
7466 return val > -256 && val < 256;
7467 else
7468 return val > -4096 && val < 4092;
7471 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7474 if (GET_MODE_SIZE (mode) <= 4
7475 && ! (arm_arch4
7476 && (mode == HImode
7477 || mode == HFmode
7478 || (mode == QImode && outer == SIGN_EXTEND))))
7480 if (code == MULT)
7482 rtx xiop0 = XEXP (index, 0);
7483 rtx xiop1 = XEXP (index, 1);
7485 return ((arm_address_register_rtx_p (xiop0, strict_p)
7486 && power_of_two_operand (xiop1, SImode))
7487 || (arm_address_register_rtx_p (xiop1, strict_p)
7488 && power_of_two_operand (xiop0, SImode)));
7490 else if (code == LSHIFTRT || code == ASHIFTRT
7491 || code == ASHIFT || code == ROTATERT)
7493 rtx op = XEXP (index, 1);
7495 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7496 && CONST_INT_P (op)
7497 && INTVAL (op) > 0
7498 && INTVAL (op) <= 31);
7502 /* For ARM v4 we may be doing a sign-extend operation during the
7503 load. */
7504 if (arm_arch4)
7506 if (mode == HImode
7507 || mode == HFmode
7508 || (outer == SIGN_EXTEND && mode == QImode))
7509 range = 256;
7510 else
7511 range = 4096;
7513 else
7514 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7516 return (code == CONST_INT
7517 && INTVAL (index) < range
7518 && INTVAL (index) > -range);
7521 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7522 index operand. i.e. 1, 2, 4 or 8. */
7523 static bool
7524 thumb2_index_mul_operand (rtx op)
7526 HOST_WIDE_INT val;
7528 if (!CONST_INT_P (op))
7529 return false;
7531 val = INTVAL(op);
7532 return (val == 1 || val == 2 || val == 4 || val == 8);
7535 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7536 static int
7537 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7539 enum rtx_code code = GET_CODE (index);
7541 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7542 /* Standard coprocessor addressing modes. */
7543 if (TARGET_HARD_FLOAT
7544 && TARGET_VFP
7545 && (mode == SFmode || mode == DFmode))
7546 return (code == CONST_INT && INTVAL (index) < 1024
7547 /* Thumb-2 allows only > -256 index range for it's core register
7548 load/stores. Since we allow SF/DF in core registers, we have
7549 to use the intersection between -256~4096 (core) and -1024~1024
7550 (coprocessor). */
7551 && INTVAL (index) > -256
7552 && (INTVAL (index) & 3) == 0);
7554 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7556 /* For DImode assume values will usually live in core regs
7557 and only allow LDRD addressing modes. */
7558 if (!TARGET_LDRD || mode != DImode)
7559 return (code == CONST_INT
7560 && INTVAL (index) < 1024
7561 && INTVAL (index) > -1024
7562 && (INTVAL (index) & 3) == 0);
7565 /* For quad modes, we restrict the constant offset to be slightly less
7566 than what the instruction format permits. We do this because for
7567 quad mode moves, we will actually decompose them into two separate
7568 double-mode reads or writes. INDEX must therefore be a valid
7569 (double-mode) offset and so should INDEX+8. */
7570 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7571 return (code == CONST_INT
7572 && INTVAL (index) < 1016
7573 && INTVAL (index) > -1024
7574 && (INTVAL (index) & 3) == 0);
7576 /* We have no such constraint on double mode offsets, so we permit the
7577 full range of the instruction format. */
7578 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7579 return (code == CONST_INT
7580 && INTVAL (index) < 1024
7581 && INTVAL (index) > -1024
7582 && (INTVAL (index) & 3) == 0);
7584 if (arm_address_register_rtx_p (index, strict_p)
7585 && (GET_MODE_SIZE (mode) <= 4))
7586 return 1;
7588 if (mode == DImode || mode == DFmode)
7590 if (code == CONST_INT)
7592 HOST_WIDE_INT val = INTVAL (index);
7593 /* ??? Can we assume ldrd for thumb2? */
7594 /* Thumb-2 ldrd only has reg+const addressing modes. */
7595 /* ldrd supports offsets of +-1020.
7596 However the ldr fallback does not. */
7597 return val > -256 && val < 256 && (val & 3) == 0;
7599 else
7600 return 0;
7603 if (code == MULT)
7605 rtx xiop0 = XEXP (index, 0);
7606 rtx xiop1 = XEXP (index, 1);
7608 return ((arm_address_register_rtx_p (xiop0, strict_p)
7609 && thumb2_index_mul_operand (xiop1))
7610 || (arm_address_register_rtx_p (xiop1, strict_p)
7611 && thumb2_index_mul_operand (xiop0)));
7613 else if (code == ASHIFT)
7615 rtx op = XEXP (index, 1);
7617 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7618 && CONST_INT_P (op)
7619 && INTVAL (op) > 0
7620 && INTVAL (op) <= 3);
7623 return (code == CONST_INT
7624 && INTVAL (index) < 4096
7625 && INTVAL (index) > -256);
7628 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7629 static int
7630 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7632 int regno;
7634 if (!REG_P (x))
7635 return 0;
7637 regno = REGNO (x);
7639 if (strict_p)
7640 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7642 return (regno <= LAST_LO_REGNUM
7643 || regno > LAST_VIRTUAL_REGISTER
7644 || regno == FRAME_POINTER_REGNUM
7645 || (GET_MODE_SIZE (mode) >= 4
7646 && (regno == STACK_POINTER_REGNUM
7647 || regno >= FIRST_PSEUDO_REGISTER
7648 || x == hard_frame_pointer_rtx
7649 || x == arg_pointer_rtx)));
7652 /* Return nonzero if x is a legitimate index register. This is the case
7653 for any base register that can access a QImode object. */
7654 inline static int
7655 thumb1_index_register_rtx_p (rtx x, int strict_p)
7657 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7660 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7662 The AP may be eliminated to either the SP or the FP, so we use the
7663 least common denominator, e.g. SImode, and offsets from 0 to 64.
7665 ??? Verify whether the above is the right approach.
7667 ??? Also, the FP may be eliminated to the SP, so perhaps that
7668 needs special handling also.
7670 ??? Look at how the mips16 port solves this problem. It probably uses
7671 better ways to solve some of these problems.
7673 Although it is not incorrect, we don't accept QImode and HImode
7674 addresses based on the frame pointer or arg pointer until the
7675 reload pass starts. This is so that eliminating such addresses
7676 into stack based ones won't produce impossible code. */
7678 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7680 /* ??? Not clear if this is right. Experiment. */
7681 if (GET_MODE_SIZE (mode) < 4
7682 && !(reload_in_progress || reload_completed)
7683 && (reg_mentioned_p (frame_pointer_rtx, x)
7684 || reg_mentioned_p (arg_pointer_rtx, x)
7685 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7686 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7687 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7688 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7689 return 0;
7691 /* Accept any base register. SP only in SImode or larger. */
7692 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7693 return 1;
7695 /* This is PC relative data before arm_reorg runs. */
7696 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7697 && GET_CODE (x) == SYMBOL_REF
7698 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7699 return 1;
7701 /* This is PC relative data after arm_reorg runs. */
7702 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7703 && reload_completed
7704 && (GET_CODE (x) == LABEL_REF
7705 || (GET_CODE (x) == CONST
7706 && GET_CODE (XEXP (x, 0)) == PLUS
7707 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7708 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7709 return 1;
7711 /* Post-inc indexing only supported for SImode and larger. */
7712 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7713 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7714 return 1;
7716 else if (GET_CODE (x) == PLUS)
7718 /* REG+REG address can be any two index registers. */
7719 /* We disallow FRAME+REG addressing since we know that FRAME
7720 will be replaced with STACK, and SP relative addressing only
7721 permits SP+OFFSET. */
7722 if (GET_MODE_SIZE (mode) <= 4
7723 && XEXP (x, 0) != frame_pointer_rtx
7724 && XEXP (x, 1) != frame_pointer_rtx
7725 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7726 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7727 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7728 return 1;
7730 /* REG+const has 5-7 bit offset for non-SP registers. */
7731 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7732 || XEXP (x, 0) == arg_pointer_rtx)
7733 && CONST_INT_P (XEXP (x, 1))
7734 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7735 return 1;
7737 /* REG+const has 10-bit offset for SP, but only SImode and
7738 larger is supported. */
7739 /* ??? Should probably check for DI/DFmode overflow here
7740 just like GO_IF_LEGITIMATE_OFFSET does. */
7741 else if (REG_P (XEXP (x, 0))
7742 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7743 && GET_MODE_SIZE (mode) >= 4
7744 && CONST_INT_P (XEXP (x, 1))
7745 && INTVAL (XEXP (x, 1)) >= 0
7746 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7747 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7748 return 1;
7750 else if (REG_P (XEXP (x, 0))
7751 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7752 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7753 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7754 && REGNO (XEXP (x, 0))
7755 <= LAST_VIRTUAL_POINTER_REGISTER))
7756 && GET_MODE_SIZE (mode) >= 4
7757 && CONST_INT_P (XEXP (x, 1))
7758 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7759 return 1;
7762 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7763 && GET_MODE_SIZE (mode) == 4
7764 && GET_CODE (x) == SYMBOL_REF
7765 && CONSTANT_POOL_ADDRESS_P (x)
7766 && ! (flag_pic
7767 && symbol_mentioned_p (get_pool_constant (x))
7768 && ! pcrel_constant_p (get_pool_constant (x))))
7769 return 1;
7771 return 0;
7774 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7775 instruction of mode MODE. */
7777 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7779 switch (GET_MODE_SIZE (mode))
7781 case 1:
7782 return val >= 0 && val < 32;
7784 case 2:
7785 return val >= 0 && val < 64 && (val & 1) == 0;
7787 default:
7788 return (val >= 0
7789 && (val + GET_MODE_SIZE (mode)) <= 128
7790 && (val & 3) == 0);
7794 bool
7795 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7797 if (TARGET_ARM)
7798 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7799 else if (TARGET_THUMB2)
7800 return thumb2_legitimate_address_p (mode, x, strict_p);
7801 else /* if (TARGET_THUMB1) */
7802 return thumb1_legitimate_address_p (mode, x, strict_p);
7805 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7807 Given an rtx X being reloaded into a reg required to be
7808 in class CLASS, return the class of reg to actually use.
7809 In general this is just CLASS, but for the Thumb core registers and
7810 immediate constants we prefer a LO_REGS class or a subset. */
7812 static reg_class_t
7813 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7815 if (TARGET_32BIT)
7816 return rclass;
7817 else
7819 if (rclass == GENERAL_REGS)
7820 return LO_REGS;
7821 else
7822 return rclass;
7826 /* Build the SYMBOL_REF for __tls_get_addr. */
7828 static GTY(()) rtx tls_get_addr_libfunc;
7830 static rtx
7831 get_tls_get_addr (void)
7833 if (!tls_get_addr_libfunc)
7834 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7835 return tls_get_addr_libfunc;
7839 arm_load_tp (rtx target)
7841 if (!target)
7842 target = gen_reg_rtx (SImode);
7844 if (TARGET_HARD_TP)
7846 /* Can return in any reg. */
7847 emit_insn (gen_load_tp_hard (target));
7849 else
7851 /* Always returned in r0. Immediately copy the result into a pseudo,
7852 otherwise other uses of r0 (e.g. setting up function arguments) may
7853 clobber the value. */
7855 rtx tmp;
7857 emit_insn (gen_load_tp_soft ());
7859 tmp = gen_rtx_REG (SImode, R0_REGNUM);
7860 emit_move_insn (target, tmp);
7862 return target;
7865 static rtx
7866 load_tls_operand (rtx x, rtx reg)
7868 rtx tmp;
7870 if (reg == NULL_RTX)
7871 reg = gen_reg_rtx (SImode);
7873 tmp = gen_rtx_CONST (SImode, x);
7875 emit_move_insn (reg, tmp);
7877 return reg;
7880 static rtx
7881 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7883 rtx insns, label, labelno, sum;
7885 gcc_assert (reloc != TLS_DESCSEQ);
7886 start_sequence ();
7888 labelno = GEN_INT (pic_labelno++);
7889 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7890 label = gen_rtx_CONST (VOIDmode, label);
7892 sum = gen_rtx_UNSPEC (Pmode,
7893 gen_rtvec (4, x, GEN_INT (reloc), label,
7894 GEN_INT (TARGET_ARM ? 8 : 4)),
7895 UNSPEC_TLS);
7896 reg = load_tls_operand (sum, reg);
7898 if (TARGET_ARM)
7899 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7900 else
7901 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7903 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7904 LCT_PURE, /* LCT_CONST? */
7905 Pmode, 1, reg, Pmode);
7907 insns = get_insns ();
7908 end_sequence ();
7910 return insns;
7913 static rtx
7914 arm_tls_descseq_addr (rtx x, rtx reg)
7916 rtx labelno = GEN_INT (pic_labelno++);
7917 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7918 rtx sum = gen_rtx_UNSPEC (Pmode,
7919 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7920 gen_rtx_CONST (VOIDmode, label),
7921 GEN_INT (!TARGET_ARM)),
7922 UNSPEC_TLS);
7923 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7925 emit_insn (gen_tlscall (x, labelno));
7926 if (!reg)
7927 reg = gen_reg_rtx (SImode);
7928 else
7929 gcc_assert (REGNO (reg) != R0_REGNUM);
7931 emit_move_insn (reg, reg0);
7933 return reg;
7937 legitimize_tls_address (rtx x, rtx reg)
7939 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7940 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7942 switch (model)
7944 case TLS_MODEL_GLOBAL_DYNAMIC:
7945 if (TARGET_GNU2_TLS)
7947 reg = arm_tls_descseq_addr (x, reg);
7949 tp = arm_load_tp (NULL_RTX);
7951 dest = gen_rtx_PLUS (Pmode, tp, reg);
7953 else
7955 /* Original scheme */
7956 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7957 dest = gen_reg_rtx (Pmode);
7958 emit_libcall_block (insns, dest, ret, x);
7960 return dest;
7962 case TLS_MODEL_LOCAL_DYNAMIC:
7963 if (TARGET_GNU2_TLS)
7965 reg = arm_tls_descseq_addr (x, reg);
7967 tp = arm_load_tp (NULL_RTX);
7969 dest = gen_rtx_PLUS (Pmode, tp, reg);
7971 else
7973 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7975 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7976 share the LDM result with other LD model accesses. */
7977 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7978 UNSPEC_TLS);
7979 dest = gen_reg_rtx (Pmode);
7980 emit_libcall_block (insns, dest, ret, eqv);
7982 /* Load the addend. */
7983 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7984 GEN_INT (TLS_LDO32)),
7985 UNSPEC_TLS);
7986 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7987 dest = gen_rtx_PLUS (Pmode, dest, addend);
7989 return dest;
7991 case TLS_MODEL_INITIAL_EXEC:
7992 labelno = GEN_INT (pic_labelno++);
7993 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7994 label = gen_rtx_CONST (VOIDmode, label);
7995 sum = gen_rtx_UNSPEC (Pmode,
7996 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7997 GEN_INT (TARGET_ARM ? 8 : 4)),
7998 UNSPEC_TLS);
7999 reg = load_tls_operand (sum, reg);
8001 if (TARGET_ARM)
8002 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8003 else if (TARGET_THUMB2)
8004 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8005 else
8007 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8008 emit_move_insn (reg, gen_const_mem (SImode, reg));
8011 tp = arm_load_tp (NULL_RTX);
8013 return gen_rtx_PLUS (Pmode, tp, reg);
8015 case TLS_MODEL_LOCAL_EXEC:
8016 tp = arm_load_tp (NULL_RTX);
8018 reg = gen_rtx_UNSPEC (Pmode,
8019 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8020 UNSPEC_TLS);
8021 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8023 return gen_rtx_PLUS (Pmode, tp, reg);
8025 default:
8026 abort ();
8030 /* Try machine-dependent ways of modifying an illegitimate address
8031 to be legitimate. If we find one, return the new, valid address. */
8033 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8035 if (arm_tls_referenced_p (x))
8037 rtx addend = NULL;
8039 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8041 addend = XEXP (XEXP (x, 0), 1);
8042 x = XEXP (XEXP (x, 0), 0);
8045 if (GET_CODE (x) != SYMBOL_REF)
8046 return x;
8048 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8050 x = legitimize_tls_address (x, NULL_RTX);
8052 if (addend)
8054 x = gen_rtx_PLUS (SImode, x, addend);
8055 orig_x = x;
8057 else
8058 return x;
8061 if (!TARGET_ARM)
8063 /* TODO: legitimize_address for Thumb2. */
8064 if (TARGET_THUMB2)
8065 return x;
8066 return thumb_legitimize_address (x, orig_x, mode);
8069 if (GET_CODE (x) == PLUS)
8071 rtx xop0 = XEXP (x, 0);
8072 rtx xop1 = XEXP (x, 1);
8074 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8075 xop0 = force_reg (SImode, xop0);
8077 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8078 && !symbol_mentioned_p (xop1))
8079 xop1 = force_reg (SImode, xop1);
8081 if (ARM_BASE_REGISTER_RTX_P (xop0)
8082 && CONST_INT_P (xop1))
8084 HOST_WIDE_INT n, low_n;
8085 rtx base_reg, val;
8086 n = INTVAL (xop1);
8088 /* VFP addressing modes actually allow greater offsets, but for
8089 now we just stick with the lowest common denominator. */
8090 if (mode == DImode
8091 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
8093 low_n = n & 0x0f;
8094 n &= ~0x0f;
8095 if (low_n > 4)
8097 n += 16;
8098 low_n -= 16;
8101 else
8103 low_n = ((mode) == TImode ? 0
8104 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8105 n -= low_n;
8108 base_reg = gen_reg_rtx (SImode);
8109 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8110 emit_move_insn (base_reg, val);
8111 x = plus_constant (Pmode, base_reg, low_n);
8113 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8114 x = gen_rtx_PLUS (SImode, xop0, xop1);
8117 /* XXX We don't allow MINUS any more -- see comment in
8118 arm_legitimate_address_outer_p (). */
8119 else if (GET_CODE (x) == MINUS)
8121 rtx xop0 = XEXP (x, 0);
8122 rtx xop1 = XEXP (x, 1);
8124 if (CONSTANT_P (xop0))
8125 xop0 = force_reg (SImode, xop0);
8127 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8128 xop1 = force_reg (SImode, xop1);
8130 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8131 x = gen_rtx_MINUS (SImode, xop0, xop1);
8134 /* Make sure to take full advantage of the pre-indexed addressing mode
8135 with absolute addresses which often allows for the base register to
8136 be factorized for multiple adjacent memory references, and it might
8137 even allows for the mini pool to be avoided entirely. */
8138 else if (CONST_INT_P (x) && optimize > 0)
8140 unsigned int bits;
8141 HOST_WIDE_INT mask, base, index;
8142 rtx base_reg;
8144 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8145 use a 8-bit index. So let's use a 12-bit index for SImode only and
8146 hope that arm_gen_constant will enable ldrb to use more bits. */
8147 bits = (mode == SImode) ? 12 : 8;
8148 mask = (1 << bits) - 1;
8149 base = INTVAL (x) & ~mask;
8150 index = INTVAL (x) & mask;
8151 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8153 /* It'll most probably be more efficient to generate the base
8154 with more bits set and use a negative index instead. */
8155 base |= mask;
8156 index -= mask;
8158 base_reg = force_reg (SImode, GEN_INT (base));
8159 x = plus_constant (Pmode, base_reg, index);
8162 if (flag_pic)
8164 /* We need to find and carefully transform any SYMBOL and LABEL
8165 references; so go back to the original address expression. */
8166 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8168 if (new_x != orig_x)
8169 x = new_x;
8172 return x;
8176 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8177 to be legitimate. If we find one, return the new, valid address. */
8179 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8181 if (GET_CODE (x) == PLUS
8182 && CONST_INT_P (XEXP (x, 1))
8183 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8184 || INTVAL (XEXP (x, 1)) < 0))
8186 rtx xop0 = XEXP (x, 0);
8187 rtx xop1 = XEXP (x, 1);
8188 HOST_WIDE_INT offset = INTVAL (xop1);
8190 /* Try and fold the offset into a biasing of the base register and
8191 then offsetting that. Don't do this when optimizing for space
8192 since it can cause too many CSEs. */
8193 if (optimize_size && offset >= 0
8194 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8196 HOST_WIDE_INT delta;
8198 if (offset >= 256)
8199 delta = offset - (256 - GET_MODE_SIZE (mode));
8200 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8201 delta = 31 * GET_MODE_SIZE (mode);
8202 else
8203 delta = offset & (~31 * GET_MODE_SIZE (mode));
8205 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8206 NULL_RTX);
8207 x = plus_constant (Pmode, xop0, delta);
8209 else if (offset < 0 && offset > -256)
8210 /* Small negative offsets are best done with a subtract before the
8211 dereference, forcing these into a register normally takes two
8212 instructions. */
8213 x = force_operand (x, NULL_RTX);
8214 else
8216 /* For the remaining cases, force the constant into a register. */
8217 xop1 = force_reg (SImode, xop1);
8218 x = gen_rtx_PLUS (SImode, xop0, xop1);
8221 else if (GET_CODE (x) == PLUS
8222 && s_register_operand (XEXP (x, 1), SImode)
8223 && !s_register_operand (XEXP (x, 0), SImode))
8225 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8227 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8230 if (flag_pic)
8232 /* We need to find and carefully transform any SYMBOL and LABEL
8233 references; so go back to the original address expression. */
8234 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8236 if (new_x != orig_x)
8237 x = new_x;
8240 return x;
8243 /* Return TRUE if X contains any TLS symbol references. */
8245 bool
8246 arm_tls_referenced_p (rtx x)
8248 if (! TARGET_HAVE_TLS)
8249 return false;
8251 subrtx_iterator::array_type array;
8252 FOR_EACH_SUBRTX (iter, array, x, ALL)
8254 const_rtx x = *iter;
8255 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8256 return true;
8258 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8259 TLS offsets, not real symbol references. */
8260 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8261 iter.skip_subrtxes ();
8263 return false;
8266 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8268 On the ARM, allow any integer (invalid ones are removed later by insn
8269 patterns), nice doubles and symbol_refs which refer to the function's
8270 constant pool XXX.
8272 When generating pic allow anything. */
8274 static bool
8275 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8277 return flag_pic || !label_mentioned_p (x);
8280 static bool
8281 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8283 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8284 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8285 for ARMv8-M Baseline or later the result is valid. */
8286 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8287 x = XEXP (x, 0);
8289 return (CONST_INT_P (x)
8290 || CONST_DOUBLE_P (x)
8291 || CONSTANT_ADDRESS_P (x)
8292 || flag_pic);
8295 static bool
8296 arm_legitimate_constant_p (machine_mode mode, rtx x)
8298 return (!arm_cannot_force_const_mem (mode, x)
8299 && (TARGET_32BIT
8300 ? arm_legitimate_constant_p_1 (mode, x)
8301 : thumb_legitimate_constant_p (mode, x)));
8304 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8306 static bool
8307 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8309 rtx base, offset;
8311 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8313 split_const (x, &base, &offset);
8314 if (GET_CODE (base) == SYMBOL_REF
8315 && !offset_within_block_p (base, INTVAL (offset)))
8316 return true;
8318 return arm_tls_referenced_p (x);
8321 #define REG_OR_SUBREG_REG(X) \
8322 (REG_P (X) \
8323 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8325 #define REG_OR_SUBREG_RTX(X) \
8326 (REG_P (X) ? (X) : SUBREG_REG (X))
8328 static inline int
8329 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8331 machine_mode mode = GET_MODE (x);
8332 int total, words;
8334 switch (code)
8336 case ASHIFT:
8337 case ASHIFTRT:
8338 case LSHIFTRT:
8339 case ROTATERT:
8340 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8342 case PLUS:
8343 case MINUS:
8344 case COMPARE:
8345 case NEG:
8346 case NOT:
8347 return COSTS_N_INSNS (1);
8349 case MULT:
8350 if (CONST_INT_P (XEXP (x, 1)))
8352 int cycles = 0;
8353 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8355 while (i)
8357 i >>= 2;
8358 cycles++;
8360 return COSTS_N_INSNS (2) + cycles;
8362 return COSTS_N_INSNS (1) + 16;
8364 case SET:
8365 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8366 the mode. */
8367 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8368 return (COSTS_N_INSNS (words)
8369 + 4 * ((MEM_P (SET_SRC (x)))
8370 + MEM_P (SET_DEST (x))));
8372 case CONST_INT:
8373 if (outer == SET)
8375 if (UINTVAL (x) < 256
8376 /* 16-bit constant. */
8377 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8378 return 0;
8379 if (thumb_shiftable_const (INTVAL (x)))
8380 return COSTS_N_INSNS (2);
8381 return COSTS_N_INSNS (3);
8383 else if ((outer == PLUS || outer == COMPARE)
8384 && INTVAL (x) < 256 && INTVAL (x) > -256)
8385 return 0;
8386 else if ((outer == IOR || outer == XOR || outer == AND)
8387 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8388 return COSTS_N_INSNS (1);
8389 else if (outer == AND)
8391 int i;
8392 /* This duplicates the tests in the andsi3 expander. */
8393 for (i = 9; i <= 31; i++)
8394 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8395 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8396 return COSTS_N_INSNS (2);
8398 else if (outer == ASHIFT || outer == ASHIFTRT
8399 || outer == LSHIFTRT)
8400 return 0;
8401 return COSTS_N_INSNS (2);
8403 case CONST:
8404 case CONST_DOUBLE:
8405 case LABEL_REF:
8406 case SYMBOL_REF:
8407 return COSTS_N_INSNS (3);
8409 case UDIV:
8410 case UMOD:
8411 case DIV:
8412 case MOD:
8413 return 100;
8415 case TRUNCATE:
8416 return 99;
8418 case AND:
8419 case XOR:
8420 case IOR:
8421 /* XXX guess. */
8422 return 8;
8424 case MEM:
8425 /* XXX another guess. */
8426 /* Memory costs quite a lot for the first word, but subsequent words
8427 load at the equivalent of a single insn each. */
8428 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8429 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8430 ? 4 : 0));
8432 case IF_THEN_ELSE:
8433 /* XXX a guess. */
8434 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8435 return 14;
8436 return 2;
8438 case SIGN_EXTEND:
8439 case ZERO_EXTEND:
8440 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8441 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8443 if (mode == SImode)
8444 return total;
8446 if (arm_arch6)
8447 return total + COSTS_N_INSNS (1);
8449 /* Assume a two-shift sequence. Increase the cost slightly so
8450 we prefer actual shifts over an extend operation. */
8451 return total + 1 + COSTS_N_INSNS (2);
8453 default:
8454 return 99;
8458 static inline bool
8459 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8461 machine_mode mode = GET_MODE (x);
8462 enum rtx_code subcode;
8463 rtx operand;
8464 enum rtx_code code = GET_CODE (x);
8465 *total = 0;
8467 switch (code)
8469 case MEM:
8470 /* Memory costs quite a lot for the first word, but subsequent words
8471 load at the equivalent of a single insn each. */
8472 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8473 return true;
8475 case DIV:
8476 case MOD:
8477 case UDIV:
8478 case UMOD:
8479 if (TARGET_HARD_FLOAT && mode == SFmode)
8480 *total = COSTS_N_INSNS (2);
8481 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8482 *total = COSTS_N_INSNS (4);
8483 else
8484 *total = COSTS_N_INSNS (20);
8485 return false;
8487 case ROTATE:
8488 if (REG_P (XEXP (x, 1)))
8489 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8490 else if (!CONST_INT_P (XEXP (x, 1)))
8491 *total = rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8493 /* Fall through */
8494 case ROTATERT:
8495 if (mode != SImode)
8497 *total += COSTS_N_INSNS (4);
8498 return true;
8501 /* Fall through */
8502 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8503 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8504 if (mode == DImode)
8506 *total += COSTS_N_INSNS (3);
8507 return true;
8510 *total += COSTS_N_INSNS (1);
8511 /* Increase the cost of complex shifts because they aren't any faster,
8512 and reduce dual issue opportunities. */
8513 if (arm_tune_cortex_a9
8514 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8515 ++*total;
8517 return true;
8519 case MINUS:
8520 if (mode == DImode)
8522 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8523 if (CONST_INT_P (XEXP (x, 0))
8524 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8526 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8527 return true;
8530 if (CONST_INT_P (XEXP (x, 1))
8531 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8533 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8534 return true;
8537 return false;
8540 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8542 if (TARGET_HARD_FLOAT
8543 && (mode == SFmode
8544 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8546 *total = COSTS_N_INSNS (1);
8547 if (CONST_DOUBLE_P (XEXP (x, 0))
8548 && arm_const_double_rtx (XEXP (x, 0)))
8550 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8551 return true;
8554 if (CONST_DOUBLE_P (XEXP (x, 1))
8555 && arm_const_double_rtx (XEXP (x, 1)))
8557 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8558 return true;
8561 return false;
8563 *total = COSTS_N_INSNS (20);
8564 return false;
8567 *total = COSTS_N_INSNS (1);
8568 if (CONST_INT_P (XEXP (x, 0))
8569 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8571 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8572 return true;
8575 subcode = GET_CODE (XEXP (x, 1));
8576 if (subcode == ASHIFT || subcode == ASHIFTRT
8577 || subcode == LSHIFTRT
8578 || subcode == ROTATE || subcode == ROTATERT)
8580 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8581 *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, subcode, 0, speed);
8582 return true;
8585 /* A shift as a part of RSB costs no more than RSB itself. */
8586 if (GET_CODE (XEXP (x, 0)) == MULT
8587 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8589 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code, 0, speed);
8590 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8591 return true;
8594 if (subcode == MULT
8595 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8597 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8598 *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, subcode, 0, speed);
8599 return true;
8602 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8603 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8605 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
8606 0, speed);
8607 if (REG_P (XEXP (XEXP (x, 1), 0))
8608 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8609 *total += COSTS_N_INSNS (1);
8611 return true;
8614 /* Fall through */
8616 case PLUS:
8617 if (code == PLUS && arm_arch6 && mode == SImode
8618 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8619 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8621 *total = COSTS_N_INSNS (1);
8622 *total += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
8623 GET_CODE (XEXP (x, 0)), 0, speed);
8624 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8625 return true;
8628 /* MLA: All arguments must be registers. We filter out
8629 multiplication by a power of two, so that we fall down into
8630 the code below. */
8631 if (GET_CODE (XEXP (x, 0)) == MULT
8632 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8634 /* The cost comes from the cost of the multiply. */
8635 return false;
8638 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8640 if (TARGET_HARD_FLOAT
8641 && (mode == SFmode
8642 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8644 *total = COSTS_N_INSNS (1);
8645 if (CONST_DOUBLE_P (XEXP (x, 1))
8646 && arm_const_double_rtx (XEXP (x, 1)))
8648 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8649 return true;
8652 return false;
8655 *total = COSTS_N_INSNS (20);
8656 return false;
8659 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8660 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8662 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), mode, code,
8663 1, speed);
8664 if (REG_P (XEXP (XEXP (x, 0), 0))
8665 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8666 *total += COSTS_N_INSNS (1);
8667 return true;
8670 /* Fall through */
8672 case AND: case XOR: case IOR:
8674 /* Normally the frame registers will be spilt into reg+const during
8675 reload, so it is a bad idea to combine them with other instructions,
8676 since then they might not be moved outside of loops. As a compromise
8677 we allow integration with ops that have a constant as their second
8678 operand. */
8679 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8680 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8681 && !CONST_INT_P (XEXP (x, 1)))
8682 *total = COSTS_N_INSNS (1);
8684 if (mode == DImode)
8686 *total += COSTS_N_INSNS (2);
8687 if (CONST_INT_P (XEXP (x, 1))
8688 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8690 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8691 return true;
8694 return false;
8697 *total += COSTS_N_INSNS (1);
8698 if (CONST_INT_P (XEXP (x, 1))
8699 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8701 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8702 return true;
8704 subcode = GET_CODE (XEXP (x, 0));
8705 if (subcode == ASHIFT || subcode == ASHIFTRT
8706 || subcode == LSHIFTRT
8707 || subcode == ROTATE || subcode == ROTATERT)
8709 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8710 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8711 return true;
8714 if (subcode == MULT
8715 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8717 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8718 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8719 return true;
8722 if (subcode == UMIN || subcode == UMAX
8723 || subcode == SMIN || subcode == SMAX)
8725 *total = COSTS_N_INSNS (3);
8726 return true;
8729 return false;
8731 case MULT:
8732 /* This should have been handled by the CPU specific routines. */
8733 gcc_unreachable ();
8735 case TRUNCATE:
8736 if (arm_arch3m && mode == SImode
8737 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8738 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8739 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8740 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8741 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8742 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8744 *total = rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, LSHIFTRT,
8745 0, speed);
8746 return true;
8748 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8749 return false;
8751 case NEG:
8752 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8754 if (TARGET_HARD_FLOAT
8755 && (mode == SFmode
8756 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8758 *total = COSTS_N_INSNS (1);
8759 return false;
8761 *total = COSTS_N_INSNS (2);
8762 return false;
8765 /* Fall through */
8766 case NOT:
8767 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8768 if (mode == SImode && code == NOT)
8770 subcode = GET_CODE (XEXP (x, 0));
8771 if (subcode == ASHIFT || subcode == ASHIFTRT
8772 || subcode == LSHIFTRT
8773 || subcode == ROTATE || subcode == ROTATERT
8774 || (subcode == MULT
8775 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8777 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode,
8778 0, speed);
8779 /* Register shifts cost an extra cycle. */
8780 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8781 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8782 mode, subcode,
8783 1, speed);
8784 return true;
8788 return false;
8790 case IF_THEN_ELSE:
8791 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8793 *total = COSTS_N_INSNS (4);
8794 return true;
8797 operand = XEXP (x, 0);
8799 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8800 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8801 && REG_P (XEXP (operand, 0))
8802 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8803 *total += COSTS_N_INSNS (1);
8804 *total += rtx_cost (XEXP (x, 1), VOIDmode, code, 1, speed);
8805 *total += rtx_cost (XEXP (x, 2), VOIDmode, code, 2, speed);
8806 return true;
8808 case NE:
8809 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8811 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
8812 0, speed);
8813 return true;
8815 goto scc_insn;
8817 case GE:
8818 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8819 && mode == SImode && XEXP (x, 1) == const0_rtx)
8821 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
8822 0, speed);
8823 return true;
8825 goto scc_insn;
8827 case LT:
8828 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8829 && mode == SImode && XEXP (x, 1) == const0_rtx)
8831 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
8832 0, speed);
8833 return true;
8835 goto scc_insn;
8837 case EQ:
8838 case GT:
8839 case LE:
8840 case GEU:
8841 case LTU:
8842 case GTU:
8843 case LEU:
8844 case UNORDERED:
8845 case ORDERED:
8846 case UNEQ:
8847 case UNGE:
8848 case UNLT:
8849 case UNGT:
8850 case UNLE:
8851 scc_insn:
8852 /* SCC insns. In the case where the comparison has already been
8853 performed, then they cost 2 instructions. Otherwise they need
8854 an additional comparison before them. */
8855 *total = COSTS_N_INSNS (2);
8856 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8858 return true;
8861 /* Fall through */
8862 case COMPARE:
8863 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8865 *total = 0;
8866 return true;
8869 *total += COSTS_N_INSNS (1);
8870 if (CONST_INT_P (XEXP (x, 1))
8871 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8873 *total += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed);
8874 return true;
8877 subcode = GET_CODE (XEXP (x, 0));
8878 if (subcode == ASHIFT || subcode == ASHIFTRT
8879 || subcode == LSHIFTRT
8880 || subcode == ROTATE || subcode == ROTATERT)
8882 mode = GET_MODE (XEXP (x, 0));
8883 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8884 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8885 return true;
8888 if (subcode == MULT
8889 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8891 mode = GET_MODE (XEXP (x, 0));
8892 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8893 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8894 return true;
8897 return false;
8899 case UMIN:
8900 case UMAX:
8901 case SMIN:
8902 case SMAX:
8903 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8904 if (!CONST_INT_P (XEXP (x, 1))
8905 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8906 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8907 return true;
8909 case ABS:
8910 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8912 if (TARGET_HARD_FLOAT
8913 && (mode == SFmode
8914 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8916 *total = COSTS_N_INSNS (1);
8917 return false;
8919 *total = COSTS_N_INSNS (20);
8920 return false;
8922 *total = COSTS_N_INSNS (1);
8923 if (mode == DImode)
8924 *total += COSTS_N_INSNS (3);
8925 return false;
8927 case SIGN_EXTEND:
8928 case ZERO_EXTEND:
8929 *total = 0;
8930 if (GET_MODE_CLASS (mode) == MODE_INT)
8932 rtx op = XEXP (x, 0);
8933 machine_mode opmode = GET_MODE (op);
8935 if (mode == DImode)
8936 *total += COSTS_N_INSNS (1);
8938 if (opmode != SImode)
8940 if (MEM_P (op))
8942 /* If !arm_arch4, we use one of the extendhisi2_mem
8943 or movhi_bytes patterns for HImode. For a QImode
8944 sign extension, we first zero-extend from memory
8945 and then perform a shift sequence. */
8946 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8947 *total += COSTS_N_INSNS (2);
8949 else if (arm_arch6)
8950 *total += COSTS_N_INSNS (1);
8952 /* We don't have the necessary insn, so we need to perform some
8953 other operation. */
8954 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8955 /* An and with constant 255. */
8956 *total += COSTS_N_INSNS (1);
8957 else
8958 /* A shift sequence. Increase costs slightly to avoid
8959 combining two shifts into an extend operation. */
8960 *total += COSTS_N_INSNS (2) + 1;
8963 return false;
8966 switch (GET_MODE (XEXP (x, 0)))
8968 case V8QImode:
8969 case V4HImode:
8970 case V2SImode:
8971 case V4QImode:
8972 case V2HImode:
8973 *total = COSTS_N_INSNS (1);
8974 return false;
8976 default:
8977 gcc_unreachable ();
8979 gcc_unreachable ();
8981 case ZERO_EXTRACT:
8982 case SIGN_EXTRACT:
8983 mode = GET_MODE (XEXP (x, 0));
8984 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8985 return true;
8987 case CONST_INT:
8988 if (const_ok_for_arm (INTVAL (x))
8989 || const_ok_for_arm (~INTVAL (x)))
8990 *total = COSTS_N_INSNS (1);
8991 else
8992 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8993 INTVAL (x), NULL_RTX,
8994 NULL_RTX, 0, 0));
8995 return true;
8997 case CONST:
8998 case LABEL_REF:
8999 case SYMBOL_REF:
9000 *total = COSTS_N_INSNS (3);
9001 return true;
9003 case HIGH:
9004 *total = COSTS_N_INSNS (1);
9005 return true;
9007 case LO_SUM:
9008 *total = COSTS_N_INSNS (1);
9009 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
9010 return true;
9012 case CONST_DOUBLE:
9013 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
9014 && (mode == SFmode || !TARGET_VFP_SINGLE))
9015 *total = COSTS_N_INSNS (1);
9016 else
9017 *total = COSTS_N_INSNS (4);
9018 return true;
9020 case SET:
9021 /* The vec_extract patterns accept memory operands that require an
9022 address reload. Account for the cost of that reload to give the
9023 auto-inc-dec pass an incentive to try to replace them. */
9024 if (TARGET_NEON && MEM_P (SET_DEST (x))
9025 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
9027 mode = GET_MODE (SET_DEST (x));
9028 *total = rtx_cost (SET_DEST (x), mode, code, 0, speed);
9029 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
9030 *total += COSTS_N_INSNS (1);
9031 return true;
9033 /* Likewise for the vec_set patterns. */
9034 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
9035 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
9036 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
9038 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
9039 mode = GET_MODE (SET_DEST (x));
9040 *total = rtx_cost (mem, mode, code, 0, speed);
9041 if (!neon_vector_mem_operand (mem, 2, true))
9042 *total += COSTS_N_INSNS (1);
9043 return true;
9045 return false;
9047 case UNSPEC:
9048 /* We cost this as high as our memory costs to allow this to
9049 be hoisted from loops. */
9050 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
9052 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
9054 return true;
9056 case CONST_VECTOR:
9057 if (TARGET_NEON
9058 && TARGET_HARD_FLOAT
9059 && outer == SET
9060 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9061 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9062 *total = COSTS_N_INSNS (1);
9063 else
9064 *total = COSTS_N_INSNS (4);
9065 return true;
9067 default:
9068 *total = COSTS_N_INSNS (4);
9069 return false;
9073 /* Estimates the size cost of thumb1 instructions.
9074 For now most of the code is copied from thumb1_rtx_costs. We need more
9075 fine grain tuning when we have more related test cases. */
9076 static inline int
9077 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9079 machine_mode mode = GET_MODE (x);
9080 int words, cost;
9082 switch (code)
9084 case ASHIFT:
9085 case ASHIFTRT:
9086 case LSHIFTRT:
9087 case ROTATERT:
9088 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9090 case PLUS:
9091 case MINUS:
9092 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9093 defined by RTL expansion, especially for the expansion of
9094 multiplication. */
9095 if ((GET_CODE (XEXP (x, 0)) == MULT
9096 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9097 || (GET_CODE (XEXP (x, 1)) == MULT
9098 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9099 return COSTS_N_INSNS (2);
9100 /* On purpose fall through for normal RTX. */
9101 case COMPARE:
9102 case NEG:
9103 case NOT:
9104 return COSTS_N_INSNS (1);
9106 case MULT:
9107 if (CONST_INT_P (XEXP (x, 1)))
9109 /* Thumb1 mul instruction can't operate on const. We must Load it
9110 into a register first. */
9111 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9112 /* For the targets which have a very small and high-latency multiply
9113 unit, we prefer to synthesize the mult with up to 5 instructions,
9114 giving a good balance between size and performance. */
9115 if (arm_arch6m && arm_m_profile_small_mul)
9116 return COSTS_N_INSNS (5);
9117 else
9118 return COSTS_N_INSNS (1) + const_size;
9120 return COSTS_N_INSNS (1);
9122 case SET:
9123 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9124 the mode. */
9125 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9126 cost = COSTS_N_INSNS (words);
9127 if (satisfies_constraint_J (SET_SRC (x))
9128 || satisfies_constraint_K (SET_SRC (x))
9129 /* Too big an immediate for a 2-byte mov, using MOVT. */
9130 || (UINTVAL (SET_SRC (x)) >= 256
9131 && TARGET_HAVE_MOVT
9132 && satisfies_constraint_j (SET_SRC (x)))
9133 /* thumb1_movdi_insn. */
9134 || ((words > 1) && MEM_P (SET_SRC (x))))
9135 cost += COSTS_N_INSNS (1);
9136 return cost;
9138 case CONST_INT:
9139 if (outer == SET)
9141 if (UINTVAL (x) < 256)
9142 return COSTS_N_INSNS (1);
9143 /* movw is 4byte long. */
9144 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9145 return COSTS_N_INSNS (2);
9146 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9147 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9148 return COSTS_N_INSNS (2);
9149 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9150 if (thumb_shiftable_const (INTVAL (x)))
9151 return COSTS_N_INSNS (2);
9152 return COSTS_N_INSNS (3);
9154 else if ((outer == PLUS || outer == COMPARE)
9155 && INTVAL (x) < 256 && INTVAL (x) > -256)
9156 return 0;
9157 else if ((outer == IOR || outer == XOR || outer == AND)
9158 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9159 return COSTS_N_INSNS (1);
9160 else if (outer == AND)
9162 int i;
9163 /* This duplicates the tests in the andsi3 expander. */
9164 for (i = 9; i <= 31; i++)
9165 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9166 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9167 return COSTS_N_INSNS (2);
9169 else if (outer == ASHIFT || outer == ASHIFTRT
9170 || outer == LSHIFTRT)
9171 return 0;
9172 return COSTS_N_INSNS (2);
9174 case CONST:
9175 case CONST_DOUBLE:
9176 case LABEL_REF:
9177 case SYMBOL_REF:
9178 return COSTS_N_INSNS (3);
9180 case UDIV:
9181 case UMOD:
9182 case DIV:
9183 case MOD:
9184 return 100;
9186 case TRUNCATE:
9187 return 99;
9189 case AND:
9190 case XOR:
9191 case IOR:
9192 return COSTS_N_INSNS (1);
9194 case MEM:
9195 return (COSTS_N_INSNS (1)
9196 + COSTS_N_INSNS (1)
9197 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9198 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9199 ? COSTS_N_INSNS (1) : 0));
9201 case IF_THEN_ELSE:
9202 /* XXX a guess. */
9203 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9204 return 14;
9205 return 2;
9207 case ZERO_EXTEND:
9208 /* XXX still guessing. */
9209 switch (GET_MODE (XEXP (x, 0)))
9211 case QImode:
9212 return (1 + (mode == DImode ? 4 : 0)
9213 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9215 case HImode:
9216 return (4 + (mode == DImode ? 4 : 0)
9217 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9219 case SImode:
9220 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9222 default:
9223 return 99;
9226 default:
9227 return 99;
9231 /* RTX costs when optimizing for size. */
9232 static bool
9233 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9234 int *total)
9236 machine_mode mode = GET_MODE (x);
9237 if (TARGET_THUMB1)
9239 *total = thumb1_size_rtx_costs (x, code, outer_code);
9240 return true;
9243 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9244 switch (code)
9246 case MEM:
9247 /* A memory access costs 1 insn if the mode is small, or the address is
9248 a single register, otherwise it costs one insn per word. */
9249 if (REG_P (XEXP (x, 0)))
9250 *total = COSTS_N_INSNS (1);
9251 else if (flag_pic
9252 && GET_CODE (XEXP (x, 0)) == PLUS
9253 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9254 /* This will be split into two instructions.
9255 See arm.md:calculate_pic_address. */
9256 *total = COSTS_N_INSNS (2);
9257 else
9258 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9259 return true;
9261 case DIV:
9262 case MOD:
9263 case UDIV:
9264 case UMOD:
9265 /* Needs a libcall, so it costs about this. */
9266 *total = COSTS_N_INSNS (2);
9267 return false;
9269 case ROTATE:
9270 if (mode == SImode && REG_P (XEXP (x, 1)))
9272 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
9273 0, false);
9274 return true;
9276 /* Fall through */
9277 case ROTATERT:
9278 case ASHIFT:
9279 case LSHIFTRT:
9280 case ASHIFTRT:
9281 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9283 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), mode, code,
9284 0, false);
9285 return true;
9287 else if (mode == SImode)
9289 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
9290 0, false);
9291 /* Slightly disparage register shifts, but not by much. */
9292 if (!CONST_INT_P (XEXP (x, 1)))
9293 *total += 1 + rtx_cost (XEXP (x, 1), mode, code, 1, false);
9294 return true;
9297 /* Needs a libcall. */
9298 *total = COSTS_N_INSNS (2);
9299 return false;
9301 case MINUS:
9302 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9303 && (mode == SFmode || !TARGET_VFP_SINGLE))
9305 *total = COSTS_N_INSNS (1);
9306 return false;
9309 if (mode == SImode)
9311 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9312 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9314 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9315 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9316 || subcode1 == ROTATE || subcode1 == ROTATERT
9317 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9318 || subcode1 == ASHIFTRT)
9320 /* It's just the cost of the two operands. */
9321 *total = 0;
9322 return false;
9325 *total = COSTS_N_INSNS (1);
9326 return false;
9329 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9330 return false;
9332 case PLUS:
9333 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9334 && (mode == SFmode || !TARGET_VFP_SINGLE))
9336 *total = COSTS_N_INSNS (1);
9337 return false;
9340 /* A shift as a part of ADD costs nothing. */
9341 if (GET_CODE (XEXP (x, 0)) == MULT
9342 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9344 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9345 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code, 0, false);
9346 *total += rtx_cost (XEXP (x, 1), mode, code, 1, false);
9347 return true;
9350 /* Fall through */
9351 case AND: case XOR: case IOR:
9352 if (mode == SImode)
9354 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9356 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9357 || subcode == LSHIFTRT || subcode == ASHIFTRT
9358 || (code == AND && subcode == NOT))
9360 /* It's just the cost of the two operands. */
9361 *total = 0;
9362 return false;
9366 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9367 return false;
9369 case MULT:
9370 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9371 return false;
9373 case NEG:
9374 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9375 && (mode == SFmode || !TARGET_VFP_SINGLE))
9377 *total = COSTS_N_INSNS (1);
9378 return false;
9381 /* Fall through */
9382 case NOT:
9383 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9385 return false;
9387 case IF_THEN_ELSE:
9388 *total = 0;
9389 return false;
9391 case COMPARE:
9392 if (cc_register (XEXP (x, 0), VOIDmode))
9393 * total = 0;
9394 else
9395 *total = COSTS_N_INSNS (1);
9396 return false;
9398 case ABS:
9399 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9400 && (mode == SFmode || !TARGET_VFP_SINGLE))
9401 *total = COSTS_N_INSNS (1);
9402 else
9403 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9404 return false;
9406 case SIGN_EXTEND:
9407 case ZERO_EXTEND:
9408 return arm_rtx_costs_1 (x, outer_code, total, 0);
9410 case CONST_INT:
9411 if (const_ok_for_arm (INTVAL (x)))
9412 /* A multiplication by a constant requires another instruction
9413 to load the constant to a register. */
9414 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9415 ? 1 : 0);
9416 else if (const_ok_for_arm (~INTVAL (x)))
9417 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9418 else if (const_ok_for_arm (-INTVAL (x)))
9420 if (outer_code == COMPARE || outer_code == PLUS
9421 || outer_code == MINUS)
9422 *total = 0;
9423 else
9424 *total = COSTS_N_INSNS (1);
9426 else
9427 *total = COSTS_N_INSNS (2);
9428 return true;
9430 case CONST:
9431 case LABEL_REF:
9432 case SYMBOL_REF:
9433 *total = COSTS_N_INSNS (2);
9434 return true;
9436 case CONST_DOUBLE:
9437 *total = COSTS_N_INSNS (4);
9438 return true;
9440 case CONST_VECTOR:
9441 if (TARGET_NEON
9442 && TARGET_HARD_FLOAT
9443 && outer_code == SET
9444 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9445 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9446 *total = COSTS_N_INSNS (1);
9447 else
9448 *total = COSTS_N_INSNS (4);
9449 return true;
9451 case HIGH:
9452 case LO_SUM:
9453 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9454 cost of these slightly. */
9455 *total = COSTS_N_INSNS (1) + 1;
9456 return true;
9458 case SET:
9459 return false;
9461 default:
9462 if (mode != VOIDmode)
9463 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9464 else
9465 *total = COSTS_N_INSNS (4); /* How knows? */
9466 return false;
9470 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9471 operand, then return the operand that is being shifted. If the shift
9472 is not by a constant, then set SHIFT_REG to point to the operand.
9473 Return NULL if OP is not a shifter operand. */
9474 static rtx
9475 shifter_op_p (rtx op, rtx *shift_reg)
9477 enum rtx_code code = GET_CODE (op);
9479 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9480 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9481 return XEXP (op, 0);
9482 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9483 return XEXP (op, 0);
9484 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9485 || code == ASHIFTRT)
9487 if (!CONST_INT_P (XEXP (op, 1)))
9488 *shift_reg = XEXP (op, 1);
9489 return XEXP (op, 0);
9492 return NULL;
9495 static bool
9496 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9498 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9499 rtx_code code = GET_CODE (x);
9500 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9502 switch (XINT (x, 1))
9504 case UNSPEC_UNALIGNED_LOAD:
9505 /* We can only do unaligned loads into the integer unit, and we can't
9506 use LDM or LDRD. */
9507 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9508 if (speed_p)
9509 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9510 + extra_cost->ldst.load_unaligned);
9512 #ifdef NOT_YET
9513 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9514 ADDR_SPACE_GENERIC, speed_p);
9515 #endif
9516 return true;
9518 case UNSPEC_UNALIGNED_STORE:
9519 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9520 if (speed_p)
9521 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9522 + extra_cost->ldst.store_unaligned);
9524 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9525 #ifdef NOT_YET
9526 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9527 ADDR_SPACE_GENERIC, speed_p);
9528 #endif
9529 return true;
9531 case UNSPEC_VRINTZ:
9532 case UNSPEC_VRINTP:
9533 case UNSPEC_VRINTM:
9534 case UNSPEC_VRINTR:
9535 case UNSPEC_VRINTX:
9536 case UNSPEC_VRINTA:
9537 if (speed_p)
9538 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9540 return true;
9541 default:
9542 *cost = COSTS_N_INSNS (2);
9543 break;
9545 return true;
9548 /* Cost of a libcall. We assume one insn per argument, an amount for the
9549 call (one insn for -Os) and then one for processing the result. */
9550 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9552 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9553 do \
9555 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9556 if (shift_op != NULL \
9557 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9559 if (shift_reg) \
9561 if (speed_p) \
9562 *cost += extra_cost->alu.arith_shift_reg; \
9563 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9564 ASHIFT, 1, speed_p); \
9566 else if (speed_p) \
9567 *cost += extra_cost->alu.arith_shift; \
9569 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9570 ASHIFT, 0, speed_p) \
9571 + rtx_cost (XEXP (x, 1 - IDX), \
9572 GET_MODE (shift_op), \
9573 OP, 1, speed_p)); \
9574 return true; \
9577 while (0);
9579 /* RTX costs. Make an estimate of the cost of executing the operation
9580 X, which is contained with an operation with code OUTER_CODE.
9581 SPEED_P indicates whether the cost desired is the performance cost,
9582 or the size cost. The estimate is stored in COST and the return
9583 value is TRUE if the cost calculation is final, or FALSE if the
9584 caller should recurse through the operands of X to add additional
9585 costs.
9587 We currently make no attempt to model the size savings of Thumb-2
9588 16-bit instructions. At the normal points in compilation where
9589 this code is called we have no measure of whether the condition
9590 flags are live or not, and thus no realistic way to determine what
9591 the size will eventually be. */
9592 static bool
9593 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9594 const struct cpu_cost_table *extra_cost,
9595 int *cost, bool speed_p)
9597 machine_mode mode = GET_MODE (x);
9599 *cost = COSTS_N_INSNS (1);
9601 if (TARGET_THUMB1)
9603 if (speed_p)
9604 *cost = thumb1_rtx_costs (x, code, outer_code);
9605 else
9606 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9607 return true;
9610 switch (code)
9612 case SET:
9613 *cost = 0;
9614 /* SET RTXs don't have a mode so we get it from the destination. */
9615 mode = GET_MODE (SET_DEST (x));
9617 if (REG_P (SET_SRC (x))
9618 && REG_P (SET_DEST (x)))
9620 /* Assume that most copies can be done with a single insn,
9621 unless we don't have HW FP, in which case everything
9622 larger than word mode will require two insns. */
9623 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9624 && GET_MODE_SIZE (mode) > 4)
9625 || mode == DImode)
9626 ? 2 : 1);
9627 /* Conditional register moves can be encoded
9628 in 16 bits in Thumb mode. */
9629 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9630 *cost >>= 1;
9632 return true;
9635 if (CONST_INT_P (SET_SRC (x)))
9637 /* Handle CONST_INT here, since the value doesn't have a mode
9638 and we would otherwise be unable to work out the true cost. */
9639 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9640 0, speed_p);
9641 outer_code = SET;
9642 /* Slightly lower the cost of setting a core reg to a constant.
9643 This helps break up chains and allows for better scheduling. */
9644 if (REG_P (SET_DEST (x))
9645 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9646 *cost -= 1;
9647 x = SET_SRC (x);
9648 /* Immediate moves with an immediate in the range [0, 255] can be
9649 encoded in 16 bits in Thumb mode. */
9650 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9651 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9652 *cost >>= 1;
9653 goto const_int_cost;
9656 return false;
9658 case MEM:
9659 /* A memory access costs 1 insn if the mode is small, or the address is
9660 a single register, otherwise it costs one insn per word. */
9661 if (REG_P (XEXP (x, 0)))
9662 *cost = COSTS_N_INSNS (1);
9663 else if (flag_pic
9664 && GET_CODE (XEXP (x, 0)) == PLUS
9665 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9666 /* This will be split into two instructions.
9667 See arm.md:calculate_pic_address. */
9668 *cost = COSTS_N_INSNS (2);
9669 else
9670 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9672 /* For speed optimizations, add the costs of the address and
9673 accessing memory. */
9674 if (speed_p)
9675 #ifdef NOT_YET
9676 *cost += (extra_cost->ldst.load
9677 + arm_address_cost (XEXP (x, 0), mode,
9678 ADDR_SPACE_GENERIC, speed_p));
9679 #else
9680 *cost += extra_cost->ldst.load;
9681 #endif
9682 return true;
9684 case PARALLEL:
9686 /* Calculations of LDM costs are complex. We assume an initial cost
9687 (ldm_1st) which will load the number of registers mentioned in
9688 ldm_regs_per_insn_1st registers; then each additional
9689 ldm_regs_per_insn_subsequent registers cost one more insn. The
9690 formula for N regs is thus:
9692 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9693 + ldm_regs_per_insn_subsequent - 1)
9694 / ldm_regs_per_insn_subsequent).
9696 Additional costs may also be added for addressing. A similar
9697 formula is used for STM. */
9699 bool is_ldm = load_multiple_operation (x, SImode);
9700 bool is_stm = store_multiple_operation (x, SImode);
9702 if (is_ldm || is_stm)
9704 if (speed_p)
9706 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9707 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9708 ? extra_cost->ldst.ldm_regs_per_insn_1st
9709 : extra_cost->ldst.stm_regs_per_insn_1st;
9710 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9711 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9712 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9714 *cost += regs_per_insn_1st
9715 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9716 + regs_per_insn_sub - 1)
9717 / regs_per_insn_sub);
9718 return true;
9722 return false;
9724 case DIV:
9725 case UDIV:
9726 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9727 && (mode == SFmode || !TARGET_VFP_SINGLE))
9728 *cost += COSTS_N_INSNS (speed_p
9729 ? extra_cost->fp[mode != SFmode].div : 0);
9730 else if (mode == SImode && TARGET_IDIV)
9731 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9732 else
9733 *cost = LIBCALL_COST (2);
9734 return false; /* All arguments must be in registers. */
9736 case MOD:
9737 /* MOD by a power of 2 can be expanded as:
9738 rsbs r1, r0, #0
9739 and r0, r0, #(n - 1)
9740 and r1, r1, #(n - 1)
9741 rsbpl r0, r1, #0. */
9742 if (CONST_INT_P (XEXP (x, 1))
9743 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9744 && mode == SImode)
9746 *cost += COSTS_N_INSNS (3);
9748 if (speed_p)
9749 *cost += 2 * extra_cost->alu.logical
9750 + extra_cost->alu.arith;
9751 return true;
9754 /* Fall-through. */
9755 case UMOD:
9756 *cost = LIBCALL_COST (2);
9757 return false; /* All arguments must be in registers. */
9759 case ROTATE:
9760 if (mode == SImode && REG_P (XEXP (x, 1)))
9762 *cost += (COSTS_N_INSNS (1)
9763 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9764 if (speed_p)
9765 *cost += extra_cost->alu.shift_reg;
9766 return true;
9768 /* Fall through */
9769 case ROTATERT:
9770 case ASHIFT:
9771 case LSHIFTRT:
9772 case ASHIFTRT:
9773 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9775 *cost += (COSTS_N_INSNS (2)
9776 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9777 if (speed_p)
9778 *cost += 2 * extra_cost->alu.shift;
9779 return true;
9781 else if (mode == SImode)
9783 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9784 /* Slightly disparage register shifts at -Os, but not by much. */
9785 if (!CONST_INT_P (XEXP (x, 1)))
9786 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9787 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9788 return true;
9790 else if (GET_MODE_CLASS (mode) == MODE_INT
9791 && GET_MODE_SIZE (mode) < 4)
9793 if (code == ASHIFT)
9795 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9796 /* Slightly disparage register shifts at -Os, but not by
9797 much. */
9798 if (!CONST_INT_P (XEXP (x, 1)))
9799 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9800 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9802 else if (code == LSHIFTRT || code == ASHIFTRT)
9804 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9806 /* Can use SBFX/UBFX. */
9807 if (speed_p)
9808 *cost += extra_cost->alu.bfx;
9809 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9811 else
9813 *cost += COSTS_N_INSNS (1);
9814 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9815 if (speed_p)
9817 if (CONST_INT_P (XEXP (x, 1)))
9818 *cost += 2 * extra_cost->alu.shift;
9819 else
9820 *cost += (extra_cost->alu.shift
9821 + extra_cost->alu.shift_reg);
9823 else
9824 /* Slightly disparage register shifts. */
9825 *cost += !CONST_INT_P (XEXP (x, 1));
9828 else /* Rotates. */
9830 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9831 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9832 if (speed_p)
9834 if (CONST_INT_P (XEXP (x, 1)))
9835 *cost += (2 * extra_cost->alu.shift
9836 + extra_cost->alu.log_shift);
9837 else
9838 *cost += (extra_cost->alu.shift
9839 + extra_cost->alu.shift_reg
9840 + extra_cost->alu.log_shift_reg);
9843 return true;
9846 *cost = LIBCALL_COST (2);
9847 return false;
9849 case BSWAP:
9850 if (arm_arch6)
9852 if (mode == SImode)
9854 if (speed_p)
9855 *cost += extra_cost->alu.rev;
9857 return false;
9860 else
9862 /* No rev instruction available. Look at arm_legacy_rev
9863 and thumb_legacy_rev for the form of RTL used then. */
9864 if (TARGET_THUMB)
9866 *cost += COSTS_N_INSNS (9);
9868 if (speed_p)
9870 *cost += 6 * extra_cost->alu.shift;
9871 *cost += 3 * extra_cost->alu.logical;
9874 else
9876 *cost += COSTS_N_INSNS (4);
9878 if (speed_p)
9880 *cost += 2 * extra_cost->alu.shift;
9881 *cost += extra_cost->alu.arith_shift;
9882 *cost += 2 * extra_cost->alu.logical;
9885 return true;
9887 return false;
9889 case MINUS:
9890 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9891 && (mode == SFmode || !TARGET_VFP_SINGLE))
9893 if (GET_CODE (XEXP (x, 0)) == MULT
9894 || GET_CODE (XEXP (x, 1)) == MULT)
9896 rtx mul_op0, mul_op1, sub_op;
9898 if (speed_p)
9899 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9901 if (GET_CODE (XEXP (x, 0)) == MULT)
9903 mul_op0 = XEXP (XEXP (x, 0), 0);
9904 mul_op1 = XEXP (XEXP (x, 0), 1);
9905 sub_op = XEXP (x, 1);
9907 else
9909 mul_op0 = XEXP (XEXP (x, 1), 0);
9910 mul_op1 = XEXP (XEXP (x, 1), 1);
9911 sub_op = XEXP (x, 0);
9914 /* The first operand of the multiply may be optionally
9915 negated. */
9916 if (GET_CODE (mul_op0) == NEG)
9917 mul_op0 = XEXP (mul_op0, 0);
9919 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9920 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9921 + rtx_cost (sub_op, mode, code, 0, speed_p));
9923 return true;
9926 if (speed_p)
9927 *cost += extra_cost->fp[mode != SFmode].addsub;
9928 return false;
9931 if (mode == SImode)
9933 rtx shift_by_reg = NULL;
9934 rtx shift_op;
9935 rtx non_shift_op;
9937 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9938 if (shift_op == NULL)
9940 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9941 non_shift_op = XEXP (x, 0);
9943 else
9944 non_shift_op = XEXP (x, 1);
9946 if (shift_op != NULL)
9948 if (shift_by_reg != NULL)
9950 if (speed_p)
9951 *cost += extra_cost->alu.arith_shift_reg;
9952 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9954 else if (speed_p)
9955 *cost += extra_cost->alu.arith_shift;
9957 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9958 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9959 return true;
9962 if (arm_arch_thumb2
9963 && GET_CODE (XEXP (x, 1)) == MULT)
9965 /* MLS. */
9966 if (speed_p)
9967 *cost += extra_cost->mult[0].add;
9968 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9969 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9970 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9971 return true;
9974 if (CONST_INT_P (XEXP (x, 0)))
9976 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9977 INTVAL (XEXP (x, 0)), NULL_RTX,
9978 NULL_RTX, 1, 0);
9979 *cost = COSTS_N_INSNS (insns);
9980 if (speed_p)
9981 *cost += insns * extra_cost->alu.arith;
9982 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9983 return true;
9985 else if (speed_p)
9986 *cost += extra_cost->alu.arith;
9988 return false;
9991 if (GET_MODE_CLASS (mode) == MODE_INT
9992 && GET_MODE_SIZE (mode) < 4)
9994 rtx shift_op, shift_reg;
9995 shift_reg = NULL;
9997 /* We check both sides of the MINUS for shifter operands since,
9998 unlike PLUS, it's not commutative. */
10000 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
10001 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
10003 /* Slightly disparage, as we might need to widen the result. */
10004 *cost += 1;
10005 if (speed_p)
10006 *cost += extra_cost->alu.arith;
10008 if (CONST_INT_P (XEXP (x, 0)))
10010 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10011 return true;
10014 return false;
10017 if (mode == DImode)
10019 *cost += COSTS_N_INSNS (1);
10021 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10023 rtx op1 = XEXP (x, 1);
10025 if (speed_p)
10026 *cost += 2 * extra_cost->alu.arith;
10028 if (GET_CODE (op1) == ZERO_EXTEND)
10029 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10030 0, speed_p);
10031 else
10032 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10033 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10034 0, speed_p);
10035 return true;
10037 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10039 if (speed_p)
10040 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10041 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10042 0, speed_p)
10043 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10044 return true;
10046 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10047 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10049 if (speed_p)
10050 *cost += (extra_cost->alu.arith
10051 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10052 ? extra_cost->alu.arith
10053 : extra_cost->alu.arith_shift));
10054 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10055 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10056 GET_CODE (XEXP (x, 1)), 0, speed_p));
10057 return true;
10060 if (speed_p)
10061 *cost += 2 * extra_cost->alu.arith;
10062 return false;
10065 /* Vector mode? */
10067 *cost = LIBCALL_COST (2);
10068 return false;
10070 case PLUS:
10071 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10072 && (mode == SFmode || !TARGET_VFP_SINGLE))
10074 if (GET_CODE (XEXP (x, 0)) == MULT)
10076 rtx mul_op0, mul_op1, add_op;
10078 if (speed_p)
10079 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10081 mul_op0 = XEXP (XEXP (x, 0), 0);
10082 mul_op1 = XEXP (XEXP (x, 0), 1);
10083 add_op = XEXP (x, 1);
10085 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10086 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10087 + rtx_cost (add_op, mode, code, 0, speed_p));
10089 return true;
10092 if (speed_p)
10093 *cost += extra_cost->fp[mode != SFmode].addsub;
10094 return false;
10096 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10098 *cost = LIBCALL_COST (2);
10099 return false;
10102 /* Narrow modes can be synthesized in SImode, but the range
10103 of useful sub-operations is limited. Check for shift operations
10104 on one of the operands. Only left shifts can be used in the
10105 narrow modes. */
10106 if (GET_MODE_CLASS (mode) == MODE_INT
10107 && GET_MODE_SIZE (mode) < 4)
10109 rtx shift_op, shift_reg;
10110 shift_reg = NULL;
10112 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
10114 if (CONST_INT_P (XEXP (x, 1)))
10116 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10117 INTVAL (XEXP (x, 1)), NULL_RTX,
10118 NULL_RTX, 1, 0);
10119 *cost = COSTS_N_INSNS (insns);
10120 if (speed_p)
10121 *cost += insns * extra_cost->alu.arith;
10122 /* Slightly penalize a narrow operation as the result may
10123 need widening. */
10124 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10125 return true;
10128 /* Slightly penalize a narrow operation as the result may
10129 need widening. */
10130 *cost += 1;
10131 if (speed_p)
10132 *cost += extra_cost->alu.arith;
10134 return false;
10137 if (mode == SImode)
10139 rtx shift_op, shift_reg;
10141 if (TARGET_INT_SIMD
10142 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10143 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10145 /* UXTA[BH] or SXTA[BH]. */
10146 if (speed_p)
10147 *cost += extra_cost->alu.extend_arith;
10148 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10149 0, speed_p)
10150 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10151 return true;
10154 shift_reg = NULL;
10155 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10156 if (shift_op != NULL)
10158 if (shift_reg)
10160 if (speed_p)
10161 *cost += extra_cost->alu.arith_shift_reg;
10162 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10164 else if (speed_p)
10165 *cost += extra_cost->alu.arith_shift;
10167 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10168 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10169 return true;
10171 if (GET_CODE (XEXP (x, 0)) == MULT)
10173 rtx mul_op = XEXP (x, 0);
10175 if (TARGET_DSP_MULTIPLY
10176 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10177 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10178 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10179 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10180 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10181 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10182 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10183 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10184 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10185 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10186 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10187 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10188 == 16))))))
10190 /* SMLA[BT][BT]. */
10191 if (speed_p)
10192 *cost += extra_cost->mult[0].extend_add;
10193 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10194 SIGN_EXTEND, 0, speed_p)
10195 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10196 SIGN_EXTEND, 0, speed_p)
10197 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10198 return true;
10201 if (speed_p)
10202 *cost += extra_cost->mult[0].add;
10203 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10204 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10205 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10206 return true;
10208 if (CONST_INT_P (XEXP (x, 1)))
10210 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10211 INTVAL (XEXP (x, 1)), NULL_RTX,
10212 NULL_RTX, 1, 0);
10213 *cost = COSTS_N_INSNS (insns);
10214 if (speed_p)
10215 *cost += insns * extra_cost->alu.arith;
10216 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10217 return true;
10219 else if (speed_p)
10220 *cost += extra_cost->alu.arith;
10222 return false;
10225 if (mode == DImode)
10227 if (arm_arch3m
10228 && GET_CODE (XEXP (x, 0)) == MULT
10229 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10230 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10231 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10232 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10234 if (speed_p)
10235 *cost += extra_cost->mult[1].extend_add;
10236 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10237 ZERO_EXTEND, 0, speed_p)
10238 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10239 ZERO_EXTEND, 0, speed_p)
10240 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10241 return true;
10244 *cost += COSTS_N_INSNS (1);
10246 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10247 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10249 if (speed_p)
10250 *cost += (extra_cost->alu.arith
10251 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10252 ? extra_cost->alu.arith
10253 : extra_cost->alu.arith_shift));
10255 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10256 0, speed_p)
10257 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10258 return true;
10261 if (speed_p)
10262 *cost += 2 * extra_cost->alu.arith;
10263 return false;
10266 /* Vector mode? */
10267 *cost = LIBCALL_COST (2);
10268 return false;
10269 case IOR:
10270 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10272 if (speed_p)
10273 *cost += extra_cost->alu.rev;
10275 return true;
10277 /* Fall through. */
10278 case AND: case XOR:
10279 if (mode == SImode)
10281 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10282 rtx op0 = XEXP (x, 0);
10283 rtx shift_op, shift_reg;
10285 if (subcode == NOT
10286 && (code == AND
10287 || (code == IOR && TARGET_THUMB2)))
10288 op0 = XEXP (op0, 0);
10290 shift_reg = NULL;
10291 shift_op = shifter_op_p (op0, &shift_reg);
10292 if (shift_op != NULL)
10294 if (shift_reg)
10296 if (speed_p)
10297 *cost += extra_cost->alu.log_shift_reg;
10298 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10300 else if (speed_p)
10301 *cost += extra_cost->alu.log_shift;
10303 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10304 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10305 return true;
10308 if (CONST_INT_P (XEXP (x, 1)))
10310 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10311 INTVAL (XEXP (x, 1)), NULL_RTX,
10312 NULL_RTX, 1, 0);
10314 *cost = COSTS_N_INSNS (insns);
10315 if (speed_p)
10316 *cost += insns * extra_cost->alu.logical;
10317 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10318 return true;
10321 if (speed_p)
10322 *cost += extra_cost->alu.logical;
10323 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10324 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10325 return true;
10328 if (mode == DImode)
10330 rtx op0 = XEXP (x, 0);
10331 enum rtx_code subcode = GET_CODE (op0);
10333 *cost += COSTS_N_INSNS (1);
10335 if (subcode == NOT
10336 && (code == AND
10337 || (code == IOR && TARGET_THUMB2)))
10338 op0 = XEXP (op0, 0);
10340 if (GET_CODE (op0) == ZERO_EXTEND)
10342 if (speed_p)
10343 *cost += 2 * extra_cost->alu.logical;
10345 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10346 0, speed_p)
10347 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10348 return true;
10350 else if (GET_CODE (op0) == SIGN_EXTEND)
10352 if (speed_p)
10353 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10355 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10356 0, speed_p)
10357 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10358 return true;
10361 if (speed_p)
10362 *cost += 2 * extra_cost->alu.logical;
10364 return true;
10366 /* Vector mode? */
10368 *cost = LIBCALL_COST (2);
10369 return false;
10371 case MULT:
10372 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10373 && (mode == SFmode || !TARGET_VFP_SINGLE))
10375 rtx op0 = XEXP (x, 0);
10377 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10378 op0 = XEXP (op0, 0);
10380 if (speed_p)
10381 *cost += extra_cost->fp[mode != SFmode].mult;
10383 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10384 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10385 return true;
10387 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10389 *cost = LIBCALL_COST (2);
10390 return false;
10393 if (mode == SImode)
10395 if (TARGET_DSP_MULTIPLY
10396 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10397 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10398 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10399 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10400 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10401 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10402 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10403 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10404 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10405 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10406 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10407 && (INTVAL (XEXP (XEXP (x, 1), 1))
10408 == 16))))))
10410 /* SMUL[TB][TB]. */
10411 if (speed_p)
10412 *cost += extra_cost->mult[0].extend;
10413 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10414 SIGN_EXTEND, 0, speed_p);
10415 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10416 SIGN_EXTEND, 1, speed_p);
10417 return true;
10419 if (speed_p)
10420 *cost += extra_cost->mult[0].simple;
10421 return false;
10424 if (mode == DImode)
10426 if (arm_arch3m
10427 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10428 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10429 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10430 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10432 if (speed_p)
10433 *cost += extra_cost->mult[1].extend;
10434 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10435 ZERO_EXTEND, 0, speed_p)
10436 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10437 ZERO_EXTEND, 0, speed_p));
10438 return true;
10441 *cost = LIBCALL_COST (2);
10442 return false;
10445 /* Vector mode? */
10446 *cost = LIBCALL_COST (2);
10447 return false;
10449 case NEG:
10450 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10451 && (mode == SFmode || !TARGET_VFP_SINGLE))
10453 if (GET_CODE (XEXP (x, 0)) == MULT)
10455 /* VNMUL. */
10456 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10457 return true;
10460 if (speed_p)
10461 *cost += extra_cost->fp[mode != SFmode].neg;
10463 return false;
10465 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10467 *cost = LIBCALL_COST (1);
10468 return false;
10471 if (mode == SImode)
10473 if (GET_CODE (XEXP (x, 0)) == ABS)
10475 *cost += COSTS_N_INSNS (1);
10476 /* Assume the non-flag-changing variant. */
10477 if (speed_p)
10478 *cost += (extra_cost->alu.log_shift
10479 + extra_cost->alu.arith_shift);
10480 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10481 return true;
10484 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10485 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10487 *cost += COSTS_N_INSNS (1);
10488 /* No extra cost for MOV imm and MVN imm. */
10489 /* If the comparison op is using the flags, there's no further
10490 cost, otherwise we need to add the cost of the comparison. */
10491 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10492 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10493 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10495 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10496 *cost += (COSTS_N_INSNS (1)
10497 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10498 0, speed_p)
10499 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10500 1, speed_p));
10501 if (speed_p)
10502 *cost += extra_cost->alu.arith;
10504 return true;
10507 if (speed_p)
10508 *cost += extra_cost->alu.arith;
10509 return false;
10512 if (GET_MODE_CLASS (mode) == MODE_INT
10513 && GET_MODE_SIZE (mode) < 4)
10515 /* Slightly disparage, as we might need an extend operation. */
10516 *cost += 1;
10517 if (speed_p)
10518 *cost += extra_cost->alu.arith;
10519 return false;
10522 if (mode == DImode)
10524 *cost += COSTS_N_INSNS (1);
10525 if (speed_p)
10526 *cost += 2 * extra_cost->alu.arith;
10527 return false;
10530 /* Vector mode? */
10531 *cost = LIBCALL_COST (1);
10532 return false;
10534 case NOT:
10535 if (mode == SImode)
10537 rtx shift_op;
10538 rtx shift_reg = NULL;
10540 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10542 if (shift_op)
10544 if (shift_reg != NULL)
10546 if (speed_p)
10547 *cost += extra_cost->alu.log_shift_reg;
10548 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10550 else if (speed_p)
10551 *cost += extra_cost->alu.log_shift;
10552 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10553 return true;
10556 if (speed_p)
10557 *cost += extra_cost->alu.logical;
10558 return false;
10560 if (mode == DImode)
10562 *cost += COSTS_N_INSNS (1);
10563 return false;
10566 /* Vector mode? */
10568 *cost += LIBCALL_COST (1);
10569 return false;
10571 case IF_THEN_ELSE:
10573 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10575 *cost += COSTS_N_INSNS (3);
10576 return true;
10578 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10579 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10581 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10582 /* Assume that if one arm of the if_then_else is a register,
10583 that it will be tied with the result and eliminate the
10584 conditional insn. */
10585 if (REG_P (XEXP (x, 1)))
10586 *cost += op2cost;
10587 else if (REG_P (XEXP (x, 2)))
10588 *cost += op1cost;
10589 else
10591 if (speed_p)
10593 if (extra_cost->alu.non_exec_costs_exec)
10594 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10595 else
10596 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10598 else
10599 *cost += op1cost + op2cost;
10602 return true;
10604 case COMPARE:
10605 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10606 *cost = 0;
10607 else
10609 machine_mode op0mode;
10610 /* We'll mostly assume that the cost of a compare is the cost of the
10611 LHS. However, there are some notable exceptions. */
10613 /* Floating point compares are never done as side-effects. */
10614 op0mode = GET_MODE (XEXP (x, 0));
10615 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10616 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10618 if (speed_p)
10619 *cost += extra_cost->fp[op0mode != SFmode].compare;
10621 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10623 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10624 return true;
10627 return false;
10629 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10631 *cost = LIBCALL_COST (2);
10632 return false;
10635 /* DImode compares normally take two insns. */
10636 if (op0mode == DImode)
10638 *cost += COSTS_N_INSNS (1);
10639 if (speed_p)
10640 *cost += 2 * extra_cost->alu.arith;
10641 return false;
10644 if (op0mode == SImode)
10646 rtx shift_op;
10647 rtx shift_reg;
10649 if (XEXP (x, 1) == const0_rtx
10650 && !(REG_P (XEXP (x, 0))
10651 || (GET_CODE (XEXP (x, 0)) == SUBREG
10652 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10654 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10656 /* Multiply operations that set the flags are often
10657 significantly more expensive. */
10658 if (speed_p
10659 && GET_CODE (XEXP (x, 0)) == MULT
10660 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10661 *cost += extra_cost->mult[0].flag_setting;
10663 if (speed_p
10664 && GET_CODE (XEXP (x, 0)) == PLUS
10665 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10666 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10667 0), 1), mode))
10668 *cost += extra_cost->mult[0].flag_setting;
10669 return true;
10672 shift_reg = NULL;
10673 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10674 if (shift_op != NULL)
10676 if (shift_reg != NULL)
10678 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10679 1, speed_p);
10680 if (speed_p)
10681 *cost += extra_cost->alu.arith_shift_reg;
10683 else if (speed_p)
10684 *cost += extra_cost->alu.arith_shift;
10685 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10686 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10687 return true;
10690 if (speed_p)
10691 *cost += extra_cost->alu.arith;
10692 if (CONST_INT_P (XEXP (x, 1))
10693 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10695 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10696 return true;
10698 return false;
10701 /* Vector mode? */
10703 *cost = LIBCALL_COST (2);
10704 return false;
10706 return true;
10708 case EQ:
10709 case NE:
10710 case LT:
10711 case LE:
10712 case GT:
10713 case GE:
10714 case LTU:
10715 case LEU:
10716 case GEU:
10717 case GTU:
10718 case ORDERED:
10719 case UNORDERED:
10720 case UNEQ:
10721 case UNLE:
10722 case UNLT:
10723 case UNGE:
10724 case UNGT:
10725 case LTGT:
10726 if (outer_code == SET)
10728 /* Is it a store-flag operation? */
10729 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10730 && XEXP (x, 1) == const0_rtx)
10732 /* Thumb also needs an IT insn. */
10733 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10734 return true;
10736 if (XEXP (x, 1) == const0_rtx)
10738 switch (code)
10740 case LT:
10741 /* LSR Rd, Rn, #31. */
10742 if (speed_p)
10743 *cost += extra_cost->alu.shift;
10744 break;
10746 case EQ:
10747 /* RSBS T1, Rn, #0
10748 ADC Rd, Rn, T1. */
10750 case NE:
10751 /* SUBS T1, Rn, #1
10752 SBC Rd, Rn, T1. */
10753 *cost += COSTS_N_INSNS (1);
10754 break;
10756 case LE:
10757 /* RSBS T1, Rn, Rn, LSR #31
10758 ADC Rd, Rn, T1. */
10759 *cost += COSTS_N_INSNS (1);
10760 if (speed_p)
10761 *cost += extra_cost->alu.arith_shift;
10762 break;
10764 case GT:
10765 /* RSB Rd, Rn, Rn, ASR #1
10766 LSR Rd, Rd, #31. */
10767 *cost += COSTS_N_INSNS (1);
10768 if (speed_p)
10769 *cost += (extra_cost->alu.arith_shift
10770 + extra_cost->alu.shift);
10771 break;
10773 case GE:
10774 /* ASR Rd, Rn, #31
10775 ADD Rd, Rn, #1. */
10776 *cost += COSTS_N_INSNS (1);
10777 if (speed_p)
10778 *cost += extra_cost->alu.shift;
10779 break;
10781 default:
10782 /* Remaining cases are either meaningless or would take
10783 three insns anyway. */
10784 *cost = COSTS_N_INSNS (3);
10785 break;
10787 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10788 return true;
10790 else
10792 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10793 if (CONST_INT_P (XEXP (x, 1))
10794 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10796 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10797 return true;
10800 return false;
10803 /* Not directly inside a set. If it involves the condition code
10804 register it must be the condition for a branch, cond_exec or
10805 I_T_E operation. Since the comparison is performed elsewhere
10806 this is just the control part which has no additional
10807 cost. */
10808 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10809 && XEXP (x, 1) == const0_rtx)
10811 *cost = 0;
10812 return true;
10814 return false;
10816 case ABS:
10817 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10818 && (mode == SFmode || !TARGET_VFP_SINGLE))
10820 if (speed_p)
10821 *cost += extra_cost->fp[mode != SFmode].neg;
10823 return false;
10825 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10827 *cost = LIBCALL_COST (1);
10828 return false;
10831 if (mode == SImode)
10833 if (speed_p)
10834 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10835 return false;
10837 /* Vector mode? */
10838 *cost = LIBCALL_COST (1);
10839 return false;
10841 case SIGN_EXTEND:
10842 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10843 && MEM_P (XEXP (x, 0)))
10845 if (mode == DImode)
10846 *cost += COSTS_N_INSNS (1);
10848 if (!speed_p)
10849 return true;
10851 if (GET_MODE (XEXP (x, 0)) == SImode)
10852 *cost += extra_cost->ldst.load;
10853 else
10854 *cost += extra_cost->ldst.load_sign_extend;
10856 if (mode == DImode)
10857 *cost += extra_cost->alu.shift;
10859 return true;
10862 /* Widening from less than 32-bits requires an extend operation. */
10863 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10865 /* We have SXTB/SXTH. */
10866 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10867 if (speed_p)
10868 *cost += extra_cost->alu.extend;
10870 else if (GET_MODE (XEXP (x, 0)) != SImode)
10872 /* Needs two shifts. */
10873 *cost += COSTS_N_INSNS (1);
10874 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10875 if (speed_p)
10876 *cost += 2 * extra_cost->alu.shift;
10879 /* Widening beyond 32-bits requires one more insn. */
10880 if (mode == DImode)
10882 *cost += COSTS_N_INSNS (1);
10883 if (speed_p)
10884 *cost += extra_cost->alu.shift;
10887 return true;
10889 case ZERO_EXTEND:
10890 if ((arm_arch4
10891 || GET_MODE (XEXP (x, 0)) == SImode
10892 || GET_MODE (XEXP (x, 0)) == QImode)
10893 && MEM_P (XEXP (x, 0)))
10895 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10897 if (mode == DImode)
10898 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10900 return true;
10903 /* Widening from less than 32-bits requires an extend operation. */
10904 if (GET_MODE (XEXP (x, 0)) == QImode)
10906 /* UXTB can be a shorter instruction in Thumb2, but it might
10907 be slower than the AND Rd, Rn, #255 alternative. When
10908 optimizing for speed it should never be slower to use
10909 AND, and we don't really model 16-bit vs 32-bit insns
10910 here. */
10911 if (speed_p)
10912 *cost += extra_cost->alu.logical;
10914 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10916 /* We have UXTB/UXTH. */
10917 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10918 if (speed_p)
10919 *cost += extra_cost->alu.extend;
10921 else if (GET_MODE (XEXP (x, 0)) != SImode)
10923 /* Needs two shifts. It's marginally preferable to use
10924 shifts rather than two BIC instructions as the second
10925 shift may merge with a subsequent insn as a shifter
10926 op. */
10927 *cost = COSTS_N_INSNS (2);
10928 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10929 if (speed_p)
10930 *cost += 2 * extra_cost->alu.shift;
10933 /* Widening beyond 32-bits requires one more insn. */
10934 if (mode == DImode)
10936 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10939 return true;
10941 case CONST_INT:
10942 *cost = 0;
10943 /* CONST_INT has no mode, so we cannot tell for sure how many
10944 insns are really going to be needed. The best we can do is
10945 look at the value passed. If it fits in SImode, then assume
10946 that's the mode it will be used for. Otherwise assume it
10947 will be used in DImode. */
10948 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10949 mode = SImode;
10950 else
10951 mode = DImode;
10953 /* Avoid blowing up in arm_gen_constant (). */
10954 if (!(outer_code == PLUS
10955 || outer_code == AND
10956 || outer_code == IOR
10957 || outer_code == XOR
10958 || outer_code == MINUS))
10959 outer_code = SET;
10961 const_int_cost:
10962 if (mode == SImode)
10964 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10965 INTVAL (x), NULL, NULL,
10966 0, 0));
10967 /* Extra costs? */
10969 else
10971 *cost += COSTS_N_INSNS (arm_gen_constant
10972 (outer_code, SImode, NULL,
10973 trunc_int_for_mode (INTVAL (x), SImode),
10974 NULL, NULL, 0, 0)
10975 + arm_gen_constant (outer_code, SImode, NULL,
10976 INTVAL (x) >> 32, NULL,
10977 NULL, 0, 0));
10978 /* Extra costs? */
10981 return true;
10983 case CONST:
10984 case LABEL_REF:
10985 case SYMBOL_REF:
10986 if (speed_p)
10988 if (arm_arch_thumb2 && !flag_pic)
10989 *cost += COSTS_N_INSNS (1);
10990 else
10991 *cost += extra_cost->ldst.load;
10993 else
10994 *cost += COSTS_N_INSNS (1);
10996 if (flag_pic)
10998 *cost += COSTS_N_INSNS (1);
10999 if (speed_p)
11000 *cost += extra_cost->alu.arith;
11003 return true;
11005 case CONST_FIXED:
11006 *cost = COSTS_N_INSNS (4);
11007 /* Fixme. */
11008 return true;
11010 case CONST_DOUBLE:
11011 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11012 && (mode == SFmode || !TARGET_VFP_SINGLE))
11014 if (vfp3_const_double_rtx (x))
11016 if (speed_p)
11017 *cost += extra_cost->fp[mode == DFmode].fpconst;
11018 return true;
11021 if (speed_p)
11023 if (mode == DFmode)
11024 *cost += extra_cost->ldst.loadd;
11025 else
11026 *cost += extra_cost->ldst.loadf;
11028 else
11029 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11031 return true;
11033 *cost = COSTS_N_INSNS (4);
11034 return true;
11036 case CONST_VECTOR:
11037 /* Fixme. */
11038 if (TARGET_NEON
11039 && TARGET_HARD_FLOAT
11040 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11041 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
11042 *cost = COSTS_N_INSNS (1);
11043 else
11044 *cost = COSTS_N_INSNS (4);
11045 return true;
11047 case HIGH:
11048 case LO_SUM:
11049 /* When optimizing for size, we prefer constant pool entries to
11050 MOVW/MOVT pairs, so bump the cost of these slightly. */
11051 if (!speed_p)
11052 *cost += 1;
11053 return true;
11055 case CLZ:
11056 if (speed_p)
11057 *cost += extra_cost->alu.clz;
11058 return false;
11060 case SMIN:
11061 if (XEXP (x, 1) == const0_rtx)
11063 if (speed_p)
11064 *cost += extra_cost->alu.log_shift;
11065 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11066 return true;
11068 /* Fall through. */
11069 case SMAX:
11070 case UMIN:
11071 case UMAX:
11072 *cost += COSTS_N_INSNS (1);
11073 return false;
11075 case TRUNCATE:
11076 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11077 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11078 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11079 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11080 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11081 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11082 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11083 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11084 == ZERO_EXTEND))))
11086 if (speed_p)
11087 *cost += extra_cost->mult[1].extend;
11088 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11089 ZERO_EXTEND, 0, speed_p)
11090 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11091 ZERO_EXTEND, 0, speed_p));
11092 return true;
11094 *cost = LIBCALL_COST (1);
11095 return false;
11097 case UNSPEC_VOLATILE:
11098 case UNSPEC:
11099 return arm_unspec_cost (x, outer_code, speed_p, cost);
11101 case PC:
11102 /* Reading the PC is like reading any other register. Writing it
11103 is more expensive, but we take that into account elsewhere. */
11104 *cost = 0;
11105 return true;
11107 case ZERO_EXTRACT:
11108 /* TODO: Simple zero_extract of bottom bits using AND. */
11109 /* Fall through. */
11110 case SIGN_EXTRACT:
11111 if (arm_arch6
11112 && mode == SImode
11113 && CONST_INT_P (XEXP (x, 1))
11114 && CONST_INT_P (XEXP (x, 2)))
11116 if (speed_p)
11117 *cost += extra_cost->alu.bfx;
11118 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11119 return true;
11121 /* Without UBFX/SBFX, need to resort to shift operations. */
11122 *cost += COSTS_N_INSNS (1);
11123 if (speed_p)
11124 *cost += 2 * extra_cost->alu.shift;
11125 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11126 return true;
11128 case FLOAT_EXTEND:
11129 if (TARGET_HARD_FLOAT)
11131 if (speed_p)
11132 *cost += extra_cost->fp[mode == DFmode].widen;
11133 if (!TARGET_FPU_ARMV8
11134 && GET_MODE (XEXP (x, 0)) == HFmode)
11136 /* Pre v8, widening HF->DF is a two-step process, first
11137 widening to SFmode. */
11138 *cost += COSTS_N_INSNS (1);
11139 if (speed_p)
11140 *cost += extra_cost->fp[0].widen;
11142 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11143 return true;
11146 *cost = LIBCALL_COST (1);
11147 return false;
11149 case FLOAT_TRUNCATE:
11150 if (TARGET_HARD_FLOAT)
11152 if (speed_p)
11153 *cost += extra_cost->fp[mode == DFmode].narrow;
11154 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11155 return true;
11156 /* Vector modes? */
11158 *cost = LIBCALL_COST (1);
11159 return false;
11161 case FMA:
11162 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11164 rtx op0 = XEXP (x, 0);
11165 rtx op1 = XEXP (x, 1);
11166 rtx op2 = XEXP (x, 2);
11169 /* vfms or vfnma. */
11170 if (GET_CODE (op0) == NEG)
11171 op0 = XEXP (op0, 0);
11173 /* vfnms or vfnma. */
11174 if (GET_CODE (op2) == NEG)
11175 op2 = XEXP (op2, 0);
11177 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11178 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11179 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11181 if (speed_p)
11182 *cost += extra_cost->fp[mode ==DFmode].fma;
11184 return true;
11187 *cost = LIBCALL_COST (3);
11188 return false;
11190 case FIX:
11191 case UNSIGNED_FIX:
11192 if (TARGET_HARD_FLOAT)
11194 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11195 a vcvt fixed-point conversion. */
11196 if (code == FIX && mode == SImode
11197 && GET_CODE (XEXP (x, 0)) == FIX
11198 && GET_MODE (XEXP (x, 0)) == SFmode
11199 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11200 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11201 > 0)
11203 if (speed_p)
11204 *cost += extra_cost->fp[0].toint;
11206 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11207 code, 0, speed_p);
11208 return true;
11211 if (GET_MODE_CLASS (mode) == MODE_INT)
11213 mode = GET_MODE (XEXP (x, 0));
11214 if (speed_p)
11215 *cost += extra_cost->fp[mode == DFmode].toint;
11216 /* Strip of the 'cost' of rounding towards zero. */
11217 if (GET_CODE (XEXP (x, 0)) == FIX)
11218 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11219 0, speed_p);
11220 else
11221 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11222 /* ??? Increase the cost to deal with transferring from
11223 FP -> CORE registers? */
11224 return true;
11226 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11227 && TARGET_FPU_ARMV8)
11229 if (speed_p)
11230 *cost += extra_cost->fp[mode == DFmode].roundint;
11231 return false;
11233 /* Vector costs? */
11235 *cost = LIBCALL_COST (1);
11236 return false;
11238 case FLOAT:
11239 case UNSIGNED_FLOAT:
11240 if (TARGET_HARD_FLOAT)
11242 /* ??? Increase the cost to deal with transferring from CORE
11243 -> FP registers? */
11244 if (speed_p)
11245 *cost += extra_cost->fp[mode == DFmode].fromint;
11246 return false;
11248 *cost = LIBCALL_COST (1);
11249 return false;
11251 case CALL:
11252 return true;
11254 case ASM_OPERANDS:
11256 /* Just a guess. Guess number of instructions in the asm
11257 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11258 though (see PR60663). */
11259 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11260 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11262 *cost = COSTS_N_INSNS (asm_length + num_operands);
11263 return true;
11265 default:
11266 if (mode != VOIDmode)
11267 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11268 else
11269 *cost = COSTS_N_INSNS (4); /* Who knows? */
11270 return false;
11274 #undef HANDLE_NARROW_SHIFT_ARITH
11276 /* RTX costs when optimizing for size. */
11277 static bool
11278 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11279 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11281 bool result;
11282 int code = GET_CODE (x);
11284 if (TARGET_OLD_RTX_COSTS
11285 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11287 /* Old way. (Deprecated.) */
11288 if (!speed)
11289 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11290 (enum rtx_code) outer_code, total);
11291 else
11292 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11293 (enum rtx_code) outer_code, total,
11294 speed);
11296 else
11298 /* New way. */
11299 if (current_tune->insn_extra_cost)
11300 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11301 (enum rtx_code) outer_code,
11302 current_tune->insn_extra_cost,
11303 total, speed);
11304 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11305 && current_tune->insn_extra_cost != NULL */
11306 else
11307 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11308 (enum rtx_code) outer_code,
11309 &generic_extra_costs, total, speed);
11312 if (dump_file && (dump_flags & TDF_DETAILS))
11314 print_rtl_single (dump_file, x);
11315 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11316 *total, result ? "final" : "partial");
11318 return result;
11321 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11322 supported on any "slowmul" cores, so it can be ignored. */
11324 static bool
11325 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11326 int *total, bool speed)
11328 machine_mode mode = GET_MODE (x);
11330 if (TARGET_THUMB)
11332 *total = thumb1_rtx_costs (x, code, outer_code);
11333 return true;
11336 switch (code)
11338 case MULT:
11339 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11340 || mode == DImode)
11342 *total = COSTS_N_INSNS (20);
11343 return false;
11346 if (CONST_INT_P (XEXP (x, 1)))
11348 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11349 & (unsigned HOST_WIDE_INT) 0xffffffff);
11350 int cost, const_ok = const_ok_for_arm (i);
11351 int j, booth_unit_size;
11353 /* Tune as appropriate. */
11354 cost = const_ok ? 4 : 8;
11355 booth_unit_size = 2;
11356 for (j = 0; i && j < 32; j += booth_unit_size)
11358 i >>= booth_unit_size;
11359 cost++;
11362 *total = COSTS_N_INSNS (cost);
11363 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
11364 return true;
11367 *total = COSTS_N_INSNS (20);
11368 return false;
11370 default:
11371 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11376 /* RTX cost for cores with a fast multiply unit (M variants). */
11378 static bool
11379 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11380 int *total, bool speed)
11382 machine_mode mode = GET_MODE (x);
11384 if (TARGET_THUMB1)
11386 *total = thumb1_rtx_costs (x, code, outer_code);
11387 return true;
11390 /* ??? should thumb2 use different costs? */
11391 switch (code)
11393 case MULT:
11394 /* There is no point basing this on the tuning, since it is always the
11395 fast variant if it exists at all. */
11396 if (mode == DImode
11397 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11398 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11399 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11401 *total = COSTS_N_INSNS(2);
11402 return false;
11406 if (mode == DImode)
11408 *total = COSTS_N_INSNS (5);
11409 return false;
11412 if (CONST_INT_P (XEXP (x, 1)))
11414 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11415 & (unsigned HOST_WIDE_INT) 0xffffffff);
11416 int cost, const_ok = const_ok_for_arm (i);
11417 int j, booth_unit_size;
11419 /* Tune as appropriate. */
11420 cost = const_ok ? 4 : 8;
11421 booth_unit_size = 8;
11422 for (j = 0; i && j < 32; j += booth_unit_size)
11424 i >>= booth_unit_size;
11425 cost++;
11428 *total = COSTS_N_INSNS(cost);
11429 return false;
11432 if (mode == SImode)
11434 *total = COSTS_N_INSNS (4);
11435 return false;
11438 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11440 if (TARGET_HARD_FLOAT
11441 && (mode == SFmode
11442 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11444 *total = COSTS_N_INSNS (1);
11445 return false;
11449 /* Requires a lib call */
11450 *total = COSTS_N_INSNS (20);
11451 return false;
11453 default:
11454 return arm_rtx_costs_1 (x, outer_code, total, speed);
11459 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11460 so it can be ignored. */
11462 static bool
11463 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11464 int *total, bool speed)
11466 machine_mode mode = GET_MODE (x);
11468 if (TARGET_THUMB)
11470 *total = thumb1_rtx_costs (x, code, outer_code);
11471 return true;
11474 switch (code)
11476 case COMPARE:
11477 if (GET_CODE (XEXP (x, 0)) != MULT)
11478 return arm_rtx_costs_1 (x, outer_code, total, speed);
11480 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11481 will stall until the multiplication is complete. */
11482 *total = COSTS_N_INSNS (3);
11483 return false;
11485 case MULT:
11486 /* There is no point basing this on the tuning, since it is always the
11487 fast variant if it exists at all. */
11488 if (mode == DImode
11489 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11490 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11491 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11493 *total = COSTS_N_INSNS (2);
11494 return false;
11498 if (mode == DImode)
11500 *total = COSTS_N_INSNS (5);
11501 return false;
11504 if (CONST_INT_P (XEXP (x, 1)))
11506 /* If operand 1 is a constant we can more accurately
11507 calculate the cost of the multiply. The multiplier can
11508 retire 15 bits on the first cycle and a further 12 on the
11509 second. We do, of course, have to load the constant into
11510 a register first. */
11511 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11512 /* There's a general overhead of one cycle. */
11513 int cost = 1;
11514 unsigned HOST_WIDE_INT masked_const;
11516 if (i & 0x80000000)
11517 i = ~i;
11519 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11521 masked_const = i & 0xffff8000;
11522 if (masked_const != 0)
11524 cost++;
11525 masked_const = i & 0xf8000000;
11526 if (masked_const != 0)
11527 cost++;
11529 *total = COSTS_N_INSNS (cost);
11530 return false;
11533 if (mode == SImode)
11535 *total = COSTS_N_INSNS (3);
11536 return false;
11539 /* Requires a lib call */
11540 *total = COSTS_N_INSNS (20);
11541 return false;
11543 default:
11544 return arm_rtx_costs_1 (x, outer_code, total, speed);
11549 /* RTX costs for 9e (and later) cores. */
11551 static bool
11552 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11553 int *total, bool speed)
11555 machine_mode mode = GET_MODE (x);
11557 if (TARGET_THUMB1)
11559 switch (code)
11561 case MULT:
11562 /* Small multiply: 32 cycles for an integer multiply inst. */
11563 if (arm_arch6m && arm_m_profile_small_mul)
11564 *total = COSTS_N_INSNS (32);
11565 else
11566 *total = COSTS_N_INSNS (3);
11567 return true;
11569 default:
11570 *total = thumb1_rtx_costs (x, code, outer_code);
11571 return true;
11575 switch (code)
11577 case MULT:
11578 /* There is no point basing this on the tuning, since it is always the
11579 fast variant if it exists at all. */
11580 if (mode == DImode
11581 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11582 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11583 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11585 *total = COSTS_N_INSNS (2);
11586 return false;
11590 if (mode == DImode)
11592 *total = COSTS_N_INSNS (5);
11593 return false;
11596 if (mode == SImode)
11598 *total = COSTS_N_INSNS (2);
11599 return false;
11602 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11604 if (TARGET_HARD_FLOAT
11605 && (mode == SFmode
11606 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11608 *total = COSTS_N_INSNS (1);
11609 return false;
11613 *total = COSTS_N_INSNS (20);
11614 return false;
11616 default:
11617 return arm_rtx_costs_1 (x, outer_code, total, speed);
11620 /* All address computations that can be done are free, but rtx cost returns
11621 the same for practically all of them. So we weight the different types
11622 of address here in the order (most pref first):
11623 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11624 static inline int
11625 arm_arm_address_cost (rtx x)
11627 enum rtx_code c = GET_CODE (x);
11629 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11630 return 0;
11631 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11632 return 10;
11634 if (c == PLUS)
11636 if (CONST_INT_P (XEXP (x, 1)))
11637 return 2;
11639 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11640 return 3;
11642 return 4;
11645 return 6;
11648 static inline int
11649 arm_thumb_address_cost (rtx x)
11651 enum rtx_code c = GET_CODE (x);
11653 if (c == REG)
11654 return 1;
11655 if (c == PLUS
11656 && REG_P (XEXP (x, 0))
11657 && CONST_INT_P (XEXP (x, 1)))
11658 return 1;
11660 return 2;
11663 static int
11664 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11665 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11667 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11670 /* Adjust cost hook for XScale. */
11671 static bool
11672 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11674 /* Some true dependencies can have a higher cost depending
11675 on precisely how certain input operands are used. */
11676 if (REG_NOTE_KIND(link) == 0
11677 && recog_memoized (insn) >= 0
11678 && recog_memoized (dep) >= 0)
11680 int shift_opnum = get_attr_shift (insn);
11681 enum attr_type attr_type = get_attr_type (dep);
11683 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11684 operand for INSN. If we have a shifted input operand and the
11685 instruction we depend on is another ALU instruction, then we may
11686 have to account for an additional stall. */
11687 if (shift_opnum != 0
11688 && (attr_type == TYPE_ALU_SHIFT_IMM
11689 || attr_type == TYPE_ALUS_SHIFT_IMM
11690 || attr_type == TYPE_LOGIC_SHIFT_IMM
11691 || attr_type == TYPE_LOGICS_SHIFT_IMM
11692 || attr_type == TYPE_ALU_SHIFT_REG
11693 || attr_type == TYPE_ALUS_SHIFT_REG
11694 || attr_type == TYPE_LOGIC_SHIFT_REG
11695 || attr_type == TYPE_LOGICS_SHIFT_REG
11696 || attr_type == TYPE_MOV_SHIFT
11697 || attr_type == TYPE_MVN_SHIFT
11698 || attr_type == TYPE_MOV_SHIFT_REG
11699 || attr_type == TYPE_MVN_SHIFT_REG))
11701 rtx shifted_operand;
11702 int opno;
11704 /* Get the shifted operand. */
11705 extract_insn (insn);
11706 shifted_operand = recog_data.operand[shift_opnum];
11708 /* Iterate over all the operands in DEP. If we write an operand
11709 that overlaps with SHIFTED_OPERAND, then we have increase the
11710 cost of this dependency. */
11711 extract_insn (dep);
11712 preprocess_constraints (dep);
11713 for (opno = 0; opno < recog_data.n_operands; opno++)
11715 /* We can ignore strict inputs. */
11716 if (recog_data.operand_type[opno] == OP_IN)
11717 continue;
11719 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11720 shifted_operand))
11722 *cost = 2;
11723 return false;
11728 return true;
11731 /* Adjust cost hook for Cortex A9. */
11732 static bool
11733 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11735 switch (REG_NOTE_KIND (link))
11737 case REG_DEP_ANTI:
11738 *cost = 0;
11739 return false;
11741 case REG_DEP_TRUE:
11742 case REG_DEP_OUTPUT:
11743 if (recog_memoized (insn) >= 0
11744 && recog_memoized (dep) >= 0)
11746 if (GET_CODE (PATTERN (insn)) == SET)
11748 if (GET_MODE_CLASS
11749 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11750 || GET_MODE_CLASS
11751 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11753 enum attr_type attr_type_insn = get_attr_type (insn);
11754 enum attr_type attr_type_dep = get_attr_type (dep);
11756 /* By default all dependencies of the form
11757 s0 = s0 <op> s1
11758 s0 = s0 <op> s2
11759 have an extra latency of 1 cycle because
11760 of the input and output dependency in this
11761 case. However this gets modeled as an true
11762 dependency and hence all these checks. */
11763 if (REG_P (SET_DEST (PATTERN (insn)))
11764 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11766 /* FMACS is a special case where the dependent
11767 instruction can be issued 3 cycles before
11768 the normal latency in case of an output
11769 dependency. */
11770 if ((attr_type_insn == TYPE_FMACS
11771 || attr_type_insn == TYPE_FMACD)
11772 && (attr_type_dep == TYPE_FMACS
11773 || attr_type_dep == TYPE_FMACD))
11775 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11776 *cost = insn_default_latency (dep) - 3;
11777 else
11778 *cost = insn_default_latency (dep);
11779 return false;
11781 else
11783 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11784 *cost = insn_default_latency (dep) + 1;
11785 else
11786 *cost = insn_default_latency (dep);
11788 return false;
11793 break;
11795 default:
11796 gcc_unreachable ();
11799 return true;
11802 /* Adjust cost hook for FA726TE. */
11803 static bool
11804 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11806 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11807 have penalty of 3. */
11808 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11809 && recog_memoized (insn) >= 0
11810 && recog_memoized (dep) >= 0
11811 && get_attr_conds (dep) == CONDS_SET)
11813 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11814 if (get_attr_conds (insn) == CONDS_USE
11815 && get_attr_type (insn) != TYPE_BRANCH)
11817 *cost = 3;
11818 return false;
11821 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11822 || get_attr_conds (insn) == CONDS_USE)
11824 *cost = 0;
11825 return false;
11829 return true;
11832 /* Implement TARGET_REGISTER_MOVE_COST.
11834 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11835 it is typically more expensive than a single memory access. We set
11836 the cost to less than two memory accesses so that floating
11837 point to integer conversion does not go through memory. */
11840 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11841 reg_class_t from, reg_class_t to)
11843 if (TARGET_32BIT)
11845 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11846 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11847 return 15;
11848 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11849 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11850 return 4;
11851 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11852 return 20;
11853 else
11854 return 2;
11856 else
11858 if (from == HI_REGS || to == HI_REGS)
11859 return 4;
11860 else
11861 return 2;
11865 /* Implement TARGET_MEMORY_MOVE_COST. */
11868 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11869 bool in ATTRIBUTE_UNUSED)
11871 if (TARGET_32BIT)
11872 return 10;
11873 else
11875 if (GET_MODE_SIZE (mode) < 4)
11876 return 8;
11877 else
11878 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11882 /* Vectorizer cost model implementation. */
11884 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11885 static int
11886 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11887 tree vectype,
11888 int misalign ATTRIBUTE_UNUSED)
11890 unsigned elements;
11892 switch (type_of_cost)
11894 case scalar_stmt:
11895 return current_tune->vec_costs->scalar_stmt_cost;
11897 case scalar_load:
11898 return current_tune->vec_costs->scalar_load_cost;
11900 case scalar_store:
11901 return current_tune->vec_costs->scalar_store_cost;
11903 case vector_stmt:
11904 return current_tune->vec_costs->vec_stmt_cost;
11906 case vector_load:
11907 return current_tune->vec_costs->vec_align_load_cost;
11909 case vector_store:
11910 return current_tune->vec_costs->vec_store_cost;
11912 case vec_to_scalar:
11913 return current_tune->vec_costs->vec_to_scalar_cost;
11915 case scalar_to_vec:
11916 return current_tune->vec_costs->scalar_to_vec_cost;
11918 case unaligned_load:
11919 return current_tune->vec_costs->vec_unalign_load_cost;
11921 case unaligned_store:
11922 return current_tune->vec_costs->vec_unalign_store_cost;
11924 case cond_branch_taken:
11925 return current_tune->vec_costs->cond_taken_branch_cost;
11927 case cond_branch_not_taken:
11928 return current_tune->vec_costs->cond_not_taken_branch_cost;
11930 case vec_perm:
11931 case vec_promote_demote:
11932 return current_tune->vec_costs->vec_stmt_cost;
11934 case vec_construct:
11935 elements = TYPE_VECTOR_SUBPARTS (vectype);
11936 return elements / 2 + 1;
11938 default:
11939 gcc_unreachable ();
11943 /* Implement targetm.vectorize.add_stmt_cost. */
11945 static unsigned
11946 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11947 struct _stmt_vec_info *stmt_info, int misalign,
11948 enum vect_cost_model_location where)
11950 unsigned *cost = (unsigned *) data;
11951 unsigned retval = 0;
11953 if (flag_vect_cost_model)
11955 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11956 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11958 /* Statements in an inner loop relative to the loop being
11959 vectorized are weighted more heavily. The value here is
11960 arbitrary and could potentially be improved with analysis. */
11961 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11962 count *= 50; /* FIXME. */
11964 retval = (unsigned) (count * stmt_cost);
11965 cost[where] += retval;
11968 return retval;
11971 /* Return true if and only if this insn can dual-issue only as older. */
11972 static bool
11973 cortexa7_older_only (rtx_insn *insn)
11975 if (recog_memoized (insn) < 0)
11976 return false;
11978 switch (get_attr_type (insn))
11980 case TYPE_ALU_DSP_REG:
11981 case TYPE_ALU_SREG:
11982 case TYPE_ALUS_SREG:
11983 case TYPE_LOGIC_REG:
11984 case TYPE_LOGICS_REG:
11985 case TYPE_ADC_REG:
11986 case TYPE_ADCS_REG:
11987 case TYPE_ADR:
11988 case TYPE_BFM:
11989 case TYPE_REV:
11990 case TYPE_MVN_REG:
11991 case TYPE_SHIFT_IMM:
11992 case TYPE_SHIFT_REG:
11993 case TYPE_LOAD_BYTE:
11994 case TYPE_LOAD1:
11995 case TYPE_STORE1:
11996 case TYPE_FFARITHS:
11997 case TYPE_FADDS:
11998 case TYPE_FFARITHD:
11999 case TYPE_FADDD:
12000 case TYPE_FMOV:
12001 case TYPE_F_CVT:
12002 case TYPE_FCMPS:
12003 case TYPE_FCMPD:
12004 case TYPE_FCONSTS:
12005 case TYPE_FCONSTD:
12006 case TYPE_FMULS:
12007 case TYPE_FMACS:
12008 case TYPE_FMULD:
12009 case TYPE_FMACD:
12010 case TYPE_FDIVS:
12011 case TYPE_FDIVD:
12012 case TYPE_F_MRC:
12013 case TYPE_F_MRRC:
12014 case TYPE_F_FLAG:
12015 case TYPE_F_LOADS:
12016 case TYPE_F_STORES:
12017 return true;
12018 default:
12019 return false;
12023 /* Return true if and only if this insn can dual-issue as younger. */
12024 static bool
12025 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12027 if (recog_memoized (insn) < 0)
12029 if (verbose > 5)
12030 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12031 return false;
12034 switch (get_attr_type (insn))
12036 case TYPE_ALU_IMM:
12037 case TYPE_ALUS_IMM:
12038 case TYPE_LOGIC_IMM:
12039 case TYPE_LOGICS_IMM:
12040 case TYPE_EXTEND:
12041 case TYPE_MVN_IMM:
12042 case TYPE_MOV_IMM:
12043 case TYPE_MOV_REG:
12044 case TYPE_MOV_SHIFT:
12045 case TYPE_MOV_SHIFT_REG:
12046 case TYPE_BRANCH:
12047 case TYPE_CALL:
12048 return true;
12049 default:
12050 return false;
12055 /* Look for an instruction that can dual issue only as an older
12056 instruction, and move it in front of any instructions that can
12057 dual-issue as younger, while preserving the relative order of all
12058 other instructions in the ready list. This is a hueuristic to help
12059 dual-issue in later cycles, by postponing issue of more flexible
12060 instructions. This heuristic may affect dual issue opportunities
12061 in the current cycle. */
12062 static void
12063 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12064 int *n_readyp, int clock)
12066 int i;
12067 int first_older_only = -1, first_younger = -1;
12069 if (verbose > 5)
12070 fprintf (file,
12071 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12072 clock,
12073 *n_readyp);
12075 /* Traverse the ready list from the head (the instruction to issue
12076 first), and looking for the first instruction that can issue as
12077 younger and the first instruction that can dual-issue only as
12078 older. */
12079 for (i = *n_readyp - 1; i >= 0; i--)
12081 rtx_insn *insn = ready[i];
12082 if (cortexa7_older_only (insn))
12084 first_older_only = i;
12085 if (verbose > 5)
12086 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12087 break;
12089 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12090 first_younger = i;
12093 /* Nothing to reorder because either no younger insn found or insn
12094 that can dual-issue only as older appears before any insn that
12095 can dual-issue as younger. */
12096 if (first_younger == -1)
12098 if (verbose > 5)
12099 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12100 return;
12103 /* Nothing to reorder because no older-only insn in the ready list. */
12104 if (first_older_only == -1)
12106 if (verbose > 5)
12107 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12108 return;
12111 /* Move first_older_only insn before first_younger. */
12112 if (verbose > 5)
12113 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12114 INSN_UID(ready [first_older_only]),
12115 INSN_UID(ready [first_younger]));
12116 rtx_insn *first_older_only_insn = ready [first_older_only];
12117 for (i = first_older_only; i < first_younger; i++)
12119 ready[i] = ready[i+1];
12122 ready[i] = first_older_only_insn;
12123 return;
12126 /* Implement TARGET_SCHED_REORDER. */
12127 static int
12128 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12129 int clock)
12131 switch (arm_tune)
12133 case cortexa7:
12134 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12135 break;
12136 default:
12137 /* Do nothing for other cores. */
12138 break;
12141 return arm_issue_rate ();
12144 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12145 It corrects the value of COST based on the relationship between
12146 INSN and DEP through the dependence LINK. It returns the new
12147 value. There is a per-core adjust_cost hook to adjust scheduler costs
12148 and the per-core hook can choose to completely override the generic
12149 adjust_cost function. Only put bits of code into arm_adjust_cost that
12150 are common across all cores. */
12151 static int
12152 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
12154 rtx i_pat, d_pat;
12156 /* When generating Thumb-1 code, we want to place flag-setting operations
12157 close to a conditional branch which depends on them, so that we can
12158 omit the comparison. */
12159 if (TARGET_THUMB1
12160 && REG_NOTE_KIND (link) == 0
12161 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12162 && recog_memoized (dep) >= 0
12163 && get_attr_conds (dep) == CONDS_SET)
12164 return 0;
12166 if (current_tune->sched_adjust_cost != NULL)
12168 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
12169 return cost;
12172 /* XXX Is this strictly true? */
12173 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
12174 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
12175 return 0;
12177 /* Call insns don't incur a stall, even if they follow a load. */
12178 if (REG_NOTE_KIND (link) == 0
12179 && CALL_P (insn))
12180 return 1;
12182 if ((i_pat = single_set (insn)) != NULL
12183 && MEM_P (SET_SRC (i_pat))
12184 && (d_pat = single_set (dep)) != NULL
12185 && MEM_P (SET_DEST (d_pat)))
12187 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12188 /* This is a load after a store, there is no conflict if the load reads
12189 from a cached area. Assume that loads from the stack, and from the
12190 constant pool are cached, and that others will miss. This is a
12191 hack. */
12193 if ((GET_CODE (src_mem) == SYMBOL_REF
12194 && CONSTANT_POOL_ADDRESS_P (src_mem))
12195 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12196 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12197 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12198 return 1;
12201 return cost;
12205 arm_max_conditional_execute (void)
12207 return max_insns_skipped;
12210 static int
12211 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12213 if (TARGET_32BIT)
12214 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12215 else
12216 return (optimize > 0) ? 2 : 0;
12219 static int
12220 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12222 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12225 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12226 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12227 sequences of non-executed instructions in IT blocks probably take the same
12228 amount of time as executed instructions (and the IT instruction itself takes
12229 space in icache). This function was experimentally determined to give good
12230 results on a popular embedded benchmark. */
12232 static int
12233 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12235 return (TARGET_32BIT && speed_p) ? 1
12236 : arm_default_branch_cost (speed_p, predictable_p);
12239 static int
12240 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12242 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12245 static bool fp_consts_inited = false;
12247 static REAL_VALUE_TYPE value_fp0;
12249 static void
12250 init_fp_table (void)
12252 REAL_VALUE_TYPE r;
12254 r = REAL_VALUE_ATOF ("0", DFmode);
12255 value_fp0 = r;
12256 fp_consts_inited = true;
12259 /* Return TRUE if rtx X is a valid immediate FP constant. */
12261 arm_const_double_rtx (rtx x)
12263 const REAL_VALUE_TYPE *r;
12265 if (!fp_consts_inited)
12266 init_fp_table ();
12268 r = CONST_DOUBLE_REAL_VALUE (x);
12269 if (REAL_VALUE_MINUS_ZERO (*r))
12270 return 0;
12272 if (real_equal (r, &value_fp0))
12273 return 1;
12275 return 0;
12278 /* VFPv3 has a fairly wide range of representable immediates, formed from
12279 "quarter-precision" floating-point values. These can be evaluated using this
12280 formula (with ^ for exponentiation):
12282 -1^s * n * 2^-r
12284 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12285 16 <= n <= 31 and 0 <= r <= 7.
12287 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12289 - A (most-significant) is the sign bit.
12290 - BCD are the exponent (encoded as r XOR 3).
12291 - EFGH are the mantissa (encoded as n - 16).
12294 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12295 fconst[sd] instruction, or -1 if X isn't suitable. */
12296 static int
12297 vfp3_const_double_index (rtx x)
12299 REAL_VALUE_TYPE r, m;
12300 int sign, exponent;
12301 unsigned HOST_WIDE_INT mantissa, mant_hi;
12302 unsigned HOST_WIDE_INT mask;
12303 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12304 bool fail;
12306 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12307 return -1;
12309 r = *CONST_DOUBLE_REAL_VALUE (x);
12311 /* We can't represent these things, so detect them first. */
12312 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12313 return -1;
12315 /* Extract sign, exponent and mantissa. */
12316 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12317 r = real_value_abs (&r);
12318 exponent = REAL_EXP (&r);
12319 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12320 highest (sign) bit, with a fixed binary point at bit point_pos.
12321 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12322 bits for the mantissa, this may fail (low bits would be lost). */
12323 real_ldexp (&m, &r, point_pos - exponent);
12324 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12325 mantissa = w.elt (0);
12326 mant_hi = w.elt (1);
12328 /* If there are bits set in the low part of the mantissa, we can't
12329 represent this value. */
12330 if (mantissa != 0)
12331 return -1;
12333 /* Now make it so that mantissa contains the most-significant bits, and move
12334 the point_pos to indicate that the least-significant bits have been
12335 discarded. */
12336 point_pos -= HOST_BITS_PER_WIDE_INT;
12337 mantissa = mant_hi;
12339 /* We can permit four significant bits of mantissa only, plus a high bit
12340 which is always 1. */
12341 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12342 if ((mantissa & mask) != 0)
12343 return -1;
12345 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12346 mantissa >>= point_pos - 5;
12348 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12349 floating-point immediate zero with Neon using an integer-zero load, but
12350 that case is handled elsewhere.) */
12351 if (mantissa == 0)
12352 return -1;
12354 gcc_assert (mantissa >= 16 && mantissa <= 31);
12356 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12357 normalized significands are in the range [1, 2). (Our mantissa is shifted
12358 left 4 places at this point relative to normalized IEEE754 values). GCC
12359 internally uses [0.5, 1) (see real.c), so the exponent returned from
12360 REAL_EXP must be altered. */
12361 exponent = 5 - exponent;
12363 if (exponent < 0 || exponent > 7)
12364 return -1;
12366 /* Sign, mantissa and exponent are now in the correct form to plug into the
12367 formula described in the comment above. */
12368 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12371 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12373 vfp3_const_double_rtx (rtx x)
12375 if (!TARGET_VFP3)
12376 return 0;
12378 return vfp3_const_double_index (x) != -1;
12381 /* Recognize immediates which can be used in various Neon instructions. Legal
12382 immediates are described by the following table (for VMVN variants, the
12383 bitwise inverse of the constant shown is recognized. In either case, VMOV
12384 is output and the correct instruction to use for a given constant is chosen
12385 by the assembler). The constant shown is replicated across all elements of
12386 the destination vector.
12388 insn elems variant constant (binary)
12389 ---- ----- ------- -----------------
12390 vmov i32 0 00000000 00000000 00000000 abcdefgh
12391 vmov i32 1 00000000 00000000 abcdefgh 00000000
12392 vmov i32 2 00000000 abcdefgh 00000000 00000000
12393 vmov i32 3 abcdefgh 00000000 00000000 00000000
12394 vmov i16 4 00000000 abcdefgh
12395 vmov i16 5 abcdefgh 00000000
12396 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12397 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12398 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12399 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12400 vmvn i16 10 00000000 abcdefgh
12401 vmvn i16 11 abcdefgh 00000000
12402 vmov i32 12 00000000 00000000 abcdefgh 11111111
12403 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12404 vmov i32 14 00000000 abcdefgh 11111111 11111111
12405 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12406 vmov i8 16 abcdefgh
12407 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12408 eeeeeeee ffffffff gggggggg hhhhhhhh
12409 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12410 vmov f32 19 00000000 00000000 00000000 00000000
12412 For case 18, B = !b. Representable values are exactly those accepted by
12413 vfp3_const_double_index, but are output as floating-point numbers rather
12414 than indices.
12416 For case 19, we will change it to vmov.i32 when assembling.
12418 Variants 0-5 (inclusive) may also be used as immediates for the second
12419 operand of VORR/VBIC instructions.
12421 The INVERSE argument causes the bitwise inverse of the given operand to be
12422 recognized instead (used for recognizing legal immediates for the VAND/VORN
12423 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12424 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12425 output, rather than the real insns vbic/vorr).
12427 INVERSE makes no difference to the recognition of float vectors.
12429 The return value is the variant of immediate as shown in the above table, or
12430 -1 if the given value doesn't match any of the listed patterns.
12432 static int
12433 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12434 rtx *modconst, int *elementwidth)
12436 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12437 matches = 1; \
12438 for (i = 0; i < idx; i += (STRIDE)) \
12439 if (!(TEST)) \
12440 matches = 0; \
12441 if (matches) \
12443 immtype = (CLASS); \
12444 elsize = (ELSIZE); \
12445 break; \
12448 unsigned int i, elsize = 0, idx = 0, n_elts;
12449 unsigned int innersize;
12450 unsigned char bytes[16];
12451 int immtype = -1, matches;
12452 unsigned int invmask = inverse ? 0xff : 0;
12453 bool vector = GET_CODE (op) == CONST_VECTOR;
12455 if (vector)
12456 n_elts = CONST_VECTOR_NUNITS (op);
12457 else
12459 n_elts = 1;
12460 if (mode == VOIDmode)
12461 mode = DImode;
12464 innersize = GET_MODE_UNIT_SIZE (mode);
12466 /* Vectors of float constants. */
12467 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12469 rtx el0 = CONST_VECTOR_ELT (op, 0);
12470 const REAL_VALUE_TYPE *r0;
12472 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12473 return -1;
12475 /* FP16 vectors cannot be represented. */
12476 if (GET_MODE_INNER (mode) == HFmode)
12477 return -1;
12479 r0 = CONST_DOUBLE_REAL_VALUE (el0);
12481 for (i = 1; i < n_elts; i++)
12483 rtx elt = CONST_VECTOR_ELT (op, i);
12484 if (!real_equal (r0, CONST_DOUBLE_REAL_VALUE (elt)))
12485 return -1;
12488 if (modconst)
12489 *modconst = CONST_VECTOR_ELT (op, 0);
12491 if (elementwidth)
12492 *elementwidth = 0;
12494 if (el0 == CONST0_RTX (GET_MODE (el0)))
12495 return 19;
12496 else
12497 return 18;
12500 /* Splat vector constant out into a byte vector. */
12501 for (i = 0; i < n_elts; i++)
12503 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12504 unsigned HOST_WIDE_INT elpart;
12506 gcc_assert (CONST_INT_P (el));
12507 elpart = INTVAL (el);
12509 for (unsigned int byte = 0; byte < innersize; byte++)
12511 bytes[idx++] = (elpart & 0xff) ^ invmask;
12512 elpart >>= BITS_PER_UNIT;
12516 /* Sanity check. */
12517 gcc_assert (idx == GET_MODE_SIZE (mode));
12521 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12522 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12524 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12525 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12527 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12528 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12530 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12531 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12533 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12535 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12537 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12538 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12540 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12541 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12543 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12544 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12546 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12547 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12549 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12551 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12553 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12554 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12556 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12557 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12559 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12560 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12562 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12563 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12565 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12567 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12568 && bytes[i] == bytes[(i + 8) % idx]);
12570 while (0);
12572 if (immtype == -1)
12573 return -1;
12575 if (elementwidth)
12576 *elementwidth = elsize;
12578 if (modconst)
12580 unsigned HOST_WIDE_INT imm = 0;
12582 /* Un-invert bytes of recognized vector, if necessary. */
12583 if (invmask != 0)
12584 for (i = 0; i < idx; i++)
12585 bytes[i] ^= invmask;
12587 if (immtype == 17)
12589 /* FIXME: Broken on 32-bit H_W_I hosts. */
12590 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12592 for (i = 0; i < 8; i++)
12593 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12594 << (i * BITS_PER_UNIT);
12596 *modconst = GEN_INT (imm);
12598 else
12600 unsigned HOST_WIDE_INT imm = 0;
12602 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12603 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12605 *modconst = GEN_INT (imm);
12609 return immtype;
12610 #undef CHECK
12613 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12614 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12615 float elements), and a modified constant (whatever should be output for a
12616 VMOV) in *MODCONST. */
12619 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12620 rtx *modconst, int *elementwidth)
12622 rtx tmpconst;
12623 int tmpwidth;
12624 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12626 if (retval == -1)
12627 return 0;
12629 if (modconst)
12630 *modconst = tmpconst;
12632 if (elementwidth)
12633 *elementwidth = tmpwidth;
12635 return 1;
12638 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12639 the immediate is valid, write a constant suitable for using as an operand
12640 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12641 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12644 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12645 rtx *modconst, int *elementwidth)
12647 rtx tmpconst;
12648 int tmpwidth;
12649 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12651 if (retval < 0 || retval > 5)
12652 return 0;
12654 if (modconst)
12655 *modconst = tmpconst;
12657 if (elementwidth)
12658 *elementwidth = tmpwidth;
12660 return 1;
12663 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12664 the immediate is valid, write a constant suitable for using as an operand
12665 to VSHR/VSHL to *MODCONST and the corresponding element width to
12666 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12667 because they have different limitations. */
12670 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12671 rtx *modconst, int *elementwidth,
12672 bool isleftshift)
12674 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12675 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12676 unsigned HOST_WIDE_INT last_elt = 0;
12677 unsigned HOST_WIDE_INT maxshift;
12679 /* Split vector constant out into a byte vector. */
12680 for (i = 0; i < n_elts; i++)
12682 rtx el = CONST_VECTOR_ELT (op, i);
12683 unsigned HOST_WIDE_INT elpart;
12685 if (CONST_INT_P (el))
12686 elpart = INTVAL (el);
12687 else if (CONST_DOUBLE_P (el))
12688 return 0;
12689 else
12690 gcc_unreachable ();
12692 if (i != 0 && elpart != last_elt)
12693 return 0;
12695 last_elt = elpart;
12698 /* Shift less than element size. */
12699 maxshift = innersize * 8;
12701 if (isleftshift)
12703 /* Left shift immediate value can be from 0 to <size>-1. */
12704 if (last_elt >= maxshift)
12705 return 0;
12707 else
12709 /* Right shift immediate value can be from 1 to <size>. */
12710 if (last_elt == 0 || last_elt > maxshift)
12711 return 0;
12714 if (elementwidth)
12715 *elementwidth = innersize * 8;
12717 if (modconst)
12718 *modconst = CONST_VECTOR_ELT (op, 0);
12720 return 1;
12723 /* Return a string suitable for output of Neon immediate logic operation
12724 MNEM. */
12726 char *
12727 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12728 int inverse, int quad)
12730 int width, is_valid;
12731 static char templ[40];
12733 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12735 gcc_assert (is_valid != 0);
12737 if (quad)
12738 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12739 else
12740 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12742 return templ;
12745 /* Return a string suitable for output of Neon immediate shift operation
12746 (VSHR or VSHL) MNEM. */
12748 char *
12749 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12750 machine_mode mode, int quad,
12751 bool isleftshift)
12753 int width, is_valid;
12754 static char templ[40];
12756 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12757 gcc_assert (is_valid != 0);
12759 if (quad)
12760 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12761 else
12762 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12764 return templ;
12767 /* Output a sequence of pairwise operations to implement a reduction.
12768 NOTE: We do "too much work" here, because pairwise operations work on two
12769 registers-worth of operands in one go. Unfortunately we can't exploit those
12770 extra calculations to do the full operation in fewer steps, I don't think.
12771 Although all vector elements of the result but the first are ignored, we
12772 actually calculate the same result in each of the elements. An alternative
12773 such as initially loading a vector with zero to use as each of the second
12774 operands would use up an additional register and take an extra instruction,
12775 for no particular gain. */
12777 void
12778 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12779 rtx (*reduc) (rtx, rtx, rtx))
12781 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12782 rtx tmpsum = op1;
12784 for (i = parts / 2; i >= 1; i /= 2)
12786 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12787 emit_insn (reduc (dest, tmpsum, tmpsum));
12788 tmpsum = dest;
12792 /* If VALS is a vector constant that can be loaded into a register
12793 using VDUP, generate instructions to do so and return an RTX to
12794 assign to the register. Otherwise return NULL_RTX. */
12796 static rtx
12797 neon_vdup_constant (rtx vals)
12799 machine_mode mode = GET_MODE (vals);
12800 machine_mode inner_mode = GET_MODE_INNER (mode);
12801 rtx x;
12803 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12804 return NULL_RTX;
12806 if (!const_vec_duplicate_p (vals, &x))
12807 /* The elements are not all the same. We could handle repeating
12808 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12809 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12810 vdup.i16). */
12811 return NULL_RTX;
12813 /* We can load this constant by using VDUP and a constant in a
12814 single ARM register. This will be cheaper than a vector
12815 load. */
12817 x = copy_to_mode_reg (inner_mode, x);
12818 return gen_rtx_VEC_DUPLICATE (mode, x);
12821 /* Generate code to load VALS, which is a PARALLEL containing only
12822 constants (for vec_init) or CONST_VECTOR, efficiently into a
12823 register. Returns an RTX to copy into the register, or NULL_RTX
12824 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12827 neon_make_constant (rtx vals)
12829 machine_mode mode = GET_MODE (vals);
12830 rtx target;
12831 rtx const_vec = NULL_RTX;
12832 int n_elts = GET_MODE_NUNITS (mode);
12833 int n_const = 0;
12834 int i;
12836 if (GET_CODE (vals) == CONST_VECTOR)
12837 const_vec = vals;
12838 else if (GET_CODE (vals) == PARALLEL)
12840 /* A CONST_VECTOR must contain only CONST_INTs and
12841 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12842 Only store valid constants in a CONST_VECTOR. */
12843 for (i = 0; i < n_elts; ++i)
12845 rtx x = XVECEXP (vals, 0, i);
12846 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12847 n_const++;
12849 if (n_const == n_elts)
12850 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12852 else
12853 gcc_unreachable ();
12855 if (const_vec != NULL
12856 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12857 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12858 return const_vec;
12859 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12860 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12861 pipeline cycle; creating the constant takes one or two ARM
12862 pipeline cycles. */
12863 return target;
12864 else if (const_vec != NULL_RTX)
12865 /* Load from constant pool. On Cortex-A8 this takes two cycles
12866 (for either double or quad vectors). We can not take advantage
12867 of single-cycle VLD1 because we need a PC-relative addressing
12868 mode. */
12869 return const_vec;
12870 else
12871 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12872 We can not construct an initializer. */
12873 return NULL_RTX;
12876 /* Initialize vector TARGET to VALS. */
12878 void
12879 neon_expand_vector_init (rtx target, rtx vals)
12881 machine_mode mode = GET_MODE (target);
12882 machine_mode inner_mode = GET_MODE_INNER (mode);
12883 int n_elts = GET_MODE_NUNITS (mode);
12884 int n_var = 0, one_var = -1;
12885 bool all_same = true;
12886 rtx x, mem;
12887 int i;
12889 for (i = 0; i < n_elts; ++i)
12891 x = XVECEXP (vals, 0, i);
12892 if (!CONSTANT_P (x))
12893 ++n_var, one_var = i;
12895 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12896 all_same = false;
12899 if (n_var == 0)
12901 rtx constant = neon_make_constant (vals);
12902 if (constant != NULL_RTX)
12904 emit_move_insn (target, constant);
12905 return;
12909 /* Splat a single non-constant element if we can. */
12910 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12912 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12913 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12914 return;
12917 /* One field is non-constant. Load constant then overwrite varying
12918 field. This is more efficient than using the stack. */
12919 if (n_var == 1)
12921 rtx copy = copy_rtx (vals);
12922 rtx index = GEN_INT (one_var);
12924 /* Load constant part of vector, substitute neighboring value for
12925 varying element. */
12926 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12927 neon_expand_vector_init (target, copy);
12929 /* Insert variable. */
12930 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12931 switch (mode)
12933 case V8QImode:
12934 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12935 break;
12936 case V16QImode:
12937 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12938 break;
12939 case V4HImode:
12940 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12941 break;
12942 case V8HImode:
12943 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12944 break;
12945 case V2SImode:
12946 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12947 break;
12948 case V4SImode:
12949 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12950 break;
12951 case V2SFmode:
12952 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12953 break;
12954 case V4SFmode:
12955 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12956 break;
12957 case V2DImode:
12958 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12959 break;
12960 default:
12961 gcc_unreachable ();
12963 return;
12966 /* Construct the vector in memory one field at a time
12967 and load the whole vector. */
12968 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12969 for (i = 0; i < n_elts; i++)
12970 emit_move_insn (adjust_address_nv (mem, inner_mode,
12971 i * GET_MODE_SIZE (inner_mode)),
12972 XVECEXP (vals, 0, i));
12973 emit_move_insn (target, mem);
12976 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12977 ERR if it doesn't. EXP indicates the source location, which includes the
12978 inlining history for intrinsics. */
12980 static void
12981 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12982 const_tree exp, const char *desc)
12984 HOST_WIDE_INT lane;
12986 gcc_assert (CONST_INT_P (operand));
12988 lane = INTVAL (operand);
12990 if (lane < low || lane >= high)
12992 if (exp)
12993 error ("%K%s %wd out of range %wd - %wd",
12994 exp, desc, lane, low, high - 1);
12995 else
12996 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13000 /* Bounds-check lanes. */
13002 void
13003 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13004 const_tree exp)
13006 bounds_check (operand, low, high, exp, "lane");
13009 /* Bounds-check constants. */
13011 void
13012 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13014 bounds_check (operand, low, high, NULL_TREE, "constant");
13017 HOST_WIDE_INT
13018 neon_element_bits (machine_mode mode)
13020 return GET_MODE_UNIT_BITSIZE (mode);
13024 /* Predicates for `match_operand' and `match_operator'. */
13026 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13027 WB is true if full writeback address modes are allowed and is false
13028 if limited writeback address modes (POST_INC and PRE_DEC) are
13029 allowed. */
13032 arm_coproc_mem_operand (rtx op, bool wb)
13034 rtx ind;
13036 /* Reject eliminable registers. */
13037 if (! (reload_in_progress || reload_completed || lra_in_progress)
13038 && ( reg_mentioned_p (frame_pointer_rtx, op)
13039 || reg_mentioned_p (arg_pointer_rtx, op)
13040 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13041 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13042 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13043 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13044 return FALSE;
13046 /* Constants are converted into offsets from labels. */
13047 if (!MEM_P (op))
13048 return FALSE;
13050 ind = XEXP (op, 0);
13052 if (reload_completed
13053 && (GET_CODE (ind) == LABEL_REF
13054 || (GET_CODE (ind) == CONST
13055 && GET_CODE (XEXP (ind, 0)) == PLUS
13056 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13057 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13058 return TRUE;
13060 /* Match: (mem (reg)). */
13061 if (REG_P (ind))
13062 return arm_address_register_rtx_p (ind, 0);
13064 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13065 acceptable in any case (subject to verification by
13066 arm_address_register_rtx_p). We need WB to be true to accept
13067 PRE_INC and POST_DEC. */
13068 if (GET_CODE (ind) == POST_INC
13069 || GET_CODE (ind) == PRE_DEC
13070 || (wb
13071 && (GET_CODE (ind) == PRE_INC
13072 || GET_CODE (ind) == POST_DEC)))
13073 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13075 if (wb
13076 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13077 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13078 && GET_CODE (XEXP (ind, 1)) == PLUS
13079 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13080 ind = XEXP (ind, 1);
13082 /* Match:
13083 (plus (reg)
13084 (const)). */
13085 if (GET_CODE (ind) == PLUS
13086 && REG_P (XEXP (ind, 0))
13087 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13088 && CONST_INT_P (XEXP (ind, 1))
13089 && INTVAL (XEXP (ind, 1)) > -1024
13090 && INTVAL (XEXP (ind, 1)) < 1024
13091 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13092 return TRUE;
13094 return FALSE;
13097 /* Return TRUE if OP is a memory operand which we can load or store a vector
13098 to/from. TYPE is one of the following values:
13099 0 - Vector load/stor (vldr)
13100 1 - Core registers (ldm)
13101 2 - Element/structure loads (vld1)
13104 neon_vector_mem_operand (rtx op, int type, bool strict)
13106 rtx ind;
13108 /* Reject eliminable registers. */
13109 if (strict && ! (reload_in_progress || reload_completed)
13110 && (reg_mentioned_p (frame_pointer_rtx, op)
13111 || reg_mentioned_p (arg_pointer_rtx, op)
13112 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13113 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13114 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13115 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13116 return FALSE;
13118 /* Constants are converted into offsets from labels. */
13119 if (!MEM_P (op))
13120 return FALSE;
13122 ind = XEXP (op, 0);
13124 if (reload_completed
13125 && (GET_CODE (ind) == LABEL_REF
13126 || (GET_CODE (ind) == CONST
13127 && GET_CODE (XEXP (ind, 0)) == PLUS
13128 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13129 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13130 return TRUE;
13132 /* Match: (mem (reg)). */
13133 if (REG_P (ind))
13134 return arm_address_register_rtx_p (ind, 0);
13136 /* Allow post-increment with Neon registers. */
13137 if ((type != 1 && GET_CODE (ind) == POST_INC)
13138 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13139 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13141 /* Allow post-increment by register for VLDn */
13142 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13143 && GET_CODE (XEXP (ind, 1)) == PLUS
13144 && REG_P (XEXP (XEXP (ind, 1), 1)))
13145 return true;
13147 /* Match:
13148 (plus (reg)
13149 (const)). */
13150 if (type == 0
13151 && GET_CODE (ind) == PLUS
13152 && REG_P (XEXP (ind, 0))
13153 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13154 && CONST_INT_P (XEXP (ind, 1))
13155 && INTVAL (XEXP (ind, 1)) > -1024
13156 /* For quad modes, we restrict the constant offset to be slightly less
13157 than what the instruction format permits. We have no such constraint
13158 on double mode offsets. (This must match arm_legitimate_index_p.) */
13159 && (INTVAL (XEXP (ind, 1))
13160 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13161 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13162 return TRUE;
13164 return FALSE;
13167 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13168 type. */
13170 neon_struct_mem_operand (rtx op)
13172 rtx ind;
13174 /* Reject eliminable registers. */
13175 if (! (reload_in_progress || reload_completed)
13176 && ( reg_mentioned_p (frame_pointer_rtx, op)
13177 || reg_mentioned_p (arg_pointer_rtx, op)
13178 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13179 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13180 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13181 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13182 return FALSE;
13184 /* Constants are converted into offsets from labels. */
13185 if (!MEM_P (op))
13186 return FALSE;
13188 ind = XEXP (op, 0);
13190 if (reload_completed
13191 && (GET_CODE (ind) == LABEL_REF
13192 || (GET_CODE (ind) == CONST
13193 && GET_CODE (XEXP (ind, 0)) == PLUS
13194 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13195 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13196 return TRUE;
13198 /* Match: (mem (reg)). */
13199 if (REG_P (ind))
13200 return arm_address_register_rtx_p (ind, 0);
13202 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13203 if (GET_CODE (ind) == POST_INC
13204 || GET_CODE (ind) == PRE_DEC)
13205 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13207 return FALSE;
13210 /* Return true if X is a register that will be eliminated later on. */
13212 arm_eliminable_register (rtx x)
13214 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13215 || REGNO (x) == ARG_POINTER_REGNUM
13216 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13217 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13220 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13221 coprocessor registers. Otherwise return NO_REGS. */
13223 enum reg_class
13224 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13226 if (mode == HFmode)
13228 if (!TARGET_NEON_FP16)
13229 return GENERAL_REGS;
13230 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13231 return NO_REGS;
13232 return GENERAL_REGS;
13235 /* The neon move patterns handle all legitimate vector and struct
13236 addresses. */
13237 if (TARGET_NEON
13238 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13239 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13240 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13241 || VALID_NEON_STRUCT_MODE (mode)))
13242 return NO_REGS;
13244 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13245 return NO_REGS;
13247 return GENERAL_REGS;
13250 /* Values which must be returned in the most-significant end of the return
13251 register. */
13253 static bool
13254 arm_return_in_msb (const_tree valtype)
13256 return (TARGET_AAPCS_BASED
13257 && BYTES_BIG_ENDIAN
13258 && (AGGREGATE_TYPE_P (valtype)
13259 || TREE_CODE (valtype) == COMPLEX_TYPE
13260 || FIXED_POINT_TYPE_P (valtype)));
13263 /* Return TRUE if X references a SYMBOL_REF. */
13265 symbol_mentioned_p (rtx x)
13267 const char * fmt;
13268 int i;
13270 if (GET_CODE (x) == SYMBOL_REF)
13271 return 1;
13273 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13274 are constant offsets, not symbols. */
13275 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13276 return 0;
13278 fmt = GET_RTX_FORMAT (GET_CODE (x));
13280 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13282 if (fmt[i] == 'E')
13284 int j;
13286 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13287 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13288 return 1;
13290 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13291 return 1;
13294 return 0;
13297 /* Return TRUE if X references a LABEL_REF. */
13299 label_mentioned_p (rtx x)
13301 const char * fmt;
13302 int i;
13304 if (GET_CODE (x) == LABEL_REF)
13305 return 1;
13307 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13308 instruction, but they are constant offsets, not symbols. */
13309 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13310 return 0;
13312 fmt = GET_RTX_FORMAT (GET_CODE (x));
13313 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13315 if (fmt[i] == 'E')
13317 int j;
13319 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13320 if (label_mentioned_p (XVECEXP (x, i, j)))
13321 return 1;
13323 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13324 return 1;
13327 return 0;
13331 tls_mentioned_p (rtx x)
13333 switch (GET_CODE (x))
13335 case CONST:
13336 return tls_mentioned_p (XEXP (x, 0));
13338 case UNSPEC:
13339 if (XINT (x, 1) == UNSPEC_TLS)
13340 return 1;
13342 default:
13343 return 0;
13347 /* Must not copy any rtx that uses a pc-relative address.
13348 Also, disallow copying of load-exclusive instructions that
13349 may appear after splitting of compare-and-swap-style operations
13350 so as to prevent those loops from being transformed away from their
13351 canonical forms (see PR 69904). */
13353 static bool
13354 arm_cannot_copy_insn_p (rtx_insn *insn)
13356 /* The tls call insn cannot be copied, as it is paired with a data
13357 word. */
13358 if (recog_memoized (insn) == CODE_FOR_tlscall)
13359 return true;
13361 subrtx_iterator::array_type array;
13362 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13364 const_rtx x = *iter;
13365 if (GET_CODE (x) == UNSPEC
13366 && (XINT (x, 1) == UNSPEC_PIC_BASE
13367 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13368 return true;
13371 rtx set = single_set (insn);
13372 if (set)
13374 rtx src = SET_SRC (set);
13375 if (GET_CODE (src) == ZERO_EXTEND)
13376 src = XEXP (src, 0);
13378 /* Catch the load-exclusive and load-acquire operations. */
13379 if (GET_CODE (src) == UNSPEC_VOLATILE
13380 && (XINT (src, 1) == VUNSPEC_LL
13381 || XINT (src, 1) == VUNSPEC_LAX))
13382 return true;
13384 return false;
13387 enum rtx_code
13388 minmax_code (rtx x)
13390 enum rtx_code code = GET_CODE (x);
13392 switch (code)
13394 case SMAX:
13395 return GE;
13396 case SMIN:
13397 return LE;
13398 case UMIN:
13399 return LEU;
13400 case UMAX:
13401 return GEU;
13402 default:
13403 gcc_unreachable ();
13407 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13409 bool
13410 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13411 int *mask, bool *signed_sat)
13413 /* The high bound must be a power of two minus one. */
13414 int log = exact_log2 (INTVAL (hi_bound) + 1);
13415 if (log == -1)
13416 return false;
13418 /* The low bound is either zero (for usat) or one less than the
13419 negation of the high bound (for ssat). */
13420 if (INTVAL (lo_bound) == 0)
13422 if (mask)
13423 *mask = log;
13424 if (signed_sat)
13425 *signed_sat = false;
13427 return true;
13430 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13432 if (mask)
13433 *mask = log + 1;
13434 if (signed_sat)
13435 *signed_sat = true;
13437 return true;
13440 return false;
13443 /* Return 1 if memory locations are adjacent. */
13445 adjacent_mem_locations (rtx a, rtx b)
13447 /* We don't guarantee to preserve the order of these memory refs. */
13448 if (volatile_refs_p (a) || volatile_refs_p (b))
13449 return 0;
13451 if ((REG_P (XEXP (a, 0))
13452 || (GET_CODE (XEXP (a, 0)) == PLUS
13453 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13454 && (REG_P (XEXP (b, 0))
13455 || (GET_CODE (XEXP (b, 0)) == PLUS
13456 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13458 HOST_WIDE_INT val0 = 0, val1 = 0;
13459 rtx reg0, reg1;
13460 int val_diff;
13462 if (GET_CODE (XEXP (a, 0)) == PLUS)
13464 reg0 = XEXP (XEXP (a, 0), 0);
13465 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13467 else
13468 reg0 = XEXP (a, 0);
13470 if (GET_CODE (XEXP (b, 0)) == PLUS)
13472 reg1 = XEXP (XEXP (b, 0), 0);
13473 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13475 else
13476 reg1 = XEXP (b, 0);
13478 /* Don't accept any offset that will require multiple
13479 instructions to handle, since this would cause the
13480 arith_adjacentmem pattern to output an overlong sequence. */
13481 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13482 return 0;
13484 /* Don't allow an eliminable register: register elimination can make
13485 the offset too large. */
13486 if (arm_eliminable_register (reg0))
13487 return 0;
13489 val_diff = val1 - val0;
13491 if (arm_ld_sched)
13493 /* If the target has load delay slots, then there's no benefit
13494 to using an ldm instruction unless the offset is zero and
13495 we are optimizing for size. */
13496 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13497 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13498 && (val_diff == 4 || val_diff == -4));
13501 return ((REGNO (reg0) == REGNO (reg1))
13502 && (val_diff == 4 || val_diff == -4));
13505 return 0;
13508 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13509 for load operations, false for store operations. CONSECUTIVE is true
13510 if the register numbers in the operation must be consecutive in the register
13511 bank. RETURN_PC is true if value is to be loaded in PC.
13512 The pattern we are trying to match for load is:
13513 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13514 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13517 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13519 where
13520 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13521 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13522 3. If consecutive is TRUE, then for kth register being loaded,
13523 REGNO (R_dk) = REGNO (R_d0) + k.
13524 The pattern for store is similar. */
13525 bool
13526 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13527 bool consecutive, bool return_pc)
13529 HOST_WIDE_INT count = XVECLEN (op, 0);
13530 rtx reg, mem, addr;
13531 unsigned regno;
13532 unsigned first_regno;
13533 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13534 rtx elt;
13535 bool addr_reg_in_reglist = false;
13536 bool update = false;
13537 int reg_increment;
13538 int offset_adj;
13539 int regs_per_val;
13541 /* If not in SImode, then registers must be consecutive
13542 (e.g., VLDM instructions for DFmode). */
13543 gcc_assert ((mode == SImode) || consecutive);
13544 /* Setting return_pc for stores is illegal. */
13545 gcc_assert (!return_pc || load);
13547 /* Set up the increments and the regs per val based on the mode. */
13548 reg_increment = GET_MODE_SIZE (mode);
13549 regs_per_val = reg_increment / 4;
13550 offset_adj = return_pc ? 1 : 0;
13552 if (count <= 1
13553 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13554 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13555 return false;
13557 /* Check if this is a write-back. */
13558 elt = XVECEXP (op, 0, offset_adj);
13559 if (GET_CODE (SET_SRC (elt)) == PLUS)
13561 i++;
13562 base = 1;
13563 update = true;
13565 /* The offset adjustment must be the number of registers being
13566 popped times the size of a single register. */
13567 if (!REG_P (SET_DEST (elt))
13568 || !REG_P (XEXP (SET_SRC (elt), 0))
13569 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13570 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13571 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13572 ((count - 1 - offset_adj) * reg_increment))
13573 return false;
13576 i = i + offset_adj;
13577 base = base + offset_adj;
13578 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13579 success depends on the type: VLDM can do just one reg,
13580 LDM must do at least two. */
13581 if ((count <= i) && (mode == SImode))
13582 return false;
13584 elt = XVECEXP (op, 0, i - 1);
13585 if (GET_CODE (elt) != SET)
13586 return false;
13588 if (load)
13590 reg = SET_DEST (elt);
13591 mem = SET_SRC (elt);
13593 else
13595 reg = SET_SRC (elt);
13596 mem = SET_DEST (elt);
13599 if (!REG_P (reg) || !MEM_P (mem))
13600 return false;
13602 regno = REGNO (reg);
13603 first_regno = regno;
13604 addr = XEXP (mem, 0);
13605 if (GET_CODE (addr) == PLUS)
13607 if (!CONST_INT_P (XEXP (addr, 1)))
13608 return false;
13610 offset = INTVAL (XEXP (addr, 1));
13611 addr = XEXP (addr, 0);
13614 if (!REG_P (addr))
13615 return false;
13617 /* Don't allow SP to be loaded unless it is also the base register. It
13618 guarantees that SP is reset correctly when an LDM instruction
13619 is interrupted. Otherwise, we might end up with a corrupt stack. */
13620 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13621 return false;
13623 for (; i < count; i++)
13625 elt = XVECEXP (op, 0, i);
13626 if (GET_CODE (elt) != SET)
13627 return false;
13629 if (load)
13631 reg = SET_DEST (elt);
13632 mem = SET_SRC (elt);
13634 else
13636 reg = SET_SRC (elt);
13637 mem = SET_DEST (elt);
13640 if (!REG_P (reg)
13641 || GET_MODE (reg) != mode
13642 || REGNO (reg) <= regno
13643 || (consecutive
13644 && (REGNO (reg) !=
13645 (unsigned int) (first_regno + regs_per_val * (i - base))))
13646 /* Don't allow SP to be loaded unless it is also the base register. It
13647 guarantees that SP is reset correctly when an LDM instruction
13648 is interrupted. Otherwise, we might end up with a corrupt stack. */
13649 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13650 || !MEM_P (mem)
13651 || GET_MODE (mem) != mode
13652 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13653 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13654 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13655 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13656 offset + (i - base) * reg_increment))
13657 && (!REG_P (XEXP (mem, 0))
13658 || offset + (i - base) * reg_increment != 0)))
13659 return false;
13661 regno = REGNO (reg);
13662 if (regno == REGNO (addr))
13663 addr_reg_in_reglist = true;
13666 if (load)
13668 if (update && addr_reg_in_reglist)
13669 return false;
13671 /* For Thumb-1, address register is always modified - either by write-back
13672 or by explicit load. If the pattern does not describe an update,
13673 then the address register must be in the list of loaded registers. */
13674 if (TARGET_THUMB1)
13675 return update || addr_reg_in_reglist;
13678 return true;
13681 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13682 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13683 instruction. ADD_OFFSET is nonzero if the base address register needs
13684 to be modified with an add instruction before we can use it. */
13686 static bool
13687 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13688 int nops, HOST_WIDE_INT add_offset)
13690 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13691 if the offset isn't small enough. The reason 2 ldrs are faster
13692 is because these ARMs are able to do more than one cache access
13693 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13694 whilst the ARM8 has a double bandwidth cache. This means that
13695 these cores can do both an instruction fetch and a data fetch in
13696 a single cycle, so the trick of calculating the address into a
13697 scratch register (one of the result regs) and then doing a load
13698 multiple actually becomes slower (and no smaller in code size).
13699 That is the transformation
13701 ldr rd1, [rbase + offset]
13702 ldr rd2, [rbase + offset + 4]
13706 add rd1, rbase, offset
13707 ldmia rd1, {rd1, rd2}
13709 produces worse code -- '3 cycles + any stalls on rd2' instead of
13710 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13711 access per cycle, the first sequence could never complete in less
13712 than 6 cycles, whereas the ldm sequence would only take 5 and
13713 would make better use of sequential accesses if not hitting the
13714 cache.
13716 We cheat here and test 'arm_ld_sched' which we currently know to
13717 only be true for the ARM8, ARM9 and StrongARM. If this ever
13718 changes, then the test below needs to be reworked. */
13719 if (nops == 2 && arm_ld_sched && add_offset != 0)
13720 return false;
13722 /* XScale has load-store double instructions, but they have stricter
13723 alignment requirements than load-store multiple, so we cannot
13724 use them.
13726 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13727 the pipeline until completion.
13729 NREGS CYCLES
13735 An ldr instruction takes 1-3 cycles, but does not block the
13736 pipeline.
13738 NREGS CYCLES
13739 1 1-3
13740 2 2-6
13741 3 3-9
13742 4 4-12
13744 Best case ldr will always win. However, the more ldr instructions
13745 we issue, the less likely we are to be able to schedule them well.
13746 Using ldr instructions also increases code size.
13748 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13749 for counts of 3 or 4 regs. */
13750 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13751 return false;
13752 return true;
13755 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13756 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13757 an array ORDER which describes the sequence to use when accessing the
13758 offsets that produces an ascending order. In this sequence, each
13759 offset must be larger by exactly 4 than the previous one. ORDER[0]
13760 must have been filled in with the lowest offset by the caller.
13761 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13762 we use to verify that ORDER produces an ascending order of registers.
13763 Return true if it was possible to construct such an order, false if
13764 not. */
13766 static bool
13767 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13768 int *unsorted_regs)
13770 int i;
13771 for (i = 1; i < nops; i++)
13773 int j;
13775 order[i] = order[i - 1];
13776 for (j = 0; j < nops; j++)
13777 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13779 /* We must find exactly one offset that is higher than the
13780 previous one by 4. */
13781 if (order[i] != order[i - 1])
13782 return false;
13783 order[i] = j;
13785 if (order[i] == order[i - 1])
13786 return false;
13787 /* The register numbers must be ascending. */
13788 if (unsorted_regs != NULL
13789 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13790 return false;
13792 return true;
13795 /* Used to determine in a peephole whether a sequence of load
13796 instructions can be changed into a load-multiple instruction.
13797 NOPS is the number of separate load instructions we are examining. The
13798 first NOPS entries in OPERANDS are the destination registers, the
13799 next NOPS entries are memory operands. If this function is
13800 successful, *BASE is set to the common base register of the memory
13801 accesses; *LOAD_OFFSET is set to the first memory location's offset
13802 from that base register.
13803 REGS is an array filled in with the destination register numbers.
13804 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13805 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13806 the sequence of registers in REGS matches the loads from ascending memory
13807 locations, and the function verifies that the register numbers are
13808 themselves ascending. If CHECK_REGS is false, the register numbers
13809 are stored in the order they are found in the operands. */
13810 static int
13811 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13812 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13814 int unsorted_regs[MAX_LDM_STM_OPS];
13815 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13816 int order[MAX_LDM_STM_OPS];
13817 rtx base_reg_rtx = NULL;
13818 int base_reg = -1;
13819 int i, ldm_case;
13821 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13822 easily extended if required. */
13823 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13825 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13827 /* Loop over the operands and check that the memory references are
13828 suitable (i.e. immediate offsets from the same base register). At
13829 the same time, extract the target register, and the memory
13830 offsets. */
13831 for (i = 0; i < nops; i++)
13833 rtx reg;
13834 rtx offset;
13836 /* Convert a subreg of a mem into the mem itself. */
13837 if (GET_CODE (operands[nops + i]) == SUBREG)
13838 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13840 gcc_assert (MEM_P (operands[nops + i]));
13842 /* Don't reorder volatile memory references; it doesn't seem worth
13843 looking for the case where the order is ok anyway. */
13844 if (MEM_VOLATILE_P (operands[nops + i]))
13845 return 0;
13847 offset = const0_rtx;
13849 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13850 || (GET_CODE (reg) == SUBREG
13851 && REG_P (reg = SUBREG_REG (reg))))
13852 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13853 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13854 || (GET_CODE (reg) == SUBREG
13855 && REG_P (reg = SUBREG_REG (reg))))
13856 && (CONST_INT_P (offset
13857 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13859 if (i == 0)
13861 base_reg = REGNO (reg);
13862 base_reg_rtx = reg;
13863 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13864 return 0;
13866 else if (base_reg != (int) REGNO (reg))
13867 /* Not addressed from the same base register. */
13868 return 0;
13870 unsorted_regs[i] = (REG_P (operands[i])
13871 ? REGNO (operands[i])
13872 : REGNO (SUBREG_REG (operands[i])));
13874 /* If it isn't an integer register, or if it overwrites the
13875 base register but isn't the last insn in the list, then
13876 we can't do this. */
13877 if (unsorted_regs[i] < 0
13878 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13879 || unsorted_regs[i] > 14
13880 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13881 return 0;
13883 /* Don't allow SP to be loaded unless it is also the base
13884 register. It guarantees that SP is reset correctly when
13885 an LDM instruction is interrupted. Otherwise, we might
13886 end up with a corrupt stack. */
13887 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13888 return 0;
13890 unsorted_offsets[i] = INTVAL (offset);
13891 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13892 order[0] = i;
13894 else
13895 /* Not a suitable memory address. */
13896 return 0;
13899 /* All the useful information has now been extracted from the
13900 operands into unsorted_regs and unsorted_offsets; additionally,
13901 order[0] has been set to the lowest offset in the list. Sort
13902 the offsets into order, verifying that they are adjacent, and
13903 check that the register numbers are ascending. */
13904 if (!compute_offset_order (nops, unsorted_offsets, order,
13905 check_regs ? unsorted_regs : NULL))
13906 return 0;
13908 if (saved_order)
13909 memcpy (saved_order, order, sizeof order);
13911 if (base)
13913 *base = base_reg;
13915 for (i = 0; i < nops; i++)
13916 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13918 *load_offset = unsorted_offsets[order[0]];
13921 if (TARGET_THUMB1
13922 && !peep2_reg_dead_p (nops, base_reg_rtx))
13923 return 0;
13925 if (unsorted_offsets[order[0]] == 0)
13926 ldm_case = 1; /* ldmia */
13927 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13928 ldm_case = 2; /* ldmib */
13929 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13930 ldm_case = 3; /* ldmda */
13931 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13932 ldm_case = 4; /* ldmdb */
13933 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13934 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13935 ldm_case = 5;
13936 else
13937 return 0;
13939 if (!multiple_operation_profitable_p (false, nops,
13940 ldm_case == 5
13941 ? unsorted_offsets[order[0]] : 0))
13942 return 0;
13944 return ldm_case;
13947 /* Used to determine in a peephole whether a sequence of store instructions can
13948 be changed into a store-multiple instruction.
13949 NOPS is the number of separate store instructions we are examining.
13950 NOPS_TOTAL is the total number of instructions recognized by the peephole
13951 pattern.
13952 The first NOPS entries in OPERANDS are the source registers, the next
13953 NOPS entries are memory operands. If this function is successful, *BASE is
13954 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13955 to the first memory location's offset from that base register. REGS is an
13956 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13957 likewise filled with the corresponding rtx's.
13958 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13959 numbers to an ascending order of stores.
13960 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13961 from ascending memory locations, and the function verifies that the register
13962 numbers are themselves ascending. If CHECK_REGS is false, the register
13963 numbers are stored in the order they are found in the operands. */
13964 static int
13965 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13966 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13967 HOST_WIDE_INT *load_offset, bool check_regs)
13969 int unsorted_regs[MAX_LDM_STM_OPS];
13970 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13971 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13972 int order[MAX_LDM_STM_OPS];
13973 int base_reg = -1;
13974 rtx base_reg_rtx = NULL;
13975 int i, stm_case;
13977 /* Write back of base register is currently only supported for Thumb 1. */
13978 int base_writeback = TARGET_THUMB1;
13980 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13981 easily extended if required. */
13982 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13984 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13986 /* Loop over the operands and check that the memory references are
13987 suitable (i.e. immediate offsets from the same base register). At
13988 the same time, extract the target register, and the memory
13989 offsets. */
13990 for (i = 0; i < nops; i++)
13992 rtx reg;
13993 rtx offset;
13995 /* Convert a subreg of a mem into the mem itself. */
13996 if (GET_CODE (operands[nops + i]) == SUBREG)
13997 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13999 gcc_assert (MEM_P (operands[nops + i]));
14001 /* Don't reorder volatile memory references; it doesn't seem worth
14002 looking for the case where the order is ok anyway. */
14003 if (MEM_VOLATILE_P (operands[nops + i]))
14004 return 0;
14006 offset = const0_rtx;
14008 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14009 || (GET_CODE (reg) == SUBREG
14010 && REG_P (reg = SUBREG_REG (reg))))
14011 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14012 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14013 || (GET_CODE (reg) == SUBREG
14014 && REG_P (reg = SUBREG_REG (reg))))
14015 && (CONST_INT_P (offset
14016 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14018 unsorted_reg_rtxs[i] = (REG_P (operands[i])
14019 ? operands[i] : SUBREG_REG (operands[i]));
14020 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14022 if (i == 0)
14024 base_reg = REGNO (reg);
14025 base_reg_rtx = reg;
14026 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14027 return 0;
14029 else if (base_reg != (int) REGNO (reg))
14030 /* Not addressed from the same base register. */
14031 return 0;
14033 /* If it isn't an integer register, then we can't do this. */
14034 if (unsorted_regs[i] < 0
14035 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14036 /* The effects are unpredictable if the base register is
14037 both updated and stored. */
14038 || (base_writeback && unsorted_regs[i] == base_reg)
14039 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14040 || unsorted_regs[i] > 14)
14041 return 0;
14043 unsorted_offsets[i] = INTVAL (offset);
14044 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14045 order[0] = i;
14047 else
14048 /* Not a suitable memory address. */
14049 return 0;
14052 /* All the useful information has now been extracted from the
14053 operands into unsorted_regs and unsorted_offsets; additionally,
14054 order[0] has been set to the lowest offset in the list. Sort
14055 the offsets into order, verifying that they are adjacent, and
14056 check that the register numbers are ascending. */
14057 if (!compute_offset_order (nops, unsorted_offsets, order,
14058 check_regs ? unsorted_regs : NULL))
14059 return 0;
14061 if (saved_order)
14062 memcpy (saved_order, order, sizeof order);
14064 if (base)
14066 *base = base_reg;
14068 for (i = 0; i < nops; i++)
14070 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14071 if (reg_rtxs)
14072 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14075 *load_offset = unsorted_offsets[order[0]];
14078 if (TARGET_THUMB1
14079 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14080 return 0;
14082 if (unsorted_offsets[order[0]] == 0)
14083 stm_case = 1; /* stmia */
14084 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14085 stm_case = 2; /* stmib */
14086 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14087 stm_case = 3; /* stmda */
14088 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14089 stm_case = 4; /* stmdb */
14090 else
14091 return 0;
14093 if (!multiple_operation_profitable_p (false, nops, 0))
14094 return 0;
14096 return stm_case;
14099 /* Routines for use in generating RTL. */
14101 /* Generate a load-multiple instruction. COUNT is the number of loads in
14102 the instruction; REGS and MEMS are arrays containing the operands.
14103 BASEREG is the base register to be used in addressing the memory operands.
14104 WBACK_OFFSET is nonzero if the instruction should update the base
14105 register. */
14107 static rtx
14108 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14109 HOST_WIDE_INT wback_offset)
14111 int i = 0, j;
14112 rtx result;
14114 if (!multiple_operation_profitable_p (false, count, 0))
14116 rtx seq;
14118 start_sequence ();
14120 for (i = 0; i < count; i++)
14121 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14123 if (wback_offset != 0)
14124 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14126 seq = get_insns ();
14127 end_sequence ();
14129 return seq;
14132 result = gen_rtx_PARALLEL (VOIDmode,
14133 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14134 if (wback_offset != 0)
14136 XVECEXP (result, 0, 0)
14137 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14138 i = 1;
14139 count++;
14142 for (j = 0; i < count; i++, j++)
14143 XVECEXP (result, 0, i)
14144 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14146 return result;
14149 /* Generate a store-multiple instruction. COUNT is the number of stores in
14150 the instruction; REGS and MEMS are arrays containing the operands.
14151 BASEREG is the base register to be used in addressing the memory operands.
14152 WBACK_OFFSET is nonzero if the instruction should update the base
14153 register. */
14155 static rtx
14156 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14157 HOST_WIDE_INT wback_offset)
14159 int i = 0, j;
14160 rtx result;
14162 if (GET_CODE (basereg) == PLUS)
14163 basereg = XEXP (basereg, 0);
14165 if (!multiple_operation_profitable_p (false, count, 0))
14167 rtx seq;
14169 start_sequence ();
14171 for (i = 0; i < count; i++)
14172 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14174 if (wback_offset != 0)
14175 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14177 seq = get_insns ();
14178 end_sequence ();
14180 return seq;
14183 result = gen_rtx_PARALLEL (VOIDmode,
14184 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14185 if (wback_offset != 0)
14187 XVECEXP (result, 0, 0)
14188 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14189 i = 1;
14190 count++;
14193 for (j = 0; i < count; i++, j++)
14194 XVECEXP (result, 0, i)
14195 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14197 return result;
14200 /* Generate either a load-multiple or a store-multiple instruction. This
14201 function can be used in situations where we can start with a single MEM
14202 rtx and adjust its address upwards.
14203 COUNT is the number of operations in the instruction, not counting a
14204 possible update of the base register. REGS is an array containing the
14205 register operands.
14206 BASEREG is the base register to be used in addressing the memory operands,
14207 which are constructed from BASEMEM.
14208 WRITE_BACK specifies whether the generated instruction should include an
14209 update of the base register.
14210 OFFSETP is used to pass an offset to and from this function; this offset
14211 is not used when constructing the address (instead BASEMEM should have an
14212 appropriate offset in its address), it is used only for setting
14213 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14215 static rtx
14216 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14217 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14219 rtx mems[MAX_LDM_STM_OPS];
14220 HOST_WIDE_INT offset = *offsetp;
14221 int i;
14223 gcc_assert (count <= MAX_LDM_STM_OPS);
14225 if (GET_CODE (basereg) == PLUS)
14226 basereg = XEXP (basereg, 0);
14228 for (i = 0; i < count; i++)
14230 rtx addr = plus_constant (Pmode, basereg, i * 4);
14231 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14232 offset += 4;
14235 if (write_back)
14236 *offsetp = offset;
14238 if (is_load)
14239 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14240 write_back ? 4 * count : 0);
14241 else
14242 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14243 write_back ? 4 * count : 0);
14247 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14248 rtx basemem, HOST_WIDE_INT *offsetp)
14250 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14251 offsetp);
14255 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14256 rtx basemem, HOST_WIDE_INT *offsetp)
14258 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14259 offsetp);
14262 /* Called from a peephole2 expander to turn a sequence of loads into an
14263 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14264 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14265 is true if we can reorder the registers because they are used commutatively
14266 subsequently.
14267 Returns true iff we could generate a new instruction. */
14269 bool
14270 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14272 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14273 rtx mems[MAX_LDM_STM_OPS];
14274 int i, j, base_reg;
14275 rtx base_reg_rtx;
14276 HOST_WIDE_INT offset;
14277 int write_back = FALSE;
14278 int ldm_case;
14279 rtx addr;
14281 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14282 &base_reg, &offset, !sort_regs);
14284 if (ldm_case == 0)
14285 return false;
14287 if (sort_regs)
14288 for (i = 0; i < nops - 1; i++)
14289 for (j = i + 1; j < nops; j++)
14290 if (regs[i] > regs[j])
14292 int t = regs[i];
14293 regs[i] = regs[j];
14294 regs[j] = t;
14296 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14298 if (TARGET_THUMB1)
14300 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14301 gcc_assert (ldm_case == 1 || ldm_case == 5);
14302 write_back = TRUE;
14305 if (ldm_case == 5)
14307 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14308 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14309 offset = 0;
14310 if (!TARGET_THUMB1)
14312 base_reg = regs[0];
14313 base_reg_rtx = newbase;
14317 for (i = 0; i < nops; i++)
14319 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14320 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14321 SImode, addr, 0);
14323 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14324 write_back ? offset + i * 4 : 0));
14325 return true;
14328 /* Called from a peephole2 expander to turn a sequence of stores into an
14329 STM instruction. OPERANDS are the operands found by the peephole matcher;
14330 NOPS indicates how many separate stores we are trying to combine.
14331 Returns true iff we could generate a new instruction. */
14333 bool
14334 gen_stm_seq (rtx *operands, int nops)
14336 int i;
14337 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14338 rtx mems[MAX_LDM_STM_OPS];
14339 int base_reg;
14340 rtx base_reg_rtx;
14341 HOST_WIDE_INT offset;
14342 int write_back = FALSE;
14343 int stm_case;
14344 rtx addr;
14345 bool base_reg_dies;
14347 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14348 mem_order, &base_reg, &offset, true);
14350 if (stm_case == 0)
14351 return false;
14353 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14355 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14356 if (TARGET_THUMB1)
14358 gcc_assert (base_reg_dies);
14359 write_back = TRUE;
14362 if (stm_case == 5)
14364 gcc_assert (base_reg_dies);
14365 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14366 offset = 0;
14369 addr = plus_constant (Pmode, base_reg_rtx, offset);
14371 for (i = 0; i < nops; i++)
14373 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14374 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14375 SImode, addr, 0);
14377 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14378 write_back ? offset + i * 4 : 0));
14379 return true;
14382 /* Called from a peephole2 expander to turn a sequence of stores that are
14383 preceded by constant loads into an STM instruction. OPERANDS are the
14384 operands found by the peephole matcher; NOPS indicates how many
14385 separate stores we are trying to combine; there are 2 * NOPS
14386 instructions in the peephole.
14387 Returns true iff we could generate a new instruction. */
14389 bool
14390 gen_const_stm_seq (rtx *operands, int nops)
14392 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14393 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14394 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14395 rtx mems[MAX_LDM_STM_OPS];
14396 int base_reg;
14397 rtx base_reg_rtx;
14398 HOST_WIDE_INT offset;
14399 int write_back = FALSE;
14400 int stm_case;
14401 rtx addr;
14402 bool base_reg_dies;
14403 int i, j;
14404 HARD_REG_SET allocated;
14406 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14407 mem_order, &base_reg, &offset, false);
14409 if (stm_case == 0)
14410 return false;
14412 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14414 /* If the same register is used more than once, try to find a free
14415 register. */
14416 CLEAR_HARD_REG_SET (allocated);
14417 for (i = 0; i < nops; i++)
14419 for (j = i + 1; j < nops; j++)
14420 if (regs[i] == regs[j])
14422 rtx t = peep2_find_free_register (0, nops * 2,
14423 TARGET_THUMB1 ? "l" : "r",
14424 SImode, &allocated);
14425 if (t == NULL_RTX)
14426 return false;
14427 reg_rtxs[i] = t;
14428 regs[i] = REGNO (t);
14432 /* Compute an ordering that maps the register numbers to an ascending
14433 sequence. */
14434 reg_order[0] = 0;
14435 for (i = 0; i < nops; i++)
14436 if (regs[i] < regs[reg_order[0]])
14437 reg_order[0] = i;
14439 for (i = 1; i < nops; i++)
14441 int this_order = reg_order[i - 1];
14442 for (j = 0; j < nops; j++)
14443 if (regs[j] > regs[reg_order[i - 1]]
14444 && (this_order == reg_order[i - 1]
14445 || regs[j] < regs[this_order]))
14446 this_order = j;
14447 reg_order[i] = this_order;
14450 /* Ensure that registers that must be live after the instruction end
14451 up with the correct value. */
14452 for (i = 0; i < nops; i++)
14454 int this_order = reg_order[i];
14455 if ((this_order != mem_order[i]
14456 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14457 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14458 return false;
14461 /* Load the constants. */
14462 for (i = 0; i < nops; i++)
14464 rtx op = operands[2 * nops + mem_order[i]];
14465 sorted_regs[i] = regs[reg_order[i]];
14466 emit_move_insn (reg_rtxs[reg_order[i]], op);
14469 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14471 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14472 if (TARGET_THUMB1)
14474 gcc_assert (base_reg_dies);
14475 write_back = TRUE;
14478 if (stm_case == 5)
14480 gcc_assert (base_reg_dies);
14481 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14482 offset = 0;
14485 addr = plus_constant (Pmode, base_reg_rtx, offset);
14487 for (i = 0; i < nops; i++)
14489 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14490 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14491 SImode, addr, 0);
14493 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14494 write_back ? offset + i * 4 : 0));
14495 return true;
14498 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14499 unaligned copies on processors which support unaligned semantics for those
14500 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14501 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14502 An interleave factor of 1 (the minimum) will perform no interleaving.
14503 Load/store multiple are used for aligned addresses where possible. */
14505 static void
14506 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14507 HOST_WIDE_INT length,
14508 unsigned int interleave_factor)
14510 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14511 int *regnos = XALLOCAVEC (int, interleave_factor);
14512 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14513 HOST_WIDE_INT i, j;
14514 HOST_WIDE_INT remaining = length, words;
14515 rtx halfword_tmp = NULL, byte_tmp = NULL;
14516 rtx dst, src;
14517 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14518 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14519 HOST_WIDE_INT srcoffset, dstoffset;
14520 HOST_WIDE_INT src_autoinc, dst_autoinc;
14521 rtx mem, addr;
14523 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14525 /* Use hard registers if we have aligned source or destination so we can use
14526 load/store multiple with contiguous registers. */
14527 if (dst_aligned || src_aligned)
14528 for (i = 0; i < interleave_factor; i++)
14529 regs[i] = gen_rtx_REG (SImode, i);
14530 else
14531 for (i = 0; i < interleave_factor; i++)
14532 regs[i] = gen_reg_rtx (SImode);
14534 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14535 src = copy_addr_to_reg (XEXP (srcbase, 0));
14537 srcoffset = dstoffset = 0;
14539 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14540 For copying the last bytes we want to subtract this offset again. */
14541 src_autoinc = dst_autoinc = 0;
14543 for (i = 0; i < interleave_factor; i++)
14544 regnos[i] = i;
14546 /* Copy BLOCK_SIZE_BYTES chunks. */
14548 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14550 /* Load words. */
14551 if (src_aligned && interleave_factor > 1)
14553 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14554 TRUE, srcbase, &srcoffset));
14555 src_autoinc += UNITS_PER_WORD * interleave_factor;
14557 else
14559 for (j = 0; j < interleave_factor; j++)
14561 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14562 - src_autoinc));
14563 mem = adjust_automodify_address (srcbase, SImode, addr,
14564 srcoffset + j * UNITS_PER_WORD);
14565 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14567 srcoffset += block_size_bytes;
14570 /* Store words. */
14571 if (dst_aligned && interleave_factor > 1)
14573 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14574 TRUE, dstbase, &dstoffset));
14575 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14577 else
14579 for (j = 0; j < interleave_factor; j++)
14581 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14582 - dst_autoinc));
14583 mem = adjust_automodify_address (dstbase, SImode, addr,
14584 dstoffset + j * UNITS_PER_WORD);
14585 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14587 dstoffset += block_size_bytes;
14590 remaining -= block_size_bytes;
14593 /* Copy any whole words left (note these aren't interleaved with any
14594 subsequent halfword/byte load/stores in the interests of simplicity). */
14596 words = remaining / UNITS_PER_WORD;
14598 gcc_assert (words < interleave_factor);
14600 if (src_aligned && words > 1)
14602 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14603 &srcoffset));
14604 src_autoinc += UNITS_PER_WORD * words;
14606 else
14608 for (j = 0; j < words; j++)
14610 addr = plus_constant (Pmode, src,
14611 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14612 mem = adjust_automodify_address (srcbase, SImode, addr,
14613 srcoffset + j * UNITS_PER_WORD);
14614 if (src_aligned)
14615 emit_move_insn (regs[j], mem);
14616 else
14617 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14619 srcoffset += words * UNITS_PER_WORD;
14622 if (dst_aligned && words > 1)
14624 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14625 &dstoffset));
14626 dst_autoinc += words * UNITS_PER_WORD;
14628 else
14630 for (j = 0; j < words; j++)
14632 addr = plus_constant (Pmode, dst,
14633 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14634 mem = adjust_automodify_address (dstbase, SImode, addr,
14635 dstoffset + j * UNITS_PER_WORD);
14636 if (dst_aligned)
14637 emit_move_insn (mem, regs[j]);
14638 else
14639 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14641 dstoffset += words * UNITS_PER_WORD;
14644 remaining -= words * UNITS_PER_WORD;
14646 gcc_assert (remaining < 4);
14648 /* Copy a halfword if necessary. */
14650 if (remaining >= 2)
14652 halfword_tmp = gen_reg_rtx (SImode);
14654 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14655 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14656 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14658 /* Either write out immediately, or delay until we've loaded the last
14659 byte, depending on interleave factor. */
14660 if (interleave_factor == 1)
14662 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14663 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14664 emit_insn (gen_unaligned_storehi (mem,
14665 gen_lowpart (HImode, halfword_tmp)));
14666 halfword_tmp = NULL;
14667 dstoffset += 2;
14670 remaining -= 2;
14671 srcoffset += 2;
14674 gcc_assert (remaining < 2);
14676 /* Copy last byte. */
14678 if ((remaining & 1) != 0)
14680 byte_tmp = gen_reg_rtx (SImode);
14682 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14683 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14684 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14686 if (interleave_factor == 1)
14688 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14689 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14690 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14691 byte_tmp = NULL;
14692 dstoffset++;
14695 remaining--;
14696 srcoffset++;
14699 /* Store last halfword if we haven't done so already. */
14701 if (halfword_tmp)
14703 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14704 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14705 emit_insn (gen_unaligned_storehi (mem,
14706 gen_lowpart (HImode, halfword_tmp)));
14707 dstoffset += 2;
14710 /* Likewise for last byte. */
14712 if (byte_tmp)
14714 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14715 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14716 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14717 dstoffset++;
14720 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14723 /* From mips_adjust_block_mem:
14725 Helper function for doing a loop-based block operation on memory
14726 reference MEM. Each iteration of the loop will operate on LENGTH
14727 bytes of MEM.
14729 Create a new base register for use within the loop and point it to
14730 the start of MEM. Create a new memory reference that uses this
14731 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14733 static void
14734 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14735 rtx *loop_mem)
14737 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14739 /* Although the new mem does not refer to a known location,
14740 it does keep up to LENGTH bytes of alignment. */
14741 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14742 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14745 /* From mips_block_move_loop:
14747 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14748 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14749 the memory regions do not overlap. */
14751 static void
14752 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14753 unsigned int interleave_factor,
14754 HOST_WIDE_INT bytes_per_iter)
14756 rtx src_reg, dest_reg, final_src, test;
14757 HOST_WIDE_INT leftover;
14759 leftover = length % bytes_per_iter;
14760 length -= leftover;
14762 /* Create registers and memory references for use within the loop. */
14763 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14764 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14766 /* Calculate the value that SRC_REG should have after the last iteration of
14767 the loop. */
14768 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14769 0, 0, OPTAB_WIDEN);
14771 /* Emit the start of the loop. */
14772 rtx_code_label *label = gen_label_rtx ();
14773 emit_label (label);
14775 /* Emit the loop body. */
14776 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14777 interleave_factor);
14779 /* Move on to the next block. */
14780 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14781 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14783 /* Emit the loop condition. */
14784 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14785 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14787 /* Mop up any left-over bytes. */
14788 if (leftover)
14789 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14792 /* Emit a block move when either the source or destination is unaligned (not
14793 aligned to a four-byte boundary). This may need further tuning depending on
14794 core type, optimize_size setting, etc. */
14796 static int
14797 arm_movmemqi_unaligned (rtx *operands)
14799 HOST_WIDE_INT length = INTVAL (operands[2]);
14801 if (optimize_size)
14803 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14804 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14805 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14806 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14807 or dst_aligned though: allow more interleaving in those cases since the
14808 resulting code can be smaller. */
14809 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14810 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14812 if (length > 12)
14813 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14814 interleave_factor, bytes_per_iter);
14815 else
14816 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14817 interleave_factor);
14819 else
14821 /* Note that the loop created by arm_block_move_unaligned_loop may be
14822 subject to loop unrolling, which makes tuning this condition a little
14823 redundant. */
14824 if (length > 32)
14825 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14826 else
14827 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14830 return 1;
14834 arm_gen_movmemqi (rtx *operands)
14836 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14837 HOST_WIDE_INT srcoffset, dstoffset;
14838 int i;
14839 rtx src, dst, srcbase, dstbase;
14840 rtx part_bytes_reg = NULL;
14841 rtx mem;
14843 if (!CONST_INT_P (operands[2])
14844 || !CONST_INT_P (operands[3])
14845 || INTVAL (operands[2]) > 64)
14846 return 0;
14848 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14849 return arm_movmemqi_unaligned (operands);
14851 if (INTVAL (operands[3]) & 3)
14852 return 0;
14854 dstbase = operands[0];
14855 srcbase = operands[1];
14857 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14858 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14860 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14861 out_words_to_go = INTVAL (operands[2]) / 4;
14862 last_bytes = INTVAL (operands[2]) & 3;
14863 dstoffset = srcoffset = 0;
14865 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14866 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14868 for (i = 0; in_words_to_go >= 2; i+=4)
14870 if (in_words_to_go > 4)
14871 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14872 TRUE, srcbase, &srcoffset));
14873 else
14874 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14875 src, FALSE, srcbase,
14876 &srcoffset));
14878 if (out_words_to_go)
14880 if (out_words_to_go > 4)
14881 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14882 TRUE, dstbase, &dstoffset));
14883 else if (out_words_to_go != 1)
14884 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14885 out_words_to_go, dst,
14886 (last_bytes == 0
14887 ? FALSE : TRUE),
14888 dstbase, &dstoffset));
14889 else
14891 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14892 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14893 if (last_bytes != 0)
14895 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14896 dstoffset += 4;
14901 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14902 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14905 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14906 if (out_words_to_go)
14908 rtx sreg;
14910 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14911 sreg = copy_to_reg (mem);
14913 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14914 emit_move_insn (mem, sreg);
14915 in_words_to_go--;
14917 gcc_assert (!in_words_to_go); /* Sanity check */
14920 if (in_words_to_go)
14922 gcc_assert (in_words_to_go > 0);
14924 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14925 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14928 gcc_assert (!last_bytes || part_bytes_reg);
14930 if (BYTES_BIG_ENDIAN && last_bytes)
14932 rtx tmp = gen_reg_rtx (SImode);
14934 /* The bytes we want are in the top end of the word. */
14935 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14936 GEN_INT (8 * (4 - last_bytes))));
14937 part_bytes_reg = tmp;
14939 while (last_bytes)
14941 mem = adjust_automodify_address (dstbase, QImode,
14942 plus_constant (Pmode, dst,
14943 last_bytes - 1),
14944 dstoffset + last_bytes - 1);
14945 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14947 if (--last_bytes)
14949 tmp = gen_reg_rtx (SImode);
14950 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14951 part_bytes_reg = tmp;
14956 else
14958 if (last_bytes > 1)
14960 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14961 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14962 last_bytes -= 2;
14963 if (last_bytes)
14965 rtx tmp = gen_reg_rtx (SImode);
14966 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14967 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14968 part_bytes_reg = tmp;
14969 dstoffset += 2;
14973 if (last_bytes)
14975 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14976 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14980 return 1;
14983 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14984 by mode size. */
14985 inline static rtx
14986 next_consecutive_mem (rtx mem)
14988 machine_mode mode = GET_MODE (mem);
14989 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14990 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14992 return adjust_automodify_address (mem, mode, addr, offset);
14995 /* Copy using LDRD/STRD instructions whenever possible.
14996 Returns true upon success. */
14997 bool
14998 gen_movmem_ldrd_strd (rtx *operands)
15000 unsigned HOST_WIDE_INT len;
15001 HOST_WIDE_INT align;
15002 rtx src, dst, base;
15003 rtx reg0;
15004 bool src_aligned, dst_aligned;
15005 bool src_volatile, dst_volatile;
15007 gcc_assert (CONST_INT_P (operands[2]));
15008 gcc_assert (CONST_INT_P (operands[3]));
15010 len = UINTVAL (operands[2]);
15011 if (len > 64)
15012 return false;
15014 /* Maximum alignment we can assume for both src and dst buffers. */
15015 align = INTVAL (operands[3]);
15017 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15018 return false;
15020 /* Place src and dst addresses in registers
15021 and update the corresponding mem rtx. */
15022 dst = operands[0];
15023 dst_volatile = MEM_VOLATILE_P (dst);
15024 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15025 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15026 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15028 src = operands[1];
15029 src_volatile = MEM_VOLATILE_P (src);
15030 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15031 base = copy_to_mode_reg (SImode, XEXP (src, 0));
15032 src = adjust_automodify_address (src, VOIDmode, base, 0);
15034 if (!unaligned_access && !(src_aligned && dst_aligned))
15035 return false;
15037 if (src_volatile || dst_volatile)
15038 return false;
15040 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15041 if (!(dst_aligned || src_aligned))
15042 return arm_gen_movmemqi (operands);
15044 /* If the either src or dst is unaligned we'll be accessing it as pairs
15045 of unaligned SImode accesses. Otherwise we can generate DImode
15046 ldrd/strd instructions. */
15047 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15048 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15050 while (len >= 8)
15052 len -= 8;
15053 reg0 = gen_reg_rtx (DImode);
15054 rtx low_reg = NULL_RTX;
15055 rtx hi_reg = NULL_RTX;
15057 if (!src_aligned || !dst_aligned)
15059 low_reg = gen_lowpart (SImode, reg0);
15060 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
15062 if (src_aligned)
15063 emit_move_insn (reg0, src);
15064 else
15066 emit_insn (gen_unaligned_loadsi (low_reg, src));
15067 src = next_consecutive_mem (src);
15068 emit_insn (gen_unaligned_loadsi (hi_reg, src));
15071 if (dst_aligned)
15072 emit_move_insn (dst, reg0);
15073 else
15075 emit_insn (gen_unaligned_storesi (dst, low_reg));
15076 dst = next_consecutive_mem (dst);
15077 emit_insn (gen_unaligned_storesi (dst, hi_reg));
15080 src = next_consecutive_mem (src);
15081 dst = next_consecutive_mem (dst);
15084 gcc_assert (len < 8);
15085 if (len >= 4)
15087 /* More than a word but less than a double-word to copy. Copy a word. */
15088 reg0 = gen_reg_rtx (SImode);
15089 src = adjust_address (src, SImode, 0);
15090 dst = adjust_address (dst, SImode, 0);
15091 if (src_aligned)
15092 emit_move_insn (reg0, src);
15093 else
15094 emit_insn (gen_unaligned_loadsi (reg0, src));
15096 if (dst_aligned)
15097 emit_move_insn (dst, reg0);
15098 else
15099 emit_insn (gen_unaligned_storesi (dst, reg0));
15101 src = next_consecutive_mem (src);
15102 dst = next_consecutive_mem (dst);
15103 len -= 4;
15106 if (len == 0)
15107 return true;
15109 /* Copy the remaining bytes. */
15110 if (len >= 2)
15112 dst = adjust_address (dst, HImode, 0);
15113 src = adjust_address (src, HImode, 0);
15114 reg0 = gen_reg_rtx (SImode);
15115 if (src_aligned)
15116 emit_insn (gen_zero_extendhisi2 (reg0, src));
15117 else
15118 emit_insn (gen_unaligned_loadhiu (reg0, src));
15120 if (dst_aligned)
15121 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15122 else
15123 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15125 src = next_consecutive_mem (src);
15126 dst = next_consecutive_mem (dst);
15127 if (len == 2)
15128 return true;
15131 dst = adjust_address (dst, QImode, 0);
15132 src = adjust_address (src, QImode, 0);
15133 reg0 = gen_reg_rtx (QImode);
15134 emit_move_insn (reg0, src);
15135 emit_move_insn (dst, reg0);
15136 return true;
15139 /* Select a dominance comparison mode if possible for a test of the general
15140 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15141 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15142 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15143 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15144 In all cases OP will be either EQ or NE, but we don't need to know which
15145 here. If we are unable to support a dominance comparison we return
15146 CC mode. This will then fail to match for the RTL expressions that
15147 generate this call. */
15148 machine_mode
15149 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15151 enum rtx_code cond1, cond2;
15152 int swapped = 0;
15154 /* Currently we will probably get the wrong result if the individual
15155 comparisons are not simple. This also ensures that it is safe to
15156 reverse a comparison if necessary. */
15157 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15158 != CCmode)
15159 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15160 != CCmode))
15161 return CCmode;
15163 /* The if_then_else variant of this tests the second condition if the
15164 first passes, but is true if the first fails. Reverse the first
15165 condition to get a true "inclusive-or" expression. */
15166 if (cond_or == DOM_CC_NX_OR_Y)
15167 cond1 = reverse_condition (cond1);
15169 /* If the comparisons are not equal, and one doesn't dominate the other,
15170 then we can't do this. */
15171 if (cond1 != cond2
15172 && !comparison_dominates_p (cond1, cond2)
15173 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15174 return CCmode;
15176 if (swapped)
15177 std::swap (cond1, cond2);
15179 switch (cond1)
15181 case EQ:
15182 if (cond_or == DOM_CC_X_AND_Y)
15183 return CC_DEQmode;
15185 switch (cond2)
15187 case EQ: return CC_DEQmode;
15188 case LE: return CC_DLEmode;
15189 case LEU: return CC_DLEUmode;
15190 case GE: return CC_DGEmode;
15191 case GEU: return CC_DGEUmode;
15192 default: gcc_unreachable ();
15195 case LT:
15196 if (cond_or == DOM_CC_X_AND_Y)
15197 return CC_DLTmode;
15199 switch (cond2)
15201 case LT:
15202 return CC_DLTmode;
15203 case LE:
15204 return CC_DLEmode;
15205 case NE:
15206 return CC_DNEmode;
15207 default:
15208 gcc_unreachable ();
15211 case GT:
15212 if (cond_or == DOM_CC_X_AND_Y)
15213 return CC_DGTmode;
15215 switch (cond2)
15217 case GT:
15218 return CC_DGTmode;
15219 case GE:
15220 return CC_DGEmode;
15221 case NE:
15222 return CC_DNEmode;
15223 default:
15224 gcc_unreachable ();
15227 case LTU:
15228 if (cond_or == DOM_CC_X_AND_Y)
15229 return CC_DLTUmode;
15231 switch (cond2)
15233 case LTU:
15234 return CC_DLTUmode;
15235 case LEU:
15236 return CC_DLEUmode;
15237 case NE:
15238 return CC_DNEmode;
15239 default:
15240 gcc_unreachable ();
15243 case GTU:
15244 if (cond_or == DOM_CC_X_AND_Y)
15245 return CC_DGTUmode;
15247 switch (cond2)
15249 case GTU:
15250 return CC_DGTUmode;
15251 case GEU:
15252 return CC_DGEUmode;
15253 case NE:
15254 return CC_DNEmode;
15255 default:
15256 gcc_unreachable ();
15259 /* The remaining cases only occur when both comparisons are the
15260 same. */
15261 case NE:
15262 gcc_assert (cond1 == cond2);
15263 return CC_DNEmode;
15265 case LE:
15266 gcc_assert (cond1 == cond2);
15267 return CC_DLEmode;
15269 case GE:
15270 gcc_assert (cond1 == cond2);
15271 return CC_DGEmode;
15273 case LEU:
15274 gcc_assert (cond1 == cond2);
15275 return CC_DLEUmode;
15277 case GEU:
15278 gcc_assert (cond1 == cond2);
15279 return CC_DGEUmode;
15281 default:
15282 gcc_unreachable ();
15286 machine_mode
15287 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15289 /* All floating point compares return CCFP if it is an equality
15290 comparison, and CCFPE otherwise. */
15291 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15293 switch (op)
15295 case EQ:
15296 case NE:
15297 case UNORDERED:
15298 case ORDERED:
15299 case UNLT:
15300 case UNLE:
15301 case UNGT:
15302 case UNGE:
15303 case UNEQ:
15304 case LTGT:
15305 return CCFPmode;
15307 case LT:
15308 case LE:
15309 case GT:
15310 case GE:
15311 return CCFPEmode;
15313 default:
15314 gcc_unreachable ();
15318 /* A compare with a shifted operand. Because of canonicalization, the
15319 comparison will have to be swapped when we emit the assembler. */
15320 if (GET_MODE (y) == SImode
15321 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15322 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15323 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15324 || GET_CODE (x) == ROTATERT))
15325 return CC_SWPmode;
15327 /* This operation is performed swapped, but since we only rely on the Z
15328 flag we don't need an additional mode. */
15329 if (GET_MODE (y) == SImode
15330 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15331 && GET_CODE (x) == NEG
15332 && (op == EQ || op == NE))
15333 return CC_Zmode;
15335 /* This is a special case that is used by combine to allow a
15336 comparison of a shifted byte load to be split into a zero-extend
15337 followed by a comparison of the shifted integer (only valid for
15338 equalities and unsigned inequalities). */
15339 if (GET_MODE (x) == SImode
15340 && GET_CODE (x) == ASHIFT
15341 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15342 && GET_CODE (XEXP (x, 0)) == SUBREG
15343 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15344 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15345 && (op == EQ || op == NE
15346 || op == GEU || op == GTU || op == LTU || op == LEU)
15347 && CONST_INT_P (y))
15348 return CC_Zmode;
15350 /* A construct for a conditional compare, if the false arm contains
15351 0, then both conditions must be true, otherwise either condition
15352 must be true. Not all conditions are possible, so CCmode is
15353 returned if it can't be done. */
15354 if (GET_CODE (x) == IF_THEN_ELSE
15355 && (XEXP (x, 2) == const0_rtx
15356 || XEXP (x, 2) == const1_rtx)
15357 && COMPARISON_P (XEXP (x, 0))
15358 && COMPARISON_P (XEXP (x, 1)))
15359 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15360 INTVAL (XEXP (x, 2)));
15362 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15363 if (GET_CODE (x) == AND
15364 && (op == EQ || op == NE)
15365 && COMPARISON_P (XEXP (x, 0))
15366 && COMPARISON_P (XEXP (x, 1)))
15367 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15368 DOM_CC_X_AND_Y);
15370 if (GET_CODE (x) == IOR
15371 && (op == EQ || op == NE)
15372 && COMPARISON_P (XEXP (x, 0))
15373 && COMPARISON_P (XEXP (x, 1)))
15374 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15375 DOM_CC_X_OR_Y);
15377 /* An operation (on Thumb) where we want to test for a single bit.
15378 This is done by shifting that bit up into the top bit of a
15379 scratch register; we can then branch on the sign bit. */
15380 if (TARGET_THUMB1
15381 && GET_MODE (x) == SImode
15382 && (op == EQ || op == NE)
15383 && GET_CODE (x) == ZERO_EXTRACT
15384 && XEXP (x, 1) == const1_rtx)
15385 return CC_Nmode;
15387 /* An operation that sets the condition codes as a side-effect, the
15388 V flag is not set correctly, so we can only use comparisons where
15389 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15390 instead.) */
15391 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15392 if (GET_MODE (x) == SImode
15393 && y == const0_rtx
15394 && (op == EQ || op == NE || op == LT || op == GE)
15395 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15396 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15397 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15398 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15399 || GET_CODE (x) == LSHIFTRT
15400 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15401 || GET_CODE (x) == ROTATERT
15402 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15403 return CC_NOOVmode;
15405 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15406 return CC_Zmode;
15408 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15409 && GET_CODE (x) == PLUS
15410 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15411 return CC_Cmode;
15413 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15415 switch (op)
15417 case EQ:
15418 case NE:
15419 /* A DImode comparison against zero can be implemented by
15420 or'ing the two halves together. */
15421 if (y == const0_rtx)
15422 return CC_Zmode;
15424 /* We can do an equality test in three Thumb instructions. */
15425 if (!TARGET_32BIT)
15426 return CC_Zmode;
15428 /* FALLTHROUGH */
15430 case LTU:
15431 case LEU:
15432 case GTU:
15433 case GEU:
15434 /* DImode unsigned comparisons can be implemented by cmp +
15435 cmpeq without a scratch register. Not worth doing in
15436 Thumb-2. */
15437 if (TARGET_32BIT)
15438 return CC_CZmode;
15440 /* FALLTHROUGH */
15442 case LT:
15443 case LE:
15444 case GT:
15445 case GE:
15446 /* DImode signed and unsigned comparisons can be implemented
15447 by cmp + sbcs with a scratch register, but that does not
15448 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15449 gcc_assert (op != EQ && op != NE);
15450 return CC_NCVmode;
15452 default:
15453 gcc_unreachable ();
15457 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15458 return GET_MODE (x);
15460 return CCmode;
15463 /* X and Y are two things to compare using CODE. Emit the compare insn and
15464 return the rtx for register 0 in the proper mode. FP means this is a
15465 floating point compare: I don't think that it is needed on the arm. */
15467 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15469 machine_mode mode;
15470 rtx cc_reg;
15471 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15473 /* We might have X as a constant, Y as a register because of the predicates
15474 used for cmpdi. If so, force X to a register here. */
15475 if (dimode_comparison && !REG_P (x))
15476 x = force_reg (DImode, x);
15478 mode = SELECT_CC_MODE (code, x, y);
15479 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15481 if (dimode_comparison
15482 && mode != CC_CZmode)
15484 rtx clobber, set;
15486 /* To compare two non-zero values for equality, XOR them and
15487 then compare against zero. Not used for ARM mode; there
15488 CC_CZmode is cheaper. */
15489 if (mode == CC_Zmode && y != const0_rtx)
15491 gcc_assert (!reload_completed);
15492 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15493 y = const0_rtx;
15496 /* A scratch register is required. */
15497 if (reload_completed)
15498 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15499 else
15500 scratch = gen_rtx_SCRATCH (SImode);
15502 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15503 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15504 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15506 else
15507 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15509 return cc_reg;
15512 /* Generate a sequence of insns that will generate the correct return
15513 address mask depending on the physical architecture that the program
15514 is running on. */
15516 arm_gen_return_addr_mask (void)
15518 rtx reg = gen_reg_rtx (Pmode);
15520 emit_insn (gen_return_addr_mask (reg));
15521 return reg;
15524 void
15525 arm_reload_in_hi (rtx *operands)
15527 rtx ref = operands[1];
15528 rtx base, scratch;
15529 HOST_WIDE_INT offset = 0;
15531 if (GET_CODE (ref) == SUBREG)
15533 offset = SUBREG_BYTE (ref);
15534 ref = SUBREG_REG (ref);
15537 if (REG_P (ref))
15539 /* We have a pseudo which has been spilt onto the stack; there
15540 are two cases here: the first where there is a simple
15541 stack-slot replacement and a second where the stack-slot is
15542 out of range, or is used as a subreg. */
15543 if (reg_equiv_mem (REGNO (ref)))
15545 ref = reg_equiv_mem (REGNO (ref));
15546 base = find_replacement (&XEXP (ref, 0));
15548 else
15549 /* The slot is out of range, or was dressed up in a SUBREG. */
15550 base = reg_equiv_address (REGNO (ref));
15552 /* PR 62554: If there is no equivalent memory location then just move
15553 the value as an SImode register move. This happens when the target
15554 architecture variant does not have an HImode register move. */
15555 if (base == NULL)
15557 gcc_assert (REG_P (operands[0]));
15558 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15559 gen_rtx_SUBREG (SImode, ref, 0)));
15560 return;
15563 else
15564 base = find_replacement (&XEXP (ref, 0));
15566 /* Handle the case where the address is too complex to be offset by 1. */
15567 if (GET_CODE (base) == MINUS
15568 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15570 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15572 emit_set_insn (base_plus, base);
15573 base = base_plus;
15575 else if (GET_CODE (base) == PLUS)
15577 /* The addend must be CONST_INT, or we would have dealt with it above. */
15578 HOST_WIDE_INT hi, lo;
15580 offset += INTVAL (XEXP (base, 1));
15581 base = XEXP (base, 0);
15583 /* Rework the address into a legal sequence of insns. */
15584 /* Valid range for lo is -4095 -> 4095 */
15585 lo = (offset >= 0
15586 ? (offset & 0xfff)
15587 : -((-offset) & 0xfff));
15589 /* Corner case, if lo is the max offset then we would be out of range
15590 once we have added the additional 1 below, so bump the msb into the
15591 pre-loading insn(s). */
15592 if (lo == 4095)
15593 lo &= 0x7ff;
15595 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15596 ^ (HOST_WIDE_INT) 0x80000000)
15597 - (HOST_WIDE_INT) 0x80000000);
15599 gcc_assert (hi + lo == offset);
15601 if (hi != 0)
15603 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15605 /* Get the base address; addsi3 knows how to handle constants
15606 that require more than one insn. */
15607 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15608 base = base_plus;
15609 offset = lo;
15613 /* Operands[2] may overlap operands[0] (though it won't overlap
15614 operands[1]), that's why we asked for a DImode reg -- so we can
15615 use the bit that does not overlap. */
15616 if (REGNO (operands[2]) == REGNO (operands[0]))
15617 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15618 else
15619 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15621 emit_insn (gen_zero_extendqisi2 (scratch,
15622 gen_rtx_MEM (QImode,
15623 plus_constant (Pmode, base,
15624 offset))));
15625 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15626 gen_rtx_MEM (QImode,
15627 plus_constant (Pmode, base,
15628 offset + 1))));
15629 if (!BYTES_BIG_ENDIAN)
15630 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15631 gen_rtx_IOR (SImode,
15632 gen_rtx_ASHIFT
15633 (SImode,
15634 gen_rtx_SUBREG (SImode, operands[0], 0),
15635 GEN_INT (8)),
15636 scratch));
15637 else
15638 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15639 gen_rtx_IOR (SImode,
15640 gen_rtx_ASHIFT (SImode, scratch,
15641 GEN_INT (8)),
15642 gen_rtx_SUBREG (SImode, operands[0], 0)));
15645 /* Handle storing a half-word to memory during reload by synthesizing as two
15646 byte stores. Take care not to clobber the input values until after we
15647 have moved them somewhere safe. This code assumes that if the DImode
15648 scratch in operands[2] overlaps either the input value or output address
15649 in some way, then that value must die in this insn (we absolutely need
15650 two scratch registers for some corner cases). */
15651 void
15652 arm_reload_out_hi (rtx *operands)
15654 rtx ref = operands[0];
15655 rtx outval = operands[1];
15656 rtx base, scratch;
15657 HOST_WIDE_INT offset = 0;
15659 if (GET_CODE (ref) == SUBREG)
15661 offset = SUBREG_BYTE (ref);
15662 ref = SUBREG_REG (ref);
15665 if (REG_P (ref))
15667 /* We have a pseudo which has been spilt onto the stack; there
15668 are two cases here: the first where there is a simple
15669 stack-slot replacement and a second where the stack-slot is
15670 out of range, or is used as a subreg. */
15671 if (reg_equiv_mem (REGNO (ref)))
15673 ref = reg_equiv_mem (REGNO (ref));
15674 base = find_replacement (&XEXP (ref, 0));
15676 else
15677 /* The slot is out of range, or was dressed up in a SUBREG. */
15678 base = reg_equiv_address (REGNO (ref));
15680 /* PR 62254: If there is no equivalent memory location then just move
15681 the value as an SImode register move. This happens when the target
15682 architecture variant does not have an HImode register move. */
15683 if (base == NULL)
15685 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15687 if (REG_P (outval))
15689 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15690 gen_rtx_SUBREG (SImode, outval, 0)));
15692 else /* SUBREG_P (outval) */
15694 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15695 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15696 SUBREG_REG (outval)));
15697 else
15698 /* FIXME: Handle other cases ? */
15699 gcc_unreachable ();
15701 return;
15704 else
15705 base = find_replacement (&XEXP (ref, 0));
15707 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15709 /* Handle the case where the address is too complex to be offset by 1. */
15710 if (GET_CODE (base) == MINUS
15711 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15713 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15715 /* Be careful not to destroy OUTVAL. */
15716 if (reg_overlap_mentioned_p (base_plus, outval))
15718 /* Updating base_plus might destroy outval, see if we can
15719 swap the scratch and base_plus. */
15720 if (!reg_overlap_mentioned_p (scratch, outval))
15721 std::swap (scratch, base_plus);
15722 else
15724 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15726 /* Be conservative and copy OUTVAL into the scratch now,
15727 this should only be necessary if outval is a subreg
15728 of something larger than a word. */
15729 /* XXX Might this clobber base? I can't see how it can,
15730 since scratch is known to overlap with OUTVAL, and
15731 must be wider than a word. */
15732 emit_insn (gen_movhi (scratch_hi, outval));
15733 outval = scratch_hi;
15737 emit_set_insn (base_plus, base);
15738 base = base_plus;
15740 else if (GET_CODE (base) == PLUS)
15742 /* The addend must be CONST_INT, or we would have dealt with it above. */
15743 HOST_WIDE_INT hi, lo;
15745 offset += INTVAL (XEXP (base, 1));
15746 base = XEXP (base, 0);
15748 /* Rework the address into a legal sequence of insns. */
15749 /* Valid range for lo is -4095 -> 4095 */
15750 lo = (offset >= 0
15751 ? (offset & 0xfff)
15752 : -((-offset) & 0xfff));
15754 /* Corner case, if lo is the max offset then we would be out of range
15755 once we have added the additional 1 below, so bump the msb into the
15756 pre-loading insn(s). */
15757 if (lo == 4095)
15758 lo &= 0x7ff;
15760 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15761 ^ (HOST_WIDE_INT) 0x80000000)
15762 - (HOST_WIDE_INT) 0x80000000);
15764 gcc_assert (hi + lo == offset);
15766 if (hi != 0)
15768 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15770 /* Be careful not to destroy OUTVAL. */
15771 if (reg_overlap_mentioned_p (base_plus, outval))
15773 /* Updating base_plus might destroy outval, see if we
15774 can swap the scratch and base_plus. */
15775 if (!reg_overlap_mentioned_p (scratch, outval))
15776 std::swap (scratch, base_plus);
15777 else
15779 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15781 /* Be conservative and copy outval into scratch now,
15782 this should only be necessary if outval is a
15783 subreg of something larger than a word. */
15784 /* XXX Might this clobber base? I can't see how it
15785 can, since scratch is known to overlap with
15786 outval. */
15787 emit_insn (gen_movhi (scratch_hi, outval));
15788 outval = scratch_hi;
15792 /* Get the base address; addsi3 knows how to handle constants
15793 that require more than one insn. */
15794 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15795 base = base_plus;
15796 offset = lo;
15800 if (BYTES_BIG_ENDIAN)
15802 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15803 plus_constant (Pmode, base,
15804 offset + 1)),
15805 gen_lowpart (QImode, outval)));
15806 emit_insn (gen_lshrsi3 (scratch,
15807 gen_rtx_SUBREG (SImode, outval, 0),
15808 GEN_INT (8)));
15809 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15810 offset)),
15811 gen_lowpart (QImode, scratch)));
15813 else
15815 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15816 offset)),
15817 gen_lowpart (QImode, outval)));
15818 emit_insn (gen_lshrsi3 (scratch,
15819 gen_rtx_SUBREG (SImode, outval, 0),
15820 GEN_INT (8)));
15821 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15822 plus_constant (Pmode, base,
15823 offset + 1)),
15824 gen_lowpart (QImode, scratch)));
15828 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15829 (padded to the size of a word) should be passed in a register. */
15831 static bool
15832 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15834 if (TARGET_AAPCS_BASED)
15835 return must_pass_in_stack_var_size (mode, type);
15836 else
15837 return must_pass_in_stack_var_size_or_pad (mode, type);
15841 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15842 Return true if an argument passed on the stack should be padded upwards,
15843 i.e. if the least-significant byte has useful data.
15844 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15845 aggregate types are placed in the lowest memory address. */
15847 bool
15848 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15850 if (!TARGET_AAPCS_BASED)
15851 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15853 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15854 return false;
15856 return true;
15860 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15861 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15862 register has useful data, and return the opposite if the most
15863 significant byte does. */
15865 bool
15866 arm_pad_reg_upward (machine_mode mode,
15867 tree type, int first ATTRIBUTE_UNUSED)
15869 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15871 /* For AAPCS, small aggregates, small fixed-point types,
15872 and small complex types are always padded upwards. */
15873 if (type)
15875 if ((AGGREGATE_TYPE_P (type)
15876 || TREE_CODE (type) == COMPLEX_TYPE
15877 || FIXED_POINT_TYPE_P (type))
15878 && int_size_in_bytes (type) <= 4)
15879 return true;
15881 else
15883 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15884 && GET_MODE_SIZE (mode) <= 4)
15885 return true;
15889 /* Otherwise, use default padding. */
15890 return !BYTES_BIG_ENDIAN;
15893 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15894 assuming that the address in the base register is word aligned. */
15895 bool
15896 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15898 HOST_WIDE_INT max_offset;
15900 /* Offset must be a multiple of 4 in Thumb mode. */
15901 if (TARGET_THUMB2 && ((offset & 3) != 0))
15902 return false;
15904 if (TARGET_THUMB2)
15905 max_offset = 1020;
15906 else if (TARGET_ARM)
15907 max_offset = 255;
15908 else
15909 return false;
15911 return ((offset <= max_offset) && (offset >= -max_offset));
15914 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15915 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15916 Assumes that the address in the base register RN is word aligned. Pattern
15917 guarantees that both memory accesses use the same base register,
15918 the offsets are constants within the range, and the gap between the offsets is 4.
15919 If preload complete then check that registers are legal. WBACK indicates whether
15920 address is updated. LOAD indicates whether memory access is load or store. */
15921 bool
15922 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15923 bool wback, bool load)
15925 unsigned int t, t2, n;
15927 if (!reload_completed)
15928 return true;
15930 if (!offset_ok_for_ldrd_strd (offset))
15931 return false;
15933 t = REGNO (rt);
15934 t2 = REGNO (rt2);
15935 n = REGNO (rn);
15937 if ((TARGET_THUMB2)
15938 && ((wback && (n == t || n == t2))
15939 || (t == SP_REGNUM)
15940 || (t == PC_REGNUM)
15941 || (t2 == SP_REGNUM)
15942 || (t2 == PC_REGNUM)
15943 || (!load && (n == PC_REGNUM))
15944 || (load && (t == t2))
15945 /* Triggers Cortex-M3 LDRD errata. */
15946 || (!wback && load && fix_cm3_ldrd && (n == t))))
15947 return false;
15949 if ((TARGET_ARM)
15950 && ((wback && (n == t || n == t2))
15951 || (t2 == PC_REGNUM)
15952 || (t % 2 != 0) /* First destination register is not even. */
15953 || (t2 != t + 1)
15954 /* PC can be used as base register (for offset addressing only),
15955 but it is depricated. */
15956 || (n == PC_REGNUM)))
15957 return false;
15959 return true;
15962 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15963 operand MEM's address contains an immediate offset from the base
15964 register and has no side effects, in which case it sets BASE and
15965 OFFSET accordingly. */
15966 static bool
15967 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15969 rtx addr;
15971 gcc_assert (base != NULL && offset != NULL);
15973 /* TODO: Handle more general memory operand patterns, such as
15974 PRE_DEC and PRE_INC. */
15976 if (side_effects_p (mem))
15977 return false;
15979 /* Can't deal with subregs. */
15980 if (GET_CODE (mem) == SUBREG)
15981 return false;
15983 gcc_assert (MEM_P (mem));
15985 *offset = const0_rtx;
15987 addr = XEXP (mem, 0);
15989 /* If addr isn't valid for DImode, then we can't handle it. */
15990 if (!arm_legitimate_address_p (DImode, addr,
15991 reload_in_progress || reload_completed))
15992 return false;
15994 if (REG_P (addr))
15996 *base = addr;
15997 return true;
15999 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
16001 *base = XEXP (addr, 0);
16002 *offset = XEXP (addr, 1);
16003 return (REG_P (*base) && CONST_INT_P (*offset));
16006 return false;
16009 /* Called from a peephole2 to replace two word-size accesses with a
16010 single LDRD/STRD instruction. Returns true iff we can generate a
16011 new instruction sequence. That is, both accesses use the same base
16012 register and the gap between constant offsets is 4. This function
16013 may reorder its operands to match ldrd/strd RTL templates.
16014 OPERANDS are the operands found by the peephole matcher;
16015 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16016 corresponding memory operands. LOAD indicaates whether the access
16017 is load or store. CONST_STORE indicates a store of constant
16018 integer values held in OPERANDS[4,5] and assumes that the pattern
16019 is of length 4 insn, for the purpose of checking dead registers.
16020 COMMUTE indicates that register operands may be reordered. */
16021 bool
16022 gen_operands_ldrd_strd (rtx *operands, bool load,
16023 bool const_store, bool commute)
16025 int nops = 2;
16026 HOST_WIDE_INT offsets[2], offset;
16027 rtx base = NULL_RTX;
16028 rtx cur_base, cur_offset, tmp;
16029 int i, gap;
16030 HARD_REG_SET regset;
16032 gcc_assert (!const_store || !load);
16033 /* Check that the memory references are immediate offsets from the
16034 same base register. Extract the base register, the destination
16035 registers, and the corresponding memory offsets. */
16036 for (i = 0; i < nops; i++)
16038 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
16039 return false;
16041 if (i == 0)
16042 base = cur_base;
16043 else if (REGNO (base) != REGNO (cur_base))
16044 return false;
16046 offsets[i] = INTVAL (cur_offset);
16047 if (GET_CODE (operands[i]) == SUBREG)
16049 tmp = SUBREG_REG (operands[i]);
16050 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
16051 operands[i] = tmp;
16055 /* Make sure there is no dependency between the individual loads. */
16056 if (load && REGNO (operands[0]) == REGNO (base))
16057 return false; /* RAW */
16059 if (load && REGNO (operands[0]) == REGNO (operands[1]))
16060 return false; /* WAW */
16062 /* If the same input register is used in both stores
16063 when storing different constants, try to find a free register.
16064 For example, the code
16065 mov r0, 0
16066 str r0, [r2]
16067 mov r0, 1
16068 str r0, [r2, #4]
16069 can be transformed into
16070 mov r1, 0
16071 mov r0, 1
16072 strd r1, r0, [r2]
16073 in Thumb mode assuming that r1 is free.
16074 For ARM mode do the same but only if the starting register
16075 can be made to be even. */
16076 if (const_store
16077 && REGNO (operands[0]) == REGNO (operands[1])
16078 && INTVAL (operands[4]) != INTVAL (operands[5]))
16080 if (TARGET_THUMB2)
16082 CLEAR_HARD_REG_SET (regset);
16083 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16084 if (tmp == NULL_RTX)
16085 return false;
16087 /* Use the new register in the first load to ensure that
16088 if the original input register is not dead after peephole,
16089 then it will have the correct constant value. */
16090 operands[0] = tmp;
16092 else if (TARGET_ARM)
16094 int regno = REGNO (operands[0]);
16095 if (!peep2_reg_dead_p (4, operands[0]))
16097 /* When the input register is even and is not dead after the
16098 pattern, it has to hold the second constant but we cannot
16099 form a legal STRD in ARM mode with this register as the second
16100 register. */
16101 if (regno % 2 == 0)
16102 return false;
16104 /* Is regno-1 free? */
16105 SET_HARD_REG_SET (regset);
16106 CLEAR_HARD_REG_BIT(regset, regno - 1);
16107 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16108 if (tmp == NULL_RTX)
16109 return false;
16111 operands[0] = tmp;
16113 else
16115 /* Find a DImode register. */
16116 CLEAR_HARD_REG_SET (regset);
16117 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16118 if (tmp != NULL_RTX)
16120 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16121 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16123 else
16125 /* Can we use the input register to form a DI register? */
16126 SET_HARD_REG_SET (regset);
16127 CLEAR_HARD_REG_BIT(regset,
16128 regno % 2 == 0 ? regno + 1 : regno - 1);
16129 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16130 if (tmp == NULL_RTX)
16131 return false;
16132 operands[regno % 2 == 1 ? 0 : 1] = tmp;
16136 gcc_assert (operands[0] != NULL_RTX);
16137 gcc_assert (operands[1] != NULL_RTX);
16138 gcc_assert (REGNO (operands[0]) % 2 == 0);
16139 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
16143 /* Make sure the instructions are ordered with lower memory access first. */
16144 if (offsets[0] > offsets[1])
16146 gap = offsets[0] - offsets[1];
16147 offset = offsets[1];
16149 /* Swap the instructions such that lower memory is accessed first. */
16150 std::swap (operands[0], operands[1]);
16151 std::swap (operands[2], operands[3]);
16152 if (const_store)
16153 std::swap (operands[4], operands[5]);
16155 else
16157 gap = offsets[1] - offsets[0];
16158 offset = offsets[0];
16161 /* Make sure accesses are to consecutive memory locations. */
16162 if (gap != 4)
16163 return false;
16165 /* Make sure we generate legal instructions. */
16166 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16167 false, load))
16168 return true;
16170 /* In Thumb state, where registers are almost unconstrained, there
16171 is little hope to fix it. */
16172 if (TARGET_THUMB2)
16173 return false;
16175 if (load && commute)
16177 /* Try reordering registers. */
16178 std::swap (operands[0], operands[1]);
16179 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16180 false, load))
16181 return true;
16184 if (const_store)
16186 /* If input registers are dead after this pattern, they can be
16187 reordered or replaced by other registers that are free in the
16188 current pattern. */
16189 if (!peep2_reg_dead_p (4, operands[0])
16190 || !peep2_reg_dead_p (4, operands[1]))
16191 return false;
16193 /* Try to reorder the input registers. */
16194 /* For example, the code
16195 mov r0, 0
16196 mov r1, 1
16197 str r1, [r2]
16198 str r0, [r2, #4]
16199 can be transformed into
16200 mov r1, 0
16201 mov r0, 1
16202 strd r0, [r2]
16204 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
16205 false, false))
16207 std::swap (operands[0], operands[1]);
16208 return true;
16211 /* Try to find a free DI register. */
16212 CLEAR_HARD_REG_SET (regset);
16213 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16214 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16215 while (true)
16217 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16218 if (tmp == NULL_RTX)
16219 return false;
16221 /* DREG must be an even-numbered register in DImode.
16222 Split it into SI registers. */
16223 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16224 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16225 gcc_assert (operands[0] != NULL_RTX);
16226 gcc_assert (operands[1] != NULL_RTX);
16227 gcc_assert (REGNO (operands[0]) % 2 == 0);
16228 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16230 return (operands_ok_ldrd_strd (operands[0], operands[1],
16231 base, offset,
16232 false, load));
16236 return false;
16242 /* Print a symbolic form of X to the debug file, F. */
16243 static void
16244 arm_print_value (FILE *f, rtx x)
16246 switch (GET_CODE (x))
16248 case CONST_INT:
16249 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16250 return;
16252 case CONST_DOUBLE:
16253 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
16254 return;
16256 case CONST_VECTOR:
16258 int i;
16260 fprintf (f, "<");
16261 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16263 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16264 if (i < (CONST_VECTOR_NUNITS (x) - 1))
16265 fputc (',', f);
16267 fprintf (f, ">");
16269 return;
16271 case CONST_STRING:
16272 fprintf (f, "\"%s\"", XSTR (x, 0));
16273 return;
16275 case SYMBOL_REF:
16276 fprintf (f, "`%s'", XSTR (x, 0));
16277 return;
16279 case LABEL_REF:
16280 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16281 return;
16283 case CONST:
16284 arm_print_value (f, XEXP (x, 0));
16285 return;
16287 case PLUS:
16288 arm_print_value (f, XEXP (x, 0));
16289 fprintf (f, "+");
16290 arm_print_value (f, XEXP (x, 1));
16291 return;
16293 case PC:
16294 fprintf (f, "pc");
16295 return;
16297 default:
16298 fprintf (f, "????");
16299 return;
16303 /* Routines for manipulation of the constant pool. */
16305 /* Arm instructions cannot load a large constant directly into a
16306 register; they have to come from a pc relative load. The constant
16307 must therefore be placed in the addressable range of the pc
16308 relative load. Depending on the precise pc relative load
16309 instruction the range is somewhere between 256 bytes and 4k. This
16310 means that we often have to dump a constant inside a function, and
16311 generate code to branch around it.
16313 It is important to minimize this, since the branches will slow
16314 things down and make the code larger.
16316 Normally we can hide the table after an existing unconditional
16317 branch so that there is no interruption of the flow, but in the
16318 worst case the code looks like this:
16320 ldr rn, L1
16322 b L2
16323 align
16324 L1: .long value
16328 ldr rn, L3
16330 b L4
16331 align
16332 L3: .long value
16336 We fix this by performing a scan after scheduling, which notices
16337 which instructions need to have their operands fetched from the
16338 constant table and builds the table.
16340 The algorithm starts by building a table of all the constants that
16341 need fixing up and all the natural barriers in the function (places
16342 where a constant table can be dropped without breaking the flow).
16343 For each fixup we note how far the pc-relative replacement will be
16344 able to reach and the offset of the instruction into the function.
16346 Having built the table we then group the fixes together to form
16347 tables that are as large as possible (subject to addressing
16348 constraints) and emit each table of constants after the last
16349 barrier that is within range of all the instructions in the group.
16350 If a group does not contain a barrier, then we forcibly create one
16351 by inserting a jump instruction into the flow. Once the table has
16352 been inserted, the insns are then modified to reference the
16353 relevant entry in the pool.
16355 Possible enhancements to the algorithm (not implemented) are:
16357 1) For some processors and object formats, there may be benefit in
16358 aligning the pools to the start of cache lines; this alignment
16359 would need to be taken into account when calculating addressability
16360 of a pool. */
16362 /* These typedefs are located at the start of this file, so that
16363 they can be used in the prototypes there. This comment is to
16364 remind readers of that fact so that the following structures
16365 can be understood more easily.
16367 typedef struct minipool_node Mnode;
16368 typedef struct minipool_fixup Mfix; */
16370 struct minipool_node
16372 /* Doubly linked chain of entries. */
16373 Mnode * next;
16374 Mnode * prev;
16375 /* The maximum offset into the code that this entry can be placed. While
16376 pushing fixes for forward references, all entries are sorted in order
16377 of increasing max_address. */
16378 HOST_WIDE_INT max_address;
16379 /* Similarly for an entry inserted for a backwards ref. */
16380 HOST_WIDE_INT min_address;
16381 /* The number of fixes referencing this entry. This can become zero
16382 if we "unpush" an entry. In this case we ignore the entry when we
16383 come to emit the code. */
16384 int refcount;
16385 /* The offset from the start of the minipool. */
16386 HOST_WIDE_INT offset;
16387 /* The value in table. */
16388 rtx value;
16389 /* The mode of value. */
16390 machine_mode mode;
16391 /* The size of the value. With iWMMXt enabled
16392 sizes > 4 also imply an alignment of 8-bytes. */
16393 int fix_size;
16396 struct minipool_fixup
16398 Mfix * next;
16399 rtx_insn * insn;
16400 HOST_WIDE_INT address;
16401 rtx * loc;
16402 machine_mode mode;
16403 int fix_size;
16404 rtx value;
16405 Mnode * minipool;
16406 HOST_WIDE_INT forwards;
16407 HOST_WIDE_INT backwards;
16410 /* Fixes less than a word need padding out to a word boundary. */
16411 #define MINIPOOL_FIX_SIZE(mode) \
16412 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16414 static Mnode * minipool_vector_head;
16415 static Mnode * minipool_vector_tail;
16416 static rtx_code_label *minipool_vector_label;
16417 static int minipool_pad;
16419 /* The linked list of all minipool fixes required for this function. */
16420 Mfix * minipool_fix_head;
16421 Mfix * minipool_fix_tail;
16422 /* The fix entry for the current minipool, once it has been placed. */
16423 Mfix * minipool_barrier;
16425 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16426 #define JUMP_TABLES_IN_TEXT_SECTION 0
16427 #endif
16429 static HOST_WIDE_INT
16430 get_jump_table_size (rtx_jump_table_data *insn)
16432 /* ADDR_VECs only take room if read-only data does into the text
16433 section. */
16434 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16436 rtx body = PATTERN (insn);
16437 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16438 HOST_WIDE_INT size;
16439 HOST_WIDE_INT modesize;
16441 modesize = GET_MODE_SIZE (GET_MODE (body));
16442 size = modesize * XVECLEN (body, elt);
16443 switch (modesize)
16445 case 1:
16446 /* Round up size of TBB table to a halfword boundary. */
16447 size = (size + 1) & ~HOST_WIDE_INT_1;
16448 break;
16449 case 2:
16450 /* No padding necessary for TBH. */
16451 break;
16452 case 4:
16453 /* Add two bytes for alignment on Thumb. */
16454 if (TARGET_THUMB)
16455 size += 2;
16456 break;
16457 default:
16458 gcc_unreachable ();
16460 return size;
16463 return 0;
16466 /* Return the maximum amount of padding that will be inserted before
16467 label LABEL. */
16469 static HOST_WIDE_INT
16470 get_label_padding (rtx label)
16472 HOST_WIDE_INT align, min_insn_size;
16474 align = 1 << label_to_alignment (label);
16475 min_insn_size = TARGET_THUMB ? 2 : 4;
16476 return align > min_insn_size ? align - min_insn_size : 0;
16479 /* Move a minipool fix MP from its current location to before MAX_MP.
16480 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16481 constraints may need updating. */
16482 static Mnode *
16483 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16484 HOST_WIDE_INT max_address)
16486 /* The code below assumes these are different. */
16487 gcc_assert (mp != max_mp);
16489 if (max_mp == NULL)
16491 if (max_address < mp->max_address)
16492 mp->max_address = max_address;
16494 else
16496 if (max_address > max_mp->max_address - mp->fix_size)
16497 mp->max_address = max_mp->max_address - mp->fix_size;
16498 else
16499 mp->max_address = max_address;
16501 /* Unlink MP from its current position. Since max_mp is non-null,
16502 mp->prev must be non-null. */
16503 mp->prev->next = mp->next;
16504 if (mp->next != NULL)
16505 mp->next->prev = mp->prev;
16506 else
16507 minipool_vector_tail = mp->prev;
16509 /* Re-insert it before MAX_MP. */
16510 mp->next = max_mp;
16511 mp->prev = max_mp->prev;
16512 max_mp->prev = mp;
16514 if (mp->prev != NULL)
16515 mp->prev->next = mp;
16516 else
16517 minipool_vector_head = mp;
16520 /* Save the new entry. */
16521 max_mp = mp;
16523 /* Scan over the preceding entries and adjust their addresses as
16524 required. */
16525 while (mp->prev != NULL
16526 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16528 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16529 mp = mp->prev;
16532 return max_mp;
16535 /* Add a constant to the minipool for a forward reference. Returns the
16536 node added or NULL if the constant will not fit in this pool. */
16537 static Mnode *
16538 add_minipool_forward_ref (Mfix *fix)
16540 /* If set, max_mp is the first pool_entry that has a lower
16541 constraint than the one we are trying to add. */
16542 Mnode * max_mp = NULL;
16543 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16544 Mnode * mp;
16546 /* If the minipool starts before the end of FIX->INSN then this FIX
16547 can not be placed into the current pool. Furthermore, adding the
16548 new constant pool entry may cause the pool to start FIX_SIZE bytes
16549 earlier. */
16550 if (minipool_vector_head &&
16551 (fix->address + get_attr_length (fix->insn)
16552 >= minipool_vector_head->max_address - fix->fix_size))
16553 return NULL;
16555 /* Scan the pool to see if a constant with the same value has
16556 already been added. While we are doing this, also note the
16557 location where we must insert the constant if it doesn't already
16558 exist. */
16559 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16561 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16562 && fix->mode == mp->mode
16563 && (!LABEL_P (fix->value)
16564 || (CODE_LABEL_NUMBER (fix->value)
16565 == CODE_LABEL_NUMBER (mp->value)))
16566 && rtx_equal_p (fix->value, mp->value))
16568 /* More than one fix references this entry. */
16569 mp->refcount++;
16570 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16573 /* Note the insertion point if necessary. */
16574 if (max_mp == NULL
16575 && mp->max_address > max_address)
16576 max_mp = mp;
16578 /* If we are inserting an 8-bytes aligned quantity and
16579 we have not already found an insertion point, then
16580 make sure that all such 8-byte aligned quantities are
16581 placed at the start of the pool. */
16582 if (ARM_DOUBLEWORD_ALIGN
16583 && max_mp == NULL
16584 && fix->fix_size >= 8
16585 && mp->fix_size < 8)
16587 max_mp = mp;
16588 max_address = mp->max_address;
16592 /* The value is not currently in the minipool, so we need to create
16593 a new entry for it. If MAX_MP is NULL, the entry will be put on
16594 the end of the list since the placement is less constrained than
16595 any existing entry. Otherwise, we insert the new fix before
16596 MAX_MP and, if necessary, adjust the constraints on the other
16597 entries. */
16598 mp = XNEW (Mnode);
16599 mp->fix_size = fix->fix_size;
16600 mp->mode = fix->mode;
16601 mp->value = fix->value;
16602 mp->refcount = 1;
16603 /* Not yet required for a backwards ref. */
16604 mp->min_address = -65536;
16606 if (max_mp == NULL)
16608 mp->max_address = max_address;
16609 mp->next = NULL;
16610 mp->prev = minipool_vector_tail;
16612 if (mp->prev == NULL)
16614 minipool_vector_head = mp;
16615 minipool_vector_label = gen_label_rtx ();
16617 else
16618 mp->prev->next = mp;
16620 minipool_vector_tail = mp;
16622 else
16624 if (max_address > max_mp->max_address - mp->fix_size)
16625 mp->max_address = max_mp->max_address - mp->fix_size;
16626 else
16627 mp->max_address = max_address;
16629 mp->next = max_mp;
16630 mp->prev = max_mp->prev;
16631 max_mp->prev = mp;
16632 if (mp->prev != NULL)
16633 mp->prev->next = mp;
16634 else
16635 minipool_vector_head = mp;
16638 /* Save the new entry. */
16639 max_mp = mp;
16641 /* Scan over the preceding entries and adjust their addresses as
16642 required. */
16643 while (mp->prev != NULL
16644 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16646 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16647 mp = mp->prev;
16650 return max_mp;
16653 static Mnode *
16654 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16655 HOST_WIDE_INT min_address)
16657 HOST_WIDE_INT offset;
16659 /* The code below assumes these are different. */
16660 gcc_assert (mp != min_mp);
16662 if (min_mp == NULL)
16664 if (min_address > mp->min_address)
16665 mp->min_address = min_address;
16667 else
16669 /* We will adjust this below if it is too loose. */
16670 mp->min_address = min_address;
16672 /* Unlink MP from its current position. Since min_mp is non-null,
16673 mp->next must be non-null. */
16674 mp->next->prev = mp->prev;
16675 if (mp->prev != NULL)
16676 mp->prev->next = mp->next;
16677 else
16678 minipool_vector_head = mp->next;
16680 /* Reinsert it after MIN_MP. */
16681 mp->prev = min_mp;
16682 mp->next = min_mp->next;
16683 min_mp->next = mp;
16684 if (mp->next != NULL)
16685 mp->next->prev = mp;
16686 else
16687 minipool_vector_tail = mp;
16690 min_mp = mp;
16692 offset = 0;
16693 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16695 mp->offset = offset;
16696 if (mp->refcount > 0)
16697 offset += mp->fix_size;
16699 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16700 mp->next->min_address = mp->min_address + mp->fix_size;
16703 return min_mp;
16706 /* Add a constant to the minipool for a backward reference. Returns the
16707 node added or NULL if the constant will not fit in this pool.
16709 Note that the code for insertion for a backwards reference can be
16710 somewhat confusing because the calculated offsets for each fix do
16711 not take into account the size of the pool (which is still under
16712 construction. */
16713 static Mnode *
16714 add_minipool_backward_ref (Mfix *fix)
16716 /* If set, min_mp is the last pool_entry that has a lower constraint
16717 than the one we are trying to add. */
16718 Mnode *min_mp = NULL;
16719 /* This can be negative, since it is only a constraint. */
16720 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16721 Mnode *mp;
16723 /* If we can't reach the current pool from this insn, or if we can't
16724 insert this entry at the end of the pool without pushing other
16725 fixes out of range, then we don't try. This ensures that we
16726 can't fail later on. */
16727 if (min_address >= minipool_barrier->address
16728 || (minipool_vector_tail->min_address + fix->fix_size
16729 >= minipool_barrier->address))
16730 return NULL;
16732 /* Scan the pool to see if a constant with the same value has
16733 already been added. While we are doing this, also note the
16734 location where we must insert the constant if it doesn't already
16735 exist. */
16736 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16738 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16739 && fix->mode == mp->mode
16740 && (!LABEL_P (fix->value)
16741 || (CODE_LABEL_NUMBER (fix->value)
16742 == CODE_LABEL_NUMBER (mp->value)))
16743 && rtx_equal_p (fix->value, mp->value)
16744 /* Check that there is enough slack to move this entry to the
16745 end of the table (this is conservative). */
16746 && (mp->max_address
16747 > (minipool_barrier->address
16748 + minipool_vector_tail->offset
16749 + minipool_vector_tail->fix_size)))
16751 mp->refcount++;
16752 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16755 if (min_mp != NULL)
16756 mp->min_address += fix->fix_size;
16757 else
16759 /* Note the insertion point if necessary. */
16760 if (mp->min_address < min_address)
16762 /* For now, we do not allow the insertion of 8-byte alignment
16763 requiring nodes anywhere but at the start of the pool. */
16764 if (ARM_DOUBLEWORD_ALIGN
16765 && fix->fix_size >= 8 && mp->fix_size < 8)
16766 return NULL;
16767 else
16768 min_mp = mp;
16770 else if (mp->max_address
16771 < minipool_barrier->address + mp->offset + fix->fix_size)
16773 /* Inserting before this entry would push the fix beyond
16774 its maximum address (which can happen if we have
16775 re-located a forwards fix); force the new fix to come
16776 after it. */
16777 if (ARM_DOUBLEWORD_ALIGN
16778 && fix->fix_size >= 8 && mp->fix_size < 8)
16779 return NULL;
16780 else
16782 min_mp = mp;
16783 min_address = mp->min_address + fix->fix_size;
16786 /* Do not insert a non-8-byte aligned quantity before 8-byte
16787 aligned quantities. */
16788 else if (ARM_DOUBLEWORD_ALIGN
16789 && fix->fix_size < 8
16790 && mp->fix_size >= 8)
16792 min_mp = mp;
16793 min_address = mp->min_address + fix->fix_size;
16798 /* We need to create a new entry. */
16799 mp = XNEW (Mnode);
16800 mp->fix_size = fix->fix_size;
16801 mp->mode = fix->mode;
16802 mp->value = fix->value;
16803 mp->refcount = 1;
16804 mp->max_address = minipool_barrier->address + 65536;
16806 mp->min_address = min_address;
16808 if (min_mp == NULL)
16810 mp->prev = NULL;
16811 mp->next = minipool_vector_head;
16813 if (mp->next == NULL)
16815 minipool_vector_tail = mp;
16816 minipool_vector_label = gen_label_rtx ();
16818 else
16819 mp->next->prev = mp;
16821 minipool_vector_head = mp;
16823 else
16825 mp->next = min_mp->next;
16826 mp->prev = min_mp;
16827 min_mp->next = mp;
16829 if (mp->next != NULL)
16830 mp->next->prev = mp;
16831 else
16832 minipool_vector_tail = mp;
16835 /* Save the new entry. */
16836 min_mp = mp;
16838 if (mp->prev)
16839 mp = mp->prev;
16840 else
16841 mp->offset = 0;
16843 /* Scan over the following entries and adjust their offsets. */
16844 while (mp->next != NULL)
16846 if (mp->next->min_address < mp->min_address + mp->fix_size)
16847 mp->next->min_address = mp->min_address + mp->fix_size;
16849 if (mp->refcount)
16850 mp->next->offset = mp->offset + mp->fix_size;
16851 else
16852 mp->next->offset = mp->offset;
16854 mp = mp->next;
16857 return min_mp;
16860 static void
16861 assign_minipool_offsets (Mfix *barrier)
16863 HOST_WIDE_INT offset = 0;
16864 Mnode *mp;
16866 minipool_barrier = barrier;
16868 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16870 mp->offset = offset;
16872 if (mp->refcount > 0)
16873 offset += mp->fix_size;
16877 /* Output the literal table */
16878 static void
16879 dump_minipool (rtx_insn *scan)
16881 Mnode * mp;
16882 Mnode * nmp;
16883 int align64 = 0;
16885 if (ARM_DOUBLEWORD_ALIGN)
16886 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16887 if (mp->refcount > 0 && mp->fix_size >= 8)
16889 align64 = 1;
16890 break;
16893 if (dump_file)
16894 fprintf (dump_file,
16895 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16896 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16898 scan = emit_label_after (gen_label_rtx (), scan);
16899 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16900 scan = emit_label_after (minipool_vector_label, scan);
16902 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16904 if (mp->refcount > 0)
16906 if (dump_file)
16908 fprintf (dump_file,
16909 ";; Offset %u, min %ld, max %ld ",
16910 (unsigned) mp->offset, (unsigned long) mp->min_address,
16911 (unsigned long) mp->max_address);
16912 arm_print_value (dump_file, mp->value);
16913 fputc ('\n', dump_file);
16916 switch (GET_MODE_SIZE (mp->mode))
16918 #ifdef HAVE_consttable_1
16919 case 1:
16920 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16921 break;
16923 #endif
16924 #ifdef HAVE_consttable_2
16925 case 2:
16926 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16927 break;
16929 #endif
16930 #ifdef HAVE_consttable_4
16931 case 4:
16932 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16933 break;
16935 #endif
16936 #ifdef HAVE_consttable_8
16937 case 8:
16938 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16939 break;
16941 #endif
16942 #ifdef HAVE_consttable_16
16943 case 16:
16944 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16945 break;
16947 #endif
16948 default:
16949 gcc_unreachable ();
16953 nmp = mp->next;
16954 free (mp);
16957 minipool_vector_head = minipool_vector_tail = NULL;
16958 scan = emit_insn_after (gen_consttable_end (), scan);
16959 scan = emit_barrier_after (scan);
16962 /* Return the cost of forcibly inserting a barrier after INSN. */
16963 static int
16964 arm_barrier_cost (rtx_insn *insn)
16966 /* Basing the location of the pool on the loop depth is preferable,
16967 but at the moment, the basic block information seems to be
16968 corrupt by this stage of the compilation. */
16969 int base_cost = 50;
16970 rtx_insn *next = next_nonnote_insn (insn);
16972 if (next != NULL && LABEL_P (next))
16973 base_cost -= 20;
16975 switch (GET_CODE (insn))
16977 case CODE_LABEL:
16978 /* It will always be better to place the table before the label, rather
16979 than after it. */
16980 return 50;
16982 case INSN:
16983 case CALL_INSN:
16984 return base_cost;
16986 case JUMP_INSN:
16987 return base_cost - 10;
16989 default:
16990 return base_cost + 10;
16994 /* Find the best place in the insn stream in the range
16995 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16996 Create the barrier by inserting a jump and add a new fix entry for
16997 it. */
16998 static Mfix *
16999 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
17001 HOST_WIDE_INT count = 0;
17002 rtx_barrier *barrier;
17003 rtx_insn *from = fix->insn;
17004 /* The instruction after which we will insert the jump. */
17005 rtx_insn *selected = NULL;
17006 int selected_cost;
17007 /* The address at which the jump instruction will be placed. */
17008 HOST_WIDE_INT selected_address;
17009 Mfix * new_fix;
17010 HOST_WIDE_INT max_count = max_address - fix->address;
17011 rtx_code_label *label = gen_label_rtx ();
17013 selected_cost = arm_barrier_cost (from);
17014 selected_address = fix->address;
17016 while (from && count < max_count)
17018 rtx_jump_table_data *tmp;
17019 int new_cost;
17021 /* This code shouldn't have been called if there was a natural barrier
17022 within range. */
17023 gcc_assert (!BARRIER_P (from));
17025 /* Count the length of this insn. This must stay in sync with the
17026 code that pushes minipool fixes. */
17027 if (LABEL_P (from))
17028 count += get_label_padding (from);
17029 else
17030 count += get_attr_length (from);
17032 /* If there is a jump table, add its length. */
17033 if (tablejump_p (from, NULL, &tmp))
17035 count += get_jump_table_size (tmp);
17037 /* Jump tables aren't in a basic block, so base the cost on
17038 the dispatch insn. If we select this location, we will
17039 still put the pool after the table. */
17040 new_cost = arm_barrier_cost (from);
17042 if (count < max_count
17043 && (!selected || new_cost <= selected_cost))
17045 selected = tmp;
17046 selected_cost = new_cost;
17047 selected_address = fix->address + count;
17050 /* Continue after the dispatch table. */
17051 from = NEXT_INSN (tmp);
17052 continue;
17055 new_cost = arm_barrier_cost (from);
17057 if (count < max_count
17058 && (!selected || new_cost <= selected_cost))
17060 selected = from;
17061 selected_cost = new_cost;
17062 selected_address = fix->address + count;
17065 from = NEXT_INSN (from);
17068 /* Make sure that we found a place to insert the jump. */
17069 gcc_assert (selected);
17071 /* Make sure we do not split a call and its corresponding
17072 CALL_ARG_LOCATION note. */
17073 if (CALL_P (selected))
17075 rtx_insn *next = NEXT_INSN (selected);
17076 if (next && NOTE_P (next)
17077 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
17078 selected = next;
17081 /* Create a new JUMP_INSN that branches around a barrier. */
17082 from = emit_jump_insn_after (gen_jump (label), selected);
17083 JUMP_LABEL (from) = label;
17084 barrier = emit_barrier_after (from);
17085 emit_label_after (label, barrier);
17087 /* Create a minipool barrier entry for the new barrier. */
17088 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
17089 new_fix->insn = barrier;
17090 new_fix->address = selected_address;
17091 new_fix->next = fix->next;
17092 fix->next = new_fix;
17094 return new_fix;
17097 /* Record that there is a natural barrier in the insn stream at
17098 ADDRESS. */
17099 static void
17100 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
17102 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17104 fix->insn = insn;
17105 fix->address = address;
17107 fix->next = NULL;
17108 if (minipool_fix_head != NULL)
17109 minipool_fix_tail->next = fix;
17110 else
17111 minipool_fix_head = fix;
17113 minipool_fix_tail = fix;
17116 /* Record INSN, which will need fixing up to load a value from the
17117 minipool. ADDRESS is the offset of the insn since the start of the
17118 function; LOC is a pointer to the part of the insn which requires
17119 fixing; VALUE is the constant that must be loaded, which is of type
17120 MODE. */
17121 static void
17122 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
17123 machine_mode mode, rtx value)
17125 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17127 fix->insn = insn;
17128 fix->address = address;
17129 fix->loc = loc;
17130 fix->mode = mode;
17131 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
17132 fix->value = value;
17133 fix->forwards = get_attr_pool_range (insn);
17134 fix->backwards = get_attr_neg_pool_range (insn);
17135 fix->minipool = NULL;
17137 /* If an insn doesn't have a range defined for it, then it isn't
17138 expecting to be reworked by this code. Better to stop now than
17139 to generate duff assembly code. */
17140 gcc_assert (fix->forwards || fix->backwards);
17142 /* If an entry requires 8-byte alignment then assume all constant pools
17143 require 4 bytes of padding. Trying to do this later on a per-pool
17144 basis is awkward because existing pool entries have to be modified. */
17145 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
17146 minipool_pad = 4;
17148 if (dump_file)
17150 fprintf (dump_file,
17151 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17152 GET_MODE_NAME (mode),
17153 INSN_UID (insn), (unsigned long) address,
17154 -1 * (long)fix->backwards, (long)fix->forwards);
17155 arm_print_value (dump_file, fix->value);
17156 fprintf (dump_file, "\n");
17159 /* Add it to the chain of fixes. */
17160 fix->next = NULL;
17162 if (minipool_fix_head != NULL)
17163 minipool_fix_tail->next = fix;
17164 else
17165 minipool_fix_head = fix;
17167 minipool_fix_tail = fix;
17170 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17171 Returns the number of insns needed, or 99 if we always want to synthesize
17172 the value. */
17174 arm_max_const_double_inline_cost ()
17176 /* Let the value get synthesized to avoid the use of literal pools. */
17177 if (arm_disable_literal_pool)
17178 return 99;
17180 return ((optimize_size || arm_ld_sched) ? 3 : 4);
17183 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17184 Returns the number of insns needed, or 99 if we don't know how to
17185 do it. */
17187 arm_const_double_inline_cost (rtx val)
17189 rtx lowpart, highpart;
17190 machine_mode mode;
17192 mode = GET_MODE (val);
17194 if (mode == VOIDmode)
17195 mode = DImode;
17197 gcc_assert (GET_MODE_SIZE (mode) == 8);
17199 lowpart = gen_lowpart (SImode, val);
17200 highpart = gen_highpart_mode (SImode, mode, val);
17202 gcc_assert (CONST_INT_P (lowpart));
17203 gcc_assert (CONST_INT_P (highpart));
17205 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
17206 NULL_RTX, NULL_RTX, 0, 0)
17207 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
17208 NULL_RTX, NULL_RTX, 0, 0));
17211 /* Cost of loading a SImode constant. */
17212 static inline int
17213 arm_const_inline_cost (enum rtx_code code, rtx val)
17215 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17216 NULL_RTX, NULL_RTX, 1, 0);
17219 /* Return true if it is worthwhile to split a 64-bit constant into two
17220 32-bit operations. This is the case if optimizing for size, or
17221 if we have load delay slots, or if one 32-bit part can be done with
17222 a single data operation. */
17223 bool
17224 arm_const_double_by_parts (rtx val)
17226 machine_mode mode = GET_MODE (val);
17227 rtx part;
17229 if (optimize_size || arm_ld_sched)
17230 return true;
17232 if (mode == VOIDmode)
17233 mode = DImode;
17235 part = gen_highpart_mode (SImode, mode, val);
17237 gcc_assert (CONST_INT_P (part));
17239 if (const_ok_for_arm (INTVAL (part))
17240 || const_ok_for_arm (~INTVAL (part)))
17241 return true;
17243 part = gen_lowpart (SImode, val);
17245 gcc_assert (CONST_INT_P (part));
17247 if (const_ok_for_arm (INTVAL (part))
17248 || const_ok_for_arm (~INTVAL (part)))
17249 return true;
17251 return false;
17254 /* Return true if it is possible to inline both the high and low parts
17255 of a 64-bit constant into 32-bit data processing instructions. */
17256 bool
17257 arm_const_double_by_immediates (rtx val)
17259 machine_mode mode = GET_MODE (val);
17260 rtx part;
17262 if (mode == VOIDmode)
17263 mode = DImode;
17265 part = gen_highpart_mode (SImode, mode, val);
17267 gcc_assert (CONST_INT_P (part));
17269 if (!const_ok_for_arm (INTVAL (part)))
17270 return false;
17272 part = gen_lowpart (SImode, val);
17274 gcc_assert (CONST_INT_P (part));
17276 if (!const_ok_for_arm (INTVAL (part)))
17277 return false;
17279 return true;
17282 /* Scan INSN and note any of its operands that need fixing.
17283 If DO_PUSHES is false we do not actually push any of the fixups
17284 needed. */
17285 static void
17286 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17288 int opno;
17290 extract_constrain_insn (insn);
17292 if (recog_data.n_alternatives == 0)
17293 return;
17295 /* Fill in recog_op_alt with information about the constraints of
17296 this insn. */
17297 preprocess_constraints (insn);
17299 const operand_alternative *op_alt = which_op_alt ();
17300 for (opno = 0; opno < recog_data.n_operands; opno++)
17302 /* Things we need to fix can only occur in inputs. */
17303 if (recog_data.operand_type[opno] != OP_IN)
17304 continue;
17306 /* If this alternative is a memory reference, then any mention
17307 of constants in this alternative is really to fool reload
17308 into allowing us to accept one there. We need to fix them up
17309 now so that we output the right code. */
17310 if (op_alt[opno].memory_ok)
17312 rtx op = recog_data.operand[opno];
17314 if (CONSTANT_P (op))
17316 if (do_pushes)
17317 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17318 recog_data.operand_mode[opno], op);
17320 else if (MEM_P (op)
17321 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17322 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17324 if (do_pushes)
17326 rtx cop = avoid_constant_pool_reference (op);
17328 /* Casting the address of something to a mode narrower
17329 than a word can cause avoid_constant_pool_reference()
17330 to return the pool reference itself. That's no good to
17331 us here. Lets just hope that we can use the
17332 constant pool value directly. */
17333 if (op == cop)
17334 cop = get_pool_constant (XEXP (op, 0));
17336 push_minipool_fix (insn, address,
17337 recog_data.operand_loc[opno],
17338 recog_data.operand_mode[opno], cop);
17345 return;
17348 /* Rewrite move insn into subtract of 0 if the condition codes will
17349 be useful in next conditional jump insn. */
17351 static void
17352 thumb1_reorg (void)
17354 basic_block bb;
17356 FOR_EACH_BB_FN (bb, cfun)
17358 rtx dest, src;
17359 rtx cmp, op0, op1, set = NULL;
17360 rtx_insn *prev, *insn = BB_END (bb);
17361 bool insn_clobbered = false;
17363 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17364 insn = PREV_INSN (insn);
17366 /* Find the last cbranchsi4_insn in basic block BB. */
17367 if (insn == BB_HEAD (bb)
17368 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17369 continue;
17371 /* Get the register with which we are comparing. */
17372 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17373 op0 = XEXP (cmp, 0);
17374 op1 = XEXP (cmp, 1);
17376 /* Check that comparison is against ZERO. */
17377 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17378 continue;
17380 /* Find the first flag setting insn before INSN in basic block BB. */
17381 gcc_assert (insn != BB_HEAD (bb));
17382 for (prev = PREV_INSN (insn);
17383 (!insn_clobbered
17384 && prev != BB_HEAD (bb)
17385 && (NOTE_P (prev)
17386 || DEBUG_INSN_P (prev)
17387 || ((set = single_set (prev)) != NULL
17388 && get_attr_conds (prev) == CONDS_NOCOND)));
17389 prev = PREV_INSN (prev))
17391 if (reg_set_p (op0, prev))
17392 insn_clobbered = true;
17395 /* Skip if op0 is clobbered by insn other than prev. */
17396 if (insn_clobbered)
17397 continue;
17399 if (!set)
17400 continue;
17402 dest = SET_DEST (set);
17403 src = SET_SRC (set);
17404 if (!low_register_operand (dest, SImode)
17405 || !low_register_operand (src, SImode))
17406 continue;
17408 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17409 in INSN. Both src and dest of the move insn are checked. */
17410 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17412 dest = copy_rtx (dest);
17413 src = copy_rtx (src);
17414 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17415 PATTERN (prev) = gen_rtx_SET (dest, src);
17416 INSN_CODE (prev) = -1;
17417 /* Set test register in INSN to dest. */
17418 XEXP (cmp, 0) = copy_rtx (dest);
17419 INSN_CODE (insn) = -1;
17424 /* Convert instructions to their cc-clobbering variant if possible, since
17425 that allows us to use smaller encodings. */
17427 static void
17428 thumb2_reorg (void)
17430 basic_block bb;
17431 regset_head live;
17433 INIT_REG_SET (&live);
17435 /* We are freeing block_for_insn in the toplev to keep compatibility
17436 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17437 compute_bb_for_insn ();
17438 df_analyze ();
17440 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17442 FOR_EACH_BB_FN (bb, cfun)
17444 if ((current_tune->disparage_flag_setting_t16_encodings
17445 == tune_params::DISPARAGE_FLAGS_ALL)
17446 && optimize_bb_for_speed_p (bb))
17447 continue;
17449 rtx_insn *insn;
17450 Convert_Action action = SKIP;
17451 Convert_Action action_for_partial_flag_setting
17452 = ((current_tune->disparage_flag_setting_t16_encodings
17453 != tune_params::DISPARAGE_FLAGS_NEITHER)
17454 && optimize_bb_for_speed_p (bb))
17455 ? SKIP : CONV;
17457 COPY_REG_SET (&live, DF_LR_OUT (bb));
17458 df_simulate_initialize_backwards (bb, &live);
17459 FOR_BB_INSNS_REVERSE (bb, insn)
17461 if (NONJUMP_INSN_P (insn)
17462 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17463 && GET_CODE (PATTERN (insn)) == SET)
17465 action = SKIP;
17466 rtx pat = PATTERN (insn);
17467 rtx dst = XEXP (pat, 0);
17468 rtx src = XEXP (pat, 1);
17469 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17471 if (UNARY_P (src) || BINARY_P (src))
17472 op0 = XEXP (src, 0);
17474 if (BINARY_P (src))
17475 op1 = XEXP (src, 1);
17477 if (low_register_operand (dst, SImode))
17479 switch (GET_CODE (src))
17481 case PLUS:
17482 /* Adding two registers and storing the result
17483 in the first source is already a 16-bit
17484 operation. */
17485 if (rtx_equal_p (dst, op0)
17486 && register_operand (op1, SImode))
17487 break;
17489 if (low_register_operand (op0, SImode))
17491 /* ADDS <Rd>,<Rn>,<Rm> */
17492 if (low_register_operand (op1, SImode))
17493 action = CONV;
17494 /* ADDS <Rdn>,#<imm8> */
17495 /* SUBS <Rdn>,#<imm8> */
17496 else if (rtx_equal_p (dst, op0)
17497 && CONST_INT_P (op1)
17498 && IN_RANGE (INTVAL (op1), -255, 255))
17499 action = CONV;
17500 /* ADDS <Rd>,<Rn>,#<imm3> */
17501 /* SUBS <Rd>,<Rn>,#<imm3> */
17502 else if (CONST_INT_P (op1)
17503 && IN_RANGE (INTVAL (op1), -7, 7))
17504 action = CONV;
17506 /* ADCS <Rd>, <Rn> */
17507 else if (GET_CODE (XEXP (src, 0)) == PLUS
17508 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17509 && low_register_operand (XEXP (XEXP (src, 0), 1),
17510 SImode)
17511 && COMPARISON_P (op1)
17512 && cc_register (XEXP (op1, 0), VOIDmode)
17513 && maybe_get_arm_condition_code (op1) == ARM_CS
17514 && XEXP (op1, 1) == const0_rtx)
17515 action = CONV;
17516 break;
17518 case MINUS:
17519 /* RSBS <Rd>,<Rn>,#0
17520 Not handled here: see NEG below. */
17521 /* SUBS <Rd>,<Rn>,#<imm3>
17522 SUBS <Rdn>,#<imm8>
17523 Not handled here: see PLUS above. */
17524 /* SUBS <Rd>,<Rn>,<Rm> */
17525 if (low_register_operand (op0, SImode)
17526 && low_register_operand (op1, SImode))
17527 action = CONV;
17528 break;
17530 case MULT:
17531 /* MULS <Rdm>,<Rn>,<Rdm>
17532 As an exception to the rule, this is only used
17533 when optimizing for size since MULS is slow on all
17534 known implementations. We do not even want to use
17535 MULS in cold code, if optimizing for speed, so we
17536 test the global flag here. */
17537 if (!optimize_size)
17538 break;
17539 /* else fall through. */
17540 case AND:
17541 case IOR:
17542 case XOR:
17543 /* ANDS <Rdn>,<Rm> */
17544 if (rtx_equal_p (dst, op0)
17545 && low_register_operand (op1, SImode))
17546 action = action_for_partial_flag_setting;
17547 else if (rtx_equal_p (dst, op1)
17548 && low_register_operand (op0, SImode))
17549 action = action_for_partial_flag_setting == SKIP
17550 ? SKIP : SWAP_CONV;
17551 break;
17553 case ASHIFTRT:
17554 case ASHIFT:
17555 case LSHIFTRT:
17556 /* ASRS <Rdn>,<Rm> */
17557 /* LSRS <Rdn>,<Rm> */
17558 /* LSLS <Rdn>,<Rm> */
17559 if (rtx_equal_p (dst, op0)
17560 && low_register_operand (op1, SImode))
17561 action = action_for_partial_flag_setting;
17562 /* ASRS <Rd>,<Rm>,#<imm5> */
17563 /* LSRS <Rd>,<Rm>,#<imm5> */
17564 /* LSLS <Rd>,<Rm>,#<imm5> */
17565 else if (low_register_operand (op0, SImode)
17566 && CONST_INT_P (op1)
17567 && IN_RANGE (INTVAL (op1), 0, 31))
17568 action = action_for_partial_flag_setting;
17569 break;
17571 case ROTATERT:
17572 /* RORS <Rdn>,<Rm> */
17573 if (rtx_equal_p (dst, op0)
17574 && low_register_operand (op1, SImode))
17575 action = action_for_partial_flag_setting;
17576 break;
17578 case NOT:
17579 /* MVNS <Rd>,<Rm> */
17580 if (low_register_operand (op0, SImode))
17581 action = action_for_partial_flag_setting;
17582 break;
17584 case NEG:
17585 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17586 if (low_register_operand (op0, SImode))
17587 action = CONV;
17588 break;
17590 case CONST_INT:
17591 /* MOVS <Rd>,#<imm8> */
17592 if (CONST_INT_P (src)
17593 && IN_RANGE (INTVAL (src), 0, 255))
17594 action = action_for_partial_flag_setting;
17595 break;
17597 case REG:
17598 /* MOVS and MOV<c> with registers have different
17599 encodings, so are not relevant here. */
17600 break;
17602 default:
17603 break;
17607 if (action != SKIP)
17609 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17610 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17611 rtvec vec;
17613 if (action == SWAP_CONV)
17615 src = copy_rtx (src);
17616 XEXP (src, 0) = op1;
17617 XEXP (src, 1) = op0;
17618 pat = gen_rtx_SET (dst, src);
17619 vec = gen_rtvec (2, pat, clobber);
17621 else /* action == CONV */
17622 vec = gen_rtvec (2, pat, clobber);
17624 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17625 INSN_CODE (insn) = -1;
17629 if (NONDEBUG_INSN_P (insn))
17630 df_simulate_one_insn_backwards (bb, insn, &live);
17634 CLEAR_REG_SET (&live);
17637 /* Gcc puts the pool in the wrong place for ARM, since we can only
17638 load addresses a limited distance around the pc. We do some
17639 special munging to move the constant pool values to the correct
17640 point in the code. */
17641 static void
17642 arm_reorg (void)
17644 rtx_insn *insn;
17645 HOST_WIDE_INT address = 0;
17646 Mfix * fix;
17648 if (TARGET_THUMB1)
17649 thumb1_reorg ();
17650 else if (TARGET_THUMB2)
17651 thumb2_reorg ();
17653 /* Ensure all insns that must be split have been split at this point.
17654 Otherwise, the pool placement code below may compute incorrect
17655 insn lengths. Note that when optimizing, all insns have already
17656 been split at this point. */
17657 if (!optimize)
17658 split_all_insns_noflow ();
17660 minipool_fix_head = minipool_fix_tail = NULL;
17662 /* The first insn must always be a note, or the code below won't
17663 scan it properly. */
17664 insn = get_insns ();
17665 gcc_assert (NOTE_P (insn));
17666 minipool_pad = 0;
17668 /* Scan all the insns and record the operands that will need fixing. */
17669 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17671 if (BARRIER_P (insn))
17672 push_minipool_barrier (insn, address);
17673 else if (INSN_P (insn))
17675 rtx_jump_table_data *table;
17677 note_invalid_constants (insn, address, true);
17678 address += get_attr_length (insn);
17680 /* If the insn is a vector jump, add the size of the table
17681 and skip the table. */
17682 if (tablejump_p (insn, NULL, &table))
17684 address += get_jump_table_size (table);
17685 insn = table;
17688 else if (LABEL_P (insn))
17689 /* Add the worst-case padding due to alignment. We don't add
17690 the _current_ padding because the minipool insertions
17691 themselves might change it. */
17692 address += get_label_padding (insn);
17695 fix = minipool_fix_head;
17697 /* Now scan the fixups and perform the required changes. */
17698 while (fix)
17700 Mfix * ftmp;
17701 Mfix * fdel;
17702 Mfix * last_added_fix;
17703 Mfix * last_barrier = NULL;
17704 Mfix * this_fix;
17706 /* Skip any further barriers before the next fix. */
17707 while (fix && BARRIER_P (fix->insn))
17708 fix = fix->next;
17710 /* No more fixes. */
17711 if (fix == NULL)
17712 break;
17714 last_added_fix = NULL;
17716 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17718 if (BARRIER_P (ftmp->insn))
17720 if (ftmp->address >= minipool_vector_head->max_address)
17721 break;
17723 last_barrier = ftmp;
17725 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17726 break;
17728 last_added_fix = ftmp; /* Keep track of the last fix added. */
17731 /* If we found a barrier, drop back to that; any fixes that we
17732 could have reached but come after the barrier will now go in
17733 the next mini-pool. */
17734 if (last_barrier != NULL)
17736 /* Reduce the refcount for those fixes that won't go into this
17737 pool after all. */
17738 for (fdel = last_barrier->next;
17739 fdel && fdel != ftmp;
17740 fdel = fdel->next)
17742 fdel->minipool->refcount--;
17743 fdel->minipool = NULL;
17746 ftmp = last_barrier;
17748 else
17750 /* ftmp is first fix that we can't fit into this pool and
17751 there no natural barriers that we could use. Insert a
17752 new barrier in the code somewhere between the previous
17753 fix and this one, and arrange to jump around it. */
17754 HOST_WIDE_INT max_address;
17756 /* The last item on the list of fixes must be a barrier, so
17757 we can never run off the end of the list of fixes without
17758 last_barrier being set. */
17759 gcc_assert (ftmp);
17761 max_address = minipool_vector_head->max_address;
17762 /* Check that there isn't another fix that is in range that
17763 we couldn't fit into this pool because the pool was
17764 already too large: we need to put the pool before such an
17765 instruction. The pool itself may come just after the
17766 fix because create_fix_barrier also allows space for a
17767 jump instruction. */
17768 if (ftmp->address < max_address)
17769 max_address = ftmp->address + 1;
17771 last_barrier = create_fix_barrier (last_added_fix, max_address);
17774 assign_minipool_offsets (last_barrier);
17776 while (ftmp)
17778 if (!BARRIER_P (ftmp->insn)
17779 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17780 == NULL))
17781 break;
17783 ftmp = ftmp->next;
17786 /* Scan over the fixes we have identified for this pool, fixing them
17787 up and adding the constants to the pool itself. */
17788 for (this_fix = fix; this_fix && ftmp != this_fix;
17789 this_fix = this_fix->next)
17790 if (!BARRIER_P (this_fix->insn))
17792 rtx addr
17793 = plus_constant (Pmode,
17794 gen_rtx_LABEL_REF (VOIDmode,
17795 minipool_vector_label),
17796 this_fix->minipool->offset);
17797 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17800 dump_minipool (last_barrier->insn);
17801 fix = ftmp;
17804 /* From now on we must synthesize any constants that we can't handle
17805 directly. This can happen if the RTL gets split during final
17806 instruction generation. */
17807 cfun->machine->after_arm_reorg = 1;
17809 /* Free the minipool memory. */
17810 obstack_free (&minipool_obstack, minipool_startobj);
17813 /* Routines to output assembly language. */
17815 /* Return string representation of passed in real value. */
17816 static const char *
17817 fp_const_from_val (REAL_VALUE_TYPE *r)
17819 if (!fp_consts_inited)
17820 init_fp_table ();
17822 gcc_assert (real_equal (r, &value_fp0));
17823 return "0";
17826 /* OPERANDS[0] is the entire list of insns that constitute pop,
17827 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17828 is in the list, UPDATE is true iff the list contains explicit
17829 update of base register. */
17830 void
17831 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17832 bool update)
17834 int i;
17835 char pattern[100];
17836 int offset;
17837 const char *conditional;
17838 int num_saves = XVECLEN (operands[0], 0);
17839 unsigned int regno;
17840 unsigned int regno_base = REGNO (operands[1]);
17841 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17843 offset = 0;
17844 offset += update ? 1 : 0;
17845 offset += return_pc ? 1 : 0;
17847 /* Is the base register in the list? */
17848 for (i = offset; i < num_saves; i++)
17850 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17851 /* If SP is in the list, then the base register must be SP. */
17852 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17853 /* If base register is in the list, there must be no explicit update. */
17854 if (regno == regno_base)
17855 gcc_assert (!update);
17858 conditional = reverse ? "%?%D0" : "%?%d0";
17859 /* Can't use POP if returning from an interrupt. */
17860 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17861 sprintf (pattern, "pop%s\t{", conditional);
17862 else
17864 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17865 It's just a convention, their semantics are identical. */
17866 if (regno_base == SP_REGNUM)
17867 sprintf (pattern, "ldmfd%s\t", conditional);
17868 else if (update)
17869 sprintf (pattern, "ldmia%s\t", conditional);
17870 else
17871 sprintf (pattern, "ldm%s\t", conditional);
17873 strcat (pattern, reg_names[regno_base]);
17874 if (update)
17875 strcat (pattern, "!, {");
17876 else
17877 strcat (pattern, ", {");
17880 /* Output the first destination register. */
17881 strcat (pattern,
17882 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17884 /* Output the rest of the destination registers. */
17885 for (i = offset + 1; i < num_saves; i++)
17887 strcat (pattern, ", ");
17888 strcat (pattern,
17889 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17892 strcat (pattern, "}");
17894 if (interrupt_p && return_pc)
17895 strcat (pattern, "^");
17897 output_asm_insn (pattern, &cond);
17901 /* Output the assembly for a store multiple. */
17903 const char *
17904 vfp_output_vstmd (rtx * operands)
17906 char pattern[100];
17907 int p;
17908 int base;
17909 int i;
17910 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17911 ? XEXP (operands[0], 0)
17912 : XEXP (XEXP (operands[0], 0), 0);
17913 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17915 if (push_p)
17916 strcpy (pattern, "vpush%?.64\t{%P1");
17917 else
17918 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17920 p = strlen (pattern);
17922 gcc_assert (REG_P (operands[1]));
17924 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17925 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17927 p += sprintf (&pattern[p], ", d%d", base + i);
17929 strcpy (&pattern[p], "}");
17931 output_asm_insn (pattern, operands);
17932 return "";
17936 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17937 number of bytes pushed. */
17939 static int
17940 vfp_emit_fstmd (int base_reg, int count)
17942 rtx par;
17943 rtx dwarf;
17944 rtx tmp, reg;
17945 int i;
17947 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17948 register pairs are stored by a store multiple insn. We avoid this
17949 by pushing an extra pair. */
17950 if (count == 2 && !arm_arch6)
17952 if (base_reg == LAST_VFP_REGNUM - 3)
17953 base_reg -= 2;
17954 count++;
17957 /* FSTMD may not store more than 16 doubleword registers at once. Split
17958 larger stores into multiple parts (up to a maximum of two, in
17959 practice). */
17960 if (count > 16)
17962 int saved;
17963 /* NOTE: base_reg is an internal register number, so each D register
17964 counts as 2. */
17965 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17966 saved += vfp_emit_fstmd (base_reg, 16);
17967 return saved;
17970 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17971 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17973 reg = gen_rtx_REG (DFmode, base_reg);
17974 base_reg += 2;
17976 XVECEXP (par, 0, 0)
17977 = gen_rtx_SET (gen_frame_mem
17978 (BLKmode,
17979 gen_rtx_PRE_MODIFY (Pmode,
17980 stack_pointer_rtx,
17981 plus_constant
17982 (Pmode, stack_pointer_rtx,
17983 - (count * 8)))
17985 gen_rtx_UNSPEC (BLKmode,
17986 gen_rtvec (1, reg),
17987 UNSPEC_PUSH_MULT));
17989 tmp = gen_rtx_SET (stack_pointer_rtx,
17990 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17991 RTX_FRAME_RELATED_P (tmp) = 1;
17992 XVECEXP (dwarf, 0, 0) = tmp;
17994 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17995 RTX_FRAME_RELATED_P (tmp) = 1;
17996 XVECEXP (dwarf, 0, 1) = tmp;
17998 for (i = 1; i < count; i++)
18000 reg = gen_rtx_REG (DFmode, base_reg);
18001 base_reg += 2;
18002 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18004 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18005 plus_constant (Pmode,
18006 stack_pointer_rtx,
18007 i * 8)),
18008 reg);
18009 RTX_FRAME_RELATED_P (tmp) = 1;
18010 XVECEXP (dwarf, 0, i + 1) = tmp;
18013 par = emit_insn (par);
18014 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18015 RTX_FRAME_RELATED_P (par) = 1;
18017 return count * 8;
18020 /* Emit a call instruction with pattern PAT. ADDR is the address of
18021 the call target. */
18023 void
18024 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18026 rtx insn;
18028 insn = emit_call_insn (pat);
18030 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18031 If the call might use such an entry, add a use of the PIC register
18032 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18033 if (TARGET_VXWORKS_RTP
18034 && flag_pic
18035 && !sibcall
18036 && GET_CODE (addr) == SYMBOL_REF
18037 && (SYMBOL_REF_DECL (addr)
18038 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18039 : !SYMBOL_REF_LOCAL_P (addr)))
18041 require_pic_register ();
18042 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18045 if (TARGET_AAPCS_BASED)
18047 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18048 linker. We need to add an IP clobber to allow setting
18049 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18050 is not needed since it's a fixed register. */
18051 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18052 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18056 /* Output a 'call' insn. */
18057 const char *
18058 output_call (rtx *operands)
18060 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
18062 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18063 if (REGNO (operands[0]) == LR_REGNUM)
18065 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18066 output_asm_insn ("mov%?\t%0, %|lr", operands);
18069 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18071 if (TARGET_INTERWORK || arm_arch4t)
18072 output_asm_insn ("bx%?\t%0", operands);
18073 else
18074 output_asm_insn ("mov%?\t%|pc, %0", operands);
18076 return "";
18079 /* Output a move from arm registers to arm registers of a long double
18080 OPERANDS[0] is the destination.
18081 OPERANDS[1] is the source. */
18082 const char *
18083 output_mov_long_double_arm_from_arm (rtx *operands)
18085 /* We have to be careful here because the two might overlap. */
18086 int dest_start = REGNO (operands[0]);
18087 int src_start = REGNO (operands[1]);
18088 rtx ops[2];
18089 int i;
18091 if (dest_start < src_start)
18093 for (i = 0; i < 3; i++)
18095 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18096 ops[1] = gen_rtx_REG (SImode, src_start + i);
18097 output_asm_insn ("mov%?\t%0, %1", ops);
18100 else
18102 for (i = 2; i >= 0; i--)
18104 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18105 ops[1] = gen_rtx_REG (SImode, src_start + i);
18106 output_asm_insn ("mov%?\t%0, %1", ops);
18110 return "";
18113 void
18114 arm_emit_movpair (rtx dest, rtx src)
18116 rtx insn;
18118 /* If the src is an immediate, simplify it. */
18119 if (CONST_INT_P (src))
18121 HOST_WIDE_INT val = INTVAL (src);
18122 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18123 if ((val >> 16) & 0x0000ffff)
18125 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18126 GEN_INT (16)),
18127 GEN_INT ((val >> 16) & 0x0000ffff));
18128 insn = get_last_insn ();
18129 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18131 return;
18133 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18134 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18135 insn = get_last_insn ();
18136 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18139 /* Output a move between double words. It must be REG<-MEM
18140 or MEM<-REG. */
18141 const char *
18142 output_move_double (rtx *operands, bool emit, int *count)
18144 enum rtx_code code0 = GET_CODE (operands[0]);
18145 enum rtx_code code1 = GET_CODE (operands[1]);
18146 rtx otherops[3];
18147 if (count)
18148 *count = 1;
18150 /* The only case when this might happen is when
18151 you are looking at the length of a DImode instruction
18152 that has an invalid constant in it. */
18153 if (code0 == REG && code1 != MEM)
18155 gcc_assert (!emit);
18156 *count = 2;
18157 return "";
18160 if (code0 == REG)
18162 unsigned int reg0 = REGNO (operands[0]);
18164 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18166 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18168 switch (GET_CODE (XEXP (operands[1], 0)))
18170 case REG:
18172 if (emit)
18174 if (TARGET_LDRD
18175 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18176 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18177 else
18178 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18180 break;
18182 case PRE_INC:
18183 gcc_assert (TARGET_LDRD);
18184 if (emit)
18185 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18186 break;
18188 case PRE_DEC:
18189 if (emit)
18191 if (TARGET_LDRD)
18192 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18193 else
18194 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18196 break;
18198 case POST_INC:
18199 if (emit)
18201 if (TARGET_LDRD)
18202 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18203 else
18204 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18206 break;
18208 case POST_DEC:
18209 gcc_assert (TARGET_LDRD);
18210 if (emit)
18211 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18212 break;
18214 case PRE_MODIFY:
18215 case POST_MODIFY:
18216 /* Autoicrement addressing modes should never have overlapping
18217 base and destination registers, and overlapping index registers
18218 are already prohibited, so this doesn't need to worry about
18219 fix_cm3_ldrd. */
18220 otherops[0] = operands[0];
18221 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18222 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18224 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18226 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18228 /* Registers overlap so split out the increment. */
18229 if (emit)
18231 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18232 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18234 if (count)
18235 *count = 2;
18237 else
18239 /* Use a single insn if we can.
18240 FIXME: IWMMXT allows offsets larger than ldrd can
18241 handle, fix these up with a pair of ldr. */
18242 if (TARGET_THUMB2
18243 || !CONST_INT_P (otherops[2])
18244 || (INTVAL (otherops[2]) > -256
18245 && INTVAL (otherops[2]) < 256))
18247 if (emit)
18248 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18250 else
18252 if (emit)
18254 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18255 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18257 if (count)
18258 *count = 2;
18263 else
18265 /* Use a single insn if we can.
18266 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18267 fix these up with a pair of ldr. */
18268 if (TARGET_THUMB2
18269 || !CONST_INT_P (otherops[2])
18270 || (INTVAL (otherops[2]) > -256
18271 && INTVAL (otherops[2]) < 256))
18273 if (emit)
18274 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18276 else
18278 if (emit)
18280 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18281 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18283 if (count)
18284 *count = 2;
18287 break;
18289 case LABEL_REF:
18290 case CONST:
18291 /* We might be able to use ldrd %0, %1 here. However the range is
18292 different to ldr/adr, and it is broken on some ARMv7-M
18293 implementations. */
18294 /* Use the second register of the pair to avoid problematic
18295 overlap. */
18296 otherops[1] = operands[1];
18297 if (emit)
18298 output_asm_insn ("adr%?\t%0, %1", otherops);
18299 operands[1] = otherops[0];
18300 if (emit)
18302 if (TARGET_LDRD)
18303 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18304 else
18305 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18308 if (count)
18309 *count = 2;
18310 break;
18312 /* ??? This needs checking for thumb2. */
18313 default:
18314 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18315 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18317 otherops[0] = operands[0];
18318 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18319 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18321 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18323 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18325 switch ((int) INTVAL (otherops[2]))
18327 case -8:
18328 if (emit)
18329 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18330 return "";
18331 case -4:
18332 if (TARGET_THUMB2)
18333 break;
18334 if (emit)
18335 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18336 return "";
18337 case 4:
18338 if (TARGET_THUMB2)
18339 break;
18340 if (emit)
18341 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18342 return "";
18345 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18346 operands[1] = otherops[0];
18347 if (TARGET_LDRD
18348 && (REG_P (otherops[2])
18349 || TARGET_THUMB2
18350 || (CONST_INT_P (otherops[2])
18351 && INTVAL (otherops[2]) > -256
18352 && INTVAL (otherops[2]) < 256)))
18354 if (reg_overlap_mentioned_p (operands[0],
18355 otherops[2]))
18357 /* Swap base and index registers over to
18358 avoid a conflict. */
18359 std::swap (otherops[1], otherops[2]);
18361 /* If both registers conflict, it will usually
18362 have been fixed by a splitter. */
18363 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18364 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18366 if (emit)
18368 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18369 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18371 if (count)
18372 *count = 2;
18374 else
18376 otherops[0] = operands[0];
18377 if (emit)
18378 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18380 return "";
18383 if (CONST_INT_P (otherops[2]))
18385 if (emit)
18387 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18388 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18389 else
18390 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18393 else
18395 if (emit)
18396 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18399 else
18401 if (emit)
18402 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18405 if (count)
18406 *count = 2;
18408 if (TARGET_LDRD)
18409 return "ldrd%?\t%0, [%1]";
18411 return "ldmia%?\t%1, %M0";
18413 else
18415 otherops[1] = adjust_address (operands[1], SImode, 4);
18416 /* Take care of overlapping base/data reg. */
18417 if (reg_mentioned_p (operands[0], operands[1]))
18419 if (emit)
18421 output_asm_insn ("ldr%?\t%0, %1", otherops);
18422 output_asm_insn ("ldr%?\t%0, %1", operands);
18424 if (count)
18425 *count = 2;
18428 else
18430 if (emit)
18432 output_asm_insn ("ldr%?\t%0, %1", operands);
18433 output_asm_insn ("ldr%?\t%0, %1", otherops);
18435 if (count)
18436 *count = 2;
18441 else
18443 /* Constraints should ensure this. */
18444 gcc_assert (code0 == MEM && code1 == REG);
18445 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18446 || (TARGET_ARM && TARGET_LDRD));
18448 switch (GET_CODE (XEXP (operands[0], 0)))
18450 case REG:
18451 if (emit)
18453 if (TARGET_LDRD)
18454 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18455 else
18456 output_asm_insn ("stm%?\t%m0, %M1", operands);
18458 break;
18460 case PRE_INC:
18461 gcc_assert (TARGET_LDRD);
18462 if (emit)
18463 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18464 break;
18466 case PRE_DEC:
18467 if (emit)
18469 if (TARGET_LDRD)
18470 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18471 else
18472 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18474 break;
18476 case POST_INC:
18477 if (emit)
18479 if (TARGET_LDRD)
18480 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18481 else
18482 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18484 break;
18486 case POST_DEC:
18487 gcc_assert (TARGET_LDRD);
18488 if (emit)
18489 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18490 break;
18492 case PRE_MODIFY:
18493 case POST_MODIFY:
18494 otherops[0] = operands[1];
18495 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18496 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18498 /* IWMMXT allows offsets larger than ldrd can handle,
18499 fix these up with a pair of ldr. */
18500 if (!TARGET_THUMB2
18501 && CONST_INT_P (otherops[2])
18502 && (INTVAL(otherops[2]) <= -256
18503 || INTVAL(otherops[2]) >= 256))
18505 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18507 if (emit)
18509 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18510 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18512 if (count)
18513 *count = 2;
18515 else
18517 if (emit)
18519 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18520 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18522 if (count)
18523 *count = 2;
18526 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18528 if (emit)
18529 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18531 else
18533 if (emit)
18534 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18536 break;
18538 case PLUS:
18539 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18540 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18542 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18544 case -8:
18545 if (emit)
18546 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18547 return "";
18549 case -4:
18550 if (TARGET_THUMB2)
18551 break;
18552 if (emit)
18553 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18554 return "";
18556 case 4:
18557 if (TARGET_THUMB2)
18558 break;
18559 if (emit)
18560 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18561 return "";
18564 if (TARGET_LDRD
18565 && (REG_P (otherops[2])
18566 || TARGET_THUMB2
18567 || (CONST_INT_P (otherops[2])
18568 && INTVAL (otherops[2]) > -256
18569 && INTVAL (otherops[2]) < 256)))
18571 otherops[0] = operands[1];
18572 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18573 if (emit)
18574 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18575 return "";
18577 /* Fall through */
18579 default:
18580 otherops[0] = adjust_address (operands[0], SImode, 4);
18581 otherops[1] = operands[1];
18582 if (emit)
18584 output_asm_insn ("str%?\t%1, %0", operands);
18585 output_asm_insn ("str%?\t%H1, %0", otherops);
18587 if (count)
18588 *count = 2;
18592 return "";
18595 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18596 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18598 const char *
18599 output_move_quad (rtx *operands)
18601 if (REG_P (operands[0]))
18603 /* Load, or reg->reg move. */
18605 if (MEM_P (operands[1]))
18607 switch (GET_CODE (XEXP (operands[1], 0)))
18609 case REG:
18610 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18611 break;
18613 case LABEL_REF:
18614 case CONST:
18615 output_asm_insn ("adr%?\t%0, %1", operands);
18616 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18617 break;
18619 default:
18620 gcc_unreachable ();
18623 else
18625 rtx ops[2];
18626 int dest, src, i;
18628 gcc_assert (REG_P (operands[1]));
18630 dest = REGNO (operands[0]);
18631 src = REGNO (operands[1]);
18633 /* This seems pretty dumb, but hopefully GCC won't try to do it
18634 very often. */
18635 if (dest < src)
18636 for (i = 0; i < 4; i++)
18638 ops[0] = gen_rtx_REG (SImode, dest + i);
18639 ops[1] = gen_rtx_REG (SImode, src + i);
18640 output_asm_insn ("mov%?\t%0, %1", ops);
18642 else
18643 for (i = 3; i >= 0; i--)
18645 ops[0] = gen_rtx_REG (SImode, dest + i);
18646 ops[1] = gen_rtx_REG (SImode, src + i);
18647 output_asm_insn ("mov%?\t%0, %1", ops);
18651 else
18653 gcc_assert (MEM_P (operands[0]));
18654 gcc_assert (REG_P (operands[1]));
18655 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18657 switch (GET_CODE (XEXP (operands[0], 0)))
18659 case REG:
18660 output_asm_insn ("stm%?\t%m0, %M1", operands);
18661 break;
18663 default:
18664 gcc_unreachable ();
18668 return "";
18671 /* Output a VFP load or store instruction. */
18673 const char *
18674 output_move_vfp (rtx *operands)
18676 rtx reg, mem, addr, ops[2];
18677 int load = REG_P (operands[0]);
18678 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18679 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18680 const char *templ;
18681 char buff[50];
18682 machine_mode mode;
18684 reg = operands[!load];
18685 mem = operands[load];
18687 mode = GET_MODE (reg);
18689 gcc_assert (REG_P (reg));
18690 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18691 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT && TARGET_VFP)
18692 || mode == SFmode
18693 || mode == DFmode
18694 || mode == SImode
18695 || mode == DImode
18696 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18697 gcc_assert (MEM_P (mem));
18699 addr = XEXP (mem, 0);
18701 switch (GET_CODE (addr))
18703 case PRE_DEC:
18704 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18705 ops[0] = XEXP (addr, 0);
18706 ops[1] = reg;
18707 break;
18709 case POST_INC:
18710 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18711 ops[0] = XEXP (addr, 0);
18712 ops[1] = reg;
18713 break;
18715 default:
18716 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18717 ops[0] = reg;
18718 ops[1] = mem;
18719 break;
18722 sprintf (buff, templ,
18723 load ? "ld" : "st",
18724 dp ? "64" : "32",
18725 dp ? "P" : "",
18726 integer_p ? "\t%@ int" : "");
18727 output_asm_insn (buff, ops);
18729 return "";
18732 /* Output a Neon double-word or quad-word load or store, or a load
18733 or store for larger structure modes.
18735 WARNING: The ordering of elements is weird in big-endian mode,
18736 because the EABI requires that vectors stored in memory appear
18737 as though they were stored by a VSTM, as required by the EABI.
18738 GCC RTL defines element ordering based on in-memory order.
18739 This can be different from the architectural ordering of elements
18740 within a NEON register. The intrinsics defined in arm_neon.h use the
18741 NEON register element ordering, not the GCC RTL element ordering.
18743 For example, the in-memory ordering of a big-endian a quadword
18744 vector with 16-bit elements when stored from register pair {d0,d1}
18745 will be (lowest address first, d0[N] is NEON register element N):
18747 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18749 When necessary, quadword registers (dN, dN+1) are moved to ARM
18750 registers from rN in the order:
18752 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18754 So that STM/LDM can be used on vectors in ARM registers, and the
18755 same memory layout will result as if VSTM/VLDM were used.
18757 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18758 possible, which allows use of appropriate alignment tags.
18759 Note that the choice of "64" is independent of the actual vector
18760 element size; this size simply ensures that the behavior is
18761 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18763 Due to limitations of those instructions, use of VST1.64/VLD1.64
18764 is not possible if:
18765 - the address contains PRE_DEC, or
18766 - the mode refers to more than 4 double-word registers
18768 In those cases, it would be possible to replace VSTM/VLDM by a
18769 sequence of instructions; this is not currently implemented since
18770 this is not certain to actually improve performance. */
18772 const char *
18773 output_move_neon (rtx *operands)
18775 rtx reg, mem, addr, ops[2];
18776 int regno, nregs, load = REG_P (operands[0]);
18777 const char *templ;
18778 char buff[50];
18779 machine_mode mode;
18781 reg = operands[!load];
18782 mem = operands[load];
18784 mode = GET_MODE (reg);
18786 gcc_assert (REG_P (reg));
18787 regno = REGNO (reg);
18788 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18789 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18790 || NEON_REGNO_OK_FOR_QUAD (regno));
18791 gcc_assert (VALID_NEON_DREG_MODE (mode)
18792 || VALID_NEON_QREG_MODE (mode)
18793 || VALID_NEON_STRUCT_MODE (mode));
18794 gcc_assert (MEM_P (mem));
18796 addr = XEXP (mem, 0);
18798 /* Strip off const from addresses like (const (plus (...))). */
18799 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18800 addr = XEXP (addr, 0);
18802 switch (GET_CODE (addr))
18804 case POST_INC:
18805 /* We have to use vldm / vstm for too-large modes. */
18806 if (nregs > 4)
18808 templ = "v%smia%%?\t%%0!, %%h1";
18809 ops[0] = XEXP (addr, 0);
18811 else
18813 templ = "v%s1.64\t%%h1, %%A0";
18814 ops[0] = mem;
18816 ops[1] = reg;
18817 break;
18819 case PRE_DEC:
18820 /* We have to use vldm / vstm in this case, since there is no
18821 pre-decrement form of the vld1 / vst1 instructions. */
18822 templ = "v%smdb%%?\t%%0!, %%h1";
18823 ops[0] = XEXP (addr, 0);
18824 ops[1] = reg;
18825 break;
18827 case POST_MODIFY:
18828 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18829 gcc_unreachable ();
18831 case REG:
18832 /* We have to use vldm / vstm for too-large modes. */
18833 if (nregs > 1)
18835 if (nregs > 4)
18836 templ = "v%smia%%?\t%%m0, %%h1";
18837 else
18838 templ = "v%s1.64\t%%h1, %%A0";
18840 ops[0] = mem;
18841 ops[1] = reg;
18842 break;
18844 /* Fall through. */
18845 case LABEL_REF:
18846 case PLUS:
18848 int i;
18849 int overlap = -1;
18850 for (i = 0; i < nregs; i++)
18852 /* We're only using DImode here because it's a convenient size. */
18853 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18854 ops[1] = adjust_address (mem, DImode, 8 * i);
18855 if (reg_overlap_mentioned_p (ops[0], mem))
18857 gcc_assert (overlap == -1);
18858 overlap = i;
18860 else
18862 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18863 output_asm_insn (buff, ops);
18866 if (overlap != -1)
18868 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18869 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18870 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18871 output_asm_insn (buff, ops);
18874 return "";
18877 default:
18878 gcc_unreachable ();
18881 sprintf (buff, templ, load ? "ld" : "st");
18882 output_asm_insn (buff, ops);
18884 return "";
18887 /* Compute and return the length of neon_mov<mode>, where <mode> is
18888 one of VSTRUCT modes: EI, OI, CI or XI. */
18890 arm_attr_length_move_neon (rtx_insn *insn)
18892 rtx reg, mem, addr;
18893 int load;
18894 machine_mode mode;
18896 extract_insn_cached (insn);
18898 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18900 mode = GET_MODE (recog_data.operand[0]);
18901 switch (mode)
18903 case EImode:
18904 case OImode:
18905 return 8;
18906 case CImode:
18907 return 12;
18908 case XImode:
18909 return 16;
18910 default:
18911 gcc_unreachable ();
18915 load = REG_P (recog_data.operand[0]);
18916 reg = recog_data.operand[!load];
18917 mem = recog_data.operand[load];
18919 gcc_assert (MEM_P (mem));
18921 mode = GET_MODE (reg);
18922 addr = XEXP (mem, 0);
18924 /* Strip off const from addresses like (const (plus (...))). */
18925 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18926 addr = XEXP (addr, 0);
18928 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18930 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18931 return insns * 4;
18933 else
18934 return 4;
18937 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18938 return zero. */
18941 arm_address_offset_is_imm (rtx_insn *insn)
18943 rtx mem, addr;
18945 extract_insn_cached (insn);
18947 if (REG_P (recog_data.operand[0]))
18948 return 0;
18950 mem = recog_data.operand[0];
18952 gcc_assert (MEM_P (mem));
18954 addr = XEXP (mem, 0);
18956 if (REG_P (addr)
18957 || (GET_CODE (addr) == PLUS
18958 && REG_P (XEXP (addr, 0))
18959 && CONST_INT_P (XEXP (addr, 1))))
18960 return 1;
18961 else
18962 return 0;
18965 /* Output an ADD r, s, #n where n may be too big for one instruction.
18966 If adding zero to one register, output nothing. */
18967 const char *
18968 output_add_immediate (rtx *operands)
18970 HOST_WIDE_INT n = INTVAL (operands[2]);
18972 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18974 if (n < 0)
18975 output_multi_immediate (operands,
18976 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18977 -n);
18978 else
18979 output_multi_immediate (operands,
18980 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18984 return "";
18987 /* Output a multiple immediate operation.
18988 OPERANDS is the vector of operands referred to in the output patterns.
18989 INSTR1 is the output pattern to use for the first constant.
18990 INSTR2 is the output pattern to use for subsequent constants.
18991 IMMED_OP is the index of the constant slot in OPERANDS.
18992 N is the constant value. */
18993 static const char *
18994 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18995 int immed_op, HOST_WIDE_INT n)
18997 #if HOST_BITS_PER_WIDE_INT > 32
18998 n &= 0xffffffff;
18999 #endif
19001 if (n == 0)
19003 /* Quick and easy output. */
19004 operands[immed_op] = const0_rtx;
19005 output_asm_insn (instr1, operands);
19007 else
19009 int i;
19010 const char * instr = instr1;
19012 /* Note that n is never zero here (which would give no output). */
19013 for (i = 0; i < 32; i += 2)
19015 if (n & (3 << i))
19017 operands[immed_op] = GEN_INT (n & (255 << i));
19018 output_asm_insn (instr, operands);
19019 instr = instr2;
19020 i += 6;
19025 return "";
19028 /* Return the name of a shifter operation. */
19029 static const char *
19030 arm_shift_nmem(enum rtx_code code)
19032 switch (code)
19034 case ASHIFT:
19035 return ARM_LSL_NAME;
19037 case ASHIFTRT:
19038 return "asr";
19040 case LSHIFTRT:
19041 return "lsr";
19043 case ROTATERT:
19044 return "ror";
19046 default:
19047 abort();
19051 /* Return the appropriate ARM instruction for the operation code.
19052 The returned result should not be overwritten. OP is the rtx of the
19053 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19054 was shifted. */
19055 const char *
19056 arithmetic_instr (rtx op, int shift_first_arg)
19058 switch (GET_CODE (op))
19060 case PLUS:
19061 return "add";
19063 case MINUS:
19064 return shift_first_arg ? "rsb" : "sub";
19066 case IOR:
19067 return "orr";
19069 case XOR:
19070 return "eor";
19072 case AND:
19073 return "and";
19075 case ASHIFT:
19076 case ASHIFTRT:
19077 case LSHIFTRT:
19078 case ROTATERT:
19079 return arm_shift_nmem(GET_CODE(op));
19081 default:
19082 gcc_unreachable ();
19086 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19087 for the operation code. The returned result should not be overwritten.
19088 OP is the rtx code of the shift.
19089 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19090 shift. */
19091 static const char *
19092 shift_op (rtx op, HOST_WIDE_INT *amountp)
19094 const char * mnem;
19095 enum rtx_code code = GET_CODE (op);
19097 switch (code)
19099 case ROTATE:
19100 if (!CONST_INT_P (XEXP (op, 1)))
19102 output_operand_lossage ("invalid shift operand");
19103 return NULL;
19106 code = ROTATERT;
19107 *amountp = 32 - INTVAL (XEXP (op, 1));
19108 mnem = "ror";
19109 break;
19111 case ASHIFT:
19112 case ASHIFTRT:
19113 case LSHIFTRT:
19114 case ROTATERT:
19115 mnem = arm_shift_nmem(code);
19116 if (CONST_INT_P (XEXP (op, 1)))
19118 *amountp = INTVAL (XEXP (op, 1));
19120 else if (REG_P (XEXP (op, 1)))
19122 *amountp = -1;
19123 return mnem;
19125 else
19127 output_operand_lossage ("invalid shift operand");
19128 return NULL;
19130 break;
19132 case MULT:
19133 /* We never have to worry about the amount being other than a
19134 power of 2, since this case can never be reloaded from a reg. */
19135 if (!CONST_INT_P (XEXP (op, 1)))
19137 output_operand_lossage ("invalid shift operand");
19138 return NULL;
19141 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19143 /* Amount must be a power of two. */
19144 if (*amountp & (*amountp - 1))
19146 output_operand_lossage ("invalid shift operand");
19147 return NULL;
19150 *amountp = exact_log2 (*amountp);
19151 gcc_assert (IN_RANGE (*amountp, 0, 31));
19152 return ARM_LSL_NAME;
19154 default:
19155 output_operand_lossage ("invalid shift operand");
19156 return NULL;
19159 /* This is not 100% correct, but follows from the desire to merge
19160 multiplication by a power of 2 with the recognizer for a
19161 shift. >=32 is not a valid shift for "lsl", so we must try and
19162 output a shift that produces the correct arithmetical result.
19163 Using lsr #32 is identical except for the fact that the carry bit
19164 is not set correctly if we set the flags; but we never use the
19165 carry bit from such an operation, so we can ignore that. */
19166 if (code == ROTATERT)
19167 /* Rotate is just modulo 32. */
19168 *amountp &= 31;
19169 else if (*amountp != (*amountp & 31))
19171 if (code == ASHIFT)
19172 mnem = "lsr";
19173 *amountp = 32;
19176 /* Shifts of 0 are no-ops. */
19177 if (*amountp == 0)
19178 return NULL;
19180 return mnem;
19183 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19184 because /bin/as is horribly restrictive. The judgement about
19185 whether or not each character is 'printable' (and can be output as
19186 is) or not (and must be printed with an octal escape) must be made
19187 with reference to the *host* character set -- the situation is
19188 similar to that discussed in the comments above pp_c_char in
19189 c-pretty-print.c. */
19191 #define MAX_ASCII_LEN 51
19193 void
19194 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19196 int i;
19197 int len_so_far = 0;
19199 fputs ("\t.ascii\t\"", stream);
19201 for (i = 0; i < len; i++)
19203 int c = p[i];
19205 if (len_so_far >= MAX_ASCII_LEN)
19207 fputs ("\"\n\t.ascii\t\"", stream);
19208 len_so_far = 0;
19211 if (ISPRINT (c))
19213 if (c == '\\' || c == '\"')
19215 putc ('\\', stream);
19216 len_so_far++;
19218 putc (c, stream);
19219 len_so_far++;
19221 else
19223 fprintf (stream, "\\%03o", c);
19224 len_so_far += 4;
19228 fputs ("\"\n", stream);
19231 /* Whether a register is callee saved or not. This is necessary because high
19232 registers are marked as caller saved when optimizing for size on Thumb-1
19233 targets despite being callee saved in order to avoid using them. */
19234 #define callee_saved_reg_p(reg) \
19235 (!call_used_regs[reg] \
19236 || (TARGET_THUMB1 && optimize_size \
19237 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19239 /* Compute the register save mask for registers 0 through 12
19240 inclusive. This code is used by arm_compute_save_reg_mask. */
19242 static unsigned long
19243 arm_compute_save_reg0_reg12_mask (void)
19245 unsigned long func_type = arm_current_func_type ();
19246 unsigned long save_reg_mask = 0;
19247 unsigned int reg;
19249 if (IS_INTERRUPT (func_type))
19251 unsigned int max_reg;
19252 /* Interrupt functions must not corrupt any registers,
19253 even call clobbered ones. If this is a leaf function
19254 we can just examine the registers used by the RTL, but
19255 otherwise we have to assume that whatever function is
19256 called might clobber anything, and so we have to save
19257 all the call-clobbered registers as well. */
19258 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19259 /* FIQ handlers have registers r8 - r12 banked, so
19260 we only need to check r0 - r7, Normal ISRs only
19261 bank r14 and r15, so we must check up to r12.
19262 r13 is the stack pointer which is always preserved,
19263 so we do not need to consider it here. */
19264 max_reg = 7;
19265 else
19266 max_reg = 12;
19268 for (reg = 0; reg <= max_reg; reg++)
19269 if (df_regs_ever_live_p (reg)
19270 || (! crtl->is_leaf && call_used_regs[reg]))
19271 save_reg_mask |= (1 << reg);
19273 /* Also save the pic base register if necessary. */
19274 if (flag_pic
19275 && !TARGET_SINGLE_PIC_BASE
19276 && arm_pic_register != INVALID_REGNUM
19277 && crtl->uses_pic_offset_table)
19278 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19280 else if (IS_VOLATILE(func_type))
19282 /* For noreturn functions we historically omitted register saves
19283 altogether. However this really messes up debugging. As a
19284 compromise save just the frame pointers. Combined with the link
19285 register saved elsewhere this should be sufficient to get
19286 a backtrace. */
19287 if (frame_pointer_needed)
19288 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19289 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19290 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19291 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19292 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19294 else
19296 /* In the normal case we only need to save those registers
19297 which are call saved and which are used by this function. */
19298 for (reg = 0; reg <= 11; reg++)
19299 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19300 save_reg_mask |= (1 << reg);
19302 /* Handle the frame pointer as a special case. */
19303 if (frame_pointer_needed)
19304 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19306 /* If we aren't loading the PIC register,
19307 don't stack it even though it may be live. */
19308 if (flag_pic
19309 && !TARGET_SINGLE_PIC_BASE
19310 && arm_pic_register != INVALID_REGNUM
19311 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19312 || crtl->uses_pic_offset_table))
19313 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19315 /* The prologue will copy SP into R0, so save it. */
19316 if (IS_STACKALIGN (func_type))
19317 save_reg_mask |= 1;
19320 /* Save registers so the exception handler can modify them. */
19321 if (crtl->calls_eh_return)
19323 unsigned int i;
19325 for (i = 0; ; i++)
19327 reg = EH_RETURN_DATA_REGNO (i);
19328 if (reg == INVALID_REGNUM)
19329 break;
19330 save_reg_mask |= 1 << reg;
19334 return save_reg_mask;
19337 /* Return true if r3 is live at the start of the function. */
19339 static bool
19340 arm_r3_live_at_start_p (void)
19342 /* Just look at cfg info, which is still close enough to correct at this
19343 point. This gives false positives for broken functions that might use
19344 uninitialized data that happens to be allocated in r3, but who cares? */
19345 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19348 /* Compute the number of bytes used to store the static chain register on the
19349 stack, above the stack frame. We need to know this accurately to get the
19350 alignment of the rest of the stack frame correct. */
19352 static int
19353 arm_compute_static_chain_stack_bytes (void)
19355 /* See the defining assertion in arm_expand_prologue. */
19356 if (IS_NESTED (arm_current_func_type ())
19357 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19358 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19359 && !df_regs_ever_live_p (LR_REGNUM)))
19360 && arm_r3_live_at_start_p ()
19361 && crtl->args.pretend_args_size == 0)
19362 return 4;
19364 return 0;
19367 /* Compute a bit mask of which registers need to be
19368 saved on the stack for the current function.
19369 This is used by arm_get_frame_offsets, which may add extra registers. */
19371 static unsigned long
19372 arm_compute_save_reg_mask (void)
19374 unsigned int save_reg_mask = 0;
19375 unsigned long func_type = arm_current_func_type ();
19376 unsigned int reg;
19378 if (IS_NAKED (func_type))
19379 /* This should never really happen. */
19380 return 0;
19382 /* If we are creating a stack frame, then we must save the frame pointer,
19383 IP (which will hold the old stack pointer), LR and the PC. */
19384 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19385 save_reg_mask |=
19386 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19387 | (1 << IP_REGNUM)
19388 | (1 << LR_REGNUM)
19389 | (1 << PC_REGNUM);
19391 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19393 /* Decide if we need to save the link register.
19394 Interrupt routines have their own banked link register,
19395 so they never need to save it.
19396 Otherwise if we do not use the link register we do not need to save
19397 it. If we are pushing other registers onto the stack however, we
19398 can save an instruction in the epilogue by pushing the link register
19399 now and then popping it back into the PC. This incurs extra memory
19400 accesses though, so we only do it when optimizing for size, and only
19401 if we know that we will not need a fancy return sequence. */
19402 if (df_regs_ever_live_p (LR_REGNUM)
19403 || (save_reg_mask
19404 && optimize_size
19405 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19406 && !crtl->tail_call_emit
19407 && !crtl->calls_eh_return))
19408 save_reg_mask |= 1 << LR_REGNUM;
19410 if (cfun->machine->lr_save_eliminated)
19411 save_reg_mask &= ~ (1 << LR_REGNUM);
19413 if (TARGET_REALLY_IWMMXT
19414 && ((bit_count (save_reg_mask)
19415 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19416 arm_compute_static_chain_stack_bytes())
19417 ) % 2) != 0)
19419 /* The total number of registers that are going to be pushed
19420 onto the stack is odd. We need to ensure that the stack
19421 is 64-bit aligned before we start to save iWMMXt registers,
19422 and also before we start to create locals. (A local variable
19423 might be a double or long long which we will load/store using
19424 an iWMMXt instruction). Therefore we need to push another
19425 ARM register, so that the stack will be 64-bit aligned. We
19426 try to avoid using the arg registers (r0 -r3) as they might be
19427 used to pass values in a tail call. */
19428 for (reg = 4; reg <= 12; reg++)
19429 if ((save_reg_mask & (1 << reg)) == 0)
19430 break;
19432 if (reg <= 12)
19433 save_reg_mask |= (1 << reg);
19434 else
19436 cfun->machine->sibcall_blocked = 1;
19437 save_reg_mask |= (1 << 3);
19441 /* We may need to push an additional register for use initializing the
19442 PIC base register. */
19443 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19444 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19446 reg = thumb_find_work_register (1 << 4);
19447 if (!call_used_regs[reg])
19448 save_reg_mask |= (1 << reg);
19451 return save_reg_mask;
19454 /* Compute a bit mask of which registers need to be
19455 saved on the stack for the current function. */
19456 static unsigned long
19457 thumb1_compute_save_reg_mask (void)
19459 unsigned long mask;
19460 unsigned reg;
19462 mask = 0;
19463 for (reg = 0; reg < 12; reg ++)
19464 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19465 mask |= 1 << reg;
19467 if (flag_pic
19468 && !TARGET_SINGLE_PIC_BASE
19469 && arm_pic_register != INVALID_REGNUM
19470 && crtl->uses_pic_offset_table)
19471 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19473 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19474 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19475 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19477 /* LR will also be pushed if any lo regs are pushed. */
19478 if (mask & 0xff || thumb_force_lr_save ())
19479 mask |= (1 << LR_REGNUM);
19481 /* Make sure we have a low work register if we need one.
19482 We will need one if we are going to push a high register,
19483 but we are not currently intending to push a low register. */
19484 if ((mask & 0xff) == 0
19485 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19487 /* Use thumb_find_work_register to choose which register
19488 we will use. If the register is live then we will
19489 have to push it. Use LAST_LO_REGNUM as our fallback
19490 choice for the register to select. */
19491 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19492 /* Make sure the register returned by thumb_find_work_register is
19493 not part of the return value. */
19494 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19495 reg = LAST_LO_REGNUM;
19497 if (callee_saved_reg_p (reg))
19498 mask |= 1 << reg;
19501 /* The 504 below is 8 bytes less than 512 because there are two possible
19502 alignment words. We can't tell here if they will be present or not so we
19503 have to play it safe and assume that they are. */
19504 if ((CALLER_INTERWORKING_SLOT_SIZE +
19505 ROUND_UP_WORD (get_frame_size ()) +
19506 crtl->outgoing_args_size) >= 504)
19508 /* This is the same as the code in thumb1_expand_prologue() which
19509 determines which register to use for stack decrement. */
19510 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19511 if (mask & (1 << reg))
19512 break;
19514 if (reg > LAST_LO_REGNUM)
19516 /* Make sure we have a register available for stack decrement. */
19517 mask |= 1 << LAST_LO_REGNUM;
19521 return mask;
19525 /* Return the number of bytes required to save VFP registers. */
19526 static int
19527 arm_get_vfp_saved_size (void)
19529 unsigned int regno;
19530 int count;
19531 int saved;
19533 saved = 0;
19534 /* Space for saved VFP registers. */
19535 if (TARGET_HARD_FLOAT && TARGET_VFP)
19537 count = 0;
19538 for (regno = FIRST_VFP_REGNUM;
19539 regno < LAST_VFP_REGNUM;
19540 regno += 2)
19542 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19543 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19545 if (count > 0)
19547 /* Workaround ARM10 VFPr1 bug. */
19548 if (count == 2 && !arm_arch6)
19549 count++;
19550 saved += count * 8;
19552 count = 0;
19554 else
19555 count++;
19557 if (count > 0)
19559 if (count == 2 && !arm_arch6)
19560 count++;
19561 saved += count * 8;
19564 return saved;
19568 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19569 everything bar the final return instruction. If simple_return is true,
19570 then do not output epilogue, because it has already been emitted in RTL. */
19571 const char *
19572 output_return_instruction (rtx operand, bool really_return, bool reverse,
19573 bool simple_return)
19575 char conditional[10];
19576 char instr[100];
19577 unsigned reg;
19578 unsigned long live_regs_mask;
19579 unsigned long func_type;
19580 arm_stack_offsets *offsets;
19582 func_type = arm_current_func_type ();
19584 if (IS_NAKED (func_type))
19585 return "";
19587 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19589 /* If this function was declared non-returning, and we have
19590 found a tail call, then we have to trust that the called
19591 function won't return. */
19592 if (really_return)
19594 rtx ops[2];
19596 /* Otherwise, trap an attempted return by aborting. */
19597 ops[0] = operand;
19598 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19599 : "abort");
19600 assemble_external_libcall (ops[1]);
19601 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19604 return "";
19607 gcc_assert (!cfun->calls_alloca || really_return);
19609 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19611 cfun->machine->return_used_this_function = 1;
19613 offsets = arm_get_frame_offsets ();
19614 live_regs_mask = offsets->saved_regs_mask;
19616 if (!simple_return && live_regs_mask)
19618 const char * return_reg;
19620 /* If we do not have any special requirements for function exit
19621 (e.g. interworking) then we can load the return address
19622 directly into the PC. Otherwise we must load it into LR. */
19623 if (really_return
19624 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19625 return_reg = reg_names[PC_REGNUM];
19626 else
19627 return_reg = reg_names[LR_REGNUM];
19629 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19631 /* There are three possible reasons for the IP register
19632 being saved. 1) a stack frame was created, in which case
19633 IP contains the old stack pointer, or 2) an ISR routine
19634 corrupted it, or 3) it was saved to align the stack on
19635 iWMMXt. In case 1, restore IP into SP, otherwise just
19636 restore IP. */
19637 if (frame_pointer_needed)
19639 live_regs_mask &= ~ (1 << IP_REGNUM);
19640 live_regs_mask |= (1 << SP_REGNUM);
19642 else
19643 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19646 /* On some ARM architectures it is faster to use LDR rather than
19647 LDM to load a single register. On other architectures, the
19648 cost is the same. In 26 bit mode, or for exception handlers,
19649 we have to use LDM to load the PC so that the CPSR is also
19650 restored. */
19651 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19652 if (live_regs_mask == (1U << reg))
19653 break;
19655 if (reg <= LAST_ARM_REGNUM
19656 && (reg != LR_REGNUM
19657 || ! really_return
19658 || ! IS_INTERRUPT (func_type)))
19660 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19661 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19663 else
19665 char *p;
19666 int first = 1;
19668 /* Generate the load multiple instruction to restore the
19669 registers. Note we can get here, even if
19670 frame_pointer_needed is true, but only if sp already
19671 points to the base of the saved core registers. */
19672 if (live_regs_mask & (1 << SP_REGNUM))
19674 unsigned HOST_WIDE_INT stack_adjust;
19676 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19677 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19679 if (stack_adjust && arm_arch5 && TARGET_ARM)
19680 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19681 else
19683 /* If we can't use ldmib (SA110 bug),
19684 then try to pop r3 instead. */
19685 if (stack_adjust)
19686 live_regs_mask |= 1 << 3;
19688 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19691 /* For interrupt returns we have to use an LDM rather than
19692 a POP so that we can use the exception return variant. */
19693 else if (IS_INTERRUPT (func_type))
19694 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19695 else
19696 sprintf (instr, "pop%s\t{", conditional);
19698 p = instr + strlen (instr);
19700 for (reg = 0; reg <= SP_REGNUM; reg++)
19701 if (live_regs_mask & (1 << reg))
19703 int l = strlen (reg_names[reg]);
19705 if (first)
19706 first = 0;
19707 else
19709 memcpy (p, ", ", 2);
19710 p += 2;
19713 memcpy (p, "%|", 2);
19714 memcpy (p + 2, reg_names[reg], l);
19715 p += l + 2;
19718 if (live_regs_mask & (1 << LR_REGNUM))
19720 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19721 /* If returning from an interrupt, restore the CPSR. */
19722 if (IS_INTERRUPT (func_type))
19723 strcat (p, "^");
19725 else
19726 strcpy (p, "}");
19729 output_asm_insn (instr, & operand);
19731 /* See if we need to generate an extra instruction to
19732 perform the actual function return. */
19733 if (really_return
19734 && func_type != ARM_FT_INTERWORKED
19735 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19737 /* The return has already been handled
19738 by loading the LR into the PC. */
19739 return "";
19743 if (really_return)
19745 switch ((int) ARM_FUNC_TYPE (func_type))
19747 case ARM_FT_ISR:
19748 case ARM_FT_FIQ:
19749 /* ??? This is wrong for unified assembly syntax. */
19750 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19751 break;
19753 case ARM_FT_INTERWORKED:
19754 gcc_assert (arm_arch5 || arm_arch4t);
19755 sprintf (instr, "bx%s\t%%|lr", conditional);
19756 break;
19758 case ARM_FT_EXCEPTION:
19759 /* ??? This is wrong for unified assembly syntax. */
19760 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19761 break;
19763 default:
19764 /* Use bx if it's available. */
19765 if (arm_arch5 || arm_arch4t)
19766 sprintf (instr, "bx%s\t%%|lr", conditional);
19767 else
19768 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19769 break;
19772 output_asm_insn (instr, & operand);
19775 return "";
19778 /* Write the function name into the code section, directly preceding
19779 the function prologue.
19781 Code will be output similar to this:
19783 .ascii "arm_poke_function_name", 0
19784 .align
19786 .word 0xff000000 + (t1 - t0)
19787 arm_poke_function_name
19788 mov ip, sp
19789 stmfd sp!, {fp, ip, lr, pc}
19790 sub fp, ip, #4
19792 When performing a stack backtrace, code can inspect the value
19793 of 'pc' stored at 'fp' + 0. If the trace function then looks
19794 at location pc - 12 and the top 8 bits are set, then we know
19795 that there is a function name embedded immediately preceding this
19796 location and has length ((pc[-3]) & 0xff000000).
19798 We assume that pc is declared as a pointer to an unsigned long.
19800 It is of no benefit to output the function name if we are assembling
19801 a leaf function. These function types will not contain a stack
19802 backtrace structure, therefore it is not possible to determine the
19803 function name. */
19804 void
19805 arm_poke_function_name (FILE *stream, const char *name)
19807 unsigned long alignlength;
19808 unsigned long length;
19809 rtx x;
19811 length = strlen (name) + 1;
19812 alignlength = ROUND_UP_WORD (length);
19814 ASM_OUTPUT_ASCII (stream, name, length);
19815 ASM_OUTPUT_ALIGN (stream, 2);
19816 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19817 assemble_aligned_integer (UNITS_PER_WORD, x);
19820 /* Place some comments into the assembler stream
19821 describing the current function. */
19822 static void
19823 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19825 unsigned long func_type;
19827 /* ??? Do we want to print some of the below anyway? */
19828 if (TARGET_THUMB1)
19829 return;
19831 /* Sanity check. */
19832 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19834 func_type = arm_current_func_type ();
19836 switch ((int) ARM_FUNC_TYPE (func_type))
19838 default:
19839 case ARM_FT_NORMAL:
19840 break;
19841 case ARM_FT_INTERWORKED:
19842 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19843 break;
19844 case ARM_FT_ISR:
19845 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19846 break;
19847 case ARM_FT_FIQ:
19848 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19849 break;
19850 case ARM_FT_EXCEPTION:
19851 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19852 break;
19855 if (IS_NAKED (func_type))
19856 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19858 if (IS_VOLATILE (func_type))
19859 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19861 if (IS_NESTED (func_type))
19862 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19863 if (IS_STACKALIGN (func_type))
19864 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19866 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19867 crtl->args.size,
19868 crtl->args.pretend_args_size, frame_size);
19870 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19871 frame_pointer_needed,
19872 cfun->machine->uses_anonymous_args);
19874 if (cfun->machine->lr_save_eliminated)
19875 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19877 if (crtl->calls_eh_return)
19878 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19882 static void
19883 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19884 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19886 arm_stack_offsets *offsets;
19888 if (TARGET_THUMB1)
19890 int regno;
19892 /* Emit any call-via-reg trampolines that are needed for v4t support
19893 of call_reg and call_value_reg type insns. */
19894 for (regno = 0; regno < LR_REGNUM; regno++)
19896 rtx label = cfun->machine->call_via[regno];
19898 if (label != NULL)
19900 switch_to_section (function_section (current_function_decl));
19901 targetm.asm_out.internal_label (asm_out_file, "L",
19902 CODE_LABEL_NUMBER (label));
19903 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19907 /* ??? Probably not safe to set this here, since it assumes that a
19908 function will be emitted as assembly immediately after we generate
19909 RTL for it. This does not happen for inline functions. */
19910 cfun->machine->return_used_this_function = 0;
19912 else /* TARGET_32BIT */
19914 /* We need to take into account any stack-frame rounding. */
19915 offsets = arm_get_frame_offsets ();
19917 gcc_assert (!use_return_insn (FALSE, NULL)
19918 || (cfun->machine->return_used_this_function != 0)
19919 || offsets->saved_regs == offsets->outgoing_args
19920 || frame_pointer_needed);
19924 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19925 STR and STRD. If an even number of registers are being pushed, one
19926 or more STRD patterns are created for each register pair. If an
19927 odd number of registers are pushed, emit an initial STR followed by
19928 as many STRD instructions as are needed. This works best when the
19929 stack is initially 64-bit aligned (the normal case), since it
19930 ensures that each STRD is also 64-bit aligned. */
19931 static void
19932 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19934 int num_regs = 0;
19935 int i;
19936 int regno;
19937 rtx par = NULL_RTX;
19938 rtx dwarf = NULL_RTX;
19939 rtx tmp;
19940 bool first = true;
19942 num_regs = bit_count (saved_regs_mask);
19944 /* Must be at least one register to save, and can't save SP or PC. */
19945 gcc_assert (num_regs > 0 && num_regs <= 14);
19946 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19947 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19949 /* Create sequence for DWARF info. All the frame-related data for
19950 debugging is held in this wrapper. */
19951 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19953 /* Describe the stack adjustment. */
19954 tmp = gen_rtx_SET (stack_pointer_rtx,
19955 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19956 RTX_FRAME_RELATED_P (tmp) = 1;
19957 XVECEXP (dwarf, 0, 0) = tmp;
19959 /* Find the first register. */
19960 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19963 i = 0;
19965 /* If there's an odd number of registers to push. Start off by
19966 pushing a single register. This ensures that subsequent strd
19967 operations are dword aligned (assuming that SP was originally
19968 64-bit aligned). */
19969 if ((num_regs & 1) != 0)
19971 rtx reg, mem, insn;
19973 reg = gen_rtx_REG (SImode, regno);
19974 if (num_regs == 1)
19975 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19976 stack_pointer_rtx));
19977 else
19978 mem = gen_frame_mem (Pmode,
19979 gen_rtx_PRE_MODIFY
19980 (Pmode, stack_pointer_rtx,
19981 plus_constant (Pmode, stack_pointer_rtx,
19982 -4 * num_regs)));
19984 tmp = gen_rtx_SET (mem, reg);
19985 RTX_FRAME_RELATED_P (tmp) = 1;
19986 insn = emit_insn (tmp);
19987 RTX_FRAME_RELATED_P (insn) = 1;
19988 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19989 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19990 RTX_FRAME_RELATED_P (tmp) = 1;
19991 i++;
19992 regno++;
19993 XVECEXP (dwarf, 0, i) = tmp;
19994 first = false;
19997 while (i < num_regs)
19998 if (saved_regs_mask & (1 << regno))
20000 rtx reg1, reg2, mem1, mem2;
20001 rtx tmp0, tmp1, tmp2;
20002 int regno2;
20004 /* Find the register to pair with this one. */
20005 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20006 regno2++)
20009 reg1 = gen_rtx_REG (SImode, regno);
20010 reg2 = gen_rtx_REG (SImode, regno2);
20012 if (first)
20014 rtx insn;
20016 first = false;
20017 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20018 stack_pointer_rtx,
20019 -4 * num_regs));
20020 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20021 stack_pointer_rtx,
20022 -4 * (num_regs - 1)));
20023 tmp0 = gen_rtx_SET (stack_pointer_rtx,
20024 plus_constant (Pmode, stack_pointer_rtx,
20025 -4 * (num_regs)));
20026 tmp1 = gen_rtx_SET (mem1, reg1);
20027 tmp2 = gen_rtx_SET (mem2, reg2);
20028 RTX_FRAME_RELATED_P (tmp0) = 1;
20029 RTX_FRAME_RELATED_P (tmp1) = 1;
20030 RTX_FRAME_RELATED_P (tmp2) = 1;
20031 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20032 XVECEXP (par, 0, 0) = tmp0;
20033 XVECEXP (par, 0, 1) = tmp1;
20034 XVECEXP (par, 0, 2) = tmp2;
20035 insn = emit_insn (par);
20036 RTX_FRAME_RELATED_P (insn) = 1;
20037 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20039 else
20041 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20042 stack_pointer_rtx,
20043 4 * i));
20044 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20045 stack_pointer_rtx,
20046 4 * (i + 1)));
20047 tmp1 = gen_rtx_SET (mem1, reg1);
20048 tmp2 = gen_rtx_SET (mem2, reg2);
20049 RTX_FRAME_RELATED_P (tmp1) = 1;
20050 RTX_FRAME_RELATED_P (tmp2) = 1;
20051 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20052 XVECEXP (par, 0, 0) = tmp1;
20053 XVECEXP (par, 0, 1) = tmp2;
20054 emit_insn (par);
20057 /* Create unwind information. This is an approximation. */
20058 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20059 plus_constant (Pmode,
20060 stack_pointer_rtx,
20061 4 * i)),
20062 reg1);
20063 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20064 plus_constant (Pmode,
20065 stack_pointer_rtx,
20066 4 * (i + 1))),
20067 reg2);
20069 RTX_FRAME_RELATED_P (tmp1) = 1;
20070 RTX_FRAME_RELATED_P (tmp2) = 1;
20071 XVECEXP (dwarf, 0, i + 1) = tmp1;
20072 XVECEXP (dwarf, 0, i + 2) = tmp2;
20073 i += 2;
20074 regno = regno2 + 1;
20076 else
20077 regno++;
20079 return;
20082 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20083 whenever possible, otherwise it emits single-word stores. The first store
20084 also allocates stack space for all saved registers, using writeback with
20085 post-addressing mode. All other stores use offset addressing. If no STRD
20086 can be emitted, this function emits a sequence of single-word stores,
20087 and not an STM as before, because single-word stores provide more freedom
20088 scheduling and can be turned into an STM by peephole optimizations. */
20089 static void
20090 arm_emit_strd_push (unsigned long saved_regs_mask)
20092 int num_regs = 0;
20093 int i, j, dwarf_index = 0;
20094 int offset = 0;
20095 rtx dwarf = NULL_RTX;
20096 rtx insn = NULL_RTX;
20097 rtx tmp, mem;
20099 /* TODO: A more efficient code can be emitted by changing the
20100 layout, e.g., first push all pairs that can use STRD to keep the
20101 stack aligned, and then push all other registers. */
20102 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20103 if (saved_regs_mask & (1 << i))
20104 num_regs++;
20106 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20107 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20108 gcc_assert (num_regs > 0);
20110 /* Create sequence for DWARF info. */
20111 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20113 /* For dwarf info, we generate explicit stack update. */
20114 tmp = gen_rtx_SET (stack_pointer_rtx,
20115 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20116 RTX_FRAME_RELATED_P (tmp) = 1;
20117 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20119 /* Save registers. */
20120 offset = - 4 * num_regs;
20121 j = 0;
20122 while (j <= LAST_ARM_REGNUM)
20123 if (saved_regs_mask & (1 << j))
20125 if ((j % 2 == 0)
20126 && (saved_regs_mask & (1 << (j + 1))))
20128 /* Current register and previous register form register pair for
20129 which STRD can be generated. */
20130 if (offset < 0)
20132 /* Allocate stack space for all saved registers. */
20133 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20134 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20135 mem = gen_frame_mem (DImode, tmp);
20136 offset = 0;
20138 else if (offset > 0)
20139 mem = gen_frame_mem (DImode,
20140 plus_constant (Pmode,
20141 stack_pointer_rtx,
20142 offset));
20143 else
20144 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20146 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20147 RTX_FRAME_RELATED_P (tmp) = 1;
20148 tmp = emit_insn (tmp);
20150 /* Record the first store insn. */
20151 if (dwarf_index == 1)
20152 insn = tmp;
20154 /* Generate dwarf info. */
20155 mem = gen_frame_mem (SImode,
20156 plus_constant (Pmode,
20157 stack_pointer_rtx,
20158 offset));
20159 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20160 RTX_FRAME_RELATED_P (tmp) = 1;
20161 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20163 mem = gen_frame_mem (SImode,
20164 plus_constant (Pmode,
20165 stack_pointer_rtx,
20166 offset + 4));
20167 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20168 RTX_FRAME_RELATED_P (tmp) = 1;
20169 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20171 offset += 8;
20172 j += 2;
20174 else
20176 /* Emit a single word store. */
20177 if (offset < 0)
20179 /* Allocate stack space for all saved registers. */
20180 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20181 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20182 mem = gen_frame_mem (SImode, tmp);
20183 offset = 0;
20185 else if (offset > 0)
20186 mem = gen_frame_mem (SImode,
20187 plus_constant (Pmode,
20188 stack_pointer_rtx,
20189 offset));
20190 else
20191 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20193 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20194 RTX_FRAME_RELATED_P (tmp) = 1;
20195 tmp = emit_insn (tmp);
20197 /* Record the first store insn. */
20198 if (dwarf_index == 1)
20199 insn = tmp;
20201 /* Generate dwarf info. */
20202 mem = gen_frame_mem (SImode,
20203 plus_constant(Pmode,
20204 stack_pointer_rtx,
20205 offset));
20206 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20207 RTX_FRAME_RELATED_P (tmp) = 1;
20208 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20210 offset += 4;
20211 j += 1;
20214 else
20215 j++;
20217 /* Attach dwarf info to the first insn we generate. */
20218 gcc_assert (insn != NULL_RTX);
20219 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20220 RTX_FRAME_RELATED_P (insn) = 1;
20223 /* Generate and emit an insn that we will recognize as a push_multi.
20224 Unfortunately, since this insn does not reflect very well the actual
20225 semantics of the operation, we need to annotate the insn for the benefit
20226 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20227 MASK for registers that should be annotated for DWARF2 frame unwind
20228 information. */
20229 static rtx
20230 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20232 int num_regs = 0;
20233 int num_dwarf_regs = 0;
20234 int i, j;
20235 rtx par;
20236 rtx dwarf;
20237 int dwarf_par_index;
20238 rtx tmp, reg;
20240 /* We don't record the PC in the dwarf frame information. */
20241 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20243 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20245 if (mask & (1 << i))
20246 num_regs++;
20247 if (dwarf_regs_mask & (1 << i))
20248 num_dwarf_regs++;
20251 gcc_assert (num_regs && num_regs <= 16);
20252 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20254 /* For the body of the insn we are going to generate an UNSPEC in
20255 parallel with several USEs. This allows the insn to be recognized
20256 by the push_multi pattern in the arm.md file.
20258 The body of the insn looks something like this:
20260 (parallel [
20261 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20262 (const_int:SI <num>)))
20263 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20264 (use (reg:SI XX))
20265 (use (reg:SI YY))
20269 For the frame note however, we try to be more explicit and actually
20270 show each register being stored into the stack frame, plus a (single)
20271 decrement of the stack pointer. We do it this way in order to be
20272 friendly to the stack unwinding code, which only wants to see a single
20273 stack decrement per instruction. The RTL we generate for the note looks
20274 something like this:
20276 (sequence [
20277 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20278 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20279 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20280 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20284 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20285 instead we'd have a parallel expression detailing all
20286 the stores to the various memory addresses so that debug
20287 information is more up-to-date. Remember however while writing
20288 this to take care of the constraints with the push instruction.
20290 Note also that this has to be taken care of for the VFP registers.
20292 For more see PR43399. */
20294 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20295 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20296 dwarf_par_index = 1;
20298 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20300 if (mask & (1 << i))
20302 reg = gen_rtx_REG (SImode, i);
20304 XVECEXP (par, 0, 0)
20305 = gen_rtx_SET (gen_frame_mem
20306 (BLKmode,
20307 gen_rtx_PRE_MODIFY (Pmode,
20308 stack_pointer_rtx,
20309 plus_constant
20310 (Pmode, stack_pointer_rtx,
20311 -4 * num_regs))
20313 gen_rtx_UNSPEC (BLKmode,
20314 gen_rtvec (1, reg),
20315 UNSPEC_PUSH_MULT));
20317 if (dwarf_regs_mask & (1 << i))
20319 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20320 reg);
20321 RTX_FRAME_RELATED_P (tmp) = 1;
20322 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20325 break;
20329 for (j = 1, i++; j < num_regs; i++)
20331 if (mask & (1 << i))
20333 reg = gen_rtx_REG (SImode, i);
20335 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20337 if (dwarf_regs_mask & (1 << i))
20340 = gen_rtx_SET (gen_frame_mem
20341 (SImode,
20342 plus_constant (Pmode, stack_pointer_rtx,
20343 4 * j)),
20344 reg);
20345 RTX_FRAME_RELATED_P (tmp) = 1;
20346 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20349 j++;
20353 par = emit_insn (par);
20355 tmp = gen_rtx_SET (stack_pointer_rtx,
20356 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20357 RTX_FRAME_RELATED_P (tmp) = 1;
20358 XVECEXP (dwarf, 0, 0) = tmp;
20360 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20362 return par;
20365 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20366 SIZE is the offset to be adjusted.
20367 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20368 static void
20369 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20371 rtx dwarf;
20373 RTX_FRAME_RELATED_P (insn) = 1;
20374 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20375 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20378 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20379 SAVED_REGS_MASK shows which registers need to be restored.
20381 Unfortunately, since this insn does not reflect very well the actual
20382 semantics of the operation, we need to annotate the insn for the benefit
20383 of DWARF2 frame unwind information. */
20384 static void
20385 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20387 int num_regs = 0;
20388 int i, j;
20389 rtx par;
20390 rtx dwarf = NULL_RTX;
20391 rtx tmp, reg;
20392 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20393 int offset_adj;
20394 int emit_update;
20396 offset_adj = return_in_pc ? 1 : 0;
20397 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20398 if (saved_regs_mask & (1 << i))
20399 num_regs++;
20401 gcc_assert (num_regs && num_regs <= 16);
20403 /* If SP is in reglist, then we don't emit SP update insn. */
20404 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20406 /* The parallel needs to hold num_regs SETs
20407 and one SET for the stack update. */
20408 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20410 if (return_in_pc)
20411 XVECEXP (par, 0, 0) = ret_rtx;
20413 if (emit_update)
20415 /* Increment the stack pointer, based on there being
20416 num_regs 4-byte registers to restore. */
20417 tmp = gen_rtx_SET (stack_pointer_rtx,
20418 plus_constant (Pmode,
20419 stack_pointer_rtx,
20420 4 * num_regs));
20421 RTX_FRAME_RELATED_P (tmp) = 1;
20422 XVECEXP (par, 0, offset_adj) = tmp;
20425 /* Now restore every reg, which may include PC. */
20426 for (j = 0, i = 0; j < num_regs; i++)
20427 if (saved_regs_mask & (1 << i))
20429 reg = gen_rtx_REG (SImode, i);
20430 if ((num_regs == 1) && emit_update && !return_in_pc)
20432 /* Emit single load with writeback. */
20433 tmp = gen_frame_mem (SImode,
20434 gen_rtx_POST_INC (Pmode,
20435 stack_pointer_rtx));
20436 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20437 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20438 return;
20441 tmp = gen_rtx_SET (reg,
20442 gen_frame_mem
20443 (SImode,
20444 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20445 RTX_FRAME_RELATED_P (tmp) = 1;
20446 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20448 /* We need to maintain a sequence for DWARF info too. As dwarf info
20449 should not have PC, skip PC. */
20450 if (i != PC_REGNUM)
20451 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20453 j++;
20456 if (return_in_pc)
20457 par = emit_jump_insn (par);
20458 else
20459 par = emit_insn (par);
20461 REG_NOTES (par) = dwarf;
20462 if (!return_in_pc)
20463 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20464 stack_pointer_rtx, stack_pointer_rtx);
20467 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20468 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20470 Unfortunately, since this insn does not reflect very well the actual
20471 semantics of the operation, we need to annotate the insn for the benefit
20472 of DWARF2 frame unwind information. */
20473 static void
20474 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20476 int i, j;
20477 rtx par;
20478 rtx dwarf = NULL_RTX;
20479 rtx tmp, reg;
20481 gcc_assert (num_regs && num_regs <= 32);
20483 /* Workaround ARM10 VFPr1 bug. */
20484 if (num_regs == 2 && !arm_arch6)
20486 if (first_reg == 15)
20487 first_reg--;
20489 num_regs++;
20492 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20493 there could be up to 32 D-registers to restore.
20494 If there are more than 16 D-registers, make two recursive calls,
20495 each of which emits one pop_multi instruction. */
20496 if (num_regs > 16)
20498 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20499 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20500 return;
20503 /* The parallel needs to hold num_regs SETs
20504 and one SET for the stack update. */
20505 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20507 /* Increment the stack pointer, based on there being
20508 num_regs 8-byte registers to restore. */
20509 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20510 RTX_FRAME_RELATED_P (tmp) = 1;
20511 XVECEXP (par, 0, 0) = tmp;
20513 /* Now show every reg that will be restored, using a SET for each. */
20514 for (j = 0, i=first_reg; j < num_regs; i += 2)
20516 reg = gen_rtx_REG (DFmode, i);
20518 tmp = gen_rtx_SET (reg,
20519 gen_frame_mem
20520 (DFmode,
20521 plus_constant (Pmode, base_reg, 8 * j)));
20522 RTX_FRAME_RELATED_P (tmp) = 1;
20523 XVECEXP (par, 0, j + 1) = tmp;
20525 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20527 j++;
20530 par = emit_insn (par);
20531 REG_NOTES (par) = dwarf;
20533 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20534 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20536 RTX_FRAME_RELATED_P (par) = 1;
20537 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20539 else
20540 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20541 base_reg, base_reg);
20544 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20545 number of registers are being popped, multiple LDRD patterns are created for
20546 all register pairs. If odd number of registers are popped, last register is
20547 loaded by using LDR pattern. */
20548 static void
20549 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20551 int num_regs = 0;
20552 int i, j;
20553 rtx par = NULL_RTX;
20554 rtx dwarf = NULL_RTX;
20555 rtx tmp, reg, tmp1;
20556 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20558 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20559 if (saved_regs_mask & (1 << i))
20560 num_regs++;
20562 gcc_assert (num_regs && num_regs <= 16);
20564 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20565 to be popped. So, if num_regs is even, now it will become odd,
20566 and we can generate pop with PC. If num_regs is odd, it will be
20567 even now, and ldr with return can be generated for PC. */
20568 if (return_in_pc)
20569 num_regs--;
20571 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20573 /* Var j iterates over all the registers to gather all the registers in
20574 saved_regs_mask. Var i gives index of saved registers in stack frame.
20575 A PARALLEL RTX of register-pair is created here, so that pattern for
20576 LDRD can be matched. As PC is always last register to be popped, and
20577 we have already decremented num_regs if PC, we don't have to worry
20578 about PC in this loop. */
20579 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20580 if (saved_regs_mask & (1 << j))
20582 /* Create RTX for memory load. */
20583 reg = gen_rtx_REG (SImode, j);
20584 tmp = gen_rtx_SET (reg,
20585 gen_frame_mem (SImode,
20586 plus_constant (Pmode,
20587 stack_pointer_rtx, 4 * i)));
20588 RTX_FRAME_RELATED_P (tmp) = 1;
20590 if (i % 2 == 0)
20592 /* When saved-register index (i) is even, the RTX to be emitted is
20593 yet to be created. Hence create it first. The LDRD pattern we
20594 are generating is :
20595 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20596 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20597 where target registers need not be consecutive. */
20598 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20599 dwarf = NULL_RTX;
20602 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20603 added as 0th element and if i is odd, reg_i is added as 1st element
20604 of LDRD pattern shown above. */
20605 XVECEXP (par, 0, (i % 2)) = tmp;
20606 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20608 if ((i % 2) == 1)
20610 /* When saved-register index (i) is odd, RTXs for both the registers
20611 to be loaded are generated in above given LDRD pattern, and the
20612 pattern can be emitted now. */
20613 par = emit_insn (par);
20614 REG_NOTES (par) = dwarf;
20615 RTX_FRAME_RELATED_P (par) = 1;
20618 i++;
20621 /* If the number of registers pushed is odd AND return_in_pc is false OR
20622 number of registers are even AND return_in_pc is true, last register is
20623 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20624 then LDR with post increment. */
20626 /* Increment the stack pointer, based on there being
20627 num_regs 4-byte registers to restore. */
20628 tmp = gen_rtx_SET (stack_pointer_rtx,
20629 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20630 RTX_FRAME_RELATED_P (tmp) = 1;
20631 tmp = emit_insn (tmp);
20632 if (!return_in_pc)
20634 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20635 stack_pointer_rtx, stack_pointer_rtx);
20638 dwarf = NULL_RTX;
20640 if (((num_regs % 2) == 1 && !return_in_pc)
20641 || ((num_regs % 2) == 0 && return_in_pc))
20643 /* Scan for the single register to be popped. Skip until the saved
20644 register is found. */
20645 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20647 /* Gen LDR with post increment here. */
20648 tmp1 = gen_rtx_MEM (SImode,
20649 gen_rtx_POST_INC (SImode,
20650 stack_pointer_rtx));
20651 set_mem_alias_set (tmp1, get_frame_alias_set ());
20653 reg = gen_rtx_REG (SImode, j);
20654 tmp = gen_rtx_SET (reg, tmp1);
20655 RTX_FRAME_RELATED_P (tmp) = 1;
20656 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20658 if (return_in_pc)
20660 /* If return_in_pc, j must be PC_REGNUM. */
20661 gcc_assert (j == PC_REGNUM);
20662 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20663 XVECEXP (par, 0, 0) = ret_rtx;
20664 XVECEXP (par, 0, 1) = tmp;
20665 par = emit_jump_insn (par);
20667 else
20669 par = emit_insn (tmp);
20670 REG_NOTES (par) = dwarf;
20671 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20672 stack_pointer_rtx, stack_pointer_rtx);
20676 else if ((num_regs % 2) == 1 && return_in_pc)
20678 /* There are 2 registers to be popped. So, generate the pattern
20679 pop_multiple_with_stack_update_and_return to pop in PC. */
20680 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20683 return;
20686 /* LDRD in ARM mode needs consecutive registers as operands. This function
20687 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20688 offset addressing and then generates one separate stack udpate. This provides
20689 more scheduling freedom, compared to writeback on every load. However,
20690 if the function returns using load into PC directly
20691 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20692 before the last load. TODO: Add a peephole optimization to recognize
20693 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20694 peephole optimization to merge the load at stack-offset zero
20695 with the stack update instruction using load with writeback
20696 in post-index addressing mode. */
20697 static void
20698 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20700 int j = 0;
20701 int offset = 0;
20702 rtx par = NULL_RTX;
20703 rtx dwarf = NULL_RTX;
20704 rtx tmp, mem;
20706 /* Restore saved registers. */
20707 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20708 j = 0;
20709 while (j <= LAST_ARM_REGNUM)
20710 if (saved_regs_mask & (1 << j))
20712 if ((j % 2) == 0
20713 && (saved_regs_mask & (1 << (j + 1)))
20714 && (j + 1) != PC_REGNUM)
20716 /* Current register and next register form register pair for which
20717 LDRD can be generated. PC is always the last register popped, and
20718 we handle it separately. */
20719 if (offset > 0)
20720 mem = gen_frame_mem (DImode,
20721 plus_constant (Pmode,
20722 stack_pointer_rtx,
20723 offset));
20724 else
20725 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20727 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20728 tmp = emit_insn (tmp);
20729 RTX_FRAME_RELATED_P (tmp) = 1;
20731 /* Generate dwarf info. */
20733 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20734 gen_rtx_REG (SImode, j),
20735 NULL_RTX);
20736 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20737 gen_rtx_REG (SImode, j + 1),
20738 dwarf);
20740 REG_NOTES (tmp) = dwarf;
20742 offset += 8;
20743 j += 2;
20745 else if (j != PC_REGNUM)
20747 /* Emit a single word load. */
20748 if (offset > 0)
20749 mem = gen_frame_mem (SImode,
20750 plus_constant (Pmode,
20751 stack_pointer_rtx,
20752 offset));
20753 else
20754 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20756 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20757 tmp = emit_insn (tmp);
20758 RTX_FRAME_RELATED_P (tmp) = 1;
20760 /* Generate dwarf info. */
20761 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20762 gen_rtx_REG (SImode, j),
20763 NULL_RTX);
20765 offset += 4;
20766 j += 1;
20768 else /* j == PC_REGNUM */
20769 j++;
20771 else
20772 j++;
20774 /* Update the stack. */
20775 if (offset > 0)
20777 tmp = gen_rtx_SET (stack_pointer_rtx,
20778 plus_constant (Pmode,
20779 stack_pointer_rtx,
20780 offset));
20781 tmp = emit_insn (tmp);
20782 arm_add_cfa_adjust_cfa_note (tmp, offset,
20783 stack_pointer_rtx, stack_pointer_rtx);
20784 offset = 0;
20787 if (saved_regs_mask & (1 << PC_REGNUM))
20789 /* Only PC is to be popped. */
20790 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20791 XVECEXP (par, 0, 0) = ret_rtx;
20792 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20793 gen_frame_mem (SImode,
20794 gen_rtx_POST_INC (SImode,
20795 stack_pointer_rtx)));
20796 RTX_FRAME_RELATED_P (tmp) = 1;
20797 XVECEXP (par, 0, 1) = tmp;
20798 par = emit_jump_insn (par);
20800 /* Generate dwarf info. */
20801 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20802 gen_rtx_REG (SImode, PC_REGNUM),
20803 NULL_RTX);
20804 REG_NOTES (par) = dwarf;
20805 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20806 stack_pointer_rtx, stack_pointer_rtx);
20810 /* Calculate the size of the return value that is passed in registers. */
20811 static unsigned
20812 arm_size_return_regs (void)
20814 machine_mode mode;
20816 if (crtl->return_rtx != 0)
20817 mode = GET_MODE (crtl->return_rtx);
20818 else
20819 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20821 return GET_MODE_SIZE (mode);
20824 /* Return true if the current function needs to save/restore LR. */
20825 static bool
20826 thumb_force_lr_save (void)
20828 return !cfun->machine->lr_save_eliminated
20829 && (!leaf_function_p ()
20830 || thumb_far_jump_used_p ()
20831 || df_regs_ever_live_p (LR_REGNUM));
20834 /* We do not know if r3 will be available because
20835 we do have an indirect tailcall happening in this
20836 particular case. */
20837 static bool
20838 is_indirect_tailcall_p (rtx call)
20840 rtx pat = PATTERN (call);
20842 /* Indirect tail call. */
20843 pat = XVECEXP (pat, 0, 0);
20844 if (GET_CODE (pat) == SET)
20845 pat = SET_SRC (pat);
20847 pat = XEXP (XEXP (pat, 0), 0);
20848 return REG_P (pat);
20851 /* Return true if r3 is used by any of the tail call insns in the
20852 current function. */
20853 static bool
20854 any_sibcall_could_use_r3 (void)
20856 edge_iterator ei;
20857 edge e;
20859 if (!crtl->tail_call_emit)
20860 return false;
20861 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20862 if (e->flags & EDGE_SIBCALL)
20864 rtx call = BB_END (e->src);
20865 if (!CALL_P (call))
20866 call = prev_nonnote_nondebug_insn (call);
20867 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20868 if (find_regno_fusage (call, USE, 3)
20869 || is_indirect_tailcall_p (call))
20870 return true;
20872 return false;
20876 /* Compute the distance from register FROM to register TO.
20877 These can be the arg pointer (26), the soft frame pointer (25),
20878 the stack pointer (13) or the hard frame pointer (11).
20879 In thumb mode r7 is used as the soft frame pointer, if needed.
20880 Typical stack layout looks like this:
20882 old stack pointer -> | |
20883 ----
20884 | | \
20885 | | saved arguments for
20886 | | vararg functions
20887 | | /
20889 hard FP & arg pointer -> | | \
20890 | | stack
20891 | | frame
20892 | | /
20894 | | \
20895 | | call saved
20896 | | registers
20897 soft frame pointer -> | | /
20899 | | \
20900 | | local
20901 | | variables
20902 locals base pointer -> | | /
20904 | | \
20905 | | outgoing
20906 | | arguments
20907 current stack pointer -> | | /
20910 For a given function some or all of these stack components
20911 may not be needed, giving rise to the possibility of
20912 eliminating some of the registers.
20914 The values returned by this function must reflect the behavior
20915 of arm_expand_prologue() and arm_compute_save_reg_mask().
20917 The sign of the number returned reflects the direction of stack
20918 growth, so the values are positive for all eliminations except
20919 from the soft frame pointer to the hard frame pointer.
20921 SFP may point just inside the local variables block to ensure correct
20922 alignment. */
20925 /* Calculate stack offsets. These are used to calculate register elimination
20926 offsets and in prologue/epilogue code. Also calculates which registers
20927 should be saved. */
20929 static arm_stack_offsets *
20930 arm_get_frame_offsets (void)
20932 struct arm_stack_offsets *offsets;
20933 unsigned long func_type;
20934 int leaf;
20935 int saved;
20936 int core_saved;
20937 HOST_WIDE_INT frame_size;
20938 int i;
20940 offsets = &cfun->machine->stack_offsets;
20942 /* We need to know if we are a leaf function. Unfortunately, it
20943 is possible to be called after start_sequence has been called,
20944 which causes get_insns to return the insns for the sequence,
20945 not the function, which will cause leaf_function_p to return
20946 the incorrect result.
20948 to know about leaf functions once reload has completed, and the
20949 frame size cannot be changed after that time, so we can safely
20950 use the cached value. */
20952 if (reload_completed)
20953 return offsets;
20955 /* Initially this is the size of the local variables. It will translated
20956 into an offset once we have determined the size of preceding data. */
20957 frame_size = ROUND_UP_WORD (get_frame_size ());
20959 leaf = leaf_function_p ();
20961 /* Space for variadic functions. */
20962 offsets->saved_args = crtl->args.pretend_args_size;
20964 /* In Thumb mode this is incorrect, but never used. */
20965 offsets->frame
20966 = (offsets->saved_args
20967 + arm_compute_static_chain_stack_bytes ()
20968 + (frame_pointer_needed ? 4 : 0));
20970 if (TARGET_32BIT)
20972 unsigned int regno;
20974 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20975 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20976 saved = core_saved;
20978 /* We know that SP will be doubleword aligned on entry, and we must
20979 preserve that condition at any subroutine call. We also require the
20980 soft frame pointer to be doubleword aligned. */
20982 if (TARGET_REALLY_IWMMXT)
20984 /* Check for the call-saved iWMMXt registers. */
20985 for (regno = FIRST_IWMMXT_REGNUM;
20986 regno <= LAST_IWMMXT_REGNUM;
20987 regno++)
20988 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20989 saved += 8;
20992 func_type = arm_current_func_type ();
20993 /* Space for saved VFP registers. */
20994 if (! IS_VOLATILE (func_type)
20995 && TARGET_HARD_FLOAT && TARGET_VFP)
20996 saved += arm_get_vfp_saved_size ();
20998 else /* TARGET_THUMB1 */
21000 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
21001 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21002 saved = core_saved;
21003 if (TARGET_BACKTRACE)
21004 saved += 16;
21007 /* Saved registers include the stack frame. */
21008 offsets->saved_regs
21009 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21010 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21012 /* A leaf function does not need any stack alignment if it has nothing
21013 on the stack. */
21014 if (leaf && frame_size == 0
21015 /* However if it calls alloca(), we have a dynamically allocated
21016 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21017 && ! cfun->calls_alloca)
21019 offsets->outgoing_args = offsets->soft_frame;
21020 offsets->locals_base = offsets->soft_frame;
21021 return offsets;
21024 /* Ensure SFP has the correct alignment. */
21025 if (ARM_DOUBLEWORD_ALIGN
21026 && (offsets->soft_frame & 7))
21028 offsets->soft_frame += 4;
21029 /* Try to align stack by pushing an extra reg. Don't bother doing this
21030 when there is a stack frame as the alignment will be rolled into
21031 the normal stack adjustment. */
21032 if (frame_size + crtl->outgoing_args_size == 0)
21034 int reg = -1;
21036 /* Register r3 is caller-saved. Normally it does not need to be
21037 saved on entry by the prologue. However if we choose to save
21038 it for padding then we may confuse the compiler into thinking
21039 a prologue sequence is required when in fact it is not. This
21040 will occur when shrink-wrapping if r3 is used as a scratch
21041 register and there are no other callee-saved writes.
21043 This situation can be avoided when other callee-saved registers
21044 are available and r3 is not mandatory if we choose a callee-saved
21045 register for padding. */
21046 bool prefer_callee_reg_p = false;
21048 /* If it is safe to use r3, then do so. This sometimes
21049 generates better code on Thumb-2 by avoiding the need to
21050 use 32-bit push/pop instructions. */
21051 if (! any_sibcall_could_use_r3 ()
21052 && arm_size_return_regs () <= 12
21053 && (offsets->saved_regs_mask & (1 << 3)) == 0
21054 && (TARGET_THUMB2
21055 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21057 reg = 3;
21058 if (!TARGET_THUMB2)
21059 prefer_callee_reg_p = true;
21061 if (reg == -1
21062 || prefer_callee_reg_p)
21064 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21066 /* Avoid fixed registers; they may be changed at
21067 arbitrary times so it's unsafe to restore them
21068 during the epilogue. */
21069 if (!fixed_regs[i]
21070 && (offsets->saved_regs_mask & (1 << i)) == 0)
21072 reg = i;
21073 break;
21078 if (reg != -1)
21080 offsets->saved_regs += 4;
21081 offsets->saved_regs_mask |= (1 << reg);
21086 offsets->locals_base = offsets->soft_frame + frame_size;
21087 offsets->outgoing_args = (offsets->locals_base
21088 + crtl->outgoing_args_size);
21090 if (ARM_DOUBLEWORD_ALIGN)
21092 /* Ensure SP remains doubleword aligned. */
21093 if (offsets->outgoing_args & 7)
21094 offsets->outgoing_args += 4;
21095 gcc_assert (!(offsets->outgoing_args & 7));
21098 return offsets;
21102 /* Calculate the relative offsets for the different stack pointers. Positive
21103 offsets are in the direction of stack growth. */
21105 HOST_WIDE_INT
21106 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21108 arm_stack_offsets *offsets;
21110 offsets = arm_get_frame_offsets ();
21112 /* OK, now we have enough information to compute the distances.
21113 There must be an entry in these switch tables for each pair
21114 of registers in ELIMINABLE_REGS, even if some of the entries
21115 seem to be redundant or useless. */
21116 switch (from)
21118 case ARG_POINTER_REGNUM:
21119 switch (to)
21121 case THUMB_HARD_FRAME_POINTER_REGNUM:
21122 return 0;
21124 case FRAME_POINTER_REGNUM:
21125 /* This is the reverse of the soft frame pointer
21126 to hard frame pointer elimination below. */
21127 return offsets->soft_frame - offsets->saved_args;
21129 case ARM_HARD_FRAME_POINTER_REGNUM:
21130 /* This is only non-zero in the case where the static chain register
21131 is stored above the frame. */
21132 return offsets->frame - offsets->saved_args - 4;
21134 case STACK_POINTER_REGNUM:
21135 /* If nothing has been pushed on the stack at all
21136 then this will return -4. This *is* correct! */
21137 return offsets->outgoing_args - (offsets->saved_args + 4);
21139 default:
21140 gcc_unreachable ();
21142 gcc_unreachable ();
21144 case FRAME_POINTER_REGNUM:
21145 switch (to)
21147 case THUMB_HARD_FRAME_POINTER_REGNUM:
21148 return 0;
21150 case ARM_HARD_FRAME_POINTER_REGNUM:
21151 /* The hard frame pointer points to the top entry in the
21152 stack frame. The soft frame pointer to the bottom entry
21153 in the stack frame. If there is no stack frame at all,
21154 then they are identical. */
21156 return offsets->frame - offsets->soft_frame;
21158 case STACK_POINTER_REGNUM:
21159 return offsets->outgoing_args - offsets->soft_frame;
21161 default:
21162 gcc_unreachable ();
21164 gcc_unreachable ();
21166 default:
21167 /* You cannot eliminate from the stack pointer.
21168 In theory you could eliminate from the hard frame
21169 pointer to the stack pointer, but this will never
21170 happen, since if a stack frame is not needed the
21171 hard frame pointer will never be used. */
21172 gcc_unreachable ();
21176 /* Given FROM and TO register numbers, say whether this elimination is
21177 allowed. Frame pointer elimination is automatically handled.
21179 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21180 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21181 pointer, we must eliminate FRAME_POINTER_REGNUM into
21182 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21183 ARG_POINTER_REGNUM. */
21185 bool
21186 arm_can_eliminate (const int from, const int to)
21188 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21189 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21190 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21191 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21192 true);
21195 /* Emit RTL to save coprocessor registers on function entry. Returns the
21196 number of bytes pushed. */
21198 static int
21199 arm_save_coproc_regs(void)
21201 int saved_size = 0;
21202 unsigned reg;
21203 unsigned start_reg;
21204 rtx insn;
21206 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21207 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21209 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21210 insn = gen_rtx_MEM (V2SImode, insn);
21211 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21212 RTX_FRAME_RELATED_P (insn) = 1;
21213 saved_size += 8;
21216 if (TARGET_HARD_FLOAT && TARGET_VFP)
21218 start_reg = FIRST_VFP_REGNUM;
21220 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21222 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21223 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21225 if (start_reg != reg)
21226 saved_size += vfp_emit_fstmd (start_reg,
21227 (reg - start_reg) / 2);
21228 start_reg = reg + 2;
21231 if (start_reg != reg)
21232 saved_size += vfp_emit_fstmd (start_reg,
21233 (reg - start_reg) / 2);
21235 return saved_size;
21239 /* Set the Thumb frame pointer from the stack pointer. */
21241 static void
21242 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21244 HOST_WIDE_INT amount;
21245 rtx insn, dwarf;
21247 amount = offsets->outgoing_args - offsets->locals_base;
21248 if (amount < 1024)
21249 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21250 stack_pointer_rtx, GEN_INT (amount)));
21251 else
21253 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21254 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21255 expects the first two operands to be the same. */
21256 if (TARGET_THUMB2)
21258 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21259 stack_pointer_rtx,
21260 hard_frame_pointer_rtx));
21262 else
21264 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21265 hard_frame_pointer_rtx,
21266 stack_pointer_rtx));
21268 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21269 plus_constant (Pmode, stack_pointer_rtx, amount));
21270 RTX_FRAME_RELATED_P (dwarf) = 1;
21271 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21274 RTX_FRAME_RELATED_P (insn) = 1;
21277 struct scratch_reg {
21278 rtx reg;
21279 bool saved;
21282 /* Return a short-lived scratch register for use as a 2nd scratch register on
21283 function entry after the registers are saved in the prologue. This register
21284 must be released by means of release_scratch_register_on_entry. IP is not
21285 considered since it is always used as the 1st scratch register if available.
21287 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21288 mask of live registers. */
21290 static void
21291 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21292 unsigned long live_regs)
21294 int regno = -1;
21296 sr->saved = false;
21298 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21299 regno = LR_REGNUM;
21300 else
21302 unsigned int i;
21304 for (i = 4; i < 11; i++)
21305 if (regno1 != i && (live_regs & (1 << i)) != 0)
21307 regno = i;
21308 break;
21311 if (regno < 0)
21313 /* If IP is used as the 1st scratch register for a nested function,
21314 then either r3 wasn't available or is used to preserve IP. */
21315 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21316 regno1 = 3;
21317 regno = (regno1 == 3 ? 2 : 3);
21318 sr->saved
21319 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21320 regno);
21324 sr->reg = gen_rtx_REG (SImode, regno);
21325 if (sr->saved)
21327 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21328 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21329 rtx x = gen_rtx_SET (stack_pointer_rtx,
21330 plus_constant (Pmode, stack_pointer_rtx, -4));
21331 RTX_FRAME_RELATED_P (insn) = 1;
21332 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21336 /* Release a scratch register obtained from the preceding function. */
21338 static void
21339 release_scratch_register_on_entry (struct scratch_reg *sr)
21341 if (sr->saved)
21343 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21344 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21345 rtx x = gen_rtx_SET (stack_pointer_rtx,
21346 plus_constant (Pmode, stack_pointer_rtx, 4));
21347 RTX_FRAME_RELATED_P (insn) = 1;
21348 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21352 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21354 #if PROBE_INTERVAL > 4096
21355 #error Cannot use indexed addressing mode for stack probing
21356 #endif
21358 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21359 inclusive. These are offsets from the current stack pointer. REGNO1
21360 is the index number of the 1st scratch register and LIVE_REGS is the
21361 mask of live registers. */
21363 static void
21364 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21365 unsigned int regno1, unsigned long live_regs)
21367 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21369 /* See if we have a constant small number of probes to generate. If so,
21370 that's the easy case. */
21371 if (size <= PROBE_INTERVAL)
21373 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21374 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21375 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21378 /* The run-time loop is made up of 10 insns in the generic case while the
21379 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21380 else if (size <= 5 * PROBE_INTERVAL)
21382 HOST_WIDE_INT i, rem;
21384 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21385 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21386 emit_stack_probe (reg1);
21388 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21389 it exceeds SIZE. If only two probes are needed, this will not
21390 generate any code. Then probe at FIRST + SIZE. */
21391 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21393 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21394 emit_stack_probe (reg1);
21397 rem = size - (i - PROBE_INTERVAL);
21398 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21400 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21401 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21403 else
21404 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21407 /* Otherwise, do the same as above, but in a loop. Note that we must be
21408 extra careful with variables wrapping around because we might be at
21409 the very top (or the very bottom) of the address space and we have
21410 to be able to handle this case properly; in particular, we use an
21411 equality test for the loop condition. */
21412 else
21414 HOST_WIDE_INT rounded_size;
21415 struct scratch_reg sr;
21417 get_scratch_register_on_entry (&sr, regno1, live_regs);
21419 emit_move_insn (reg1, GEN_INT (first));
21422 /* Step 1: round SIZE to the previous multiple of the interval. */
21424 rounded_size = size & -PROBE_INTERVAL;
21425 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21428 /* Step 2: compute initial and final value of the loop counter. */
21430 /* TEST_ADDR = SP + FIRST. */
21431 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21433 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21434 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21437 /* Step 3: the loop
21441 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21442 probe at TEST_ADDR
21444 while (TEST_ADDR != LAST_ADDR)
21446 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21447 until it is equal to ROUNDED_SIZE. */
21449 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21452 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21453 that SIZE is equal to ROUNDED_SIZE. */
21455 if (size != rounded_size)
21457 HOST_WIDE_INT rem = size - rounded_size;
21459 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21461 emit_set_insn (sr.reg,
21462 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21463 emit_stack_probe (plus_constant (Pmode, sr.reg,
21464 PROBE_INTERVAL - rem));
21466 else
21467 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21470 release_scratch_register_on_entry (&sr);
21473 /* Make sure nothing is scheduled before we are done. */
21474 emit_insn (gen_blockage ());
21477 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21478 absolute addresses. */
21480 const char *
21481 output_probe_stack_range (rtx reg1, rtx reg2)
21483 static int labelno = 0;
21484 char loop_lab[32];
21485 rtx xops[2];
21487 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21489 /* Loop. */
21490 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21492 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21493 xops[0] = reg1;
21494 xops[1] = GEN_INT (PROBE_INTERVAL);
21495 output_asm_insn ("sub\t%0, %0, %1", xops);
21497 /* Probe at TEST_ADDR. */
21498 output_asm_insn ("str\tr0, [%0, #0]", xops);
21500 /* Test if TEST_ADDR == LAST_ADDR. */
21501 xops[1] = reg2;
21502 output_asm_insn ("cmp\t%0, %1", xops);
21504 /* Branch. */
21505 fputs ("\tbne\t", asm_out_file);
21506 assemble_name_raw (asm_out_file, loop_lab);
21507 fputc ('\n', asm_out_file);
21509 return "";
21512 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21513 function. */
21514 void
21515 arm_expand_prologue (void)
21517 rtx amount;
21518 rtx insn;
21519 rtx ip_rtx;
21520 unsigned long live_regs_mask;
21521 unsigned long func_type;
21522 int fp_offset = 0;
21523 int saved_pretend_args = 0;
21524 int saved_regs = 0;
21525 unsigned HOST_WIDE_INT args_to_push;
21526 HOST_WIDE_INT size;
21527 arm_stack_offsets *offsets;
21528 bool clobber_ip;
21530 func_type = arm_current_func_type ();
21532 /* Naked functions don't have prologues. */
21533 if (IS_NAKED (func_type))
21535 if (flag_stack_usage_info)
21536 current_function_static_stack_size = 0;
21537 return;
21540 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21541 args_to_push = crtl->args.pretend_args_size;
21543 /* Compute which register we will have to save onto the stack. */
21544 offsets = arm_get_frame_offsets ();
21545 live_regs_mask = offsets->saved_regs_mask;
21547 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21549 if (IS_STACKALIGN (func_type))
21551 rtx r0, r1;
21553 /* Handle a word-aligned stack pointer. We generate the following:
21555 mov r0, sp
21556 bic r1, r0, #7
21557 mov sp, r1
21558 <save and restore r0 in normal prologue/epilogue>
21559 mov sp, r0
21560 bx lr
21562 The unwinder doesn't need to know about the stack realignment.
21563 Just tell it we saved SP in r0. */
21564 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21566 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21567 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21569 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21570 RTX_FRAME_RELATED_P (insn) = 1;
21571 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21573 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21575 /* ??? The CFA changes here, which may cause GDB to conclude that it
21576 has entered a different function. That said, the unwind info is
21577 correct, individually, before and after this instruction because
21578 we've described the save of SP, which will override the default
21579 handling of SP as restoring from the CFA. */
21580 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21583 /* The static chain register is the same as the IP register. If it is
21584 clobbered when creating the frame, we need to save and restore it. */
21585 clobber_ip = IS_NESTED (func_type)
21586 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21587 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21588 && !df_regs_ever_live_p (LR_REGNUM)
21589 && arm_r3_live_at_start_p ()));
21591 /* Find somewhere to store IP whilst the frame is being created.
21592 We try the following places in order:
21594 1. The last argument register r3 if it is available.
21595 2. A slot on the stack above the frame if there are no
21596 arguments to push onto the stack.
21597 3. Register r3 again, after pushing the argument registers
21598 onto the stack, if this is a varargs function.
21599 4. The last slot on the stack created for the arguments to
21600 push, if this isn't a varargs function.
21602 Note - we only need to tell the dwarf2 backend about the SP
21603 adjustment in the second variant; the static chain register
21604 doesn't need to be unwound, as it doesn't contain a value
21605 inherited from the caller. */
21606 if (clobber_ip)
21608 if (!arm_r3_live_at_start_p ())
21609 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21610 else if (args_to_push == 0)
21612 rtx addr, dwarf;
21614 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21615 saved_regs += 4;
21617 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21618 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21619 fp_offset = 4;
21621 /* Just tell the dwarf backend that we adjusted SP. */
21622 dwarf = gen_rtx_SET (stack_pointer_rtx,
21623 plus_constant (Pmode, stack_pointer_rtx,
21624 -fp_offset));
21625 RTX_FRAME_RELATED_P (insn) = 1;
21626 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21628 else
21630 /* Store the args on the stack. */
21631 if (cfun->machine->uses_anonymous_args)
21633 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21634 (0xf0 >> (args_to_push / 4)) & 0xf);
21635 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21636 saved_pretend_args = 1;
21638 else
21640 rtx addr, dwarf;
21642 if (args_to_push == 4)
21643 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21644 else
21645 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21646 plus_constant (Pmode,
21647 stack_pointer_rtx,
21648 -args_to_push));
21650 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21652 /* Just tell the dwarf backend that we adjusted SP. */
21653 dwarf = gen_rtx_SET (stack_pointer_rtx,
21654 plus_constant (Pmode, stack_pointer_rtx,
21655 -args_to_push));
21656 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21659 RTX_FRAME_RELATED_P (insn) = 1;
21660 fp_offset = args_to_push;
21661 args_to_push = 0;
21665 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21667 if (IS_INTERRUPT (func_type))
21669 /* Interrupt functions must not corrupt any registers.
21670 Creating a frame pointer however, corrupts the IP
21671 register, so we must push it first. */
21672 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21674 /* Do not set RTX_FRAME_RELATED_P on this insn.
21675 The dwarf stack unwinding code only wants to see one
21676 stack decrement per function, and this is not it. If
21677 this instruction is labeled as being part of the frame
21678 creation sequence then dwarf2out_frame_debug_expr will
21679 die when it encounters the assignment of IP to FP
21680 later on, since the use of SP here establishes SP as
21681 the CFA register and not IP.
21683 Anyway this instruction is not really part of the stack
21684 frame creation although it is part of the prologue. */
21687 insn = emit_set_insn (ip_rtx,
21688 plus_constant (Pmode, stack_pointer_rtx,
21689 fp_offset));
21690 RTX_FRAME_RELATED_P (insn) = 1;
21693 if (args_to_push)
21695 /* Push the argument registers, or reserve space for them. */
21696 if (cfun->machine->uses_anonymous_args)
21697 insn = emit_multi_reg_push
21698 ((0xf0 >> (args_to_push / 4)) & 0xf,
21699 (0xf0 >> (args_to_push / 4)) & 0xf);
21700 else
21701 insn = emit_insn
21702 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21703 GEN_INT (- args_to_push)));
21704 RTX_FRAME_RELATED_P (insn) = 1;
21707 /* If this is an interrupt service routine, and the link register
21708 is going to be pushed, and we're not generating extra
21709 push of IP (needed when frame is needed and frame layout if apcs),
21710 subtracting four from LR now will mean that the function return
21711 can be done with a single instruction. */
21712 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21713 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21714 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21715 && TARGET_ARM)
21717 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21719 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21722 if (live_regs_mask)
21724 unsigned long dwarf_regs_mask = live_regs_mask;
21726 saved_regs += bit_count (live_regs_mask) * 4;
21727 if (optimize_size && !frame_pointer_needed
21728 && saved_regs == offsets->saved_regs - offsets->saved_args)
21730 /* If no coprocessor registers are being pushed and we don't have
21731 to worry about a frame pointer then push extra registers to
21732 create the stack frame. This is done is a way that does not
21733 alter the frame layout, so is independent of the epilogue. */
21734 int n;
21735 int frame;
21736 n = 0;
21737 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21738 n++;
21739 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21740 if (frame && n * 4 >= frame)
21742 n = frame / 4;
21743 live_regs_mask |= (1 << n) - 1;
21744 saved_regs += frame;
21748 if (TARGET_LDRD
21749 && current_tune->prefer_ldrd_strd
21750 && !optimize_function_for_size_p (cfun))
21752 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21753 if (TARGET_THUMB2)
21754 thumb2_emit_strd_push (live_regs_mask);
21755 else if (TARGET_ARM
21756 && !TARGET_APCS_FRAME
21757 && !IS_INTERRUPT (func_type))
21758 arm_emit_strd_push (live_regs_mask);
21759 else
21761 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21762 RTX_FRAME_RELATED_P (insn) = 1;
21765 else
21767 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21768 RTX_FRAME_RELATED_P (insn) = 1;
21772 if (! IS_VOLATILE (func_type))
21773 saved_regs += arm_save_coproc_regs ();
21775 if (frame_pointer_needed && TARGET_ARM)
21777 /* Create the new frame pointer. */
21778 if (TARGET_APCS_FRAME)
21780 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21781 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21782 RTX_FRAME_RELATED_P (insn) = 1;
21784 else
21786 insn = GEN_INT (saved_regs - (4 + fp_offset));
21787 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21788 stack_pointer_rtx, insn));
21789 RTX_FRAME_RELATED_P (insn) = 1;
21793 size = offsets->outgoing_args - offsets->saved_args;
21794 if (flag_stack_usage_info)
21795 current_function_static_stack_size = size;
21797 /* If this isn't an interrupt service routine and we have a frame, then do
21798 stack checking. We use IP as the first scratch register, except for the
21799 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21800 if (!IS_INTERRUPT (func_type)
21801 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21803 unsigned int regno;
21805 if (!IS_NESTED (func_type) || clobber_ip)
21806 regno = IP_REGNUM;
21807 else if (df_regs_ever_live_p (LR_REGNUM))
21808 regno = LR_REGNUM;
21809 else
21810 regno = 3;
21812 if (crtl->is_leaf && !cfun->calls_alloca)
21814 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21815 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21816 size - STACK_CHECK_PROTECT,
21817 regno, live_regs_mask);
21819 else if (size > 0)
21820 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21821 regno, live_regs_mask);
21824 /* Recover the static chain register. */
21825 if (clobber_ip)
21827 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21828 insn = gen_rtx_REG (SImode, 3);
21829 else
21831 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21832 insn = gen_frame_mem (SImode, insn);
21834 emit_set_insn (ip_rtx, insn);
21835 emit_insn (gen_force_register_use (ip_rtx));
21838 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21840 /* This add can produce multiple insns for a large constant, so we
21841 need to get tricky. */
21842 rtx_insn *last = get_last_insn ();
21844 amount = GEN_INT (offsets->saved_args + saved_regs
21845 - offsets->outgoing_args);
21847 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21848 amount));
21851 last = last ? NEXT_INSN (last) : get_insns ();
21852 RTX_FRAME_RELATED_P (last) = 1;
21854 while (last != insn);
21856 /* If the frame pointer is needed, emit a special barrier that
21857 will prevent the scheduler from moving stores to the frame
21858 before the stack adjustment. */
21859 if (frame_pointer_needed)
21860 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21861 hard_frame_pointer_rtx));
21865 if (frame_pointer_needed && TARGET_THUMB2)
21866 thumb_set_frame_pointer (offsets);
21868 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21870 unsigned long mask;
21872 mask = live_regs_mask;
21873 mask &= THUMB2_WORK_REGS;
21874 if (!IS_NESTED (func_type))
21875 mask |= (1 << IP_REGNUM);
21876 arm_load_pic_register (mask);
21879 /* If we are profiling, make sure no instructions are scheduled before
21880 the call to mcount. Similarly if the user has requested no
21881 scheduling in the prolog. Similarly if we want non-call exceptions
21882 using the EABI unwinder, to prevent faulting instructions from being
21883 swapped with a stack adjustment. */
21884 if (crtl->profile || !TARGET_SCHED_PROLOG
21885 || (arm_except_unwind_info (&global_options) == UI_TARGET
21886 && cfun->can_throw_non_call_exceptions))
21887 emit_insn (gen_blockage ());
21889 /* If the link register is being kept alive, with the return address in it,
21890 then make sure that it does not get reused by the ce2 pass. */
21891 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21892 cfun->machine->lr_save_eliminated = 1;
21895 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21896 static void
21897 arm_print_condition (FILE *stream)
21899 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21901 /* Branch conversion is not implemented for Thumb-2. */
21902 if (TARGET_THUMB)
21904 output_operand_lossage ("predicated Thumb instruction");
21905 return;
21907 if (current_insn_predicate != NULL)
21909 output_operand_lossage
21910 ("predicated instruction in conditional sequence");
21911 return;
21914 fputs (arm_condition_codes[arm_current_cc], stream);
21916 else if (current_insn_predicate)
21918 enum arm_cond_code code;
21920 if (TARGET_THUMB1)
21922 output_operand_lossage ("predicated Thumb instruction");
21923 return;
21926 code = get_arm_condition_code (current_insn_predicate);
21927 fputs (arm_condition_codes[code], stream);
21932 /* Globally reserved letters: acln
21933 Puncutation letters currently used: @_|?().!#
21934 Lower case letters currently used: bcdefhimpqtvwxyz
21935 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21936 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21938 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21940 If CODE is 'd', then the X is a condition operand and the instruction
21941 should only be executed if the condition is true.
21942 if CODE is 'D', then the X is a condition operand and the instruction
21943 should only be executed if the condition is false: however, if the mode
21944 of the comparison is CCFPEmode, then always execute the instruction -- we
21945 do this because in these circumstances !GE does not necessarily imply LT;
21946 in these cases the instruction pattern will take care to make sure that
21947 an instruction containing %d will follow, thereby undoing the effects of
21948 doing this instruction unconditionally.
21949 If CODE is 'N' then X is a floating point operand that must be negated
21950 before output.
21951 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21952 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21953 static void
21954 arm_print_operand (FILE *stream, rtx x, int code)
21956 switch (code)
21958 case '@':
21959 fputs (ASM_COMMENT_START, stream);
21960 return;
21962 case '_':
21963 fputs (user_label_prefix, stream);
21964 return;
21966 case '|':
21967 fputs (REGISTER_PREFIX, stream);
21968 return;
21970 case '?':
21971 arm_print_condition (stream);
21972 return;
21974 case '.':
21975 /* The current condition code for a condition code setting instruction.
21976 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21977 fputc('s', stream);
21978 arm_print_condition (stream);
21979 return;
21981 case '!':
21982 /* If the instruction is conditionally executed then print
21983 the current condition code, otherwise print 's'. */
21984 gcc_assert (TARGET_THUMB2);
21985 if (current_insn_predicate)
21986 arm_print_condition (stream);
21987 else
21988 fputc('s', stream);
21989 break;
21991 /* %# is a "break" sequence. It doesn't output anything, but is used to
21992 separate e.g. operand numbers from following text, if that text consists
21993 of further digits which we don't want to be part of the operand
21994 number. */
21995 case '#':
21996 return;
21998 case 'N':
22000 REAL_VALUE_TYPE r;
22001 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22002 fprintf (stream, "%s", fp_const_from_val (&r));
22004 return;
22006 /* An integer or symbol address without a preceding # sign. */
22007 case 'c':
22008 switch (GET_CODE (x))
22010 case CONST_INT:
22011 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22012 break;
22014 case SYMBOL_REF:
22015 output_addr_const (stream, x);
22016 break;
22018 case CONST:
22019 if (GET_CODE (XEXP (x, 0)) == PLUS
22020 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22022 output_addr_const (stream, x);
22023 break;
22025 /* Fall through. */
22027 default:
22028 output_operand_lossage ("Unsupported operand for code '%c'", code);
22030 return;
22032 /* An integer that we want to print in HEX. */
22033 case 'x':
22034 switch (GET_CODE (x))
22036 case CONST_INT:
22037 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22038 break;
22040 default:
22041 output_operand_lossage ("Unsupported operand for code '%c'", code);
22043 return;
22045 case 'B':
22046 if (CONST_INT_P (x))
22048 HOST_WIDE_INT val;
22049 val = ARM_SIGN_EXTEND (~INTVAL (x));
22050 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22052 else
22054 putc ('~', stream);
22055 output_addr_const (stream, x);
22057 return;
22059 case 'b':
22060 /* Print the log2 of a CONST_INT. */
22062 HOST_WIDE_INT val;
22064 if (!CONST_INT_P (x)
22065 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22066 output_operand_lossage ("Unsupported operand for code '%c'", code);
22067 else
22068 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22070 return;
22072 case 'L':
22073 /* The low 16 bits of an immediate constant. */
22074 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22075 return;
22077 case 'i':
22078 fprintf (stream, "%s", arithmetic_instr (x, 1));
22079 return;
22081 case 'I':
22082 fprintf (stream, "%s", arithmetic_instr (x, 0));
22083 return;
22085 case 'S':
22087 HOST_WIDE_INT val;
22088 const char *shift;
22090 shift = shift_op (x, &val);
22092 if (shift)
22094 fprintf (stream, ", %s ", shift);
22095 if (val == -1)
22096 arm_print_operand (stream, XEXP (x, 1), 0);
22097 else
22098 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22101 return;
22103 /* An explanation of the 'Q', 'R' and 'H' register operands:
22105 In a pair of registers containing a DI or DF value the 'Q'
22106 operand returns the register number of the register containing
22107 the least significant part of the value. The 'R' operand returns
22108 the register number of the register containing the most
22109 significant part of the value.
22111 The 'H' operand returns the higher of the two register numbers.
22112 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22113 same as the 'Q' operand, since the most significant part of the
22114 value is held in the lower number register. The reverse is true
22115 on systems where WORDS_BIG_ENDIAN is false.
22117 The purpose of these operands is to distinguish between cases
22118 where the endian-ness of the values is important (for example
22119 when they are added together), and cases where the endian-ness
22120 is irrelevant, but the order of register operations is important.
22121 For example when loading a value from memory into a register
22122 pair, the endian-ness does not matter. Provided that the value
22123 from the lower memory address is put into the lower numbered
22124 register, and the value from the higher address is put into the
22125 higher numbered register, the load will work regardless of whether
22126 the value being loaded is big-wordian or little-wordian. The
22127 order of the two register loads can matter however, if the address
22128 of the memory location is actually held in one of the registers
22129 being overwritten by the load.
22131 The 'Q' and 'R' constraints are also available for 64-bit
22132 constants. */
22133 case 'Q':
22134 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22136 rtx part = gen_lowpart (SImode, x);
22137 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22138 return;
22141 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22143 output_operand_lossage ("invalid operand for code '%c'", code);
22144 return;
22147 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22148 return;
22150 case 'R':
22151 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22153 machine_mode mode = GET_MODE (x);
22154 rtx part;
22156 if (mode == VOIDmode)
22157 mode = DImode;
22158 part = gen_highpart_mode (SImode, mode, x);
22159 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22160 return;
22163 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22165 output_operand_lossage ("invalid operand for code '%c'", code);
22166 return;
22169 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22170 return;
22172 case 'H':
22173 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22175 output_operand_lossage ("invalid operand for code '%c'", code);
22176 return;
22179 asm_fprintf (stream, "%r", REGNO (x) + 1);
22180 return;
22182 case 'J':
22183 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22185 output_operand_lossage ("invalid operand for code '%c'", code);
22186 return;
22189 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22190 return;
22192 case 'K':
22193 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22195 output_operand_lossage ("invalid operand for code '%c'", code);
22196 return;
22199 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22200 return;
22202 case 'm':
22203 asm_fprintf (stream, "%r",
22204 REG_P (XEXP (x, 0))
22205 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22206 return;
22208 case 'M':
22209 asm_fprintf (stream, "{%r-%r}",
22210 REGNO (x),
22211 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22212 return;
22214 /* Like 'M', but writing doubleword vector registers, for use by Neon
22215 insns. */
22216 case 'h':
22218 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22219 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22220 if (numregs == 1)
22221 asm_fprintf (stream, "{d%d}", regno);
22222 else
22223 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22225 return;
22227 case 'd':
22228 /* CONST_TRUE_RTX means always -- that's the default. */
22229 if (x == const_true_rtx)
22230 return;
22232 if (!COMPARISON_P (x))
22234 output_operand_lossage ("invalid operand for code '%c'", code);
22235 return;
22238 fputs (arm_condition_codes[get_arm_condition_code (x)],
22239 stream);
22240 return;
22242 case 'D':
22243 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22244 want to do that. */
22245 if (x == const_true_rtx)
22247 output_operand_lossage ("instruction never executed");
22248 return;
22250 if (!COMPARISON_P (x))
22252 output_operand_lossage ("invalid operand for code '%c'", code);
22253 return;
22256 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22257 (get_arm_condition_code (x))],
22258 stream);
22259 return;
22261 case 's':
22262 case 'V':
22263 case 'W':
22264 case 'X':
22265 case 'Y':
22266 case 'Z':
22267 /* Former Maverick support, removed after GCC-4.7. */
22268 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22269 return;
22271 case 'U':
22272 if (!REG_P (x)
22273 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22274 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22275 /* Bad value for wCG register number. */
22277 output_operand_lossage ("invalid operand for code '%c'", code);
22278 return;
22281 else
22282 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22283 return;
22285 /* Print an iWMMXt control register name. */
22286 case 'w':
22287 if (!CONST_INT_P (x)
22288 || INTVAL (x) < 0
22289 || INTVAL (x) >= 16)
22290 /* Bad value for wC register number. */
22292 output_operand_lossage ("invalid operand for code '%c'", code);
22293 return;
22296 else
22298 static const char * wc_reg_names [16] =
22300 "wCID", "wCon", "wCSSF", "wCASF",
22301 "wC4", "wC5", "wC6", "wC7",
22302 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22303 "wC12", "wC13", "wC14", "wC15"
22306 fputs (wc_reg_names [INTVAL (x)], stream);
22308 return;
22310 /* Print the high single-precision register of a VFP double-precision
22311 register. */
22312 case 'p':
22314 machine_mode mode = GET_MODE (x);
22315 int regno;
22317 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22319 output_operand_lossage ("invalid operand for code '%c'", code);
22320 return;
22323 regno = REGNO (x);
22324 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22326 output_operand_lossage ("invalid operand for code '%c'", code);
22327 return;
22330 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22332 return;
22334 /* Print a VFP/Neon double precision or quad precision register name. */
22335 case 'P':
22336 case 'q':
22338 machine_mode mode = GET_MODE (x);
22339 int is_quad = (code == 'q');
22340 int regno;
22342 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22344 output_operand_lossage ("invalid operand for code '%c'", code);
22345 return;
22348 if (!REG_P (x)
22349 || !IS_VFP_REGNUM (REGNO (x)))
22351 output_operand_lossage ("invalid operand for code '%c'", code);
22352 return;
22355 regno = REGNO (x);
22356 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22357 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22359 output_operand_lossage ("invalid operand for code '%c'", code);
22360 return;
22363 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22364 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22366 return;
22368 /* These two codes print the low/high doubleword register of a Neon quad
22369 register, respectively. For pair-structure types, can also print
22370 low/high quadword registers. */
22371 case 'e':
22372 case 'f':
22374 machine_mode mode = GET_MODE (x);
22375 int regno;
22377 if ((GET_MODE_SIZE (mode) != 16
22378 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22380 output_operand_lossage ("invalid operand for code '%c'", code);
22381 return;
22384 regno = REGNO (x);
22385 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22387 output_operand_lossage ("invalid operand for code '%c'", code);
22388 return;
22391 if (GET_MODE_SIZE (mode) == 16)
22392 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22393 + (code == 'f' ? 1 : 0));
22394 else
22395 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22396 + (code == 'f' ? 1 : 0));
22398 return;
22400 /* Print a VFPv3 floating-point constant, represented as an integer
22401 index. */
22402 case 'G':
22404 int index = vfp3_const_double_index (x);
22405 gcc_assert (index != -1);
22406 fprintf (stream, "%d", index);
22408 return;
22410 /* Print bits representing opcode features for Neon.
22412 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22413 and polynomials as unsigned.
22415 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22417 Bit 2 is 1 for rounding functions, 0 otherwise. */
22419 /* Identify the type as 's', 'u', 'p' or 'f'. */
22420 case 'T':
22422 HOST_WIDE_INT bits = INTVAL (x);
22423 fputc ("uspf"[bits & 3], stream);
22425 return;
22427 /* Likewise, but signed and unsigned integers are both 'i'. */
22428 case 'F':
22430 HOST_WIDE_INT bits = INTVAL (x);
22431 fputc ("iipf"[bits & 3], stream);
22433 return;
22435 /* As for 'T', but emit 'u' instead of 'p'. */
22436 case 't':
22438 HOST_WIDE_INT bits = INTVAL (x);
22439 fputc ("usuf"[bits & 3], stream);
22441 return;
22443 /* Bit 2: rounding (vs none). */
22444 case 'O':
22446 HOST_WIDE_INT bits = INTVAL (x);
22447 fputs ((bits & 4) != 0 ? "r" : "", stream);
22449 return;
22451 /* Memory operand for vld1/vst1 instruction. */
22452 case 'A':
22454 rtx addr;
22455 bool postinc = FALSE;
22456 rtx postinc_reg = NULL;
22457 unsigned align, memsize, align_bits;
22459 gcc_assert (MEM_P (x));
22460 addr = XEXP (x, 0);
22461 if (GET_CODE (addr) == POST_INC)
22463 postinc = 1;
22464 addr = XEXP (addr, 0);
22466 if (GET_CODE (addr) == POST_MODIFY)
22468 postinc_reg = XEXP( XEXP (addr, 1), 1);
22469 addr = XEXP (addr, 0);
22471 asm_fprintf (stream, "[%r", REGNO (addr));
22473 /* We know the alignment of this access, so we can emit a hint in the
22474 instruction (for some alignments) as an aid to the memory subsystem
22475 of the target. */
22476 align = MEM_ALIGN (x) >> 3;
22477 memsize = MEM_SIZE (x);
22479 /* Only certain alignment specifiers are supported by the hardware. */
22480 if (memsize == 32 && (align % 32) == 0)
22481 align_bits = 256;
22482 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22483 align_bits = 128;
22484 else if (memsize >= 8 && (align % 8) == 0)
22485 align_bits = 64;
22486 else
22487 align_bits = 0;
22489 if (align_bits != 0)
22490 asm_fprintf (stream, ":%d", align_bits);
22492 asm_fprintf (stream, "]");
22494 if (postinc)
22495 fputs("!", stream);
22496 if (postinc_reg)
22497 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22499 return;
22501 case 'C':
22503 rtx addr;
22505 gcc_assert (MEM_P (x));
22506 addr = XEXP (x, 0);
22507 gcc_assert (REG_P (addr));
22508 asm_fprintf (stream, "[%r]", REGNO (addr));
22510 return;
22512 /* Translate an S register number into a D register number and element index. */
22513 case 'y':
22515 machine_mode mode = GET_MODE (x);
22516 int regno;
22518 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22520 output_operand_lossage ("invalid operand for code '%c'", code);
22521 return;
22524 regno = REGNO (x);
22525 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22527 output_operand_lossage ("invalid operand for code '%c'", code);
22528 return;
22531 regno = regno - FIRST_VFP_REGNUM;
22532 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22534 return;
22536 case 'v':
22537 gcc_assert (CONST_DOUBLE_P (x));
22538 int result;
22539 result = vfp3_const_double_for_fract_bits (x);
22540 if (result == 0)
22541 result = vfp3_const_double_for_bits (x);
22542 fprintf (stream, "#%d", result);
22543 return;
22545 /* Register specifier for vld1.16/vst1.16. Translate the S register
22546 number into a D register number and element index. */
22547 case 'z':
22549 machine_mode mode = GET_MODE (x);
22550 int regno;
22552 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22554 output_operand_lossage ("invalid operand for code '%c'", code);
22555 return;
22558 regno = REGNO (x);
22559 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22561 output_operand_lossage ("invalid operand for code '%c'", code);
22562 return;
22565 regno = regno - FIRST_VFP_REGNUM;
22566 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22568 return;
22570 default:
22571 if (x == 0)
22573 output_operand_lossage ("missing operand");
22574 return;
22577 switch (GET_CODE (x))
22579 case REG:
22580 asm_fprintf (stream, "%r", REGNO (x));
22581 break;
22583 case MEM:
22584 output_address (GET_MODE (x), XEXP (x, 0));
22585 break;
22587 case CONST_DOUBLE:
22589 char fpstr[20];
22590 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22591 sizeof (fpstr), 0, 1);
22592 fprintf (stream, "#%s", fpstr);
22594 break;
22596 default:
22597 gcc_assert (GET_CODE (x) != NEG);
22598 fputc ('#', stream);
22599 if (GET_CODE (x) == HIGH)
22601 fputs (":lower16:", stream);
22602 x = XEXP (x, 0);
22605 output_addr_const (stream, x);
22606 break;
22611 /* Target hook for printing a memory address. */
22612 static void
22613 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22615 if (TARGET_32BIT)
22617 int is_minus = GET_CODE (x) == MINUS;
22619 if (REG_P (x))
22620 asm_fprintf (stream, "[%r]", REGNO (x));
22621 else if (GET_CODE (x) == PLUS || is_minus)
22623 rtx base = XEXP (x, 0);
22624 rtx index = XEXP (x, 1);
22625 HOST_WIDE_INT offset = 0;
22626 if (!REG_P (base)
22627 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22629 /* Ensure that BASE is a register. */
22630 /* (one of them must be). */
22631 /* Also ensure the SP is not used as in index register. */
22632 std::swap (base, index);
22634 switch (GET_CODE (index))
22636 case CONST_INT:
22637 offset = INTVAL (index);
22638 if (is_minus)
22639 offset = -offset;
22640 asm_fprintf (stream, "[%r, #%wd]",
22641 REGNO (base), offset);
22642 break;
22644 case REG:
22645 asm_fprintf (stream, "[%r, %s%r]",
22646 REGNO (base), is_minus ? "-" : "",
22647 REGNO (index));
22648 break;
22650 case MULT:
22651 case ASHIFTRT:
22652 case LSHIFTRT:
22653 case ASHIFT:
22654 case ROTATERT:
22656 asm_fprintf (stream, "[%r, %s%r",
22657 REGNO (base), is_minus ? "-" : "",
22658 REGNO (XEXP (index, 0)));
22659 arm_print_operand (stream, index, 'S');
22660 fputs ("]", stream);
22661 break;
22664 default:
22665 gcc_unreachable ();
22668 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22669 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22671 gcc_assert (REG_P (XEXP (x, 0)));
22673 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22674 asm_fprintf (stream, "[%r, #%s%d]!",
22675 REGNO (XEXP (x, 0)),
22676 GET_CODE (x) == PRE_DEC ? "-" : "",
22677 GET_MODE_SIZE (mode));
22678 else
22679 asm_fprintf (stream, "[%r], #%s%d",
22680 REGNO (XEXP (x, 0)),
22681 GET_CODE (x) == POST_DEC ? "-" : "",
22682 GET_MODE_SIZE (mode));
22684 else if (GET_CODE (x) == PRE_MODIFY)
22686 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22687 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22688 asm_fprintf (stream, "#%wd]!",
22689 INTVAL (XEXP (XEXP (x, 1), 1)));
22690 else
22691 asm_fprintf (stream, "%r]!",
22692 REGNO (XEXP (XEXP (x, 1), 1)));
22694 else if (GET_CODE (x) == POST_MODIFY)
22696 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22697 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22698 asm_fprintf (stream, "#%wd",
22699 INTVAL (XEXP (XEXP (x, 1), 1)));
22700 else
22701 asm_fprintf (stream, "%r",
22702 REGNO (XEXP (XEXP (x, 1), 1)));
22704 else output_addr_const (stream, x);
22706 else
22708 if (REG_P (x))
22709 asm_fprintf (stream, "[%r]", REGNO (x));
22710 else if (GET_CODE (x) == POST_INC)
22711 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22712 else if (GET_CODE (x) == PLUS)
22714 gcc_assert (REG_P (XEXP (x, 0)));
22715 if (CONST_INT_P (XEXP (x, 1)))
22716 asm_fprintf (stream, "[%r, #%wd]",
22717 REGNO (XEXP (x, 0)),
22718 INTVAL (XEXP (x, 1)));
22719 else
22720 asm_fprintf (stream, "[%r, %r]",
22721 REGNO (XEXP (x, 0)),
22722 REGNO (XEXP (x, 1)));
22724 else
22725 output_addr_const (stream, x);
22729 /* Target hook for indicating whether a punctuation character for
22730 TARGET_PRINT_OPERAND is valid. */
22731 static bool
22732 arm_print_operand_punct_valid_p (unsigned char code)
22734 return (code == '@' || code == '|' || code == '.'
22735 || code == '(' || code == ')' || code == '#'
22736 || (TARGET_32BIT && (code == '?'))
22737 || (TARGET_THUMB2 && (code == '!'))
22738 || (TARGET_THUMB && (code == '_')));
22741 /* Target hook for assembling integer objects. The ARM version needs to
22742 handle word-sized values specially. */
22743 static bool
22744 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22746 machine_mode mode;
22748 if (size == UNITS_PER_WORD && aligned_p)
22750 fputs ("\t.word\t", asm_out_file);
22751 output_addr_const (asm_out_file, x);
22753 /* Mark symbols as position independent. We only do this in the
22754 .text segment, not in the .data segment. */
22755 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22756 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22758 /* See legitimize_pic_address for an explanation of the
22759 TARGET_VXWORKS_RTP check. */
22760 if (!arm_pic_data_is_text_relative
22761 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22762 fputs ("(GOT)", asm_out_file);
22763 else
22764 fputs ("(GOTOFF)", asm_out_file);
22766 fputc ('\n', asm_out_file);
22767 return true;
22770 mode = GET_MODE (x);
22772 if (arm_vector_mode_supported_p (mode))
22774 int i, units;
22776 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22778 units = CONST_VECTOR_NUNITS (x);
22779 size = GET_MODE_UNIT_SIZE (mode);
22781 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22782 for (i = 0; i < units; i++)
22784 rtx elt = CONST_VECTOR_ELT (x, i);
22785 assemble_integer
22786 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22788 else
22789 for (i = 0; i < units; i++)
22791 rtx elt = CONST_VECTOR_ELT (x, i);
22792 assemble_real
22793 (*CONST_DOUBLE_REAL_VALUE (elt), GET_MODE_INNER (mode),
22794 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22797 return true;
22800 return default_assemble_integer (x, size, aligned_p);
22803 static void
22804 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22806 section *s;
22808 if (!TARGET_AAPCS_BASED)
22810 (is_ctor ?
22811 default_named_section_asm_out_constructor
22812 : default_named_section_asm_out_destructor) (symbol, priority);
22813 return;
22816 /* Put these in the .init_array section, using a special relocation. */
22817 if (priority != DEFAULT_INIT_PRIORITY)
22819 char buf[18];
22820 sprintf (buf, "%s.%.5u",
22821 is_ctor ? ".init_array" : ".fini_array",
22822 priority);
22823 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22825 else if (is_ctor)
22826 s = ctors_section;
22827 else
22828 s = dtors_section;
22830 switch_to_section (s);
22831 assemble_align (POINTER_SIZE);
22832 fputs ("\t.word\t", asm_out_file);
22833 output_addr_const (asm_out_file, symbol);
22834 fputs ("(target1)\n", asm_out_file);
22837 /* Add a function to the list of static constructors. */
22839 static void
22840 arm_elf_asm_constructor (rtx symbol, int priority)
22842 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22845 /* Add a function to the list of static destructors. */
22847 static void
22848 arm_elf_asm_destructor (rtx symbol, int priority)
22850 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22853 /* A finite state machine takes care of noticing whether or not instructions
22854 can be conditionally executed, and thus decrease execution time and code
22855 size by deleting branch instructions. The fsm is controlled by
22856 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22858 /* The state of the fsm controlling condition codes are:
22859 0: normal, do nothing special
22860 1: make ASM_OUTPUT_OPCODE not output this instruction
22861 2: make ASM_OUTPUT_OPCODE not output this instruction
22862 3: make instructions conditional
22863 4: make instructions conditional
22865 State transitions (state->state by whom under condition):
22866 0 -> 1 final_prescan_insn if the `target' is a label
22867 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22868 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22869 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22870 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22871 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22872 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22873 (the target insn is arm_target_insn).
22875 If the jump clobbers the conditions then we use states 2 and 4.
22877 A similar thing can be done with conditional return insns.
22879 XXX In case the `target' is an unconditional branch, this conditionalising
22880 of the instructions always reduces code size, but not always execution
22881 time. But then, I want to reduce the code size to somewhere near what
22882 /bin/cc produces. */
22884 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22885 instructions. When a COND_EXEC instruction is seen the subsequent
22886 instructions are scanned so that multiple conditional instructions can be
22887 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22888 specify the length and true/false mask for the IT block. These will be
22889 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22891 /* Returns the index of the ARM condition code string in
22892 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22893 COMPARISON should be an rtx like `(eq (...) (...))'. */
22895 enum arm_cond_code
22896 maybe_get_arm_condition_code (rtx comparison)
22898 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22899 enum arm_cond_code code;
22900 enum rtx_code comp_code = GET_CODE (comparison);
22902 if (GET_MODE_CLASS (mode) != MODE_CC)
22903 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22904 XEXP (comparison, 1));
22906 switch (mode)
22908 case CC_DNEmode: code = ARM_NE; goto dominance;
22909 case CC_DEQmode: code = ARM_EQ; goto dominance;
22910 case CC_DGEmode: code = ARM_GE; goto dominance;
22911 case CC_DGTmode: code = ARM_GT; goto dominance;
22912 case CC_DLEmode: code = ARM_LE; goto dominance;
22913 case CC_DLTmode: code = ARM_LT; goto dominance;
22914 case CC_DGEUmode: code = ARM_CS; goto dominance;
22915 case CC_DGTUmode: code = ARM_HI; goto dominance;
22916 case CC_DLEUmode: code = ARM_LS; goto dominance;
22917 case CC_DLTUmode: code = ARM_CC;
22919 dominance:
22920 if (comp_code == EQ)
22921 return ARM_INVERSE_CONDITION_CODE (code);
22922 if (comp_code == NE)
22923 return code;
22924 return ARM_NV;
22926 case CC_NOOVmode:
22927 switch (comp_code)
22929 case NE: return ARM_NE;
22930 case EQ: return ARM_EQ;
22931 case GE: return ARM_PL;
22932 case LT: return ARM_MI;
22933 default: return ARM_NV;
22936 case CC_Zmode:
22937 switch (comp_code)
22939 case NE: return ARM_NE;
22940 case EQ: return ARM_EQ;
22941 default: return ARM_NV;
22944 case CC_Nmode:
22945 switch (comp_code)
22947 case NE: return ARM_MI;
22948 case EQ: return ARM_PL;
22949 default: return ARM_NV;
22952 case CCFPEmode:
22953 case CCFPmode:
22954 /* We can handle all cases except UNEQ and LTGT. */
22955 switch (comp_code)
22957 case GE: return ARM_GE;
22958 case GT: return ARM_GT;
22959 case LE: return ARM_LS;
22960 case LT: return ARM_MI;
22961 case NE: return ARM_NE;
22962 case EQ: return ARM_EQ;
22963 case ORDERED: return ARM_VC;
22964 case UNORDERED: return ARM_VS;
22965 case UNLT: return ARM_LT;
22966 case UNLE: return ARM_LE;
22967 case UNGT: return ARM_HI;
22968 case UNGE: return ARM_PL;
22969 /* UNEQ and LTGT do not have a representation. */
22970 case UNEQ: /* Fall through. */
22971 case LTGT: /* Fall through. */
22972 default: return ARM_NV;
22975 case CC_SWPmode:
22976 switch (comp_code)
22978 case NE: return ARM_NE;
22979 case EQ: return ARM_EQ;
22980 case GE: return ARM_LE;
22981 case GT: return ARM_LT;
22982 case LE: return ARM_GE;
22983 case LT: return ARM_GT;
22984 case GEU: return ARM_LS;
22985 case GTU: return ARM_CC;
22986 case LEU: return ARM_CS;
22987 case LTU: return ARM_HI;
22988 default: return ARM_NV;
22991 case CC_Cmode:
22992 switch (comp_code)
22994 case LTU: return ARM_CS;
22995 case GEU: return ARM_CC;
22996 default: return ARM_NV;
22999 case CC_CZmode:
23000 switch (comp_code)
23002 case NE: return ARM_NE;
23003 case EQ: return ARM_EQ;
23004 case GEU: return ARM_CS;
23005 case GTU: return ARM_HI;
23006 case LEU: return ARM_LS;
23007 case LTU: return ARM_CC;
23008 default: return ARM_NV;
23011 case CC_NCVmode:
23012 switch (comp_code)
23014 case GE: return ARM_GE;
23015 case LT: return ARM_LT;
23016 case GEU: return ARM_CS;
23017 case LTU: return ARM_CC;
23018 default: return ARM_NV;
23021 case CCmode:
23022 switch (comp_code)
23024 case NE: return ARM_NE;
23025 case EQ: return ARM_EQ;
23026 case GE: return ARM_GE;
23027 case GT: return ARM_GT;
23028 case LE: return ARM_LE;
23029 case LT: return ARM_LT;
23030 case GEU: return ARM_CS;
23031 case GTU: return ARM_HI;
23032 case LEU: return ARM_LS;
23033 case LTU: return ARM_CC;
23034 default: return ARM_NV;
23037 default: gcc_unreachable ();
23041 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23042 static enum arm_cond_code
23043 get_arm_condition_code (rtx comparison)
23045 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23046 gcc_assert (code != ARM_NV);
23047 return code;
23050 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23051 instructions. */
23052 void
23053 thumb2_final_prescan_insn (rtx_insn *insn)
23055 rtx_insn *first_insn = insn;
23056 rtx body = PATTERN (insn);
23057 rtx predicate;
23058 enum arm_cond_code code;
23059 int n;
23060 int mask;
23061 int max;
23063 /* max_insns_skipped in the tune was already taken into account in the
23064 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23065 just emit the IT blocks as we can. It does not make sense to split
23066 the IT blocks. */
23067 max = MAX_INSN_PER_IT_BLOCK;
23069 /* Remove the previous insn from the count of insns to be output. */
23070 if (arm_condexec_count)
23071 arm_condexec_count--;
23073 /* Nothing to do if we are already inside a conditional block. */
23074 if (arm_condexec_count)
23075 return;
23077 if (GET_CODE (body) != COND_EXEC)
23078 return;
23080 /* Conditional jumps are implemented directly. */
23081 if (JUMP_P (insn))
23082 return;
23084 predicate = COND_EXEC_TEST (body);
23085 arm_current_cc = get_arm_condition_code (predicate);
23087 n = get_attr_ce_count (insn);
23088 arm_condexec_count = 1;
23089 arm_condexec_mask = (1 << n) - 1;
23090 arm_condexec_masklen = n;
23091 /* See if subsequent instructions can be combined into the same block. */
23092 for (;;)
23094 insn = next_nonnote_insn (insn);
23096 /* Jumping into the middle of an IT block is illegal, so a label or
23097 barrier terminates the block. */
23098 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23099 break;
23101 body = PATTERN (insn);
23102 /* USE and CLOBBER aren't really insns, so just skip them. */
23103 if (GET_CODE (body) == USE
23104 || GET_CODE (body) == CLOBBER)
23105 continue;
23107 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23108 if (GET_CODE (body) != COND_EXEC)
23109 break;
23110 /* Maximum number of conditionally executed instructions in a block. */
23111 n = get_attr_ce_count (insn);
23112 if (arm_condexec_masklen + n > max)
23113 break;
23115 predicate = COND_EXEC_TEST (body);
23116 code = get_arm_condition_code (predicate);
23117 mask = (1 << n) - 1;
23118 if (arm_current_cc == code)
23119 arm_condexec_mask |= (mask << arm_condexec_masklen);
23120 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23121 break;
23123 arm_condexec_count++;
23124 arm_condexec_masklen += n;
23126 /* A jump must be the last instruction in a conditional block. */
23127 if (JUMP_P (insn))
23128 break;
23130 /* Restore recog_data (getting the attributes of other insns can
23131 destroy this array, but final.c assumes that it remains intact
23132 across this call). */
23133 extract_constrain_insn_cached (first_insn);
23136 void
23137 arm_final_prescan_insn (rtx_insn *insn)
23139 /* BODY will hold the body of INSN. */
23140 rtx body = PATTERN (insn);
23142 /* This will be 1 if trying to repeat the trick, and things need to be
23143 reversed if it appears to fail. */
23144 int reverse = 0;
23146 /* If we start with a return insn, we only succeed if we find another one. */
23147 int seeking_return = 0;
23148 enum rtx_code return_code = UNKNOWN;
23150 /* START_INSN will hold the insn from where we start looking. This is the
23151 first insn after the following code_label if REVERSE is true. */
23152 rtx_insn *start_insn = insn;
23154 /* If in state 4, check if the target branch is reached, in order to
23155 change back to state 0. */
23156 if (arm_ccfsm_state == 4)
23158 if (insn == arm_target_insn)
23160 arm_target_insn = NULL;
23161 arm_ccfsm_state = 0;
23163 return;
23166 /* If in state 3, it is possible to repeat the trick, if this insn is an
23167 unconditional branch to a label, and immediately following this branch
23168 is the previous target label which is only used once, and the label this
23169 branch jumps to is not too far off. */
23170 if (arm_ccfsm_state == 3)
23172 if (simplejump_p (insn))
23174 start_insn = next_nonnote_insn (start_insn);
23175 if (BARRIER_P (start_insn))
23177 /* XXX Isn't this always a barrier? */
23178 start_insn = next_nonnote_insn (start_insn);
23180 if (LABEL_P (start_insn)
23181 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23182 && LABEL_NUSES (start_insn) == 1)
23183 reverse = TRUE;
23184 else
23185 return;
23187 else if (ANY_RETURN_P (body))
23189 start_insn = next_nonnote_insn (start_insn);
23190 if (BARRIER_P (start_insn))
23191 start_insn = next_nonnote_insn (start_insn);
23192 if (LABEL_P (start_insn)
23193 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23194 && LABEL_NUSES (start_insn) == 1)
23196 reverse = TRUE;
23197 seeking_return = 1;
23198 return_code = GET_CODE (body);
23200 else
23201 return;
23203 else
23204 return;
23207 gcc_assert (!arm_ccfsm_state || reverse);
23208 if (!JUMP_P (insn))
23209 return;
23211 /* This jump might be paralleled with a clobber of the condition codes
23212 the jump should always come first */
23213 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23214 body = XVECEXP (body, 0, 0);
23216 if (reverse
23217 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23218 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23220 int insns_skipped;
23221 int fail = FALSE, succeed = FALSE;
23222 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23223 int then_not_else = TRUE;
23224 rtx_insn *this_insn = start_insn;
23225 rtx label = 0;
23227 /* Register the insn jumped to. */
23228 if (reverse)
23230 if (!seeking_return)
23231 label = XEXP (SET_SRC (body), 0);
23233 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23234 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23235 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23237 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23238 then_not_else = FALSE;
23240 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23242 seeking_return = 1;
23243 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23245 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23247 seeking_return = 1;
23248 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23249 then_not_else = FALSE;
23251 else
23252 gcc_unreachable ();
23254 /* See how many insns this branch skips, and what kind of insns. If all
23255 insns are okay, and the label or unconditional branch to the same
23256 label is not too far away, succeed. */
23257 for (insns_skipped = 0;
23258 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23260 rtx scanbody;
23262 this_insn = next_nonnote_insn (this_insn);
23263 if (!this_insn)
23264 break;
23266 switch (GET_CODE (this_insn))
23268 case CODE_LABEL:
23269 /* Succeed if it is the target label, otherwise fail since
23270 control falls in from somewhere else. */
23271 if (this_insn == label)
23273 arm_ccfsm_state = 1;
23274 succeed = TRUE;
23276 else
23277 fail = TRUE;
23278 break;
23280 case BARRIER:
23281 /* Succeed if the following insn is the target label.
23282 Otherwise fail.
23283 If return insns are used then the last insn in a function
23284 will be a barrier. */
23285 this_insn = next_nonnote_insn (this_insn);
23286 if (this_insn && this_insn == label)
23288 arm_ccfsm_state = 1;
23289 succeed = TRUE;
23291 else
23292 fail = TRUE;
23293 break;
23295 case CALL_INSN:
23296 /* The AAPCS says that conditional calls should not be
23297 used since they make interworking inefficient (the
23298 linker can't transform BL<cond> into BLX). That's
23299 only a problem if the machine has BLX. */
23300 if (arm_arch5)
23302 fail = TRUE;
23303 break;
23306 /* Succeed if the following insn is the target label, or
23307 if the following two insns are a barrier and the
23308 target label. */
23309 this_insn = next_nonnote_insn (this_insn);
23310 if (this_insn && BARRIER_P (this_insn))
23311 this_insn = next_nonnote_insn (this_insn);
23313 if (this_insn && this_insn == label
23314 && insns_skipped < max_insns_skipped)
23316 arm_ccfsm_state = 1;
23317 succeed = TRUE;
23319 else
23320 fail = TRUE;
23321 break;
23323 case JUMP_INSN:
23324 /* If this is an unconditional branch to the same label, succeed.
23325 If it is to another label, do nothing. If it is conditional,
23326 fail. */
23327 /* XXX Probably, the tests for SET and the PC are
23328 unnecessary. */
23330 scanbody = PATTERN (this_insn);
23331 if (GET_CODE (scanbody) == SET
23332 && GET_CODE (SET_DEST (scanbody)) == PC)
23334 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23335 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23337 arm_ccfsm_state = 2;
23338 succeed = TRUE;
23340 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23341 fail = TRUE;
23343 /* Fail if a conditional return is undesirable (e.g. on a
23344 StrongARM), but still allow this if optimizing for size. */
23345 else if (GET_CODE (scanbody) == return_code
23346 && !use_return_insn (TRUE, NULL)
23347 && !optimize_size)
23348 fail = TRUE;
23349 else if (GET_CODE (scanbody) == return_code)
23351 arm_ccfsm_state = 2;
23352 succeed = TRUE;
23354 else if (GET_CODE (scanbody) == PARALLEL)
23356 switch (get_attr_conds (this_insn))
23358 case CONDS_NOCOND:
23359 break;
23360 default:
23361 fail = TRUE;
23362 break;
23365 else
23366 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23368 break;
23370 case INSN:
23371 /* Instructions using or affecting the condition codes make it
23372 fail. */
23373 scanbody = PATTERN (this_insn);
23374 if (!(GET_CODE (scanbody) == SET
23375 || GET_CODE (scanbody) == PARALLEL)
23376 || get_attr_conds (this_insn) != CONDS_NOCOND)
23377 fail = TRUE;
23378 break;
23380 default:
23381 break;
23384 if (succeed)
23386 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23387 arm_target_label = CODE_LABEL_NUMBER (label);
23388 else
23390 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23392 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23394 this_insn = next_nonnote_insn (this_insn);
23395 gcc_assert (!this_insn
23396 || (!BARRIER_P (this_insn)
23397 && !LABEL_P (this_insn)));
23399 if (!this_insn)
23401 /* Oh, dear! we ran off the end.. give up. */
23402 extract_constrain_insn_cached (insn);
23403 arm_ccfsm_state = 0;
23404 arm_target_insn = NULL;
23405 return;
23407 arm_target_insn = this_insn;
23410 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23411 what it was. */
23412 if (!reverse)
23413 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23415 if (reverse || then_not_else)
23416 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23419 /* Restore recog_data (getting the attributes of other insns can
23420 destroy this array, but final.c assumes that it remains intact
23421 across this call. */
23422 extract_constrain_insn_cached (insn);
23426 /* Output IT instructions. */
23427 void
23428 thumb2_asm_output_opcode (FILE * stream)
23430 char buff[5];
23431 int n;
23433 if (arm_condexec_mask)
23435 for (n = 0; n < arm_condexec_masklen; n++)
23436 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23437 buff[n] = 0;
23438 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23439 arm_condition_codes[arm_current_cc]);
23440 arm_condexec_mask = 0;
23444 /* Returns true if REGNO is a valid register
23445 for holding a quantity of type MODE. */
23447 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23449 if (GET_MODE_CLASS (mode) == MODE_CC)
23450 return (regno == CC_REGNUM
23451 || (TARGET_HARD_FLOAT && TARGET_VFP
23452 && regno == VFPCC_REGNUM));
23454 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23455 return false;
23457 if (TARGET_THUMB1)
23458 /* For the Thumb we only allow values bigger than SImode in
23459 registers 0 - 6, so that there is always a second low
23460 register available to hold the upper part of the value.
23461 We probably we ought to ensure that the register is the
23462 start of an even numbered register pair. */
23463 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23465 if (TARGET_HARD_FLOAT && TARGET_VFP
23466 && IS_VFP_REGNUM (regno))
23468 if (mode == SFmode || mode == SImode)
23469 return VFP_REGNO_OK_FOR_SINGLE (regno);
23471 if (mode == DFmode)
23472 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23474 if (mode == HFmode)
23475 return VFP_REGNO_OK_FOR_SINGLE (regno);
23477 if (TARGET_NEON)
23478 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23479 || (VALID_NEON_QREG_MODE (mode)
23480 && NEON_REGNO_OK_FOR_QUAD (regno))
23481 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23482 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23483 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23484 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23485 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23487 return FALSE;
23490 if (TARGET_REALLY_IWMMXT)
23492 if (IS_IWMMXT_GR_REGNUM (regno))
23493 return mode == SImode;
23495 if (IS_IWMMXT_REGNUM (regno))
23496 return VALID_IWMMXT_REG_MODE (mode);
23499 /* We allow almost any value to be stored in the general registers.
23500 Restrict doubleword quantities to even register pairs in ARM state
23501 so that we can use ldrd. Do not allow very large Neon structure
23502 opaque modes in general registers; they would use too many. */
23503 if (regno <= LAST_ARM_REGNUM)
23505 if (ARM_NUM_REGS (mode) > 4)
23506 return FALSE;
23508 if (TARGET_THUMB2)
23509 return TRUE;
23511 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23514 if (regno == FRAME_POINTER_REGNUM
23515 || regno == ARG_POINTER_REGNUM)
23516 /* We only allow integers in the fake hard registers. */
23517 return GET_MODE_CLASS (mode) == MODE_INT;
23519 return FALSE;
23522 /* Implement MODES_TIEABLE_P. */
23524 bool
23525 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23527 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23528 return true;
23530 /* We specifically want to allow elements of "structure" modes to
23531 be tieable to the structure. This more general condition allows
23532 other rarer situations too. */
23533 if (TARGET_NEON
23534 && (VALID_NEON_DREG_MODE (mode1)
23535 || VALID_NEON_QREG_MODE (mode1)
23536 || VALID_NEON_STRUCT_MODE (mode1))
23537 && (VALID_NEON_DREG_MODE (mode2)
23538 || VALID_NEON_QREG_MODE (mode2)
23539 || VALID_NEON_STRUCT_MODE (mode2)))
23540 return true;
23542 return false;
23545 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23546 not used in arm mode. */
23548 enum reg_class
23549 arm_regno_class (int regno)
23551 if (regno == PC_REGNUM)
23552 return NO_REGS;
23554 if (TARGET_THUMB1)
23556 if (regno == STACK_POINTER_REGNUM)
23557 return STACK_REG;
23558 if (regno == CC_REGNUM)
23559 return CC_REG;
23560 if (regno < 8)
23561 return LO_REGS;
23562 return HI_REGS;
23565 if (TARGET_THUMB2 && regno < 8)
23566 return LO_REGS;
23568 if ( regno <= LAST_ARM_REGNUM
23569 || regno == FRAME_POINTER_REGNUM
23570 || regno == ARG_POINTER_REGNUM)
23571 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23573 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23574 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23576 if (IS_VFP_REGNUM (regno))
23578 if (regno <= D7_VFP_REGNUM)
23579 return VFP_D0_D7_REGS;
23580 else if (regno <= LAST_LO_VFP_REGNUM)
23581 return VFP_LO_REGS;
23582 else
23583 return VFP_HI_REGS;
23586 if (IS_IWMMXT_REGNUM (regno))
23587 return IWMMXT_REGS;
23589 if (IS_IWMMXT_GR_REGNUM (regno))
23590 return IWMMXT_GR_REGS;
23592 return NO_REGS;
23595 /* Handle a special case when computing the offset
23596 of an argument from the frame pointer. */
23598 arm_debugger_arg_offset (int value, rtx addr)
23600 rtx_insn *insn;
23602 /* We are only interested if dbxout_parms() failed to compute the offset. */
23603 if (value != 0)
23604 return 0;
23606 /* We can only cope with the case where the address is held in a register. */
23607 if (!REG_P (addr))
23608 return 0;
23610 /* If we are using the frame pointer to point at the argument, then
23611 an offset of 0 is correct. */
23612 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23613 return 0;
23615 /* If we are using the stack pointer to point at the
23616 argument, then an offset of 0 is correct. */
23617 /* ??? Check this is consistent with thumb2 frame layout. */
23618 if ((TARGET_THUMB || !frame_pointer_needed)
23619 && REGNO (addr) == SP_REGNUM)
23620 return 0;
23622 /* Oh dear. The argument is pointed to by a register rather
23623 than being held in a register, or being stored at a known
23624 offset from the frame pointer. Since GDB only understands
23625 those two kinds of argument we must translate the address
23626 held in the register into an offset from the frame pointer.
23627 We do this by searching through the insns for the function
23628 looking to see where this register gets its value. If the
23629 register is initialized from the frame pointer plus an offset
23630 then we are in luck and we can continue, otherwise we give up.
23632 This code is exercised by producing debugging information
23633 for a function with arguments like this:
23635 double func (double a, double b, int c, double d) {return d;}
23637 Without this code the stab for parameter 'd' will be set to
23638 an offset of 0 from the frame pointer, rather than 8. */
23640 /* The if() statement says:
23642 If the insn is a normal instruction
23643 and if the insn is setting the value in a register
23644 and if the register being set is the register holding the address of the argument
23645 and if the address is computing by an addition
23646 that involves adding to a register
23647 which is the frame pointer
23648 a constant integer
23650 then... */
23652 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23654 if ( NONJUMP_INSN_P (insn)
23655 && GET_CODE (PATTERN (insn)) == SET
23656 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23657 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23658 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23659 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23660 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23663 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23665 break;
23669 if (value == 0)
23671 debug_rtx (addr);
23672 warning (0, "unable to compute real location of stacked parameter");
23673 value = 8; /* XXX magic hack */
23676 return value;
23679 /* Implement TARGET_PROMOTED_TYPE. */
23681 static tree
23682 arm_promoted_type (const_tree t)
23684 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23685 return float_type_node;
23686 return NULL_TREE;
23689 /* Implement TARGET_CONVERT_TO_TYPE.
23690 Specifically, this hook implements the peculiarity of the ARM
23691 half-precision floating-point C semantics that requires conversions between
23692 __fp16 to or from double to do an intermediate conversion to float. */
23694 static tree
23695 arm_convert_to_type (tree type, tree expr)
23697 tree fromtype = TREE_TYPE (expr);
23698 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23699 return NULL_TREE;
23700 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23701 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23702 return convert (type, convert (float_type_node, expr));
23703 return NULL_TREE;
23706 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23707 This simply adds HFmode as a supported mode; even though we don't
23708 implement arithmetic on this type directly, it's supported by
23709 optabs conversions, much the way the double-word arithmetic is
23710 special-cased in the default hook. */
23712 static bool
23713 arm_scalar_mode_supported_p (machine_mode mode)
23715 if (mode == HFmode)
23716 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23717 else if (ALL_FIXED_POINT_MODE_P (mode))
23718 return true;
23719 else
23720 return default_scalar_mode_supported_p (mode);
23723 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23724 not to early-clobber SRC registers in the process.
23726 We assume that the operands described by SRC and DEST represent a
23727 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23728 number of components into which the copy has been decomposed. */
23729 void
23730 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23732 unsigned int i;
23734 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23735 || REGNO (operands[0]) < REGNO (operands[1]))
23737 for (i = 0; i < count; i++)
23739 operands[2 * i] = dest[i];
23740 operands[2 * i + 1] = src[i];
23743 else
23745 for (i = 0; i < count; i++)
23747 operands[2 * i] = dest[count - i - 1];
23748 operands[2 * i + 1] = src[count - i - 1];
23753 /* Split operands into moves from op[1] + op[2] into op[0]. */
23755 void
23756 neon_split_vcombine (rtx operands[3])
23758 unsigned int dest = REGNO (operands[0]);
23759 unsigned int src1 = REGNO (operands[1]);
23760 unsigned int src2 = REGNO (operands[2]);
23761 machine_mode halfmode = GET_MODE (operands[1]);
23762 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23763 rtx destlo, desthi;
23765 if (src1 == dest && src2 == dest + halfregs)
23767 /* No-op move. Can't split to nothing; emit something. */
23768 emit_note (NOTE_INSN_DELETED);
23769 return;
23772 /* Preserve register attributes for variable tracking. */
23773 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23774 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23775 GET_MODE_SIZE (halfmode));
23777 /* Special case of reversed high/low parts. Use VSWP. */
23778 if (src2 == dest && src1 == dest + halfregs)
23780 rtx x = gen_rtx_SET (destlo, operands[1]);
23781 rtx y = gen_rtx_SET (desthi, operands[2]);
23782 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23783 return;
23786 if (!reg_overlap_mentioned_p (operands[2], destlo))
23788 /* Try to avoid unnecessary moves if part of the result
23789 is in the right place already. */
23790 if (src1 != dest)
23791 emit_move_insn (destlo, operands[1]);
23792 if (src2 != dest + halfregs)
23793 emit_move_insn (desthi, operands[2]);
23795 else
23797 if (src2 != dest + halfregs)
23798 emit_move_insn (desthi, operands[2]);
23799 if (src1 != dest)
23800 emit_move_insn (destlo, operands[1]);
23804 /* Return the number (counting from 0) of
23805 the least significant set bit in MASK. */
23807 inline static int
23808 number_of_first_bit_set (unsigned mask)
23810 return ctz_hwi (mask);
23813 /* Like emit_multi_reg_push, but allowing for a different set of
23814 registers to be described as saved. MASK is the set of registers
23815 to be saved; REAL_REGS is the set of registers to be described as
23816 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23818 static rtx_insn *
23819 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23821 unsigned long regno;
23822 rtx par[10], tmp, reg;
23823 rtx_insn *insn;
23824 int i, j;
23826 /* Build the parallel of the registers actually being stored. */
23827 for (i = 0; mask; ++i, mask &= mask - 1)
23829 regno = ctz_hwi (mask);
23830 reg = gen_rtx_REG (SImode, regno);
23832 if (i == 0)
23833 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23834 else
23835 tmp = gen_rtx_USE (VOIDmode, reg);
23837 par[i] = tmp;
23840 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23841 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23842 tmp = gen_frame_mem (BLKmode, tmp);
23843 tmp = gen_rtx_SET (tmp, par[0]);
23844 par[0] = tmp;
23846 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23847 insn = emit_insn (tmp);
23849 /* Always build the stack adjustment note for unwind info. */
23850 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23851 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23852 par[0] = tmp;
23854 /* Build the parallel of the registers recorded as saved for unwind. */
23855 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23857 regno = ctz_hwi (real_regs);
23858 reg = gen_rtx_REG (SImode, regno);
23860 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23861 tmp = gen_frame_mem (SImode, tmp);
23862 tmp = gen_rtx_SET (tmp, reg);
23863 RTX_FRAME_RELATED_P (tmp) = 1;
23864 par[j + 1] = tmp;
23867 if (j == 0)
23868 tmp = par[0];
23869 else
23871 RTX_FRAME_RELATED_P (par[0]) = 1;
23872 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23875 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23877 return insn;
23880 /* Emit code to push or pop registers to or from the stack. F is the
23881 assembly file. MASK is the registers to pop. */
23882 static void
23883 thumb_pop (FILE *f, unsigned long mask)
23885 int regno;
23886 int lo_mask = mask & 0xFF;
23887 int pushed_words = 0;
23889 gcc_assert (mask);
23891 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23893 /* Special case. Do not generate a POP PC statement here, do it in
23894 thumb_exit() */
23895 thumb_exit (f, -1);
23896 return;
23899 fprintf (f, "\tpop\t{");
23901 /* Look at the low registers first. */
23902 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23904 if (lo_mask & 1)
23906 asm_fprintf (f, "%r", regno);
23908 if ((lo_mask & ~1) != 0)
23909 fprintf (f, ", ");
23911 pushed_words++;
23915 if (mask & (1 << PC_REGNUM))
23917 /* Catch popping the PC. */
23918 if (TARGET_INTERWORK || TARGET_BACKTRACE
23919 || crtl->calls_eh_return)
23921 /* The PC is never poped directly, instead
23922 it is popped into r3 and then BX is used. */
23923 fprintf (f, "}\n");
23925 thumb_exit (f, -1);
23927 return;
23929 else
23931 if (mask & 0xFF)
23932 fprintf (f, ", ");
23934 asm_fprintf (f, "%r", PC_REGNUM);
23938 fprintf (f, "}\n");
23941 /* Generate code to return from a thumb function.
23942 If 'reg_containing_return_addr' is -1, then the return address is
23943 actually on the stack, at the stack pointer. */
23944 static void
23945 thumb_exit (FILE *f, int reg_containing_return_addr)
23947 unsigned regs_available_for_popping;
23948 unsigned regs_to_pop;
23949 int pops_needed;
23950 unsigned available;
23951 unsigned required;
23952 machine_mode mode;
23953 int size;
23954 int restore_a4 = FALSE;
23956 /* Compute the registers we need to pop. */
23957 regs_to_pop = 0;
23958 pops_needed = 0;
23960 if (reg_containing_return_addr == -1)
23962 regs_to_pop |= 1 << LR_REGNUM;
23963 ++pops_needed;
23966 if (TARGET_BACKTRACE)
23968 /* Restore the (ARM) frame pointer and stack pointer. */
23969 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23970 pops_needed += 2;
23973 /* If there is nothing to pop then just emit the BX instruction and
23974 return. */
23975 if (pops_needed == 0)
23977 if (crtl->calls_eh_return)
23978 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23980 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23981 return;
23983 /* Otherwise if we are not supporting interworking and we have not created
23984 a backtrace structure and the function was not entered in ARM mode then
23985 just pop the return address straight into the PC. */
23986 else if (!TARGET_INTERWORK
23987 && !TARGET_BACKTRACE
23988 && !is_called_in_ARM_mode (current_function_decl)
23989 && !crtl->calls_eh_return)
23991 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23992 return;
23995 /* Find out how many of the (return) argument registers we can corrupt. */
23996 regs_available_for_popping = 0;
23998 /* If returning via __builtin_eh_return, the bottom three registers
23999 all contain information needed for the return. */
24000 if (crtl->calls_eh_return)
24001 size = 12;
24002 else
24004 /* If we can deduce the registers used from the function's
24005 return value. This is more reliable that examining
24006 df_regs_ever_live_p () because that will be set if the register is
24007 ever used in the function, not just if the register is used
24008 to hold a return value. */
24010 if (crtl->return_rtx != 0)
24011 mode = GET_MODE (crtl->return_rtx);
24012 else
24013 mode = DECL_MODE (DECL_RESULT (current_function_decl));
24015 size = GET_MODE_SIZE (mode);
24017 if (size == 0)
24019 /* In a void function we can use any argument register.
24020 In a function that returns a structure on the stack
24021 we can use the second and third argument registers. */
24022 if (mode == VOIDmode)
24023 regs_available_for_popping =
24024 (1 << ARG_REGISTER (1))
24025 | (1 << ARG_REGISTER (2))
24026 | (1 << ARG_REGISTER (3));
24027 else
24028 regs_available_for_popping =
24029 (1 << ARG_REGISTER (2))
24030 | (1 << ARG_REGISTER (3));
24032 else if (size <= 4)
24033 regs_available_for_popping =
24034 (1 << ARG_REGISTER (2))
24035 | (1 << ARG_REGISTER (3));
24036 else if (size <= 8)
24037 regs_available_for_popping =
24038 (1 << ARG_REGISTER (3));
24041 /* Match registers to be popped with registers into which we pop them. */
24042 for (available = regs_available_for_popping,
24043 required = regs_to_pop;
24044 required != 0 && available != 0;
24045 available &= ~(available & - available),
24046 required &= ~(required & - required))
24047 -- pops_needed;
24049 /* If we have any popping registers left over, remove them. */
24050 if (available > 0)
24051 regs_available_for_popping &= ~available;
24053 /* Otherwise if we need another popping register we can use
24054 the fourth argument register. */
24055 else if (pops_needed)
24057 /* If we have not found any free argument registers and
24058 reg a4 contains the return address, we must move it. */
24059 if (regs_available_for_popping == 0
24060 && reg_containing_return_addr == LAST_ARG_REGNUM)
24062 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24063 reg_containing_return_addr = LR_REGNUM;
24065 else if (size > 12)
24067 /* Register a4 is being used to hold part of the return value,
24068 but we have dire need of a free, low register. */
24069 restore_a4 = TRUE;
24071 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24074 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24076 /* The fourth argument register is available. */
24077 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24079 --pops_needed;
24083 /* Pop as many registers as we can. */
24084 thumb_pop (f, regs_available_for_popping);
24086 /* Process the registers we popped. */
24087 if (reg_containing_return_addr == -1)
24089 /* The return address was popped into the lowest numbered register. */
24090 regs_to_pop &= ~(1 << LR_REGNUM);
24092 reg_containing_return_addr =
24093 number_of_first_bit_set (regs_available_for_popping);
24095 /* Remove this register for the mask of available registers, so that
24096 the return address will not be corrupted by further pops. */
24097 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24100 /* If we popped other registers then handle them here. */
24101 if (regs_available_for_popping)
24103 int frame_pointer;
24105 /* Work out which register currently contains the frame pointer. */
24106 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24108 /* Move it into the correct place. */
24109 asm_fprintf (f, "\tmov\t%r, %r\n",
24110 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24112 /* (Temporarily) remove it from the mask of popped registers. */
24113 regs_available_for_popping &= ~(1 << frame_pointer);
24114 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24116 if (regs_available_for_popping)
24118 int stack_pointer;
24120 /* We popped the stack pointer as well,
24121 find the register that contains it. */
24122 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24124 /* Move it into the stack register. */
24125 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24127 /* At this point we have popped all necessary registers, so
24128 do not worry about restoring regs_available_for_popping
24129 to its correct value:
24131 assert (pops_needed == 0)
24132 assert (regs_available_for_popping == (1 << frame_pointer))
24133 assert (regs_to_pop == (1 << STACK_POINTER)) */
24135 else
24137 /* Since we have just move the popped value into the frame
24138 pointer, the popping register is available for reuse, and
24139 we know that we still have the stack pointer left to pop. */
24140 regs_available_for_popping |= (1 << frame_pointer);
24144 /* If we still have registers left on the stack, but we no longer have
24145 any registers into which we can pop them, then we must move the return
24146 address into the link register and make available the register that
24147 contained it. */
24148 if (regs_available_for_popping == 0 && pops_needed > 0)
24150 regs_available_for_popping |= 1 << reg_containing_return_addr;
24152 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24153 reg_containing_return_addr);
24155 reg_containing_return_addr = LR_REGNUM;
24158 /* If we have registers left on the stack then pop some more.
24159 We know that at most we will want to pop FP and SP. */
24160 if (pops_needed > 0)
24162 int popped_into;
24163 int move_to;
24165 thumb_pop (f, regs_available_for_popping);
24167 /* We have popped either FP or SP.
24168 Move whichever one it is into the correct register. */
24169 popped_into = number_of_first_bit_set (regs_available_for_popping);
24170 move_to = number_of_first_bit_set (regs_to_pop);
24172 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24174 regs_to_pop &= ~(1 << move_to);
24176 --pops_needed;
24179 /* If we still have not popped everything then we must have only
24180 had one register available to us and we are now popping the SP. */
24181 if (pops_needed > 0)
24183 int popped_into;
24185 thumb_pop (f, regs_available_for_popping);
24187 popped_into = number_of_first_bit_set (regs_available_for_popping);
24189 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24191 assert (regs_to_pop == (1 << STACK_POINTER))
24192 assert (pops_needed == 1)
24196 /* If necessary restore the a4 register. */
24197 if (restore_a4)
24199 if (reg_containing_return_addr != LR_REGNUM)
24201 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24202 reg_containing_return_addr = LR_REGNUM;
24205 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24208 if (crtl->calls_eh_return)
24209 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24211 /* Return to caller. */
24212 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24215 /* Scan INSN just before assembler is output for it.
24216 For Thumb-1, we track the status of the condition codes; this
24217 information is used in the cbranchsi4_insn pattern. */
24218 void
24219 thumb1_final_prescan_insn (rtx_insn *insn)
24221 if (flag_print_asm_name)
24222 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24223 INSN_ADDRESSES (INSN_UID (insn)));
24224 /* Don't overwrite the previous setter when we get to a cbranch. */
24225 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24227 enum attr_conds conds;
24229 if (cfun->machine->thumb1_cc_insn)
24231 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24232 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24233 CC_STATUS_INIT;
24235 conds = get_attr_conds (insn);
24236 if (conds == CONDS_SET)
24238 rtx set = single_set (insn);
24239 cfun->machine->thumb1_cc_insn = insn;
24240 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24241 cfun->machine->thumb1_cc_op1 = const0_rtx;
24242 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24243 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24245 rtx src1 = XEXP (SET_SRC (set), 1);
24246 if (src1 == const0_rtx)
24247 cfun->machine->thumb1_cc_mode = CCmode;
24249 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24251 /* Record the src register operand instead of dest because
24252 cprop_hardreg pass propagates src. */
24253 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24256 else if (conds != CONDS_NOCOND)
24257 cfun->machine->thumb1_cc_insn = NULL_RTX;
24260 /* Check if unexpected far jump is used. */
24261 if (cfun->machine->lr_save_eliminated
24262 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24263 internal_error("Unexpected thumb1 far jump");
24267 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24269 unsigned HOST_WIDE_INT mask = 0xff;
24270 int i;
24272 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24273 if (val == 0) /* XXX */
24274 return 0;
24276 for (i = 0; i < 25; i++)
24277 if ((val & (mask << i)) == val)
24278 return 1;
24280 return 0;
24283 /* Returns nonzero if the current function contains,
24284 or might contain a far jump. */
24285 static int
24286 thumb_far_jump_used_p (void)
24288 rtx_insn *insn;
24289 bool far_jump = false;
24290 unsigned int func_size = 0;
24292 /* This test is only important for leaf functions. */
24293 /* assert (!leaf_function_p ()); */
24295 /* If we have already decided that far jumps may be used,
24296 do not bother checking again, and always return true even if
24297 it turns out that they are not being used. Once we have made
24298 the decision that far jumps are present (and that hence the link
24299 register will be pushed onto the stack) we cannot go back on it. */
24300 if (cfun->machine->far_jump_used)
24301 return 1;
24303 /* If this function is not being called from the prologue/epilogue
24304 generation code then it must be being called from the
24305 INITIAL_ELIMINATION_OFFSET macro. */
24306 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24308 /* In this case we know that we are being asked about the elimination
24309 of the arg pointer register. If that register is not being used,
24310 then there are no arguments on the stack, and we do not have to
24311 worry that a far jump might force the prologue to push the link
24312 register, changing the stack offsets. In this case we can just
24313 return false, since the presence of far jumps in the function will
24314 not affect stack offsets.
24316 If the arg pointer is live (or if it was live, but has now been
24317 eliminated and so set to dead) then we do have to test to see if
24318 the function might contain a far jump. This test can lead to some
24319 false negatives, since before reload is completed, then length of
24320 branch instructions is not known, so gcc defaults to returning their
24321 longest length, which in turn sets the far jump attribute to true.
24323 A false negative will not result in bad code being generated, but it
24324 will result in a needless push and pop of the link register. We
24325 hope that this does not occur too often.
24327 If we need doubleword stack alignment this could affect the other
24328 elimination offsets so we can't risk getting it wrong. */
24329 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24330 cfun->machine->arg_pointer_live = 1;
24331 else if (!cfun->machine->arg_pointer_live)
24332 return 0;
24335 /* We should not change far_jump_used during or after reload, as there is
24336 no chance to change stack frame layout. */
24337 if (reload_in_progress || reload_completed)
24338 return 0;
24340 /* Check to see if the function contains a branch
24341 insn with the far jump attribute set. */
24342 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24344 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24346 far_jump = true;
24348 func_size += get_attr_length (insn);
24351 /* Attribute far_jump will always be true for thumb1 before
24352 shorten_branch pass. So checking far_jump attribute before
24353 shorten_branch isn't much useful.
24355 Following heuristic tries to estimate more accurately if a far jump
24356 may finally be used. The heuristic is very conservative as there is
24357 no chance to roll-back the decision of not to use far jump.
24359 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24360 2-byte insn is associated with a 4 byte constant pool. Using
24361 function size 2048/3 as the threshold is conservative enough. */
24362 if (far_jump)
24364 if ((func_size * 3) >= 2048)
24366 /* Record the fact that we have decided that
24367 the function does use far jumps. */
24368 cfun->machine->far_jump_used = 1;
24369 return 1;
24373 return 0;
24376 /* Return nonzero if FUNC must be entered in ARM mode. */
24377 static bool
24378 is_called_in_ARM_mode (tree func)
24380 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24382 /* Ignore the problem about functions whose address is taken. */
24383 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24384 return true;
24386 #ifdef ARM_PE
24387 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24388 #else
24389 return false;
24390 #endif
24393 /* Given the stack offsets and register mask in OFFSETS, decide how
24394 many additional registers to push instead of subtracting a constant
24395 from SP. For epilogues the principle is the same except we use pop.
24396 FOR_PROLOGUE indicates which we're generating. */
24397 static int
24398 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24400 HOST_WIDE_INT amount;
24401 unsigned long live_regs_mask = offsets->saved_regs_mask;
24402 /* Extract a mask of the ones we can give to the Thumb's push/pop
24403 instruction. */
24404 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24405 /* Then count how many other high registers will need to be pushed. */
24406 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24407 int n_free, reg_base, size;
24409 if (!for_prologue && frame_pointer_needed)
24410 amount = offsets->locals_base - offsets->saved_regs;
24411 else
24412 amount = offsets->outgoing_args - offsets->saved_regs;
24414 /* If the stack frame size is 512 exactly, we can save one load
24415 instruction, which should make this a win even when optimizing
24416 for speed. */
24417 if (!optimize_size && amount != 512)
24418 return 0;
24420 /* Can't do this if there are high registers to push. */
24421 if (high_regs_pushed != 0)
24422 return 0;
24424 /* Shouldn't do it in the prologue if no registers would normally
24425 be pushed at all. In the epilogue, also allow it if we'll have
24426 a pop insn for the PC. */
24427 if (l_mask == 0
24428 && (for_prologue
24429 || TARGET_BACKTRACE
24430 || (live_regs_mask & 1 << LR_REGNUM) == 0
24431 || TARGET_INTERWORK
24432 || crtl->args.pretend_args_size != 0))
24433 return 0;
24435 /* Don't do this if thumb_expand_prologue wants to emit instructions
24436 between the push and the stack frame allocation. */
24437 if (for_prologue
24438 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24439 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24440 return 0;
24442 reg_base = 0;
24443 n_free = 0;
24444 if (!for_prologue)
24446 size = arm_size_return_regs ();
24447 reg_base = ARM_NUM_INTS (size);
24448 live_regs_mask >>= reg_base;
24451 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24452 && (for_prologue || call_used_regs[reg_base + n_free]))
24454 live_regs_mask >>= 1;
24455 n_free++;
24458 if (n_free == 0)
24459 return 0;
24460 gcc_assert (amount / 4 * 4 == amount);
24462 if (amount >= 512 && (amount - n_free * 4) < 512)
24463 return (amount - 508) / 4;
24464 if (amount <= n_free * 4)
24465 return amount / 4;
24466 return 0;
24469 /* The bits which aren't usefully expanded as rtl. */
24470 const char *
24471 thumb1_unexpanded_epilogue (void)
24473 arm_stack_offsets *offsets;
24474 int regno;
24475 unsigned long live_regs_mask = 0;
24476 int high_regs_pushed = 0;
24477 int extra_pop;
24478 int had_to_push_lr;
24479 int size;
24481 if (cfun->machine->return_used_this_function != 0)
24482 return "";
24484 if (IS_NAKED (arm_current_func_type ()))
24485 return "";
24487 offsets = arm_get_frame_offsets ();
24488 live_regs_mask = offsets->saved_regs_mask;
24489 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24491 /* If we can deduce the registers used from the function's return value.
24492 This is more reliable that examining df_regs_ever_live_p () because that
24493 will be set if the register is ever used in the function, not just if
24494 the register is used to hold a return value. */
24495 size = arm_size_return_regs ();
24497 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24498 if (extra_pop > 0)
24500 unsigned long extra_mask = (1 << extra_pop) - 1;
24501 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24504 /* The prolog may have pushed some high registers to use as
24505 work registers. e.g. the testsuite file:
24506 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24507 compiles to produce:
24508 push {r4, r5, r6, r7, lr}
24509 mov r7, r9
24510 mov r6, r8
24511 push {r6, r7}
24512 as part of the prolog. We have to undo that pushing here. */
24514 if (high_regs_pushed)
24516 unsigned long mask = live_regs_mask & 0xff;
24517 int next_hi_reg;
24519 /* The available low registers depend on the size of the value we are
24520 returning. */
24521 if (size <= 12)
24522 mask |= 1 << 3;
24523 if (size <= 8)
24524 mask |= 1 << 2;
24526 if (mask == 0)
24527 /* Oh dear! We have no low registers into which we can pop
24528 high registers! */
24529 internal_error
24530 ("no low registers available for popping high registers");
24532 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24533 if (live_regs_mask & (1 << next_hi_reg))
24534 break;
24536 while (high_regs_pushed)
24538 /* Find lo register(s) into which the high register(s) can
24539 be popped. */
24540 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24542 if (mask & (1 << regno))
24543 high_regs_pushed--;
24544 if (high_regs_pushed == 0)
24545 break;
24548 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24550 /* Pop the values into the low register(s). */
24551 thumb_pop (asm_out_file, mask);
24553 /* Move the value(s) into the high registers. */
24554 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24556 if (mask & (1 << regno))
24558 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24559 regno);
24561 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24562 if (live_regs_mask & (1 << next_hi_reg))
24563 break;
24567 live_regs_mask &= ~0x0f00;
24570 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24571 live_regs_mask &= 0xff;
24573 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24575 /* Pop the return address into the PC. */
24576 if (had_to_push_lr)
24577 live_regs_mask |= 1 << PC_REGNUM;
24579 /* Either no argument registers were pushed or a backtrace
24580 structure was created which includes an adjusted stack
24581 pointer, so just pop everything. */
24582 if (live_regs_mask)
24583 thumb_pop (asm_out_file, live_regs_mask);
24585 /* We have either just popped the return address into the
24586 PC or it is was kept in LR for the entire function.
24587 Note that thumb_pop has already called thumb_exit if the
24588 PC was in the list. */
24589 if (!had_to_push_lr)
24590 thumb_exit (asm_out_file, LR_REGNUM);
24592 else
24594 /* Pop everything but the return address. */
24595 if (live_regs_mask)
24596 thumb_pop (asm_out_file, live_regs_mask);
24598 if (had_to_push_lr)
24600 if (size > 12)
24602 /* We have no free low regs, so save one. */
24603 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24604 LAST_ARG_REGNUM);
24607 /* Get the return address into a temporary register. */
24608 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24610 if (size > 12)
24612 /* Move the return address to lr. */
24613 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24614 LAST_ARG_REGNUM);
24615 /* Restore the low register. */
24616 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24617 IP_REGNUM);
24618 regno = LR_REGNUM;
24620 else
24621 regno = LAST_ARG_REGNUM;
24623 else
24624 regno = LR_REGNUM;
24626 /* Remove the argument registers that were pushed onto the stack. */
24627 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24628 SP_REGNUM, SP_REGNUM,
24629 crtl->args.pretend_args_size);
24631 thumb_exit (asm_out_file, regno);
24634 return "";
24637 /* Functions to save and restore machine-specific function data. */
24638 static struct machine_function *
24639 arm_init_machine_status (void)
24641 struct machine_function *machine;
24642 machine = ggc_cleared_alloc<machine_function> ();
24644 #if ARM_FT_UNKNOWN != 0
24645 machine->func_type = ARM_FT_UNKNOWN;
24646 #endif
24647 return machine;
24650 /* Return an RTX indicating where the return address to the
24651 calling function can be found. */
24653 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24655 if (count != 0)
24656 return NULL_RTX;
24658 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24661 /* Do anything needed before RTL is emitted for each function. */
24662 void
24663 arm_init_expanders (void)
24665 /* Arrange to initialize and mark the machine per-function status. */
24666 init_machine_status = arm_init_machine_status;
24668 /* This is to stop the combine pass optimizing away the alignment
24669 adjustment of va_arg. */
24670 /* ??? It is claimed that this should not be necessary. */
24671 if (cfun)
24672 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24675 /* Check that FUNC is called with a different mode. */
24677 bool
24678 arm_change_mode_p (tree func)
24680 if (TREE_CODE (func) != FUNCTION_DECL)
24681 return false;
24683 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24685 if (!callee_tree)
24686 callee_tree = target_option_default_node;
24688 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24689 int flags = callee_opts->x_target_flags;
24691 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24694 /* Like arm_compute_initial_elimination offset. Simpler because there
24695 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24696 to point at the base of the local variables after static stack
24697 space for a function has been allocated. */
24699 HOST_WIDE_INT
24700 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24702 arm_stack_offsets *offsets;
24704 offsets = arm_get_frame_offsets ();
24706 switch (from)
24708 case ARG_POINTER_REGNUM:
24709 switch (to)
24711 case STACK_POINTER_REGNUM:
24712 return offsets->outgoing_args - offsets->saved_args;
24714 case FRAME_POINTER_REGNUM:
24715 return offsets->soft_frame - offsets->saved_args;
24717 case ARM_HARD_FRAME_POINTER_REGNUM:
24718 return offsets->saved_regs - offsets->saved_args;
24720 case THUMB_HARD_FRAME_POINTER_REGNUM:
24721 return offsets->locals_base - offsets->saved_args;
24723 default:
24724 gcc_unreachable ();
24726 break;
24728 case FRAME_POINTER_REGNUM:
24729 switch (to)
24731 case STACK_POINTER_REGNUM:
24732 return offsets->outgoing_args - offsets->soft_frame;
24734 case ARM_HARD_FRAME_POINTER_REGNUM:
24735 return offsets->saved_regs - offsets->soft_frame;
24737 case THUMB_HARD_FRAME_POINTER_REGNUM:
24738 return offsets->locals_base - offsets->soft_frame;
24740 default:
24741 gcc_unreachable ();
24743 break;
24745 default:
24746 gcc_unreachable ();
24750 /* Generate the function's prologue. */
24752 void
24753 thumb1_expand_prologue (void)
24755 rtx_insn *insn;
24757 HOST_WIDE_INT amount;
24758 HOST_WIDE_INT size;
24759 arm_stack_offsets *offsets;
24760 unsigned long func_type;
24761 int regno;
24762 unsigned long live_regs_mask;
24763 unsigned long l_mask;
24764 unsigned high_regs_pushed = 0;
24766 func_type = arm_current_func_type ();
24768 /* Naked functions don't have prologues. */
24769 if (IS_NAKED (func_type))
24771 if (flag_stack_usage_info)
24772 current_function_static_stack_size = 0;
24773 return;
24776 if (IS_INTERRUPT (func_type))
24778 error ("interrupt Service Routines cannot be coded in Thumb mode");
24779 return;
24782 if (is_called_in_ARM_mode (current_function_decl))
24783 emit_insn (gen_prologue_thumb1_interwork ());
24785 offsets = arm_get_frame_offsets ();
24786 live_regs_mask = offsets->saved_regs_mask;
24788 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24789 l_mask = live_regs_mask & 0x40ff;
24790 /* Then count how many other high registers will need to be pushed. */
24791 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24793 if (crtl->args.pretend_args_size)
24795 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24797 if (cfun->machine->uses_anonymous_args)
24799 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24800 unsigned long mask;
24802 mask = 1ul << (LAST_ARG_REGNUM + 1);
24803 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24805 insn = thumb1_emit_multi_reg_push (mask, 0);
24807 else
24809 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24810 stack_pointer_rtx, x));
24812 RTX_FRAME_RELATED_P (insn) = 1;
24815 if (TARGET_BACKTRACE)
24817 HOST_WIDE_INT offset = 0;
24818 unsigned work_register;
24819 rtx work_reg, x, arm_hfp_rtx;
24821 /* We have been asked to create a stack backtrace structure.
24822 The code looks like this:
24824 0 .align 2
24825 0 func:
24826 0 sub SP, #16 Reserve space for 4 registers.
24827 2 push {R7} Push low registers.
24828 4 add R7, SP, #20 Get the stack pointer before the push.
24829 6 str R7, [SP, #8] Store the stack pointer
24830 (before reserving the space).
24831 8 mov R7, PC Get hold of the start of this code + 12.
24832 10 str R7, [SP, #16] Store it.
24833 12 mov R7, FP Get hold of the current frame pointer.
24834 14 str R7, [SP, #4] Store it.
24835 16 mov R7, LR Get hold of the current return address.
24836 18 str R7, [SP, #12] Store it.
24837 20 add R7, SP, #16 Point at the start of the
24838 backtrace structure.
24839 22 mov FP, R7 Put this value into the frame pointer. */
24841 work_register = thumb_find_work_register (live_regs_mask);
24842 work_reg = gen_rtx_REG (SImode, work_register);
24843 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24845 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24846 stack_pointer_rtx, GEN_INT (-16)));
24847 RTX_FRAME_RELATED_P (insn) = 1;
24849 if (l_mask)
24851 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24852 RTX_FRAME_RELATED_P (insn) = 1;
24854 offset = bit_count (l_mask) * UNITS_PER_WORD;
24857 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24858 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24860 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24861 x = gen_frame_mem (SImode, x);
24862 emit_move_insn (x, work_reg);
24864 /* Make sure that the instruction fetching the PC is in the right place
24865 to calculate "start of backtrace creation code + 12". */
24866 /* ??? The stores using the common WORK_REG ought to be enough to
24867 prevent the scheduler from doing anything weird. Failing that
24868 we could always move all of the following into an UNSPEC_VOLATILE. */
24869 if (l_mask)
24871 x = gen_rtx_REG (SImode, PC_REGNUM);
24872 emit_move_insn (work_reg, x);
24874 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24875 x = gen_frame_mem (SImode, x);
24876 emit_move_insn (x, work_reg);
24878 emit_move_insn (work_reg, arm_hfp_rtx);
24880 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24881 x = gen_frame_mem (SImode, x);
24882 emit_move_insn (x, work_reg);
24884 else
24886 emit_move_insn (work_reg, arm_hfp_rtx);
24888 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24889 x = gen_frame_mem (SImode, x);
24890 emit_move_insn (x, work_reg);
24892 x = gen_rtx_REG (SImode, PC_REGNUM);
24893 emit_move_insn (work_reg, x);
24895 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24896 x = gen_frame_mem (SImode, x);
24897 emit_move_insn (x, work_reg);
24900 x = gen_rtx_REG (SImode, LR_REGNUM);
24901 emit_move_insn (work_reg, x);
24903 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24904 x = gen_frame_mem (SImode, x);
24905 emit_move_insn (x, work_reg);
24907 x = GEN_INT (offset + 12);
24908 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24910 emit_move_insn (arm_hfp_rtx, work_reg);
24912 /* Optimization: If we are not pushing any low registers but we are going
24913 to push some high registers then delay our first push. This will just
24914 be a push of LR and we can combine it with the push of the first high
24915 register. */
24916 else if ((l_mask & 0xff) != 0
24917 || (high_regs_pushed == 0 && l_mask))
24919 unsigned long mask = l_mask;
24920 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24921 insn = thumb1_emit_multi_reg_push (mask, mask);
24922 RTX_FRAME_RELATED_P (insn) = 1;
24925 if (high_regs_pushed)
24927 unsigned pushable_regs;
24928 unsigned next_hi_reg;
24929 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24930 : crtl->args.info.nregs;
24931 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24933 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24934 if (live_regs_mask & (1 << next_hi_reg))
24935 break;
24937 /* Here we need to mask out registers used for passing arguments
24938 even if they can be pushed. This is to avoid using them to stash the high
24939 registers. Such kind of stash may clobber the use of arguments. */
24940 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24942 if (pushable_regs == 0)
24943 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24945 while (high_regs_pushed > 0)
24947 unsigned long real_regs_mask = 0;
24949 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24951 if (pushable_regs & (1 << regno))
24953 emit_move_insn (gen_rtx_REG (SImode, regno),
24954 gen_rtx_REG (SImode, next_hi_reg));
24956 high_regs_pushed --;
24957 real_regs_mask |= (1 << next_hi_reg);
24959 if (high_regs_pushed)
24961 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24962 next_hi_reg --)
24963 if (live_regs_mask & (1 << next_hi_reg))
24964 break;
24966 else
24968 pushable_regs &= ~((1 << regno) - 1);
24969 break;
24974 /* If we had to find a work register and we have not yet
24975 saved the LR then add it to the list of regs to push. */
24976 if (l_mask == (1 << LR_REGNUM))
24978 pushable_regs |= l_mask;
24979 real_regs_mask |= l_mask;
24980 l_mask = 0;
24983 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24984 RTX_FRAME_RELATED_P (insn) = 1;
24988 /* Load the pic register before setting the frame pointer,
24989 so we can use r7 as a temporary work register. */
24990 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24991 arm_load_pic_register (live_regs_mask);
24993 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24994 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24995 stack_pointer_rtx);
24997 size = offsets->outgoing_args - offsets->saved_args;
24998 if (flag_stack_usage_info)
24999 current_function_static_stack_size = size;
25001 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25002 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
25003 sorry ("-fstack-check=specific for Thumb-1");
25005 amount = offsets->outgoing_args - offsets->saved_regs;
25006 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25007 if (amount)
25009 if (amount < 512)
25011 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25012 GEN_INT (- amount)));
25013 RTX_FRAME_RELATED_P (insn) = 1;
25015 else
25017 rtx reg, dwarf;
25019 /* The stack decrement is too big for an immediate value in a single
25020 insn. In theory we could issue multiple subtracts, but after
25021 three of them it becomes more space efficient to place the full
25022 value in the constant pool and load into a register. (Also the
25023 ARM debugger really likes to see only one stack decrement per
25024 function). So instead we look for a scratch register into which
25025 we can load the decrement, and then we subtract this from the
25026 stack pointer. Unfortunately on the thumb the only available
25027 scratch registers are the argument registers, and we cannot use
25028 these as they may hold arguments to the function. Instead we
25029 attempt to locate a call preserved register which is used by this
25030 function. If we can find one, then we know that it will have
25031 been pushed at the start of the prologue and so we can corrupt
25032 it now. */
25033 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25034 if (live_regs_mask & (1 << regno))
25035 break;
25037 gcc_assert(regno <= LAST_LO_REGNUM);
25039 reg = gen_rtx_REG (SImode, regno);
25041 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25043 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25044 stack_pointer_rtx, reg));
25046 dwarf = gen_rtx_SET (stack_pointer_rtx,
25047 plus_constant (Pmode, stack_pointer_rtx,
25048 -amount));
25049 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25050 RTX_FRAME_RELATED_P (insn) = 1;
25054 if (frame_pointer_needed)
25055 thumb_set_frame_pointer (offsets);
25057 /* If we are profiling, make sure no instructions are scheduled before
25058 the call to mcount. Similarly if the user has requested no
25059 scheduling in the prolog. Similarly if we want non-call exceptions
25060 using the EABI unwinder, to prevent faulting instructions from being
25061 swapped with a stack adjustment. */
25062 if (crtl->profile || !TARGET_SCHED_PROLOG
25063 || (arm_except_unwind_info (&global_options) == UI_TARGET
25064 && cfun->can_throw_non_call_exceptions))
25065 emit_insn (gen_blockage ());
25067 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25068 if (live_regs_mask & 0xff)
25069 cfun->machine->lr_save_eliminated = 0;
25072 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25073 POP instruction can be generated. LR should be replaced by PC. All
25074 the checks required are already done by USE_RETURN_INSN (). Hence,
25075 all we really need to check here is if single register is to be
25076 returned, or multiple register return. */
25077 void
25078 thumb2_expand_return (bool simple_return)
25080 int i, num_regs;
25081 unsigned long saved_regs_mask;
25082 arm_stack_offsets *offsets;
25084 offsets = arm_get_frame_offsets ();
25085 saved_regs_mask = offsets->saved_regs_mask;
25087 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25088 if (saved_regs_mask & (1 << i))
25089 num_regs++;
25091 if (!simple_return && saved_regs_mask)
25093 if (num_regs == 1)
25095 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25096 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25097 rtx addr = gen_rtx_MEM (SImode,
25098 gen_rtx_POST_INC (SImode,
25099 stack_pointer_rtx));
25100 set_mem_alias_set (addr, get_frame_alias_set ());
25101 XVECEXP (par, 0, 0) = ret_rtx;
25102 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25103 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25104 emit_jump_insn (par);
25106 else
25108 saved_regs_mask &= ~ (1 << LR_REGNUM);
25109 saved_regs_mask |= (1 << PC_REGNUM);
25110 arm_emit_multi_reg_pop (saved_regs_mask);
25113 else
25115 emit_jump_insn (simple_return_rtx);
25119 void
25120 thumb1_expand_epilogue (void)
25122 HOST_WIDE_INT amount;
25123 arm_stack_offsets *offsets;
25124 int regno;
25126 /* Naked functions don't have prologues. */
25127 if (IS_NAKED (arm_current_func_type ()))
25128 return;
25130 offsets = arm_get_frame_offsets ();
25131 amount = offsets->outgoing_args - offsets->saved_regs;
25133 if (frame_pointer_needed)
25135 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25136 amount = offsets->locals_base - offsets->saved_regs;
25138 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25140 gcc_assert (amount >= 0);
25141 if (amount)
25143 emit_insn (gen_blockage ());
25145 if (amount < 512)
25146 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25147 GEN_INT (amount)));
25148 else
25150 /* r3 is always free in the epilogue. */
25151 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25153 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25154 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25158 /* Emit a USE (stack_pointer_rtx), so that
25159 the stack adjustment will not be deleted. */
25160 emit_insn (gen_force_register_use (stack_pointer_rtx));
25162 if (crtl->profile || !TARGET_SCHED_PROLOG)
25163 emit_insn (gen_blockage ());
25165 /* Emit a clobber for each insn that will be restored in the epilogue,
25166 so that flow2 will get register lifetimes correct. */
25167 for (regno = 0; regno < 13; regno++)
25168 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25169 emit_clobber (gen_rtx_REG (SImode, regno));
25171 if (! df_regs_ever_live_p (LR_REGNUM))
25172 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25175 /* Epilogue code for APCS frame. */
25176 static void
25177 arm_expand_epilogue_apcs_frame (bool really_return)
25179 unsigned long func_type;
25180 unsigned long saved_regs_mask;
25181 int num_regs = 0;
25182 int i;
25183 int floats_from_frame = 0;
25184 arm_stack_offsets *offsets;
25186 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25187 func_type = arm_current_func_type ();
25189 /* Get frame offsets for ARM. */
25190 offsets = arm_get_frame_offsets ();
25191 saved_regs_mask = offsets->saved_regs_mask;
25193 /* Find the offset of the floating-point save area in the frame. */
25194 floats_from_frame
25195 = (offsets->saved_args
25196 + arm_compute_static_chain_stack_bytes ()
25197 - offsets->frame);
25199 /* Compute how many core registers saved and how far away the floats are. */
25200 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25201 if (saved_regs_mask & (1 << i))
25203 num_regs++;
25204 floats_from_frame += 4;
25207 if (TARGET_HARD_FLOAT && TARGET_VFP)
25209 int start_reg;
25210 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25212 /* The offset is from IP_REGNUM. */
25213 int saved_size = arm_get_vfp_saved_size ();
25214 if (saved_size > 0)
25216 rtx_insn *insn;
25217 floats_from_frame += saved_size;
25218 insn = emit_insn (gen_addsi3 (ip_rtx,
25219 hard_frame_pointer_rtx,
25220 GEN_INT (-floats_from_frame)));
25221 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25222 ip_rtx, hard_frame_pointer_rtx);
25225 /* Generate VFP register multi-pop. */
25226 start_reg = FIRST_VFP_REGNUM;
25228 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25229 /* Look for a case where a reg does not need restoring. */
25230 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25231 && (!df_regs_ever_live_p (i + 1)
25232 || call_used_regs[i + 1]))
25234 if (start_reg != i)
25235 arm_emit_vfp_multi_reg_pop (start_reg,
25236 (i - start_reg) / 2,
25237 gen_rtx_REG (SImode,
25238 IP_REGNUM));
25239 start_reg = i + 2;
25242 /* Restore the remaining regs that we have discovered (or possibly
25243 even all of them, if the conditional in the for loop never
25244 fired). */
25245 if (start_reg != i)
25246 arm_emit_vfp_multi_reg_pop (start_reg,
25247 (i - start_reg) / 2,
25248 gen_rtx_REG (SImode, IP_REGNUM));
25251 if (TARGET_IWMMXT)
25253 /* The frame pointer is guaranteed to be non-double-word aligned, as
25254 it is set to double-word-aligned old_stack_pointer - 4. */
25255 rtx_insn *insn;
25256 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25258 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25259 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25261 rtx addr = gen_frame_mem (V2SImode,
25262 plus_constant (Pmode, hard_frame_pointer_rtx,
25263 - lrm_count * 4));
25264 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25265 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25266 gen_rtx_REG (V2SImode, i),
25267 NULL_RTX);
25268 lrm_count += 2;
25272 /* saved_regs_mask should contain IP which contains old stack pointer
25273 at the time of activation creation. Since SP and IP are adjacent registers,
25274 we can restore the value directly into SP. */
25275 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25276 saved_regs_mask &= ~(1 << IP_REGNUM);
25277 saved_regs_mask |= (1 << SP_REGNUM);
25279 /* There are two registers left in saved_regs_mask - LR and PC. We
25280 only need to restore LR (the return address), but to
25281 save time we can load it directly into PC, unless we need a
25282 special function exit sequence, or we are not really returning. */
25283 if (really_return
25284 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25285 && !crtl->calls_eh_return)
25286 /* Delete LR from the register mask, so that LR on
25287 the stack is loaded into the PC in the register mask. */
25288 saved_regs_mask &= ~(1 << LR_REGNUM);
25289 else
25290 saved_regs_mask &= ~(1 << PC_REGNUM);
25292 num_regs = bit_count (saved_regs_mask);
25293 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25295 rtx_insn *insn;
25296 emit_insn (gen_blockage ());
25297 /* Unwind the stack to just below the saved registers. */
25298 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25299 hard_frame_pointer_rtx,
25300 GEN_INT (- 4 * num_regs)));
25302 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25303 stack_pointer_rtx, hard_frame_pointer_rtx);
25306 arm_emit_multi_reg_pop (saved_regs_mask);
25308 if (IS_INTERRUPT (func_type))
25310 /* Interrupt handlers will have pushed the
25311 IP onto the stack, so restore it now. */
25312 rtx_insn *insn;
25313 rtx addr = gen_rtx_MEM (SImode,
25314 gen_rtx_POST_INC (SImode,
25315 stack_pointer_rtx));
25316 set_mem_alias_set (addr, get_frame_alias_set ());
25317 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25318 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25319 gen_rtx_REG (SImode, IP_REGNUM),
25320 NULL_RTX);
25323 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25324 return;
25326 if (crtl->calls_eh_return)
25327 emit_insn (gen_addsi3 (stack_pointer_rtx,
25328 stack_pointer_rtx,
25329 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25331 if (IS_STACKALIGN (func_type))
25332 /* Restore the original stack pointer. Before prologue, the stack was
25333 realigned and the original stack pointer saved in r0. For details,
25334 see comment in arm_expand_prologue. */
25335 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25337 emit_jump_insn (simple_return_rtx);
25340 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25341 function is not a sibcall. */
25342 void
25343 arm_expand_epilogue (bool really_return)
25345 unsigned long func_type;
25346 unsigned long saved_regs_mask;
25347 int num_regs = 0;
25348 int i;
25349 int amount;
25350 arm_stack_offsets *offsets;
25352 func_type = arm_current_func_type ();
25354 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25355 let output_return_instruction take care of instruction emission if any. */
25356 if (IS_NAKED (func_type)
25357 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25359 if (really_return)
25360 emit_jump_insn (simple_return_rtx);
25361 return;
25364 /* If we are throwing an exception, then we really must be doing a
25365 return, so we can't tail-call. */
25366 gcc_assert (!crtl->calls_eh_return || really_return);
25368 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25370 arm_expand_epilogue_apcs_frame (really_return);
25371 return;
25374 /* Get frame offsets for ARM. */
25375 offsets = arm_get_frame_offsets ();
25376 saved_regs_mask = offsets->saved_regs_mask;
25377 num_regs = bit_count (saved_regs_mask);
25379 if (frame_pointer_needed)
25381 rtx_insn *insn;
25382 /* Restore stack pointer if necessary. */
25383 if (TARGET_ARM)
25385 /* In ARM mode, frame pointer points to first saved register.
25386 Restore stack pointer to last saved register. */
25387 amount = offsets->frame - offsets->saved_regs;
25389 /* Force out any pending memory operations that reference stacked data
25390 before stack de-allocation occurs. */
25391 emit_insn (gen_blockage ());
25392 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25393 hard_frame_pointer_rtx,
25394 GEN_INT (amount)));
25395 arm_add_cfa_adjust_cfa_note (insn, amount,
25396 stack_pointer_rtx,
25397 hard_frame_pointer_rtx);
25399 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25400 deleted. */
25401 emit_insn (gen_force_register_use (stack_pointer_rtx));
25403 else
25405 /* In Thumb-2 mode, the frame pointer points to the last saved
25406 register. */
25407 amount = offsets->locals_base - offsets->saved_regs;
25408 if (amount)
25410 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25411 hard_frame_pointer_rtx,
25412 GEN_INT (amount)));
25413 arm_add_cfa_adjust_cfa_note (insn, amount,
25414 hard_frame_pointer_rtx,
25415 hard_frame_pointer_rtx);
25418 /* Force out any pending memory operations that reference stacked data
25419 before stack de-allocation occurs. */
25420 emit_insn (gen_blockage ());
25421 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25422 hard_frame_pointer_rtx));
25423 arm_add_cfa_adjust_cfa_note (insn, 0,
25424 stack_pointer_rtx,
25425 hard_frame_pointer_rtx);
25426 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25427 deleted. */
25428 emit_insn (gen_force_register_use (stack_pointer_rtx));
25431 else
25433 /* Pop off outgoing args and local frame to adjust stack pointer to
25434 last saved register. */
25435 amount = offsets->outgoing_args - offsets->saved_regs;
25436 if (amount)
25438 rtx_insn *tmp;
25439 /* Force out any pending memory operations that reference stacked data
25440 before stack de-allocation occurs. */
25441 emit_insn (gen_blockage ());
25442 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25443 stack_pointer_rtx,
25444 GEN_INT (amount)));
25445 arm_add_cfa_adjust_cfa_note (tmp, amount,
25446 stack_pointer_rtx, stack_pointer_rtx);
25447 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25448 not deleted. */
25449 emit_insn (gen_force_register_use (stack_pointer_rtx));
25453 if (TARGET_HARD_FLOAT && TARGET_VFP)
25455 /* Generate VFP register multi-pop. */
25456 int end_reg = LAST_VFP_REGNUM + 1;
25458 /* Scan the registers in reverse order. We need to match
25459 any groupings made in the prologue and generate matching
25460 vldm operations. The need to match groups is because,
25461 unlike pop, vldm can only do consecutive regs. */
25462 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25463 /* Look for a case where a reg does not need restoring. */
25464 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25465 && (!df_regs_ever_live_p (i + 1)
25466 || call_used_regs[i + 1]))
25468 /* Restore the regs discovered so far (from reg+2 to
25469 end_reg). */
25470 if (end_reg > i + 2)
25471 arm_emit_vfp_multi_reg_pop (i + 2,
25472 (end_reg - (i + 2)) / 2,
25473 stack_pointer_rtx);
25474 end_reg = i;
25477 /* Restore the remaining regs that we have discovered (or possibly
25478 even all of them, if the conditional in the for loop never
25479 fired). */
25480 if (end_reg > i + 2)
25481 arm_emit_vfp_multi_reg_pop (i + 2,
25482 (end_reg - (i + 2)) / 2,
25483 stack_pointer_rtx);
25486 if (TARGET_IWMMXT)
25487 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25488 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25490 rtx_insn *insn;
25491 rtx addr = gen_rtx_MEM (V2SImode,
25492 gen_rtx_POST_INC (SImode,
25493 stack_pointer_rtx));
25494 set_mem_alias_set (addr, get_frame_alias_set ());
25495 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25496 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25497 gen_rtx_REG (V2SImode, i),
25498 NULL_RTX);
25499 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25500 stack_pointer_rtx, stack_pointer_rtx);
25503 if (saved_regs_mask)
25505 rtx insn;
25506 bool return_in_pc = false;
25508 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25509 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25510 && !IS_STACKALIGN (func_type)
25511 && really_return
25512 && crtl->args.pretend_args_size == 0
25513 && saved_regs_mask & (1 << LR_REGNUM)
25514 && !crtl->calls_eh_return)
25516 saved_regs_mask &= ~(1 << LR_REGNUM);
25517 saved_regs_mask |= (1 << PC_REGNUM);
25518 return_in_pc = true;
25521 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25523 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25524 if (saved_regs_mask & (1 << i))
25526 rtx addr = gen_rtx_MEM (SImode,
25527 gen_rtx_POST_INC (SImode,
25528 stack_pointer_rtx));
25529 set_mem_alias_set (addr, get_frame_alias_set ());
25531 if (i == PC_REGNUM)
25533 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25534 XVECEXP (insn, 0, 0) = ret_rtx;
25535 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25536 addr);
25537 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25538 insn = emit_jump_insn (insn);
25540 else
25542 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25543 addr));
25544 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25545 gen_rtx_REG (SImode, i),
25546 NULL_RTX);
25547 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25548 stack_pointer_rtx,
25549 stack_pointer_rtx);
25553 else
25555 if (TARGET_LDRD
25556 && current_tune->prefer_ldrd_strd
25557 && !optimize_function_for_size_p (cfun))
25559 if (TARGET_THUMB2)
25560 thumb2_emit_ldrd_pop (saved_regs_mask);
25561 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25562 arm_emit_ldrd_pop (saved_regs_mask);
25563 else
25564 arm_emit_multi_reg_pop (saved_regs_mask);
25566 else
25567 arm_emit_multi_reg_pop (saved_regs_mask);
25570 if (return_in_pc)
25571 return;
25574 amount
25575 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25576 if (amount)
25578 int i, j;
25579 rtx dwarf = NULL_RTX;
25580 rtx_insn *tmp =
25581 emit_insn (gen_addsi3 (stack_pointer_rtx,
25582 stack_pointer_rtx,
25583 GEN_INT (amount)));
25585 RTX_FRAME_RELATED_P (tmp) = 1;
25587 if (cfun->machine->uses_anonymous_args)
25589 /* Restore pretend args. Refer arm_expand_prologue on how to save
25590 pretend_args in stack. */
25591 int num_regs = crtl->args.pretend_args_size / 4;
25592 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25593 for (j = 0, i = 0; j < num_regs; i++)
25594 if (saved_regs_mask & (1 << i))
25596 rtx reg = gen_rtx_REG (SImode, i);
25597 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25598 j++;
25600 REG_NOTES (tmp) = dwarf;
25602 arm_add_cfa_adjust_cfa_note (tmp, amount,
25603 stack_pointer_rtx, stack_pointer_rtx);
25606 if (!really_return)
25607 return;
25609 if (crtl->calls_eh_return)
25610 emit_insn (gen_addsi3 (stack_pointer_rtx,
25611 stack_pointer_rtx,
25612 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25614 if (IS_STACKALIGN (func_type))
25615 /* Restore the original stack pointer. Before prologue, the stack was
25616 realigned and the original stack pointer saved in r0. For details,
25617 see comment in arm_expand_prologue. */
25618 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25620 emit_jump_insn (simple_return_rtx);
25623 /* Implementation of insn prologue_thumb1_interwork. This is the first
25624 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25626 const char *
25627 thumb1_output_interwork (void)
25629 const char * name;
25630 FILE *f = asm_out_file;
25632 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25633 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25634 == SYMBOL_REF);
25635 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25637 /* Generate code sequence to switch us into Thumb mode. */
25638 /* The .code 32 directive has already been emitted by
25639 ASM_DECLARE_FUNCTION_NAME. */
25640 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25641 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25643 /* Generate a label, so that the debugger will notice the
25644 change in instruction sets. This label is also used by
25645 the assembler to bypass the ARM code when this function
25646 is called from a Thumb encoded function elsewhere in the
25647 same file. Hence the definition of STUB_NAME here must
25648 agree with the definition in gas/config/tc-arm.c. */
25650 #define STUB_NAME ".real_start_of"
25652 fprintf (f, "\t.code\t16\n");
25653 #ifdef ARM_PE
25654 if (arm_dllexport_name_p (name))
25655 name = arm_strip_name_encoding (name);
25656 #endif
25657 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25658 fprintf (f, "\t.thumb_func\n");
25659 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25661 return "";
25664 /* Handle the case of a double word load into a low register from
25665 a computed memory address. The computed address may involve a
25666 register which is overwritten by the load. */
25667 const char *
25668 thumb_load_double_from_address (rtx *operands)
25670 rtx addr;
25671 rtx base;
25672 rtx offset;
25673 rtx arg1;
25674 rtx arg2;
25676 gcc_assert (REG_P (operands[0]));
25677 gcc_assert (MEM_P (operands[1]));
25679 /* Get the memory address. */
25680 addr = XEXP (operands[1], 0);
25682 /* Work out how the memory address is computed. */
25683 switch (GET_CODE (addr))
25685 case REG:
25686 operands[2] = adjust_address (operands[1], SImode, 4);
25688 if (REGNO (operands[0]) == REGNO (addr))
25690 output_asm_insn ("ldr\t%H0, %2", operands);
25691 output_asm_insn ("ldr\t%0, %1", operands);
25693 else
25695 output_asm_insn ("ldr\t%0, %1", operands);
25696 output_asm_insn ("ldr\t%H0, %2", operands);
25698 break;
25700 case CONST:
25701 /* Compute <address> + 4 for the high order load. */
25702 operands[2] = adjust_address (operands[1], SImode, 4);
25704 output_asm_insn ("ldr\t%0, %1", operands);
25705 output_asm_insn ("ldr\t%H0, %2", operands);
25706 break;
25708 case PLUS:
25709 arg1 = XEXP (addr, 0);
25710 arg2 = XEXP (addr, 1);
25712 if (CONSTANT_P (arg1))
25713 base = arg2, offset = arg1;
25714 else
25715 base = arg1, offset = arg2;
25717 gcc_assert (REG_P (base));
25719 /* Catch the case of <address> = <reg> + <reg> */
25720 if (REG_P (offset))
25722 int reg_offset = REGNO (offset);
25723 int reg_base = REGNO (base);
25724 int reg_dest = REGNO (operands[0]);
25726 /* Add the base and offset registers together into the
25727 higher destination register. */
25728 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25729 reg_dest + 1, reg_base, reg_offset);
25731 /* Load the lower destination register from the address in
25732 the higher destination register. */
25733 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25734 reg_dest, reg_dest + 1);
25736 /* Load the higher destination register from its own address
25737 plus 4. */
25738 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25739 reg_dest + 1, reg_dest + 1);
25741 else
25743 /* Compute <address> + 4 for the high order load. */
25744 operands[2] = adjust_address (operands[1], SImode, 4);
25746 /* If the computed address is held in the low order register
25747 then load the high order register first, otherwise always
25748 load the low order register first. */
25749 if (REGNO (operands[0]) == REGNO (base))
25751 output_asm_insn ("ldr\t%H0, %2", operands);
25752 output_asm_insn ("ldr\t%0, %1", operands);
25754 else
25756 output_asm_insn ("ldr\t%0, %1", operands);
25757 output_asm_insn ("ldr\t%H0, %2", operands);
25760 break;
25762 case LABEL_REF:
25763 /* With no registers to worry about we can just load the value
25764 directly. */
25765 operands[2] = adjust_address (operands[1], SImode, 4);
25767 output_asm_insn ("ldr\t%H0, %2", operands);
25768 output_asm_insn ("ldr\t%0, %1", operands);
25769 break;
25771 default:
25772 gcc_unreachable ();
25775 return "";
25778 const char *
25779 thumb_output_move_mem_multiple (int n, rtx *operands)
25781 switch (n)
25783 case 2:
25784 if (REGNO (operands[4]) > REGNO (operands[5]))
25785 std::swap (operands[4], operands[5]);
25787 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25788 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25789 break;
25791 case 3:
25792 if (REGNO (operands[4]) > REGNO (operands[5]))
25793 std::swap (operands[4], operands[5]);
25794 if (REGNO (operands[5]) > REGNO (operands[6]))
25795 std::swap (operands[5], operands[6]);
25796 if (REGNO (operands[4]) > REGNO (operands[5]))
25797 std::swap (operands[4], operands[5]);
25799 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25800 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25801 break;
25803 default:
25804 gcc_unreachable ();
25807 return "";
25810 /* Output a call-via instruction for thumb state. */
25811 const char *
25812 thumb_call_via_reg (rtx reg)
25814 int regno = REGNO (reg);
25815 rtx *labelp;
25817 gcc_assert (regno < LR_REGNUM);
25819 /* If we are in the normal text section we can use a single instance
25820 per compilation unit. If we are doing function sections, then we need
25821 an entry per section, since we can't rely on reachability. */
25822 if (in_section == text_section)
25824 thumb_call_reg_needed = 1;
25826 if (thumb_call_via_label[regno] == NULL)
25827 thumb_call_via_label[regno] = gen_label_rtx ();
25828 labelp = thumb_call_via_label + regno;
25830 else
25832 if (cfun->machine->call_via[regno] == NULL)
25833 cfun->machine->call_via[regno] = gen_label_rtx ();
25834 labelp = cfun->machine->call_via + regno;
25837 output_asm_insn ("bl\t%a0", labelp);
25838 return "";
25841 /* Routines for generating rtl. */
25842 void
25843 thumb_expand_movmemqi (rtx *operands)
25845 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25846 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25847 HOST_WIDE_INT len = INTVAL (operands[2]);
25848 HOST_WIDE_INT offset = 0;
25850 while (len >= 12)
25852 emit_insn (gen_movmem12b (out, in, out, in));
25853 len -= 12;
25856 if (len >= 8)
25858 emit_insn (gen_movmem8b (out, in, out, in));
25859 len -= 8;
25862 if (len >= 4)
25864 rtx reg = gen_reg_rtx (SImode);
25865 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25866 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25867 len -= 4;
25868 offset += 4;
25871 if (len >= 2)
25873 rtx reg = gen_reg_rtx (HImode);
25874 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25875 plus_constant (Pmode, in,
25876 offset))));
25877 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25878 offset)),
25879 reg));
25880 len -= 2;
25881 offset += 2;
25884 if (len)
25886 rtx reg = gen_reg_rtx (QImode);
25887 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25888 plus_constant (Pmode, in,
25889 offset))));
25890 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25891 offset)),
25892 reg));
25896 void
25897 thumb_reload_out_hi (rtx *operands)
25899 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25902 /* Return the length of a function name prefix
25903 that starts with the character 'c'. */
25904 static int
25905 arm_get_strip_length (int c)
25907 switch (c)
25909 ARM_NAME_ENCODING_LENGTHS
25910 default: return 0;
25914 /* Return a pointer to a function's name with any
25915 and all prefix encodings stripped from it. */
25916 const char *
25917 arm_strip_name_encoding (const char *name)
25919 int skip;
25921 while ((skip = arm_get_strip_length (* name)))
25922 name += skip;
25924 return name;
25927 /* If there is a '*' anywhere in the name's prefix, then
25928 emit the stripped name verbatim, otherwise prepend an
25929 underscore if leading underscores are being used. */
25930 void
25931 arm_asm_output_labelref (FILE *stream, const char *name)
25933 int skip;
25934 int verbatim = 0;
25936 while ((skip = arm_get_strip_length (* name)))
25938 verbatim |= (*name == '*');
25939 name += skip;
25942 if (verbatim)
25943 fputs (name, stream);
25944 else
25945 asm_fprintf (stream, "%U%s", name);
25948 /* This function is used to emit an EABI tag and its associated value.
25949 We emit the numerical value of the tag in case the assembler does not
25950 support textual tags. (Eg gas prior to 2.20). If requested we include
25951 the tag name in a comment so that anyone reading the assembler output
25952 will know which tag is being set.
25954 This function is not static because arm-c.c needs it too. */
25956 void
25957 arm_emit_eabi_attribute (const char *name, int num, int val)
25959 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25960 if (flag_verbose_asm || flag_debug_asm)
25961 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25962 asm_fprintf (asm_out_file, "\n");
25965 /* This function is used to print CPU tuning information as comment
25966 in assembler file. Pointers are not printed for now. */
25968 void
25969 arm_print_tune_info (void)
25971 asm_fprintf (asm_out_file, "\t@.tune parameters\n");
25972 asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
25973 current_tune->constant_limit);
25974 asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
25975 current_tune->max_insns_skipped);
25976 asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n",
25977 current_tune->prefetch.num_slots);
25978 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n",
25979 current_tune->prefetch.l1_cache_size);
25980 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25981 current_tune->prefetch.l1_cache_line_size);
25982 asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
25983 (int) current_tune->prefer_constant_pool);
25984 asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25985 asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
25986 asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
25987 current_tune->branch_cost (false, false));
25988 asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
25989 current_tune->branch_cost (false, true));
25990 asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
25991 current_tune->branch_cost (true, false));
25992 asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
25993 current_tune->branch_cost (true, true));
25994 asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
25995 (int) current_tune->prefer_ldrd_strd);
25996 asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25997 (int) current_tune->logical_op_non_short_circuit_thumb,
25998 (int) current_tune->logical_op_non_short_circuit_arm);
25999 asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
26000 (int) current_tune->prefer_neon_for_64bits);
26001 asm_fprintf (asm_out_file,
26002 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
26003 (int) current_tune->disparage_flag_setting_t16_encodings);
26004 asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
26005 (int) current_tune->string_ops_prefer_neon);
26006 asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
26007 current_tune->max_insns_inline_memset);
26008 asm_fprintf (asm_out_file, "\t\t@fusible_ops:\t%u\n",
26009 current_tune->fusible_ops);
26010 asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
26011 (int) current_tune->sched_autopref);
26014 static void
26015 arm_file_start (void)
26017 int val;
26019 if (TARGET_BPABI)
26021 if (arm_selected_arch)
26023 /* armv7ve doesn't support any extensions. */
26024 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
26026 /* Keep backward compatability for assemblers
26027 which don't support armv7ve. */
26028 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26029 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26030 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26031 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26032 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26034 else
26036 const char* pos = strchr (arm_selected_arch->name, '+');
26037 if (pos)
26039 char buf[32];
26040 gcc_assert (strlen (arm_selected_arch->name)
26041 <= sizeof (buf) / sizeof (*pos));
26042 strncpy (buf, arm_selected_arch->name,
26043 (pos - arm_selected_arch->name) * sizeof (*pos));
26044 buf[pos - arm_selected_arch->name] = '\0';
26045 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
26046 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
26048 else
26049 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
26052 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
26053 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
26054 else
26056 const char* truncated_name
26057 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
26058 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26061 if (print_tune_info)
26062 arm_print_tune_info ();
26064 if (! TARGET_SOFT_FLOAT && TARGET_VFP)
26066 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26067 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26069 if (TARGET_HARD_FLOAT_ABI)
26070 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26073 /* Some of these attributes only apply when the corresponding features
26074 are used. However we don't have any easy way of figuring this out.
26075 Conservatively record the setting that would have been used. */
26077 if (flag_rounding_math)
26078 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26080 if (!flag_unsafe_math_optimizations)
26082 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26083 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26085 if (flag_signaling_nans)
26086 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26088 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26089 flag_finite_math_only ? 1 : 3);
26091 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26092 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26093 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26094 flag_short_enums ? 1 : 2);
26096 /* Tag_ABI_optimization_goals. */
26097 if (optimize_size)
26098 val = 4;
26099 else if (optimize >= 2)
26100 val = 2;
26101 else if (optimize)
26102 val = 1;
26103 else
26104 val = 6;
26105 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26107 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26108 unaligned_access);
26110 if (arm_fp16_format)
26111 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26112 (int) arm_fp16_format);
26114 if (arm_lang_output_object_attributes_hook)
26115 arm_lang_output_object_attributes_hook();
26118 default_file_start ();
26121 static void
26122 arm_file_end (void)
26124 int regno;
26126 if (NEED_INDICATE_EXEC_STACK)
26127 /* Add .note.GNU-stack. */
26128 file_end_indicate_exec_stack ();
26130 if (! thumb_call_reg_needed)
26131 return;
26133 switch_to_section (text_section);
26134 asm_fprintf (asm_out_file, "\t.code 16\n");
26135 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26137 for (regno = 0; regno < LR_REGNUM; regno++)
26139 rtx label = thumb_call_via_label[regno];
26141 if (label != 0)
26143 targetm.asm_out.internal_label (asm_out_file, "L",
26144 CODE_LABEL_NUMBER (label));
26145 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26150 #ifndef ARM_PE
26151 /* Symbols in the text segment can be accessed without indirecting via the
26152 constant pool; it may take an extra binary operation, but this is still
26153 faster than indirecting via memory. Don't do this when not optimizing,
26154 since we won't be calculating al of the offsets necessary to do this
26155 simplification. */
26157 static void
26158 arm_encode_section_info (tree decl, rtx rtl, int first)
26160 if (optimize > 0 && TREE_CONSTANT (decl))
26161 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26163 default_encode_section_info (decl, rtl, first);
26165 #endif /* !ARM_PE */
26167 static void
26168 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26170 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26171 && !strcmp (prefix, "L"))
26173 arm_ccfsm_state = 0;
26174 arm_target_insn = NULL;
26176 default_internal_label (stream, prefix, labelno);
26179 /* Output code to add DELTA to the first argument, and then jump
26180 to FUNCTION. Used for C++ multiple inheritance. */
26182 static void
26183 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26184 HOST_WIDE_INT, tree function)
26186 static int thunk_label = 0;
26187 char label[256];
26188 char labelpc[256];
26189 int mi_delta = delta;
26190 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26191 int shift = 0;
26192 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26193 ? 1 : 0);
26194 if (mi_delta < 0)
26195 mi_delta = - mi_delta;
26197 final_start_function (emit_barrier (), file, 1);
26199 if (TARGET_THUMB1)
26201 int labelno = thunk_label++;
26202 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26203 /* Thunks are entered in arm mode when avaiable. */
26204 if (TARGET_THUMB1_ONLY)
26206 /* push r3 so we can use it as a temporary. */
26207 /* TODO: Omit this save if r3 is not used. */
26208 fputs ("\tpush {r3}\n", file);
26209 fputs ("\tldr\tr3, ", file);
26211 else
26213 fputs ("\tldr\tr12, ", file);
26215 assemble_name (file, label);
26216 fputc ('\n', file);
26217 if (flag_pic)
26219 /* If we are generating PIC, the ldr instruction below loads
26220 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26221 the address of the add + 8, so we have:
26223 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26224 = target + 1.
26226 Note that we have "+ 1" because some versions of GNU ld
26227 don't set the low bit of the result for R_ARM_REL32
26228 relocations against thumb function symbols.
26229 On ARMv6M this is +4, not +8. */
26230 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26231 assemble_name (file, labelpc);
26232 fputs (":\n", file);
26233 if (TARGET_THUMB1_ONLY)
26235 /* This is 2 insns after the start of the thunk, so we know it
26236 is 4-byte aligned. */
26237 fputs ("\tadd\tr3, pc, r3\n", file);
26238 fputs ("\tmov r12, r3\n", file);
26240 else
26241 fputs ("\tadd\tr12, pc, r12\n", file);
26243 else if (TARGET_THUMB1_ONLY)
26244 fputs ("\tmov r12, r3\n", file);
26246 if (TARGET_THUMB1_ONLY)
26248 if (mi_delta > 255)
26250 fputs ("\tldr\tr3, ", file);
26251 assemble_name (file, label);
26252 fputs ("+4\n", file);
26253 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26254 mi_op, this_regno, this_regno);
26256 else if (mi_delta != 0)
26258 /* Thumb1 unified syntax requires s suffix in instruction name when
26259 one of the operands is immediate. */
26260 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26261 mi_op, this_regno, this_regno,
26262 mi_delta);
26265 else
26267 /* TODO: Use movw/movt for large constants when available. */
26268 while (mi_delta != 0)
26270 if ((mi_delta & (3 << shift)) == 0)
26271 shift += 2;
26272 else
26274 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26275 mi_op, this_regno, this_regno,
26276 mi_delta & (0xff << shift));
26277 mi_delta &= ~(0xff << shift);
26278 shift += 8;
26282 if (TARGET_THUMB1)
26284 if (TARGET_THUMB1_ONLY)
26285 fputs ("\tpop\t{r3}\n", file);
26287 fprintf (file, "\tbx\tr12\n");
26288 ASM_OUTPUT_ALIGN (file, 2);
26289 assemble_name (file, label);
26290 fputs (":\n", file);
26291 if (flag_pic)
26293 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26294 rtx tem = XEXP (DECL_RTL (function), 0);
26295 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26296 pipeline offset is four rather than eight. Adjust the offset
26297 accordingly. */
26298 tem = plus_constant (GET_MODE (tem), tem,
26299 TARGET_THUMB1_ONLY ? -3 : -7);
26300 tem = gen_rtx_MINUS (GET_MODE (tem),
26301 tem,
26302 gen_rtx_SYMBOL_REF (Pmode,
26303 ggc_strdup (labelpc)));
26304 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26306 else
26307 /* Output ".word .LTHUNKn". */
26308 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26310 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26311 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26313 else
26315 fputs ("\tb\t", file);
26316 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26317 if (NEED_PLT_RELOC)
26318 fputs ("(PLT)", file);
26319 fputc ('\n', file);
26322 final_end_function ();
26325 /* MI thunk handling for TARGET_32BIT. */
26327 static void
26328 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26329 HOST_WIDE_INT vcall_offset, tree function)
26331 /* On ARM, this_regno is R0 or R1 depending on
26332 whether the function returns an aggregate or not.
26334 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26335 function)
26336 ? R1_REGNUM : R0_REGNUM);
26338 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26339 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26340 reload_completed = 1;
26341 emit_note (NOTE_INSN_PROLOGUE_END);
26343 /* Add DELTA to THIS_RTX. */
26344 if (delta != 0)
26345 arm_split_constant (PLUS, Pmode, NULL_RTX,
26346 delta, this_rtx, this_rtx, false);
26348 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26349 if (vcall_offset != 0)
26351 /* Load *THIS_RTX. */
26352 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26353 /* Compute *THIS_RTX + VCALL_OFFSET. */
26354 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26355 false);
26356 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26357 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26358 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26361 /* Generate a tail call to the target function. */
26362 if (!TREE_USED (function))
26364 assemble_external (function);
26365 TREE_USED (function) = 1;
26367 rtx funexp = XEXP (DECL_RTL (function), 0);
26368 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26369 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26370 SIBLING_CALL_P (insn) = 1;
26372 insn = get_insns ();
26373 shorten_branches (insn);
26374 final_start_function (insn, file, 1);
26375 final (insn, file, 1);
26376 final_end_function ();
26378 /* Stop pretending this is a post-reload pass. */
26379 reload_completed = 0;
26382 /* Output code to add DELTA to the first argument, and then jump
26383 to FUNCTION. Used for C++ multiple inheritance. */
26385 static void
26386 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26387 HOST_WIDE_INT vcall_offset, tree function)
26389 if (TARGET_32BIT)
26390 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26391 else
26392 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26396 arm_emit_vector_const (FILE *file, rtx x)
26398 int i;
26399 const char * pattern;
26401 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26403 switch (GET_MODE (x))
26405 case V2SImode: pattern = "%08x"; break;
26406 case V4HImode: pattern = "%04x"; break;
26407 case V8QImode: pattern = "%02x"; break;
26408 default: gcc_unreachable ();
26411 fprintf (file, "0x");
26412 for (i = CONST_VECTOR_NUNITS (x); i--;)
26414 rtx element;
26416 element = CONST_VECTOR_ELT (x, i);
26417 fprintf (file, pattern, INTVAL (element));
26420 return 1;
26423 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26424 HFmode constant pool entries are actually loaded with ldr. */
26425 void
26426 arm_emit_fp16_const (rtx c)
26428 long bits;
26430 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26431 if (WORDS_BIG_ENDIAN)
26432 assemble_zeros (2);
26433 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26434 if (!WORDS_BIG_ENDIAN)
26435 assemble_zeros (2);
26438 const char *
26439 arm_output_load_gr (rtx *operands)
26441 rtx reg;
26442 rtx offset;
26443 rtx wcgr;
26444 rtx sum;
26446 if (!MEM_P (operands [1])
26447 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26448 || !REG_P (reg = XEXP (sum, 0))
26449 || !CONST_INT_P (offset = XEXP (sum, 1))
26450 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26451 return "wldrw%?\t%0, %1";
26453 /* Fix up an out-of-range load of a GR register. */
26454 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26455 wcgr = operands[0];
26456 operands[0] = reg;
26457 output_asm_insn ("ldr%?\t%0, %1", operands);
26459 operands[0] = wcgr;
26460 operands[1] = reg;
26461 output_asm_insn ("tmcr%?\t%0, %1", operands);
26462 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26464 return "";
26467 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26469 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26470 named arg and all anonymous args onto the stack.
26471 XXX I know the prologue shouldn't be pushing registers, but it is faster
26472 that way. */
26474 static void
26475 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26476 machine_mode mode,
26477 tree type,
26478 int *pretend_size,
26479 int second_time ATTRIBUTE_UNUSED)
26481 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26482 int nregs;
26484 cfun->machine->uses_anonymous_args = 1;
26485 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26487 nregs = pcum->aapcs_ncrn;
26488 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
26489 nregs++;
26491 else
26492 nregs = pcum->nregs;
26494 if (nregs < NUM_ARG_REGS)
26495 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26498 /* We can't rely on the caller doing the proper promotion when
26499 using APCS or ATPCS. */
26501 static bool
26502 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26504 return !TARGET_AAPCS_BASED;
26507 static machine_mode
26508 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26509 machine_mode mode,
26510 int *punsignedp ATTRIBUTE_UNUSED,
26511 const_tree fntype ATTRIBUTE_UNUSED,
26512 int for_return ATTRIBUTE_UNUSED)
26514 if (GET_MODE_CLASS (mode) == MODE_INT
26515 && GET_MODE_SIZE (mode) < 4)
26516 return SImode;
26518 return mode;
26521 /* AAPCS based ABIs use short enums by default. */
26523 static bool
26524 arm_default_short_enums (void)
26526 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26530 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26532 static bool
26533 arm_align_anon_bitfield (void)
26535 return TARGET_AAPCS_BASED;
26539 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26541 static tree
26542 arm_cxx_guard_type (void)
26544 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26548 /* The EABI says test the least significant bit of a guard variable. */
26550 static bool
26551 arm_cxx_guard_mask_bit (void)
26553 return TARGET_AAPCS_BASED;
26557 /* The EABI specifies that all array cookies are 8 bytes long. */
26559 static tree
26560 arm_get_cookie_size (tree type)
26562 tree size;
26564 if (!TARGET_AAPCS_BASED)
26565 return default_cxx_get_cookie_size (type);
26567 size = build_int_cst (sizetype, 8);
26568 return size;
26572 /* The EABI says that array cookies should also contain the element size. */
26574 static bool
26575 arm_cookie_has_size (void)
26577 return TARGET_AAPCS_BASED;
26581 /* The EABI says constructors and destructors should return a pointer to
26582 the object constructed/destroyed. */
26584 static bool
26585 arm_cxx_cdtor_returns_this (void)
26587 return TARGET_AAPCS_BASED;
26590 /* The EABI says that an inline function may never be the key
26591 method. */
26593 static bool
26594 arm_cxx_key_method_may_be_inline (void)
26596 return !TARGET_AAPCS_BASED;
26599 static void
26600 arm_cxx_determine_class_data_visibility (tree decl)
26602 if (!TARGET_AAPCS_BASED
26603 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26604 return;
26606 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26607 is exported. However, on systems without dynamic vague linkage,
26608 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26609 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26610 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26611 else
26612 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26613 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26616 static bool
26617 arm_cxx_class_data_always_comdat (void)
26619 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26620 vague linkage if the class has no key function. */
26621 return !TARGET_AAPCS_BASED;
26625 /* The EABI says __aeabi_atexit should be used to register static
26626 destructors. */
26628 static bool
26629 arm_cxx_use_aeabi_atexit (void)
26631 return TARGET_AAPCS_BASED;
26635 void
26636 arm_set_return_address (rtx source, rtx scratch)
26638 arm_stack_offsets *offsets;
26639 HOST_WIDE_INT delta;
26640 rtx addr;
26641 unsigned long saved_regs;
26643 offsets = arm_get_frame_offsets ();
26644 saved_regs = offsets->saved_regs_mask;
26646 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26647 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26648 else
26650 if (frame_pointer_needed)
26651 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26652 else
26654 /* LR will be the first saved register. */
26655 delta = offsets->outgoing_args - (offsets->frame + 4);
26658 if (delta >= 4096)
26660 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26661 GEN_INT (delta & ~4095)));
26662 addr = scratch;
26663 delta &= 4095;
26665 else
26666 addr = stack_pointer_rtx;
26668 addr = plus_constant (Pmode, addr, delta);
26670 /* The store needs to be marked as frame related in order to prevent
26671 DSE from deleting it as dead if it is based on fp. */
26672 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26673 RTX_FRAME_RELATED_P (insn) = 1;
26674 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26679 void
26680 thumb_set_return_address (rtx source, rtx scratch)
26682 arm_stack_offsets *offsets;
26683 HOST_WIDE_INT delta;
26684 HOST_WIDE_INT limit;
26685 int reg;
26686 rtx addr;
26687 unsigned long mask;
26689 emit_use (source);
26691 offsets = arm_get_frame_offsets ();
26692 mask = offsets->saved_regs_mask;
26693 if (mask & (1 << LR_REGNUM))
26695 limit = 1024;
26696 /* Find the saved regs. */
26697 if (frame_pointer_needed)
26699 delta = offsets->soft_frame - offsets->saved_args;
26700 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26701 if (TARGET_THUMB1)
26702 limit = 128;
26704 else
26706 delta = offsets->outgoing_args - offsets->saved_args;
26707 reg = SP_REGNUM;
26709 /* Allow for the stack frame. */
26710 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26711 delta -= 16;
26712 /* The link register is always the first saved register. */
26713 delta -= 4;
26715 /* Construct the address. */
26716 addr = gen_rtx_REG (SImode, reg);
26717 if (delta > limit)
26719 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26720 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26721 addr = scratch;
26723 else
26724 addr = plus_constant (Pmode, addr, delta);
26726 /* The store needs to be marked as frame related in order to prevent
26727 DSE from deleting it as dead if it is based on fp. */
26728 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26729 RTX_FRAME_RELATED_P (insn) = 1;
26730 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26732 else
26733 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26736 /* Implements target hook vector_mode_supported_p. */
26737 bool
26738 arm_vector_mode_supported_p (machine_mode mode)
26740 /* Neon also supports V2SImode, etc. listed in the clause below. */
26741 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26742 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26743 || mode == V2DImode || mode == V8HFmode))
26744 return true;
26746 if ((TARGET_NEON || TARGET_IWMMXT)
26747 && ((mode == V2SImode)
26748 || (mode == V4HImode)
26749 || (mode == V8QImode)))
26750 return true;
26752 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26753 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26754 || mode == V2HAmode))
26755 return true;
26757 return false;
26760 /* Implements target hook array_mode_supported_p. */
26762 static bool
26763 arm_array_mode_supported_p (machine_mode mode,
26764 unsigned HOST_WIDE_INT nelems)
26766 if (TARGET_NEON
26767 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26768 && (nelems >= 2 && nelems <= 4))
26769 return true;
26771 return false;
26774 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26775 registers when autovectorizing for Neon, at least until multiple vector
26776 widths are supported properly by the middle-end. */
26778 static machine_mode
26779 arm_preferred_simd_mode (machine_mode mode)
26781 if (TARGET_NEON)
26782 switch (mode)
26784 case SFmode:
26785 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26786 case SImode:
26787 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26788 case HImode:
26789 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26790 case QImode:
26791 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26792 case DImode:
26793 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26794 return V2DImode;
26795 break;
26797 default:;
26800 if (TARGET_REALLY_IWMMXT)
26801 switch (mode)
26803 case SImode:
26804 return V2SImode;
26805 case HImode:
26806 return V4HImode;
26807 case QImode:
26808 return V8QImode;
26810 default:;
26813 return word_mode;
26816 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26818 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26819 using r0-r4 for function arguments, r7 for the stack frame and don't have
26820 enough left over to do doubleword arithmetic. For Thumb-2 all the
26821 potentially problematic instructions accept high registers so this is not
26822 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26823 that require many low registers. */
26824 static bool
26825 arm_class_likely_spilled_p (reg_class_t rclass)
26827 if ((TARGET_THUMB1 && rclass == LO_REGS)
26828 || rclass == CC_REG)
26829 return true;
26831 return false;
26834 /* Implements target hook small_register_classes_for_mode_p. */
26835 bool
26836 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26838 return TARGET_THUMB1;
26841 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26842 ARM insns and therefore guarantee that the shift count is modulo 256.
26843 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26844 guarantee no particular behavior for out-of-range counts. */
26846 static unsigned HOST_WIDE_INT
26847 arm_shift_truncation_mask (machine_mode mode)
26849 return mode == SImode ? 255 : 0;
26853 /* Map internal gcc register numbers to DWARF2 register numbers. */
26855 unsigned int
26856 arm_dbx_register_number (unsigned int regno)
26858 if (regno < 16)
26859 return regno;
26861 if (IS_VFP_REGNUM (regno))
26863 /* See comment in arm_dwarf_register_span. */
26864 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26865 return 64 + regno - FIRST_VFP_REGNUM;
26866 else
26867 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26870 if (IS_IWMMXT_GR_REGNUM (regno))
26871 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26873 if (IS_IWMMXT_REGNUM (regno))
26874 return 112 + regno - FIRST_IWMMXT_REGNUM;
26876 return DWARF_FRAME_REGISTERS;
26879 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26880 GCC models tham as 64 32-bit registers, so we need to describe this to
26881 the DWARF generation code. Other registers can use the default. */
26882 static rtx
26883 arm_dwarf_register_span (rtx rtl)
26885 machine_mode mode;
26886 unsigned regno;
26887 rtx parts[16];
26888 int nregs;
26889 int i;
26891 regno = REGNO (rtl);
26892 if (!IS_VFP_REGNUM (regno))
26893 return NULL_RTX;
26895 /* XXX FIXME: The EABI defines two VFP register ranges:
26896 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26897 256-287: D0-D31
26898 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26899 corresponding D register. Until GDB supports this, we shall use the
26900 legacy encodings. We also use these encodings for D0-D15 for
26901 compatibility with older debuggers. */
26902 mode = GET_MODE (rtl);
26903 if (GET_MODE_SIZE (mode) < 8)
26904 return NULL_RTX;
26906 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26908 nregs = GET_MODE_SIZE (mode) / 4;
26909 for (i = 0; i < nregs; i += 2)
26910 if (TARGET_BIG_END)
26912 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26913 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26915 else
26917 parts[i] = gen_rtx_REG (SImode, regno + i);
26918 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26921 else
26923 nregs = GET_MODE_SIZE (mode) / 8;
26924 for (i = 0; i < nregs; i++)
26925 parts[i] = gen_rtx_REG (DImode, regno + i);
26928 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26931 #if ARM_UNWIND_INFO
26932 /* Emit unwind directives for a store-multiple instruction or stack pointer
26933 push during alignment.
26934 These should only ever be generated by the function prologue code, so
26935 expect them to have a particular form.
26936 The store-multiple instruction sometimes pushes pc as the last register,
26937 although it should not be tracked into unwind information, or for -Os
26938 sometimes pushes some dummy registers before first register that needs
26939 to be tracked in unwind information; such dummy registers are there just
26940 to avoid separate stack adjustment, and will not be restored in the
26941 epilogue. */
26943 static void
26944 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26946 int i;
26947 HOST_WIDE_INT offset;
26948 HOST_WIDE_INT nregs;
26949 int reg_size;
26950 unsigned reg;
26951 unsigned lastreg;
26952 unsigned padfirst = 0, padlast = 0;
26953 rtx e;
26955 e = XVECEXP (p, 0, 0);
26956 gcc_assert (GET_CODE (e) == SET);
26958 /* First insn will adjust the stack pointer. */
26959 gcc_assert (GET_CODE (e) == SET
26960 && REG_P (SET_DEST (e))
26961 && REGNO (SET_DEST (e)) == SP_REGNUM
26962 && GET_CODE (SET_SRC (e)) == PLUS);
26964 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26965 nregs = XVECLEN (p, 0) - 1;
26966 gcc_assert (nregs);
26968 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26969 if (reg < 16)
26971 /* For -Os dummy registers can be pushed at the beginning to
26972 avoid separate stack pointer adjustment. */
26973 e = XVECEXP (p, 0, 1);
26974 e = XEXP (SET_DEST (e), 0);
26975 if (GET_CODE (e) == PLUS)
26976 padfirst = INTVAL (XEXP (e, 1));
26977 gcc_assert (padfirst == 0 || optimize_size);
26978 /* The function prologue may also push pc, but not annotate it as it is
26979 never restored. We turn this into a stack pointer adjustment. */
26980 e = XVECEXP (p, 0, nregs);
26981 e = XEXP (SET_DEST (e), 0);
26982 if (GET_CODE (e) == PLUS)
26983 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26984 else
26985 padlast = offset - 4;
26986 gcc_assert (padlast == 0 || padlast == 4);
26987 if (padlast == 4)
26988 fprintf (asm_out_file, "\t.pad #4\n");
26989 reg_size = 4;
26990 fprintf (asm_out_file, "\t.save {");
26992 else if (IS_VFP_REGNUM (reg))
26994 reg_size = 8;
26995 fprintf (asm_out_file, "\t.vsave {");
26997 else
26998 /* Unknown register type. */
26999 gcc_unreachable ();
27001 /* If the stack increment doesn't match the size of the saved registers,
27002 something has gone horribly wrong. */
27003 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27005 offset = padfirst;
27006 lastreg = 0;
27007 /* The remaining insns will describe the stores. */
27008 for (i = 1; i <= nregs; i++)
27010 /* Expect (set (mem <addr>) (reg)).
27011 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27012 e = XVECEXP (p, 0, i);
27013 gcc_assert (GET_CODE (e) == SET
27014 && MEM_P (SET_DEST (e))
27015 && REG_P (SET_SRC (e)));
27017 reg = REGNO (SET_SRC (e));
27018 gcc_assert (reg >= lastreg);
27020 if (i != 1)
27021 fprintf (asm_out_file, ", ");
27022 /* We can't use %r for vfp because we need to use the
27023 double precision register names. */
27024 if (IS_VFP_REGNUM (reg))
27025 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27026 else
27027 asm_fprintf (asm_out_file, "%r", reg);
27029 if (flag_checking)
27031 /* Check that the addresses are consecutive. */
27032 e = XEXP (SET_DEST (e), 0);
27033 if (GET_CODE (e) == PLUS)
27034 gcc_assert (REG_P (XEXP (e, 0))
27035 && REGNO (XEXP (e, 0)) == SP_REGNUM
27036 && CONST_INT_P (XEXP (e, 1))
27037 && offset == INTVAL (XEXP (e, 1)));
27038 else
27039 gcc_assert (i == 1
27040 && REG_P (e)
27041 && REGNO (e) == SP_REGNUM);
27042 offset += reg_size;
27045 fprintf (asm_out_file, "}\n");
27046 if (padfirst)
27047 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27050 /* Emit unwind directives for a SET. */
27052 static void
27053 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27055 rtx e0;
27056 rtx e1;
27057 unsigned reg;
27059 e0 = XEXP (p, 0);
27060 e1 = XEXP (p, 1);
27061 switch (GET_CODE (e0))
27063 case MEM:
27064 /* Pushing a single register. */
27065 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27066 || !REG_P (XEXP (XEXP (e0, 0), 0))
27067 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27068 abort ();
27070 asm_fprintf (asm_out_file, "\t.save ");
27071 if (IS_VFP_REGNUM (REGNO (e1)))
27072 asm_fprintf(asm_out_file, "{d%d}\n",
27073 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27074 else
27075 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27076 break;
27078 case REG:
27079 if (REGNO (e0) == SP_REGNUM)
27081 /* A stack increment. */
27082 if (GET_CODE (e1) != PLUS
27083 || !REG_P (XEXP (e1, 0))
27084 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27085 || !CONST_INT_P (XEXP (e1, 1)))
27086 abort ();
27088 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27089 -INTVAL (XEXP (e1, 1)));
27091 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27093 HOST_WIDE_INT offset;
27095 if (GET_CODE (e1) == PLUS)
27097 if (!REG_P (XEXP (e1, 0))
27098 || !CONST_INT_P (XEXP (e1, 1)))
27099 abort ();
27100 reg = REGNO (XEXP (e1, 0));
27101 offset = INTVAL (XEXP (e1, 1));
27102 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27103 HARD_FRAME_POINTER_REGNUM, reg,
27104 offset);
27106 else if (REG_P (e1))
27108 reg = REGNO (e1);
27109 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27110 HARD_FRAME_POINTER_REGNUM, reg);
27112 else
27113 abort ();
27115 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27117 /* Move from sp to reg. */
27118 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27120 else if (GET_CODE (e1) == PLUS
27121 && REG_P (XEXP (e1, 0))
27122 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27123 && CONST_INT_P (XEXP (e1, 1)))
27125 /* Set reg to offset from sp. */
27126 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27127 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27129 else
27130 abort ();
27131 break;
27133 default:
27134 abort ();
27139 /* Emit unwind directives for the given insn. */
27141 static void
27142 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27144 rtx note, pat;
27145 bool handled_one = false;
27147 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27148 return;
27150 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27151 && (TREE_NOTHROW (current_function_decl)
27152 || crtl->all_throwers_are_sibcalls))
27153 return;
27155 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27156 return;
27158 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27160 switch (REG_NOTE_KIND (note))
27162 case REG_FRAME_RELATED_EXPR:
27163 pat = XEXP (note, 0);
27164 goto found;
27166 case REG_CFA_REGISTER:
27167 pat = XEXP (note, 0);
27168 if (pat == NULL)
27170 pat = PATTERN (insn);
27171 if (GET_CODE (pat) == PARALLEL)
27172 pat = XVECEXP (pat, 0, 0);
27175 /* Only emitted for IS_STACKALIGN re-alignment. */
27177 rtx dest, src;
27178 unsigned reg;
27180 src = SET_SRC (pat);
27181 dest = SET_DEST (pat);
27183 gcc_assert (src == stack_pointer_rtx);
27184 reg = REGNO (dest);
27185 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27186 reg + 0x90, reg);
27188 handled_one = true;
27189 break;
27191 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27192 to get correct dwarf information for shrink-wrap. We should not
27193 emit unwind information for it because these are used either for
27194 pretend arguments or notes to adjust sp and restore registers from
27195 stack. */
27196 case REG_CFA_DEF_CFA:
27197 case REG_CFA_ADJUST_CFA:
27198 case REG_CFA_RESTORE:
27199 return;
27201 case REG_CFA_EXPRESSION:
27202 case REG_CFA_OFFSET:
27203 /* ??? Only handling here what we actually emit. */
27204 gcc_unreachable ();
27206 default:
27207 break;
27210 if (handled_one)
27211 return;
27212 pat = PATTERN (insn);
27213 found:
27215 switch (GET_CODE (pat))
27217 case SET:
27218 arm_unwind_emit_set (asm_out_file, pat);
27219 break;
27221 case SEQUENCE:
27222 /* Store multiple. */
27223 arm_unwind_emit_sequence (asm_out_file, pat);
27224 break;
27226 default:
27227 abort();
27232 /* Output a reference from a function exception table to the type_info
27233 object X. The EABI specifies that the symbol should be relocated by
27234 an R_ARM_TARGET2 relocation. */
27236 static bool
27237 arm_output_ttype (rtx x)
27239 fputs ("\t.word\t", asm_out_file);
27240 output_addr_const (asm_out_file, x);
27241 /* Use special relocations for symbol references. */
27242 if (!CONST_INT_P (x))
27243 fputs ("(TARGET2)", asm_out_file);
27244 fputc ('\n', asm_out_file);
27246 return TRUE;
27249 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27251 static void
27252 arm_asm_emit_except_personality (rtx personality)
27254 fputs ("\t.personality\t", asm_out_file);
27255 output_addr_const (asm_out_file, personality);
27256 fputc ('\n', asm_out_file);
27259 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27261 static void
27262 arm_asm_init_sections (void)
27264 exception_section = get_unnamed_section (0, output_section_asm_op,
27265 "\t.handlerdata");
27267 #endif /* ARM_UNWIND_INFO */
27269 /* Output unwind directives for the start/end of a function. */
27271 void
27272 arm_output_fn_unwind (FILE * f, bool prologue)
27274 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27275 return;
27277 if (prologue)
27278 fputs ("\t.fnstart\n", f);
27279 else
27281 /* If this function will never be unwound, then mark it as such.
27282 The came condition is used in arm_unwind_emit to suppress
27283 the frame annotations. */
27284 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27285 && (TREE_NOTHROW (current_function_decl)
27286 || crtl->all_throwers_are_sibcalls))
27287 fputs("\t.cantunwind\n", f);
27289 fputs ("\t.fnend\n", f);
27293 static bool
27294 arm_emit_tls_decoration (FILE *fp, rtx x)
27296 enum tls_reloc reloc;
27297 rtx val;
27299 val = XVECEXP (x, 0, 0);
27300 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27302 output_addr_const (fp, val);
27304 switch (reloc)
27306 case TLS_GD32:
27307 fputs ("(tlsgd)", fp);
27308 break;
27309 case TLS_LDM32:
27310 fputs ("(tlsldm)", fp);
27311 break;
27312 case TLS_LDO32:
27313 fputs ("(tlsldo)", fp);
27314 break;
27315 case TLS_IE32:
27316 fputs ("(gottpoff)", fp);
27317 break;
27318 case TLS_LE32:
27319 fputs ("(tpoff)", fp);
27320 break;
27321 case TLS_DESCSEQ:
27322 fputs ("(tlsdesc)", fp);
27323 break;
27324 default:
27325 gcc_unreachable ();
27328 switch (reloc)
27330 case TLS_GD32:
27331 case TLS_LDM32:
27332 case TLS_IE32:
27333 case TLS_DESCSEQ:
27334 fputs (" + (. - ", fp);
27335 output_addr_const (fp, XVECEXP (x, 0, 2));
27336 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27337 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27338 output_addr_const (fp, XVECEXP (x, 0, 3));
27339 fputc (')', fp);
27340 break;
27341 default:
27342 break;
27345 return TRUE;
27348 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27350 static void
27351 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27353 gcc_assert (size == 4);
27354 fputs ("\t.word\t", file);
27355 output_addr_const (file, x);
27356 fputs ("(tlsldo)", file);
27359 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27361 static bool
27362 arm_output_addr_const_extra (FILE *fp, rtx x)
27364 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27365 return arm_emit_tls_decoration (fp, x);
27366 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27368 char label[256];
27369 int labelno = INTVAL (XVECEXP (x, 0, 0));
27371 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27372 assemble_name_raw (fp, label);
27374 return TRUE;
27376 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27378 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27379 if (GOT_PCREL)
27380 fputs ("+.", fp);
27381 fputs ("-(", fp);
27382 output_addr_const (fp, XVECEXP (x, 0, 0));
27383 fputc (')', fp);
27384 return TRUE;
27386 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27388 output_addr_const (fp, XVECEXP (x, 0, 0));
27389 if (GOT_PCREL)
27390 fputs ("+.", fp);
27391 fputs ("-(", fp);
27392 output_addr_const (fp, XVECEXP (x, 0, 1));
27393 fputc (')', fp);
27394 return TRUE;
27396 else if (GET_CODE (x) == CONST_VECTOR)
27397 return arm_emit_vector_const (fp, x);
27399 return FALSE;
27402 /* Output assembly for a shift instruction.
27403 SET_FLAGS determines how the instruction modifies the condition codes.
27404 0 - Do not set condition codes.
27405 1 - Set condition codes.
27406 2 - Use smallest instruction. */
27407 const char *
27408 arm_output_shift(rtx * operands, int set_flags)
27410 char pattern[100];
27411 static const char flag_chars[3] = {'?', '.', '!'};
27412 const char *shift;
27413 HOST_WIDE_INT val;
27414 char c;
27416 c = flag_chars[set_flags];
27417 shift = shift_op(operands[3], &val);
27418 if (shift)
27420 if (val != -1)
27421 operands[2] = GEN_INT(val);
27422 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27424 else
27425 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27427 output_asm_insn (pattern, operands);
27428 return "";
27431 /* Output assembly for a WMMX immediate shift instruction. */
27432 const char *
27433 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27435 int shift = INTVAL (operands[2]);
27436 char templ[50];
27437 machine_mode opmode = GET_MODE (operands[0]);
27439 gcc_assert (shift >= 0);
27441 /* If the shift value in the register versions is > 63 (for D qualifier),
27442 31 (for W qualifier) or 15 (for H qualifier). */
27443 if (((opmode == V4HImode) && (shift > 15))
27444 || ((opmode == V2SImode) && (shift > 31))
27445 || ((opmode == DImode) && (shift > 63)))
27447 if (wror_or_wsra)
27449 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27450 output_asm_insn (templ, operands);
27451 if (opmode == DImode)
27453 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27454 output_asm_insn (templ, operands);
27457 else
27459 /* The destination register will contain all zeros. */
27460 sprintf (templ, "wzero\t%%0");
27461 output_asm_insn (templ, operands);
27463 return "";
27466 if ((opmode == DImode) && (shift > 32))
27468 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27469 output_asm_insn (templ, operands);
27470 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27471 output_asm_insn (templ, operands);
27473 else
27475 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27476 output_asm_insn (templ, operands);
27478 return "";
27481 /* Output assembly for a WMMX tinsr instruction. */
27482 const char *
27483 arm_output_iwmmxt_tinsr (rtx *operands)
27485 int mask = INTVAL (operands[3]);
27486 int i;
27487 char templ[50];
27488 int units = mode_nunits[GET_MODE (operands[0])];
27489 gcc_assert ((mask & (mask - 1)) == 0);
27490 for (i = 0; i < units; ++i)
27492 if ((mask & 0x01) == 1)
27494 break;
27496 mask >>= 1;
27498 gcc_assert (i < units);
27500 switch (GET_MODE (operands[0]))
27502 case V8QImode:
27503 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27504 break;
27505 case V4HImode:
27506 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27507 break;
27508 case V2SImode:
27509 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27510 break;
27511 default:
27512 gcc_unreachable ();
27513 break;
27515 output_asm_insn (templ, operands);
27517 return "";
27520 /* Output a Thumb-1 casesi dispatch sequence. */
27521 const char *
27522 thumb1_output_casesi (rtx *operands)
27524 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27526 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27528 switch (GET_MODE(diff_vec))
27530 case QImode:
27531 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27532 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27533 case HImode:
27534 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27535 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27536 case SImode:
27537 return "bl\t%___gnu_thumb1_case_si";
27538 default:
27539 gcc_unreachable ();
27543 /* Output a Thumb-2 casesi instruction. */
27544 const char *
27545 thumb2_output_casesi (rtx *operands)
27547 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27549 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27551 output_asm_insn ("cmp\t%0, %1", operands);
27552 output_asm_insn ("bhi\t%l3", operands);
27553 switch (GET_MODE(diff_vec))
27555 case QImode:
27556 return "tbb\t[%|pc, %0]";
27557 case HImode:
27558 return "tbh\t[%|pc, %0, lsl #1]";
27559 case SImode:
27560 if (flag_pic)
27562 output_asm_insn ("adr\t%4, %l2", operands);
27563 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27564 output_asm_insn ("add\t%4, %4, %5", operands);
27565 return "bx\t%4";
27567 else
27569 output_asm_insn ("adr\t%4, %l2", operands);
27570 return "ldr\t%|pc, [%4, %0, lsl #2]";
27572 default:
27573 gcc_unreachable ();
27577 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27578 per-core tuning structs. */
27579 static int
27580 arm_issue_rate (void)
27582 return current_tune->issue_rate;
27585 /* Return how many instructions should scheduler lookahead to choose the
27586 best one. */
27587 static int
27588 arm_first_cycle_multipass_dfa_lookahead (void)
27590 int issue_rate = arm_issue_rate ();
27592 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27595 /* Enable modeling of L2 auto-prefetcher. */
27596 static int
27597 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27599 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27602 const char *
27603 arm_mangle_type (const_tree type)
27605 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27606 has to be managled as if it is in the "std" namespace. */
27607 if (TARGET_AAPCS_BASED
27608 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27609 return "St9__va_list";
27611 /* Half-precision float. */
27612 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27613 return "Dh";
27615 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27616 builtin type. */
27617 if (TYPE_NAME (type) != NULL)
27618 return arm_mangle_builtin_type (type);
27620 /* Use the default mangling. */
27621 return NULL;
27624 /* Order of allocation of core registers for Thumb: this allocation is
27625 written over the corresponding initial entries of the array
27626 initialized with REG_ALLOC_ORDER. We allocate all low registers
27627 first. Saving and restoring a low register is usually cheaper than
27628 using a call-clobbered high register. */
27630 static const int thumb_core_reg_alloc_order[] =
27632 3, 2, 1, 0, 4, 5, 6, 7,
27633 14, 12, 8, 9, 10, 11
27636 /* Adjust register allocation order when compiling for Thumb. */
27638 void
27639 arm_order_regs_for_local_alloc (void)
27641 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27642 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27643 if (TARGET_THUMB)
27644 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27645 sizeof (thumb_core_reg_alloc_order));
27648 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27650 bool
27651 arm_frame_pointer_required (void)
27653 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27654 return true;
27656 /* If the function receives nonlocal gotos, it needs to save the frame
27657 pointer in the nonlocal_goto_save_area object. */
27658 if (cfun->has_nonlocal_label)
27659 return true;
27661 /* The frame pointer is required for non-leaf APCS frames. */
27662 if (TARGET_ARM && TARGET_APCS_FRAME && !leaf_function_p ())
27663 return true;
27665 /* If we are probing the stack in the prologue, we will have a faulting
27666 instruction prior to the stack adjustment and this requires a frame
27667 pointer if we want to catch the exception using the EABI unwinder. */
27668 if (!IS_INTERRUPT (arm_current_func_type ())
27669 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27670 && arm_except_unwind_info (&global_options) == UI_TARGET
27671 && cfun->can_throw_non_call_exceptions)
27673 HOST_WIDE_INT size = get_frame_size ();
27675 /* That's irrelevant if there is no stack adjustment. */
27676 if (size <= 0)
27677 return false;
27679 /* That's relevant only if there is a stack probe. */
27680 if (crtl->is_leaf && !cfun->calls_alloca)
27682 /* We don't have the final size of the frame so adjust. */
27683 size += 32 * UNITS_PER_WORD;
27684 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27685 return true;
27687 else
27688 return true;
27691 return false;
27694 /* Only thumb1 can't support conditional execution, so return true if
27695 the target is not thumb1. */
27696 static bool
27697 arm_have_conditional_execution (void)
27699 return !TARGET_THUMB1;
27702 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27703 static HOST_WIDE_INT
27704 arm_vector_alignment (const_tree type)
27706 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27708 if (TARGET_AAPCS_BASED)
27709 align = MIN (align, 64);
27711 return align;
27714 static unsigned int
27715 arm_autovectorize_vector_sizes (void)
27717 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27720 static bool
27721 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27723 /* Vectors which aren't in packed structures will not be less aligned than
27724 the natural alignment of their element type, so this is safe. */
27725 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27726 return !is_packed;
27728 return default_builtin_vector_alignment_reachable (type, is_packed);
27731 static bool
27732 arm_builtin_support_vector_misalignment (machine_mode mode,
27733 const_tree type, int misalignment,
27734 bool is_packed)
27736 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27738 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27740 if (is_packed)
27741 return align == 1;
27743 /* If the misalignment is unknown, we should be able to handle the access
27744 so long as it is not to a member of a packed data structure. */
27745 if (misalignment == -1)
27746 return true;
27748 /* Return true if the misalignment is a multiple of the natural alignment
27749 of the vector's element type. This is probably always going to be
27750 true in practice, since we've already established that this isn't a
27751 packed access. */
27752 return ((misalignment % align) == 0);
27755 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27756 is_packed);
27759 static void
27760 arm_conditional_register_usage (void)
27762 int regno;
27764 if (TARGET_THUMB1 && optimize_size)
27766 /* When optimizing for size on Thumb-1, it's better not
27767 to use the HI regs, because of the overhead of
27768 stacking them. */
27769 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27770 fixed_regs[regno] = call_used_regs[regno] = 1;
27773 /* The link register can be clobbered by any branch insn,
27774 but we have no way to track that at present, so mark
27775 it as unavailable. */
27776 if (TARGET_THUMB1)
27777 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27779 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27781 /* VFPv3 registers are disabled when earlier VFP
27782 versions are selected due to the definition of
27783 LAST_VFP_REGNUM. */
27784 for (regno = FIRST_VFP_REGNUM;
27785 regno <= LAST_VFP_REGNUM; ++ regno)
27787 fixed_regs[regno] = 0;
27788 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27789 || regno >= FIRST_VFP_REGNUM + 32;
27793 if (TARGET_REALLY_IWMMXT)
27795 regno = FIRST_IWMMXT_GR_REGNUM;
27796 /* The 2002/10/09 revision of the XScale ABI has wCG0
27797 and wCG1 as call-preserved registers. The 2002/11/21
27798 revision changed this so that all wCG registers are
27799 scratch registers. */
27800 for (regno = FIRST_IWMMXT_GR_REGNUM;
27801 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27802 fixed_regs[regno] = 0;
27803 /* The XScale ABI has wR0 - wR9 as scratch registers,
27804 the rest as call-preserved registers. */
27805 for (regno = FIRST_IWMMXT_REGNUM;
27806 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27808 fixed_regs[regno] = 0;
27809 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27813 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27815 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27816 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27818 else if (TARGET_APCS_STACK)
27820 fixed_regs[10] = 1;
27821 call_used_regs[10] = 1;
27823 /* -mcaller-super-interworking reserves r11 for calls to
27824 _interwork_r11_call_via_rN(). Making the register global
27825 is an easy way of ensuring that it remains valid for all
27826 calls. */
27827 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27828 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27830 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27831 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27832 if (TARGET_CALLER_INTERWORKING)
27833 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27835 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27838 static reg_class_t
27839 arm_preferred_rename_class (reg_class_t rclass)
27841 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27842 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27843 and code size can be reduced. */
27844 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27845 return LO_REGS;
27846 else
27847 return NO_REGS;
27850 /* Compute the attribute "length" of insn "*push_multi".
27851 So this function MUST be kept in sync with that insn pattern. */
27853 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27855 int i, regno, hi_reg;
27856 int num_saves = XVECLEN (parallel_op, 0);
27858 /* ARM mode. */
27859 if (TARGET_ARM)
27860 return 4;
27861 /* Thumb1 mode. */
27862 if (TARGET_THUMB1)
27863 return 2;
27865 /* Thumb2 mode. */
27866 regno = REGNO (first_op);
27867 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27868 list is 8-bit. Normally this means all registers in the list must be
27869 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
27870 encodings. There is one exception for PUSH that LR in HI_REGS can be used
27871 with 16-bit encoding. */
27872 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27873 for (i = 1; i < num_saves && !hi_reg; i++)
27875 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27876 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27879 if (!hi_reg)
27880 return 2;
27881 return 4;
27884 /* Compute the attribute "length" of insn. Currently, this function is used
27885 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
27886 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
27887 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
27888 true if OPERANDS contains insn which explicit updates base register. */
27891 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
27893 /* ARM mode. */
27894 if (TARGET_ARM)
27895 return 4;
27896 /* Thumb1 mode. */
27897 if (TARGET_THUMB1)
27898 return 2;
27900 rtx parallel_op = operands[0];
27901 /* Initialize to elements number of PARALLEL. */
27902 unsigned indx = XVECLEN (parallel_op, 0) - 1;
27903 /* Initialize the value to base register. */
27904 unsigned regno = REGNO (operands[1]);
27905 /* Skip return and write back pattern.
27906 We only need register pop pattern for later analysis. */
27907 unsigned first_indx = 0;
27908 first_indx += return_pc ? 1 : 0;
27909 first_indx += write_back_p ? 1 : 0;
27911 /* A pop operation can be done through LDM or POP. If the base register is SP
27912 and if it's with write back, then a LDM will be alias of POP. */
27913 bool pop_p = (regno == SP_REGNUM && write_back_p);
27914 bool ldm_p = !pop_p;
27916 /* Check base register for LDM. */
27917 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
27918 return 4;
27920 /* Check each register in the list. */
27921 for (; indx >= first_indx; indx--)
27923 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
27924 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
27925 comment in arm_attr_length_push_multi. */
27926 if (REGNO_REG_CLASS (regno) == HI_REGS
27927 && (regno != PC_REGNUM || ldm_p))
27928 return 4;
27931 return 2;
27934 /* Compute the number of instructions emitted by output_move_double. */
27936 arm_count_output_move_double_insns (rtx *operands)
27938 int count;
27939 rtx ops[2];
27940 /* output_move_double may modify the operands array, so call it
27941 here on a copy of the array. */
27942 ops[0] = operands[0];
27943 ops[1] = operands[1];
27944 output_move_double (ops, false, &count);
27945 return count;
27949 vfp3_const_double_for_fract_bits (rtx operand)
27951 REAL_VALUE_TYPE r0;
27953 if (!CONST_DOUBLE_P (operand))
27954 return 0;
27956 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
27957 if (exact_real_inverse (DFmode, &r0)
27958 && !REAL_VALUE_NEGATIVE (r0))
27960 if (exact_real_truncate (DFmode, &r0))
27962 HOST_WIDE_INT value = real_to_integer (&r0);
27963 value = value & 0xffffffff;
27964 if ((value != 0) && ( (value & (value - 1)) == 0))
27966 int ret = exact_log2 (value);
27967 gcc_assert (IN_RANGE (ret, 0, 31));
27968 return ret;
27972 return 0;
27975 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
27976 log2 is in [1, 32], return that log2. Otherwise return -1.
27977 This is used in the patterns for vcvt.s32.f32 floating-point to
27978 fixed-point conversions. */
27981 vfp3_const_double_for_bits (rtx x)
27983 const REAL_VALUE_TYPE *r;
27985 if (!CONST_DOUBLE_P (x))
27986 return -1;
27988 r = CONST_DOUBLE_REAL_VALUE (x);
27990 if (REAL_VALUE_NEGATIVE (*r)
27991 || REAL_VALUE_ISNAN (*r)
27992 || REAL_VALUE_ISINF (*r)
27993 || !real_isinteger (r, SFmode))
27994 return -1;
27996 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
27998 /* The exact_log2 above will have returned -1 if this is
27999 not an exact log2. */
28000 if (!IN_RANGE (hwint, 1, 32))
28001 return -1;
28003 return hwint;
28007 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28009 static void
28010 arm_pre_atomic_barrier (enum memmodel model)
28012 if (need_atomic_barrier_p (model, true))
28013 emit_insn (gen_memory_barrier ());
28016 static void
28017 arm_post_atomic_barrier (enum memmodel model)
28019 if (need_atomic_barrier_p (model, false))
28020 emit_insn (gen_memory_barrier ());
28023 /* Emit the load-exclusive and store-exclusive instructions.
28024 Use acquire and release versions if necessary. */
28026 static void
28027 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28029 rtx (*gen) (rtx, rtx);
28031 if (acq)
28033 switch (mode)
28035 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28036 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28037 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28038 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28039 default:
28040 gcc_unreachable ();
28043 else
28045 switch (mode)
28047 case QImode: gen = gen_arm_load_exclusiveqi; break;
28048 case HImode: gen = gen_arm_load_exclusivehi; break;
28049 case SImode: gen = gen_arm_load_exclusivesi; break;
28050 case DImode: gen = gen_arm_load_exclusivedi; break;
28051 default:
28052 gcc_unreachable ();
28056 emit_insn (gen (rval, mem));
28059 static void
28060 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28061 rtx mem, bool rel)
28063 rtx (*gen) (rtx, rtx, rtx);
28065 if (rel)
28067 switch (mode)
28069 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
28070 case HImode: gen = gen_arm_store_release_exclusivehi; break;
28071 case SImode: gen = gen_arm_store_release_exclusivesi; break;
28072 case DImode: gen = gen_arm_store_release_exclusivedi; break;
28073 default:
28074 gcc_unreachable ();
28077 else
28079 switch (mode)
28081 case QImode: gen = gen_arm_store_exclusiveqi; break;
28082 case HImode: gen = gen_arm_store_exclusivehi; break;
28083 case SImode: gen = gen_arm_store_exclusivesi; break;
28084 case DImode: gen = gen_arm_store_exclusivedi; break;
28085 default:
28086 gcc_unreachable ();
28090 emit_insn (gen (bval, rval, mem));
28093 /* Mark the previous jump instruction as unlikely. */
28095 static void
28096 emit_unlikely_jump (rtx insn)
28098 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
28100 insn = emit_jump_insn (insn);
28101 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
28104 /* Expand a compare and swap pattern. */
28106 void
28107 arm_expand_compare_and_swap (rtx operands[])
28109 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28110 machine_mode mode;
28111 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28113 bval = operands[0];
28114 rval = operands[1];
28115 mem = operands[2];
28116 oldval = operands[3];
28117 newval = operands[4];
28118 is_weak = operands[5];
28119 mod_s = operands[6];
28120 mod_f = operands[7];
28121 mode = GET_MODE (mem);
28123 /* Normally the succ memory model must be stronger than fail, but in the
28124 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28125 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28127 if (TARGET_HAVE_LDACQ
28128 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28129 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28130 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28132 switch (mode)
28134 case QImode:
28135 case HImode:
28136 /* For narrow modes, we're going to perform the comparison in SImode,
28137 so do the zero-extension now. */
28138 rval = gen_reg_rtx (SImode);
28139 oldval = convert_modes (SImode, mode, oldval, true);
28140 /* FALLTHRU */
28142 case SImode:
28143 /* Force the value into a register if needed. We waited until after
28144 the zero-extension above to do this properly. */
28145 if (!arm_add_operand (oldval, SImode))
28146 oldval = force_reg (SImode, oldval);
28147 break;
28149 case DImode:
28150 if (!cmpdi_operand (oldval, mode))
28151 oldval = force_reg (mode, oldval);
28152 break;
28154 default:
28155 gcc_unreachable ();
28158 switch (mode)
28160 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
28161 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
28162 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
28163 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
28164 default:
28165 gcc_unreachable ();
28168 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28170 if (mode == QImode || mode == HImode)
28171 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28173 /* In all cases, we arrange for success to be signaled by Z set.
28174 This arrangement allows for the boolean result to be used directly
28175 in a subsequent branch, post optimization. */
28176 x = gen_rtx_REG (CCmode, CC_REGNUM);
28177 x = gen_rtx_EQ (SImode, x, const0_rtx);
28178 emit_insn (gen_rtx_SET (bval, x));
28181 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28182 another memory store between the load-exclusive and store-exclusive can
28183 reset the monitor from Exclusive to Open state. This means we must wait
28184 until after reload to split the pattern, lest we get a register spill in
28185 the middle of the atomic sequence. */
28187 void
28188 arm_split_compare_and_swap (rtx operands[])
28190 rtx rval, mem, oldval, newval, scratch;
28191 machine_mode mode;
28192 enum memmodel mod_s, mod_f;
28193 bool is_weak;
28194 rtx_code_label *label1, *label2;
28195 rtx x, cond;
28197 rval = operands[0];
28198 mem = operands[1];
28199 oldval = operands[2];
28200 newval = operands[3];
28201 is_weak = (operands[4] != const0_rtx);
28202 mod_s = memmodel_from_int (INTVAL (operands[5]));
28203 mod_f = memmodel_from_int (INTVAL (operands[6]));
28204 scratch = operands[7];
28205 mode = GET_MODE (mem);
28207 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28209 bool use_acquire = TARGET_HAVE_LDACQ
28210 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28211 || is_mm_release (mod_s));
28213 bool use_release = TARGET_HAVE_LDACQ
28214 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28215 || is_mm_acquire (mod_s));
28217 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28218 a full barrier is emitted after the store-release. */
28219 if (is_armv8_sync)
28220 use_acquire = false;
28222 /* Checks whether a barrier is needed and emits one accordingly. */
28223 if (!(use_acquire || use_release))
28224 arm_pre_atomic_barrier (mod_s);
28226 label1 = NULL;
28227 if (!is_weak)
28229 label1 = gen_label_rtx ();
28230 emit_label (label1);
28232 label2 = gen_label_rtx ();
28234 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28236 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
28237 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28238 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28239 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28240 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28242 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
28244 /* Weak or strong, we want EQ to be true for success, so that we
28245 match the flags that we got from the compare above. */
28246 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28247 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
28248 emit_insn (gen_rtx_SET (cond, x));
28250 if (!is_weak)
28252 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28253 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28254 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
28255 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28258 if (!is_mm_relaxed (mod_f))
28259 emit_label (label2);
28261 /* Checks whether a barrier is needed and emits one accordingly. */
28262 if (is_armv8_sync
28263 || !(use_acquire || use_release))
28264 arm_post_atomic_barrier (mod_s);
28266 if (is_mm_relaxed (mod_f))
28267 emit_label (label2);
28270 void
28271 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28272 rtx value, rtx model_rtx, rtx cond)
28274 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28275 machine_mode mode = GET_MODE (mem);
28276 machine_mode wmode = (mode == DImode ? DImode : SImode);
28277 rtx_code_label *label;
28278 rtx x;
28280 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28282 bool use_acquire = TARGET_HAVE_LDACQ
28283 && !(is_mm_relaxed (model) || is_mm_consume (model)
28284 || is_mm_release (model));
28286 bool use_release = TARGET_HAVE_LDACQ
28287 && !(is_mm_relaxed (model) || is_mm_consume (model)
28288 || is_mm_acquire (model));
28290 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28291 a full barrier is emitted after the store-release. */
28292 if (is_armv8_sync)
28293 use_acquire = false;
28295 /* Checks whether a barrier is needed and emits one accordingly. */
28296 if (!(use_acquire || use_release))
28297 arm_pre_atomic_barrier (model);
28299 label = gen_label_rtx ();
28300 emit_label (label);
28302 if (new_out)
28303 new_out = gen_lowpart (wmode, new_out);
28304 if (old_out)
28305 old_out = gen_lowpart (wmode, old_out);
28306 else
28307 old_out = new_out;
28308 value = simplify_gen_subreg (wmode, value, mode, 0);
28310 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28312 switch (code)
28314 case SET:
28315 new_out = value;
28316 break;
28318 case NOT:
28319 x = gen_rtx_AND (wmode, old_out, value);
28320 emit_insn (gen_rtx_SET (new_out, x));
28321 x = gen_rtx_NOT (wmode, new_out);
28322 emit_insn (gen_rtx_SET (new_out, x));
28323 break;
28325 case MINUS:
28326 if (CONST_INT_P (value))
28328 value = GEN_INT (-INTVAL (value));
28329 code = PLUS;
28331 /* FALLTHRU */
28333 case PLUS:
28334 if (mode == DImode)
28336 /* DImode plus/minus need to clobber flags. */
28337 /* The adddi3 and subdi3 patterns are incorrectly written so that
28338 they require matching operands, even when we could easily support
28339 three operands. Thankfully, this can be fixed up post-splitting,
28340 as the individual add+adc patterns do accept three operands and
28341 post-reload cprop can make these moves go away. */
28342 emit_move_insn (new_out, old_out);
28343 if (code == PLUS)
28344 x = gen_adddi3 (new_out, new_out, value);
28345 else
28346 x = gen_subdi3 (new_out, new_out, value);
28347 emit_insn (x);
28348 break;
28350 /* FALLTHRU */
28352 default:
28353 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28354 emit_insn (gen_rtx_SET (new_out, x));
28355 break;
28358 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28359 use_release);
28361 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28362 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28364 /* Checks whether a barrier is needed and emits one accordingly. */
28365 if (is_armv8_sync
28366 || !(use_acquire || use_release))
28367 arm_post_atomic_barrier (model);
28370 #define MAX_VECT_LEN 16
28372 struct expand_vec_perm_d
28374 rtx target, op0, op1;
28375 unsigned char perm[MAX_VECT_LEN];
28376 machine_mode vmode;
28377 unsigned char nelt;
28378 bool one_vector_p;
28379 bool testing_p;
28382 /* Generate a variable permutation. */
28384 static void
28385 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28387 machine_mode vmode = GET_MODE (target);
28388 bool one_vector_p = rtx_equal_p (op0, op1);
28390 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28391 gcc_checking_assert (GET_MODE (op0) == vmode);
28392 gcc_checking_assert (GET_MODE (op1) == vmode);
28393 gcc_checking_assert (GET_MODE (sel) == vmode);
28394 gcc_checking_assert (TARGET_NEON);
28396 if (one_vector_p)
28398 if (vmode == V8QImode)
28399 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28400 else
28401 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28403 else
28405 rtx pair;
28407 if (vmode == V8QImode)
28409 pair = gen_reg_rtx (V16QImode);
28410 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28411 pair = gen_lowpart (TImode, pair);
28412 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28414 else
28416 pair = gen_reg_rtx (OImode);
28417 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28418 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28423 void
28424 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28426 machine_mode vmode = GET_MODE (target);
28427 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28428 bool one_vector_p = rtx_equal_p (op0, op1);
28429 rtx rmask[MAX_VECT_LEN], mask;
28431 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28432 numbering of elements for big-endian, we must reverse the order. */
28433 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28435 /* The VTBL instruction does not use a modulo index, so we must take care
28436 of that ourselves. */
28437 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28438 for (i = 0; i < nelt; ++i)
28439 rmask[i] = mask;
28440 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28441 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28443 arm_expand_vec_perm_1 (target, op0, op1, sel);
28446 /* Map lane ordering between architectural lane order, and GCC lane order,
28447 taking into account ABI. See comment above output_move_neon for details. */
28449 static int
28450 neon_endian_lane_map (machine_mode mode, int lane)
28452 if (BYTES_BIG_ENDIAN)
28454 int nelems = GET_MODE_NUNITS (mode);
28455 /* Reverse lane order. */
28456 lane = (nelems - 1 - lane);
28457 /* Reverse D register order, to match ABI. */
28458 if (GET_MODE_SIZE (mode) == 16)
28459 lane = lane ^ (nelems / 2);
28461 return lane;
28464 /* Some permutations index into pairs of vectors, this is a helper function
28465 to map indexes into those pairs of vectors. */
28467 static int
28468 neon_pair_endian_lane_map (machine_mode mode, int lane)
28470 int nelem = GET_MODE_NUNITS (mode);
28471 if (BYTES_BIG_ENDIAN)
28472 lane =
28473 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28474 return lane;
28477 /* Generate or test for an insn that supports a constant permutation. */
28479 /* Recognize patterns for the VUZP insns. */
28481 static bool
28482 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28484 unsigned int i, odd, mask, nelt = d->nelt;
28485 rtx out0, out1, in0, in1;
28486 rtx (*gen)(rtx, rtx, rtx, rtx);
28487 int first_elem;
28488 int swap_nelt;
28490 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28491 return false;
28493 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28494 big endian pattern on 64 bit vectors, so we correct for that. */
28495 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28496 && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28498 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28500 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28501 odd = 0;
28502 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28503 odd = 1;
28504 else
28505 return false;
28506 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28508 for (i = 0; i < nelt; i++)
28510 unsigned elt =
28511 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28512 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28513 return false;
28516 /* Success! */
28517 if (d->testing_p)
28518 return true;
28520 switch (d->vmode)
28522 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28523 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28524 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28525 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28526 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28527 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28528 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28529 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28530 default:
28531 gcc_unreachable ();
28534 in0 = d->op0;
28535 in1 = d->op1;
28536 if (swap_nelt != 0)
28537 std::swap (in0, in1);
28539 out0 = d->target;
28540 out1 = gen_reg_rtx (d->vmode);
28541 if (odd)
28542 std::swap (out0, out1);
28544 emit_insn (gen (out0, in0, in1, out1));
28545 return true;
28548 /* Recognize patterns for the VZIP insns. */
28550 static bool
28551 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28553 unsigned int i, high, mask, nelt = d->nelt;
28554 rtx out0, out1, in0, in1;
28555 rtx (*gen)(rtx, rtx, rtx, rtx);
28556 int first_elem;
28557 bool is_swapped;
28559 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28560 return false;
28562 is_swapped = BYTES_BIG_ENDIAN;
28564 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28566 high = nelt / 2;
28567 if (first_elem == neon_endian_lane_map (d->vmode, high))
28569 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28570 high = 0;
28571 else
28572 return false;
28573 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28575 for (i = 0; i < nelt / 2; i++)
28577 unsigned elt =
28578 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28579 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28580 != elt)
28581 return false;
28582 elt =
28583 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28584 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28585 != elt)
28586 return false;
28589 /* Success! */
28590 if (d->testing_p)
28591 return true;
28593 switch (d->vmode)
28595 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28596 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28597 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28598 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28599 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
28600 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
28601 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28602 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28603 default:
28604 gcc_unreachable ();
28607 in0 = d->op0;
28608 in1 = d->op1;
28609 if (is_swapped)
28610 std::swap (in0, in1);
28612 out0 = d->target;
28613 out1 = gen_reg_rtx (d->vmode);
28614 if (high)
28615 std::swap (out0, out1);
28617 emit_insn (gen (out0, in0, in1, out1));
28618 return true;
28621 /* Recognize patterns for the VREV insns. */
28623 static bool
28624 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28626 unsigned int i, j, diff, nelt = d->nelt;
28627 rtx (*gen)(rtx, rtx);
28629 if (!d->one_vector_p)
28630 return false;
28632 diff = d->perm[0];
28633 switch (diff)
28635 case 7:
28636 switch (d->vmode)
28638 case V16QImode: gen = gen_neon_vrev64v16qi; break;
28639 case V8QImode: gen = gen_neon_vrev64v8qi; break;
28640 default:
28641 return false;
28643 break;
28644 case 3:
28645 switch (d->vmode)
28647 case V16QImode: gen = gen_neon_vrev32v16qi; break;
28648 case V8QImode: gen = gen_neon_vrev32v8qi; break;
28649 case V8HImode: gen = gen_neon_vrev64v8hi; break;
28650 case V4HImode: gen = gen_neon_vrev64v4hi; break;
28651 default:
28652 return false;
28654 break;
28655 case 1:
28656 switch (d->vmode)
28658 case V16QImode: gen = gen_neon_vrev16v16qi; break;
28659 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28660 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28661 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28662 case V4SImode: gen = gen_neon_vrev64v4si; break;
28663 case V2SImode: gen = gen_neon_vrev64v2si; break;
28664 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28665 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28666 default:
28667 return false;
28669 break;
28670 default:
28671 return false;
28674 for (i = 0; i < nelt ; i += diff + 1)
28675 for (j = 0; j <= diff; j += 1)
28677 /* This is guaranteed to be true as the value of diff
28678 is 7, 3, 1 and we should have enough elements in the
28679 queue to generate this. Getting a vector mask with a
28680 value of diff other than these values implies that
28681 something is wrong by the time we get here. */
28682 gcc_assert (i + j < nelt);
28683 if (d->perm[i + j] != i + diff - j)
28684 return false;
28687 /* Success! */
28688 if (d->testing_p)
28689 return true;
28691 emit_insn (gen (d->target, d->op0));
28692 return true;
28695 /* Recognize patterns for the VTRN insns. */
28697 static bool
28698 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28700 unsigned int i, odd, mask, nelt = d->nelt;
28701 rtx out0, out1, in0, in1;
28702 rtx (*gen)(rtx, rtx, rtx, rtx);
28704 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28705 return false;
28707 /* Note that these are little-endian tests. Adjust for big-endian later. */
28708 if (d->perm[0] == 0)
28709 odd = 0;
28710 else if (d->perm[0] == 1)
28711 odd = 1;
28712 else
28713 return false;
28714 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28716 for (i = 0; i < nelt; i += 2)
28718 if (d->perm[i] != i + odd)
28719 return false;
28720 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28721 return false;
28724 /* Success! */
28725 if (d->testing_p)
28726 return true;
28728 switch (d->vmode)
28730 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28731 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28732 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28733 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28734 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28735 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28736 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28737 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28738 default:
28739 gcc_unreachable ();
28742 in0 = d->op0;
28743 in1 = d->op1;
28744 if (BYTES_BIG_ENDIAN)
28746 std::swap (in0, in1);
28747 odd = !odd;
28750 out0 = d->target;
28751 out1 = gen_reg_rtx (d->vmode);
28752 if (odd)
28753 std::swap (out0, out1);
28755 emit_insn (gen (out0, in0, in1, out1));
28756 return true;
28759 /* Recognize patterns for the VEXT insns. */
28761 static bool
28762 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28764 unsigned int i, nelt = d->nelt;
28765 rtx (*gen) (rtx, rtx, rtx, rtx);
28766 rtx offset;
28768 unsigned int location;
28770 unsigned int next = d->perm[0] + 1;
28772 /* TODO: Handle GCC's numbering of elements for big-endian. */
28773 if (BYTES_BIG_ENDIAN)
28774 return false;
28776 /* Check if the extracted indexes are increasing by one. */
28777 for (i = 1; i < nelt; next++, i++)
28779 /* If we hit the most significant element of the 2nd vector in
28780 the previous iteration, no need to test further. */
28781 if (next == 2 * nelt)
28782 return false;
28784 /* If we are operating on only one vector: it could be a
28785 rotation. If there are only two elements of size < 64, let
28786 arm_evpc_neon_vrev catch it. */
28787 if (d->one_vector_p && (next == nelt))
28789 if ((nelt == 2) && (d->vmode != V2DImode))
28790 return false;
28791 else
28792 next = 0;
28795 if (d->perm[i] != next)
28796 return false;
28799 location = d->perm[0];
28801 switch (d->vmode)
28803 case V16QImode: gen = gen_neon_vextv16qi; break;
28804 case V8QImode: gen = gen_neon_vextv8qi; break;
28805 case V4HImode: gen = gen_neon_vextv4hi; break;
28806 case V8HImode: gen = gen_neon_vextv8hi; break;
28807 case V2SImode: gen = gen_neon_vextv2si; break;
28808 case V4SImode: gen = gen_neon_vextv4si; break;
28809 case V2SFmode: gen = gen_neon_vextv2sf; break;
28810 case V4SFmode: gen = gen_neon_vextv4sf; break;
28811 case V2DImode: gen = gen_neon_vextv2di; break;
28812 default:
28813 return false;
28816 /* Success! */
28817 if (d->testing_p)
28818 return true;
28820 offset = GEN_INT (location);
28821 emit_insn (gen (d->target, d->op0, d->op1, offset));
28822 return true;
28825 /* The NEON VTBL instruction is a fully variable permuation that's even
28826 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28827 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28828 can do slightly better by expanding this as a constant where we don't
28829 have to apply a mask. */
28831 static bool
28832 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28834 rtx rperm[MAX_VECT_LEN], sel;
28835 machine_mode vmode = d->vmode;
28836 unsigned int i, nelt = d->nelt;
28838 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28839 numbering of elements for big-endian, we must reverse the order. */
28840 if (BYTES_BIG_ENDIAN)
28841 return false;
28843 if (d->testing_p)
28844 return true;
28846 /* Generic code will try constant permutation twice. Once with the
28847 original mode and again with the elements lowered to QImode.
28848 So wait and don't do the selector expansion ourselves. */
28849 if (vmode != V8QImode && vmode != V16QImode)
28850 return false;
28852 for (i = 0; i < nelt; ++i)
28853 rperm[i] = GEN_INT (d->perm[i]);
28854 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28855 sel = force_reg (vmode, sel);
28857 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28858 return true;
28861 static bool
28862 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28864 /* Check if the input mask matches vext before reordering the
28865 operands. */
28866 if (TARGET_NEON)
28867 if (arm_evpc_neon_vext (d))
28868 return true;
28870 /* The pattern matching functions above are written to look for a small
28871 number to begin the sequence (0, 1, N/2). If we begin with an index
28872 from the second operand, we can swap the operands. */
28873 if (d->perm[0] >= d->nelt)
28875 unsigned i, nelt = d->nelt;
28877 for (i = 0; i < nelt; ++i)
28878 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28880 std::swap (d->op0, d->op1);
28883 if (TARGET_NEON)
28885 if (arm_evpc_neon_vuzp (d))
28886 return true;
28887 if (arm_evpc_neon_vzip (d))
28888 return true;
28889 if (arm_evpc_neon_vrev (d))
28890 return true;
28891 if (arm_evpc_neon_vtrn (d))
28892 return true;
28893 return arm_evpc_neon_vtbl (d);
28895 return false;
28898 /* Expand a vec_perm_const pattern. */
28900 bool
28901 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28903 struct expand_vec_perm_d d;
28904 int i, nelt, which;
28906 d.target = target;
28907 d.op0 = op0;
28908 d.op1 = op1;
28910 d.vmode = GET_MODE (target);
28911 gcc_assert (VECTOR_MODE_P (d.vmode));
28912 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28913 d.testing_p = false;
28915 for (i = which = 0; i < nelt; ++i)
28917 rtx e = XVECEXP (sel, 0, i);
28918 int ei = INTVAL (e) & (2 * nelt - 1);
28919 which |= (ei < nelt ? 1 : 2);
28920 d.perm[i] = ei;
28923 switch (which)
28925 default:
28926 gcc_unreachable();
28928 case 3:
28929 d.one_vector_p = false;
28930 if (!rtx_equal_p (op0, op1))
28931 break;
28933 /* The elements of PERM do not suggest that only the first operand
28934 is used, but both operands are identical. Allow easier matching
28935 of the permutation by folding the permutation into the single
28936 input vector. */
28937 /* FALLTHRU */
28938 case 2:
28939 for (i = 0; i < nelt; ++i)
28940 d.perm[i] &= nelt - 1;
28941 d.op0 = op1;
28942 d.one_vector_p = true;
28943 break;
28945 case 1:
28946 d.op1 = op0;
28947 d.one_vector_p = true;
28948 break;
28951 return arm_expand_vec_perm_const_1 (&d);
28954 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28956 static bool
28957 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28958 const unsigned char *sel)
28960 struct expand_vec_perm_d d;
28961 unsigned int i, nelt, which;
28962 bool ret;
28964 d.vmode = vmode;
28965 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28966 d.testing_p = true;
28967 memcpy (d.perm, sel, nelt);
28969 /* Categorize the set of elements in the selector. */
28970 for (i = which = 0; i < nelt; ++i)
28972 unsigned char e = d.perm[i];
28973 gcc_assert (e < 2 * nelt);
28974 which |= (e < nelt ? 1 : 2);
28977 /* For all elements from second vector, fold the elements to first. */
28978 if (which == 2)
28979 for (i = 0; i < nelt; ++i)
28980 d.perm[i] -= nelt;
28982 /* Check whether the mask can be applied to the vector type. */
28983 d.one_vector_p = (which != 3);
28985 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28986 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28987 if (!d.one_vector_p)
28988 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28990 start_sequence ();
28991 ret = arm_expand_vec_perm_const_1 (&d);
28992 end_sequence ();
28994 return ret;
28997 bool
28998 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29000 /* If we are soft float and we do not have ldrd
29001 then all auto increment forms are ok. */
29002 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29003 return true;
29005 switch (code)
29007 /* Post increment and Pre Decrement are supported for all
29008 instruction forms except for vector forms. */
29009 case ARM_POST_INC:
29010 case ARM_PRE_DEC:
29011 if (VECTOR_MODE_P (mode))
29013 if (code != ARM_PRE_DEC)
29014 return true;
29015 else
29016 return false;
29019 return true;
29021 case ARM_POST_DEC:
29022 case ARM_PRE_INC:
29023 /* Without LDRD and mode size greater than
29024 word size, there is no point in auto-incrementing
29025 because ldm and stm will not have these forms. */
29026 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29027 return false;
29029 /* Vector and floating point modes do not support
29030 these auto increment forms. */
29031 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29032 return false;
29034 return true;
29036 default:
29037 return false;
29041 return false;
29044 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29045 on ARM, since we know that shifts by negative amounts are no-ops.
29046 Additionally, the default expansion code is not available or suitable
29047 for post-reload insn splits (this can occur when the register allocator
29048 chooses not to do a shift in NEON).
29050 This function is used in both initial expand and post-reload splits, and
29051 handles all kinds of 64-bit shifts.
29053 Input requirements:
29054 - It is safe for the input and output to be the same register, but
29055 early-clobber rules apply for the shift amount and scratch registers.
29056 - Shift by register requires both scratch registers. In all other cases
29057 the scratch registers may be NULL.
29058 - Ashiftrt by a register also clobbers the CC register. */
29059 void
29060 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29061 rtx amount, rtx scratch1, rtx scratch2)
29063 rtx out_high = gen_highpart (SImode, out);
29064 rtx out_low = gen_lowpart (SImode, out);
29065 rtx in_high = gen_highpart (SImode, in);
29066 rtx in_low = gen_lowpart (SImode, in);
29068 /* Terminology:
29069 in = the register pair containing the input value.
29070 out = the destination register pair.
29071 up = the high- or low-part of each pair.
29072 down = the opposite part to "up".
29073 In a shift, we can consider bits to shift from "up"-stream to
29074 "down"-stream, so in a left-shift "up" is the low-part and "down"
29075 is the high-part of each register pair. */
29077 rtx out_up = code == ASHIFT ? out_low : out_high;
29078 rtx out_down = code == ASHIFT ? out_high : out_low;
29079 rtx in_up = code == ASHIFT ? in_low : in_high;
29080 rtx in_down = code == ASHIFT ? in_high : in_low;
29082 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29083 gcc_assert (out
29084 && (REG_P (out) || GET_CODE (out) == SUBREG)
29085 && GET_MODE (out) == DImode);
29086 gcc_assert (in
29087 && (REG_P (in) || GET_CODE (in) == SUBREG)
29088 && GET_MODE (in) == DImode);
29089 gcc_assert (amount
29090 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29091 && GET_MODE (amount) == SImode)
29092 || CONST_INT_P (amount)));
29093 gcc_assert (scratch1 == NULL
29094 || (GET_CODE (scratch1) == SCRATCH)
29095 || (GET_MODE (scratch1) == SImode
29096 && REG_P (scratch1)));
29097 gcc_assert (scratch2 == NULL
29098 || (GET_CODE (scratch2) == SCRATCH)
29099 || (GET_MODE (scratch2) == SImode
29100 && REG_P (scratch2)));
29101 gcc_assert (!REG_P (out) || !REG_P (amount)
29102 || !HARD_REGISTER_P (out)
29103 || (REGNO (out) != REGNO (amount)
29104 && REGNO (out) + 1 != REGNO (amount)));
29106 /* Macros to make following code more readable. */
29107 #define SUB_32(DEST,SRC) \
29108 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29109 #define RSB_32(DEST,SRC) \
29110 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29111 #define SUB_S_32(DEST,SRC) \
29112 gen_addsi3_compare0 ((DEST), (SRC), \
29113 GEN_INT (-32))
29114 #define SET(DEST,SRC) \
29115 gen_rtx_SET ((DEST), (SRC))
29116 #define SHIFT(CODE,SRC,AMOUNT) \
29117 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29118 #define LSHIFT(CODE,SRC,AMOUNT) \
29119 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29120 SImode, (SRC), (AMOUNT))
29121 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29122 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29123 SImode, (SRC), (AMOUNT))
29124 #define ORR(A,B) \
29125 gen_rtx_IOR (SImode, (A), (B))
29126 #define BRANCH(COND,LABEL) \
29127 gen_arm_cond_branch ((LABEL), \
29128 gen_rtx_ ## COND (CCmode, cc_reg, \
29129 const0_rtx), \
29130 cc_reg)
29132 /* Shifts by register and shifts by constant are handled separately. */
29133 if (CONST_INT_P (amount))
29135 /* We have a shift-by-constant. */
29137 /* First, handle out-of-range shift amounts.
29138 In both cases we try to match the result an ARM instruction in a
29139 shift-by-register would give. This helps reduce execution
29140 differences between optimization levels, but it won't stop other
29141 parts of the compiler doing different things. This is "undefined
29142 behavior, in any case. */
29143 if (INTVAL (amount) <= 0)
29144 emit_insn (gen_movdi (out, in));
29145 else if (INTVAL (amount) >= 64)
29147 if (code == ASHIFTRT)
29149 rtx const31_rtx = GEN_INT (31);
29150 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29151 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29153 else
29154 emit_insn (gen_movdi (out, const0_rtx));
29157 /* Now handle valid shifts. */
29158 else if (INTVAL (amount) < 32)
29160 /* Shifts by a constant less than 32. */
29161 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29163 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29164 emit_insn (SET (out_down,
29165 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29166 out_down)));
29167 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29169 else
29171 /* Shifts by a constant greater than 31. */
29172 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29174 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29175 if (code == ASHIFTRT)
29176 emit_insn (gen_ashrsi3 (out_up, in_up,
29177 GEN_INT (31)));
29178 else
29179 emit_insn (SET (out_up, const0_rtx));
29182 else
29184 /* We have a shift-by-register. */
29185 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29187 /* This alternative requires the scratch registers. */
29188 gcc_assert (scratch1 && REG_P (scratch1));
29189 gcc_assert (scratch2 && REG_P (scratch2));
29191 /* We will need the values "amount-32" and "32-amount" later.
29192 Swapping them around now allows the later code to be more general. */
29193 switch (code)
29195 case ASHIFT:
29196 emit_insn (SUB_32 (scratch1, amount));
29197 emit_insn (RSB_32 (scratch2, amount));
29198 break;
29199 case ASHIFTRT:
29200 emit_insn (RSB_32 (scratch1, amount));
29201 /* Also set CC = amount > 32. */
29202 emit_insn (SUB_S_32 (scratch2, amount));
29203 break;
29204 case LSHIFTRT:
29205 emit_insn (RSB_32 (scratch1, amount));
29206 emit_insn (SUB_32 (scratch2, amount));
29207 break;
29208 default:
29209 gcc_unreachable ();
29212 /* Emit code like this:
29214 arithmetic-left:
29215 out_down = in_down << amount;
29216 out_down = (in_up << (amount - 32)) | out_down;
29217 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29218 out_up = in_up << amount;
29220 arithmetic-right:
29221 out_down = in_down >> amount;
29222 out_down = (in_up << (32 - amount)) | out_down;
29223 if (amount < 32)
29224 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29225 out_up = in_up << amount;
29227 logical-right:
29228 out_down = in_down >> amount;
29229 out_down = (in_up << (32 - amount)) | out_down;
29230 if (amount < 32)
29231 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29232 out_up = in_up << amount;
29234 The ARM and Thumb2 variants are the same but implemented slightly
29235 differently. If this were only called during expand we could just
29236 use the Thumb2 case and let combine do the right thing, but this
29237 can also be called from post-reload splitters. */
29239 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29241 if (!TARGET_THUMB2)
29243 /* Emit code for ARM mode. */
29244 emit_insn (SET (out_down,
29245 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29246 if (code == ASHIFTRT)
29248 rtx_code_label *done_label = gen_label_rtx ();
29249 emit_jump_insn (BRANCH (LT, done_label));
29250 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29251 out_down)));
29252 emit_label (done_label);
29254 else
29255 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29256 out_down)));
29258 else
29260 /* Emit code for Thumb2 mode.
29261 Thumb2 can't do shift and or in one insn. */
29262 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29263 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29265 if (code == ASHIFTRT)
29267 rtx_code_label *done_label = gen_label_rtx ();
29268 emit_jump_insn (BRANCH (LT, done_label));
29269 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29270 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29271 emit_label (done_label);
29273 else
29275 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29276 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29280 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29283 #undef SUB_32
29284 #undef RSB_32
29285 #undef SUB_S_32
29286 #undef SET
29287 #undef SHIFT
29288 #undef LSHIFT
29289 #undef REV_LSHIFT
29290 #undef ORR
29291 #undef BRANCH
29294 /* Returns true if the pattern is a valid symbolic address, which is either a
29295 symbol_ref or (symbol_ref + addend).
29297 According to the ARM ELF ABI, the initial addend of REL-type relocations
29298 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29299 literal field of the instruction as a 16-bit signed value in the range
29300 -32768 <= A < 32768. */
29302 bool
29303 arm_valid_symbolic_address_p (rtx addr)
29305 rtx xop0, xop1 = NULL_RTX;
29306 rtx tmp = addr;
29308 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29309 return true;
29311 /* (const (plus: symbol_ref const_int)) */
29312 if (GET_CODE (addr) == CONST)
29313 tmp = XEXP (addr, 0);
29315 if (GET_CODE (tmp) == PLUS)
29317 xop0 = XEXP (tmp, 0);
29318 xop1 = XEXP (tmp, 1);
29320 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29321 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29324 return false;
29327 /* Returns true if a valid comparison operation and makes
29328 the operands in a form that is valid. */
29329 bool
29330 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29332 enum rtx_code code = GET_CODE (*comparison);
29333 int code_int;
29334 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29335 ? GET_MODE (*op2) : GET_MODE (*op1);
29337 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29339 if (code == UNEQ || code == LTGT)
29340 return false;
29342 code_int = (int)code;
29343 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29344 PUT_CODE (*comparison, (enum rtx_code)code_int);
29346 switch (mode)
29348 case SImode:
29349 if (!arm_add_operand (*op1, mode))
29350 *op1 = force_reg (mode, *op1);
29351 if (!arm_add_operand (*op2, mode))
29352 *op2 = force_reg (mode, *op2);
29353 return true;
29355 case DImode:
29356 if (!cmpdi_operand (*op1, mode))
29357 *op1 = force_reg (mode, *op1);
29358 if (!cmpdi_operand (*op2, mode))
29359 *op2 = force_reg (mode, *op2);
29360 return true;
29362 case SFmode:
29363 case DFmode:
29364 if (!arm_float_compare_operand (*op1, mode))
29365 *op1 = force_reg (mode, *op1);
29366 if (!arm_float_compare_operand (*op2, mode))
29367 *op2 = force_reg (mode, *op2);
29368 return true;
29369 default:
29370 break;
29373 return false;
29377 /* Maximum number of instructions to set block of memory. */
29378 static int
29379 arm_block_set_max_insns (void)
29381 if (optimize_function_for_size_p (cfun))
29382 return 4;
29383 else
29384 return current_tune->max_insns_inline_memset;
29387 /* Return TRUE if it's profitable to set block of memory for
29388 non-vectorized case. VAL is the value to set the memory
29389 with. LENGTH is the number of bytes to set. ALIGN is the
29390 alignment of the destination memory in bytes. UNALIGNED_P
29391 is TRUE if we can only set the memory with instructions
29392 meeting alignment requirements. USE_STRD_P is TRUE if we
29393 can use strd to set the memory. */
29394 static bool
29395 arm_block_set_non_vect_profit_p (rtx val,
29396 unsigned HOST_WIDE_INT length,
29397 unsigned HOST_WIDE_INT align,
29398 bool unaligned_p, bool use_strd_p)
29400 int num = 0;
29401 /* For leftovers in bytes of 0-7, we can set the memory block using
29402 strb/strh/str with minimum instruction number. */
29403 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29405 if (unaligned_p)
29407 num = arm_const_inline_cost (SET, val);
29408 num += length / align + length % align;
29410 else if (use_strd_p)
29412 num = arm_const_double_inline_cost (val);
29413 num += (length >> 3) + leftover[length & 7];
29415 else
29417 num = arm_const_inline_cost (SET, val);
29418 num += (length >> 2) + leftover[length & 3];
29421 /* We may be able to combine last pair STRH/STRB into a single STR
29422 by shifting one byte back. */
29423 if (unaligned_access && length > 3 && (length & 3) == 3)
29424 num--;
29426 return (num <= arm_block_set_max_insns ());
29429 /* Return TRUE if it's profitable to set block of memory for
29430 vectorized case. LENGTH is the number of bytes to set.
29431 ALIGN is the alignment of destination memory in bytes.
29432 MODE is the vector mode used to set the memory. */
29433 static bool
29434 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29435 unsigned HOST_WIDE_INT align,
29436 machine_mode mode)
29438 int num;
29439 bool unaligned_p = ((align & 3) != 0);
29440 unsigned int nelt = GET_MODE_NUNITS (mode);
29442 /* Instruction loading constant value. */
29443 num = 1;
29444 /* Instructions storing the memory. */
29445 num += (length + nelt - 1) / nelt;
29446 /* Instructions adjusting the address expression. Only need to
29447 adjust address expression if it's 4 bytes aligned and bytes
29448 leftover can only be stored by mis-aligned store instruction. */
29449 if (!unaligned_p && (length & 3) != 0)
29450 num++;
29452 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29453 if (!unaligned_p && mode == V16QImode)
29454 num--;
29456 return (num <= arm_block_set_max_insns ());
29459 /* Set a block of memory using vectorization instructions for the
29460 unaligned case. We fill the first LENGTH bytes of the memory
29461 area starting from DSTBASE with byte constant VALUE. ALIGN is
29462 the alignment requirement of memory. Return TRUE if succeeded. */
29463 static bool
29464 arm_block_set_unaligned_vect (rtx dstbase,
29465 unsigned HOST_WIDE_INT length,
29466 unsigned HOST_WIDE_INT value,
29467 unsigned HOST_WIDE_INT align)
29469 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29470 rtx dst, mem;
29471 rtx val_elt, val_vec, reg;
29472 rtx rval[MAX_VECT_LEN];
29473 rtx (*gen_func) (rtx, rtx);
29474 machine_mode mode;
29475 unsigned HOST_WIDE_INT v = value;
29476 unsigned int offset = 0;
29477 gcc_assert ((align & 0x3) != 0);
29478 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29479 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29480 if (length >= nelt_v16)
29482 mode = V16QImode;
29483 gen_func = gen_movmisalignv16qi;
29485 else
29487 mode = V8QImode;
29488 gen_func = gen_movmisalignv8qi;
29490 nelt_mode = GET_MODE_NUNITS (mode);
29491 gcc_assert (length >= nelt_mode);
29492 /* Skip if it isn't profitable. */
29493 if (!arm_block_set_vect_profit_p (length, align, mode))
29494 return false;
29496 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29497 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29499 v = sext_hwi (v, BITS_PER_WORD);
29500 val_elt = GEN_INT (v);
29501 for (j = 0; j < nelt_mode; j++)
29502 rval[j] = val_elt;
29504 reg = gen_reg_rtx (mode);
29505 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29506 /* Emit instruction loading the constant value. */
29507 emit_move_insn (reg, val_vec);
29509 /* Handle nelt_mode bytes in a vector. */
29510 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29512 emit_insn ((*gen_func) (mem, reg));
29513 if (i + 2 * nelt_mode <= length)
29515 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29516 offset += nelt_mode;
29517 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29521 /* If there are not less than nelt_v8 bytes leftover, we must be in
29522 V16QI mode. */
29523 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29525 /* Handle (8, 16) bytes leftover. */
29526 if (i + nelt_v8 < length)
29528 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29529 offset += length - i;
29530 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29532 /* We are shifting bytes back, set the alignment accordingly. */
29533 if ((length & 1) != 0 && align >= 2)
29534 set_mem_align (mem, BITS_PER_UNIT);
29536 emit_insn (gen_movmisalignv16qi (mem, reg));
29538 /* Handle (0, 8] bytes leftover. */
29539 else if (i < length && i + nelt_v8 >= length)
29541 if (mode == V16QImode)
29542 reg = gen_lowpart (V8QImode, reg);
29544 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29545 + (nelt_mode - nelt_v8))));
29546 offset += (length - i) + (nelt_mode - nelt_v8);
29547 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29549 /* We are shifting bytes back, set the alignment accordingly. */
29550 if ((length & 1) != 0 && align >= 2)
29551 set_mem_align (mem, BITS_PER_UNIT);
29553 emit_insn (gen_movmisalignv8qi (mem, reg));
29556 return true;
29559 /* Set a block of memory using vectorization instructions for the
29560 aligned case. We fill the first LENGTH bytes of the memory area
29561 starting from DSTBASE with byte constant VALUE. ALIGN is the
29562 alignment requirement of memory. Return TRUE if succeeded. */
29563 static bool
29564 arm_block_set_aligned_vect (rtx dstbase,
29565 unsigned HOST_WIDE_INT length,
29566 unsigned HOST_WIDE_INT value,
29567 unsigned HOST_WIDE_INT align)
29569 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29570 rtx dst, addr, mem;
29571 rtx val_elt, val_vec, reg;
29572 rtx rval[MAX_VECT_LEN];
29573 machine_mode mode;
29574 unsigned HOST_WIDE_INT v = value;
29575 unsigned int offset = 0;
29577 gcc_assert ((align & 0x3) == 0);
29578 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29579 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29580 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29581 mode = V16QImode;
29582 else
29583 mode = V8QImode;
29585 nelt_mode = GET_MODE_NUNITS (mode);
29586 gcc_assert (length >= nelt_mode);
29587 /* Skip if it isn't profitable. */
29588 if (!arm_block_set_vect_profit_p (length, align, mode))
29589 return false;
29591 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29593 v = sext_hwi (v, BITS_PER_WORD);
29594 val_elt = GEN_INT (v);
29595 for (j = 0; j < nelt_mode; j++)
29596 rval[j] = val_elt;
29598 reg = gen_reg_rtx (mode);
29599 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29600 /* Emit instruction loading the constant value. */
29601 emit_move_insn (reg, val_vec);
29603 i = 0;
29604 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29605 if (mode == V16QImode)
29607 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29608 emit_insn (gen_movmisalignv16qi (mem, reg));
29609 i += nelt_mode;
29610 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29611 if (i + nelt_v8 < length && i + nelt_v16 > length)
29613 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29614 offset += length - nelt_mode;
29615 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29616 /* We are shifting bytes back, set the alignment accordingly. */
29617 if ((length & 0x3) == 0)
29618 set_mem_align (mem, BITS_PER_UNIT * 4);
29619 else if ((length & 0x1) == 0)
29620 set_mem_align (mem, BITS_PER_UNIT * 2);
29621 else
29622 set_mem_align (mem, BITS_PER_UNIT);
29624 emit_insn (gen_movmisalignv16qi (mem, reg));
29625 return true;
29627 /* Fall through for bytes leftover. */
29628 mode = V8QImode;
29629 nelt_mode = GET_MODE_NUNITS (mode);
29630 reg = gen_lowpart (V8QImode, reg);
29633 /* Handle 8 bytes in a vector. */
29634 for (; (i + nelt_mode <= length); i += nelt_mode)
29636 addr = plus_constant (Pmode, dst, i);
29637 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29638 emit_move_insn (mem, reg);
29641 /* Handle single word leftover by shifting 4 bytes back. We can
29642 use aligned access for this case. */
29643 if (i + UNITS_PER_WORD == length)
29645 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29646 offset += i - UNITS_PER_WORD;
29647 mem = adjust_automodify_address (dstbase, mode, addr, offset);
29648 /* We are shifting 4 bytes back, set the alignment accordingly. */
29649 if (align > UNITS_PER_WORD)
29650 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29652 emit_move_insn (mem, reg);
29654 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29655 We have to use unaligned access for this case. */
29656 else if (i < length)
29658 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29659 offset += length - nelt_mode;
29660 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29661 /* We are shifting bytes back, set the alignment accordingly. */
29662 if ((length & 1) == 0)
29663 set_mem_align (mem, BITS_PER_UNIT * 2);
29664 else
29665 set_mem_align (mem, BITS_PER_UNIT);
29667 emit_insn (gen_movmisalignv8qi (mem, reg));
29670 return true;
29673 /* Set a block of memory using plain strh/strb instructions, only
29674 using instructions allowed by ALIGN on processor. We fill the
29675 first LENGTH bytes of the memory area starting from DSTBASE
29676 with byte constant VALUE. ALIGN is the alignment requirement
29677 of memory. */
29678 static bool
29679 arm_block_set_unaligned_non_vect (rtx dstbase,
29680 unsigned HOST_WIDE_INT length,
29681 unsigned HOST_WIDE_INT value,
29682 unsigned HOST_WIDE_INT align)
29684 unsigned int i;
29685 rtx dst, addr, mem;
29686 rtx val_exp, val_reg, reg;
29687 machine_mode mode;
29688 HOST_WIDE_INT v = value;
29690 gcc_assert (align == 1 || align == 2);
29692 if (align == 2)
29693 v |= (value << BITS_PER_UNIT);
29695 v = sext_hwi (v, BITS_PER_WORD);
29696 val_exp = GEN_INT (v);
29697 /* Skip if it isn't profitable. */
29698 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29699 align, true, false))
29700 return false;
29702 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29703 mode = (align == 2 ? HImode : QImode);
29704 val_reg = force_reg (SImode, val_exp);
29705 reg = gen_lowpart (mode, val_reg);
29707 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29709 addr = plus_constant (Pmode, dst, i);
29710 mem = adjust_automodify_address (dstbase, mode, addr, i);
29711 emit_move_insn (mem, reg);
29714 /* Handle single byte leftover. */
29715 if (i + 1 == length)
29717 reg = gen_lowpart (QImode, val_reg);
29718 addr = plus_constant (Pmode, dst, i);
29719 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29720 emit_move_insn (mem, reg);
29721 i++;
29724 gcc_assert (i == length);
29725 return true;
29728 /* Set a block of memory using plain strd/str/strh/strb instructions,
29729 to permit unaligned copies on processors which support unaligned
29730 semantics for those instructions. We fill the first LENGTH bytes
29731 of the memory area starting from DSTBASE with byte constant VALUE.
29732 ALIGN is the alignment requirement of memory. */
29733 static bool
29734 arm_block_set_aligned_non_vect (rtx dstbase,
29735 unsigned HOST_WIDE_INT length,
29736 unsigned HOST_WIDE_INT value,
29737 unsigned HOST_WIDE_INT align)
29739 unsigned int i;
29740 rtx dst, addr, mem;
29741 rtx val_exp, val_reg, reg;
29742 unsigned HOST_WIDE_INT v;
29743 bool use_strd_p;
29745 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29746 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29748 v = (value | (value << 8) | (value << 16) | (value << 24));
29749 if (length < UNITS_PER_WORD)
29750 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29752 if (use_strd_p)
29753 v |= (v << BITS_PER_WORD);
29754 else
29755 v = sext_hwi (v, BITS_PER_WORD);
29757 val_exp = GEN_INT (v);
29758 /* Skip if it isn't profitable. */
29759 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29760 align, false, use_strd_p))
29762 if (!use_strd_p)
29763 return false;
29765 /* Try without strd. */
29766 v = (v >> BITS_PER_WORD);
29767 v = sext_hwi (v, BITS_PER_WORD);
29768 val_exp = GEN_INT (v);
29769 use_strd_p = false;
29770 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29771 align, false, use_strd_p))
29772 return false;
29775 i = 0;
29776 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29777 /* Handle double words using strd if possible. */
29778 if (use_strd_p)
29780 val_reg = force_reg (DImode, val_exp);
29781 reg = val_reg;
29782 for (; (i + 8 <= length); i += 8)
29784 addr = plus_constant (Pmode, dst, i);
29785 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29786 emit_move_insn (mem, reg);
29789 else
29790 val_reg = force_reg (SImode, val_exp);
29792 /* Handle words. */
29793 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29794 for (; (i + 4 <= length); i += 4)
29796 addr = plus_constant (Pmode, dst, i);
29797 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29798 if ((align & 3) == 0)
29799 emit_move_insn (mem, reg);
29800 else
29801 emit_insn (gen_unaligned_storesi (mem, reg));
29804 /* Merge last pair of STRH and STRB into a STR if possible. */
29805 if (unaligned_access && i > 0 && (i + 3) == length)
29807 addr = plus_constant (Pmode, dst, i - 1);
29808 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29809 /* We are shifting one byte back, set the alignment accordingly. */
29810 if ((align & 1) == 0)
29811 set_mem_align (mem, BITS_PER_UNIT);
29813 /* Most likely this is an unaligned access, and we can't tell at
29814 compilation time. */
29815 emit_insn (gen_unaligned_storesi (mem, reg));
29816 return true;
29819 /* Handle half word leftover. */
29820 if (i + 2 <= length)
29822 reg = gen_lowpart (HImode, val_reg);
29823 addr = plus_constant (Pmode, dst, i);
29824 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29825 if ((align & 1) == 0)
29826 emit_move_insn (mem, reg);
29827 else
29828 emit_insn (gen_unaligned_storehi (mem, reg));
29830 i += 2;
29833 /* Handle single byte leftover. */
29834 if (i + 1 == length)
29836 reg = gen_lowpart (QImode, val_reg);
29837 addr = plus_constant (Pmode, dst, i);
29838 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29839 emit_move_insn (mem, reg);
29842 return true;
29845 /* Set a block of memory using vectorization instructions for both
29846 aligned and unaligned cases. We fill the first LENGTH bytes of
29847 the memory area starting from DSTBASE with byte constant VALUE.
29848 ALIGN is the alignment requirement of memory. */
29849 static bool
29850 arm_block_set_vect (rtx dstbase,
29851 unsigned HOST_WIDE_INT length,
29852 unsigned HOST_WIDE_INT value,
29853 unsigned HOST_WIDE_INT align)
29855 /* Check whether we need to use unaligned store instruction. */
29856 if (((align & 3) != 0 || (length & 3) != 0)
29857 /* Check whether unaligned store instruction is available. */
29858 && (!unaligned_access || BYTES_BIG_ENDIAN))
29859 return false;
29861 if ((align & 3) == 0)
29862 return arm_block_set_aligned_vect (dstbase, length, value, align);
29863 else
29864 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29867 /* Expand string store operation. Firstly we try to do that by using
29868 vectorization instructions, then try with ARM unaligned access and
29869 double-word store if profitable. OPERANDS[0] is the destination,
29870 OPERANDS[1] is the number of bytes, operands[2] is the value to
29871 initialize the memory, OPERANDS[3] is the known alignment of the
29872 destination. */
29873 bool
29874 arm_gen_setmem (rtx *operands)
29876 rtx dstbase = operands[0];
29877 unsigned HOST_WIDE_INT length;
29878 unsigned HOST_WIDE_INT value;
29879 unsigned HOST_WIDE_INT align;
29881 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29882 return false;
29884 length = UINTVAL (operands[1]);
29885 if (length > 64)
29886 return false;
29888 value = (UINTVAL (operands[2]) & 0xFF);
29889 align = UINTVAL (operands[3]);
29890 if (TARGET_NEON && length >= 8
29891 && current_tune->string_ops_prefer_neon
29892 && arm_block_set_vect (dstbase, length, value, align))
29893 return true;
29895 if (!unaligned_access && (align & 3) != 0)
29896 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29898 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29902 static bool
29903 arm_macro_fusion_p (void)
29905 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
29909 static bool
29910 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
29912 rtx set_dest;
29913 rtx prev_set = single_set (prev);
29914 rtx curr_set = single_set (curr);
29916 if (!prev_set
29917 || !curr_set)
29918 return false;
29920 if (any_condjump_p (curr))
29921 return false;
29923 if (!arm_macro_fusion_p ())
29924 return false;
29926 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
29927 && aarch_crypto_can_dual_issue (prev, curr))
29928 return true;
29930 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT)
29932 /* We are trying to fuse
29933 movw imm / movt imm
29934 instructions as a group that gets scheduled together. */
29936 set_dest = SET_DEST (curr_set);
29938 if (GET_MODE (set_dest) != SImode)
29939 return false;
29941 /* We are trying to match:
29942 prev (movw) == (set (reg r0) (const_int imm16))
29943 curr (movt) == (set (zero_extract (reg r0)
29944 (const_int 16)
29945 (const_int 16))
29946 (const_int imm16_1))
29948 prev (movw) == (set (reg r1)
29949 (high (symbol_ref ("SYM"))))
29950 curr (movt) == (set (reg r0)
29951 (lo_sum (reg r1)
29952 (symbol_ref ("SYM")))) */
29953 if (GET_CODE (set_dest) == ZERO_EXTRACT)
29955 if (CONST_INT_P (SET_SRC (curr_set))
29956 && CONST_INT_P (SET_SRC (prev_set))
29957 && REG_P (XEXP (set_dest, 0))
29958 && REG_P (SET_DEST (prev_set))
29959 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
29960 return true;
29962 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
29963 && REG_P (SET_DEST (curr_set))
29964 && REG_P (SET_DEST (prev_set))
29965 && GET_CODE (SET_SRC (prev_set)) == HIGH
29966 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
29967 return true;
29969 return false;
29972 /* Return true iff the instruction fusion described by OP is enabled. */
29973 bool
29974 arm_fusion_enabled_p (tune_params::fuse_ops op)
29976 return current_tune->fusible_ops & op;
29979 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29981 static unsigned HOST_WIDE_INT
29982 arm_asan_shadow_offset (void)
29984 return HOST_WIDE_INT_1U << 29;
29988 /* This is a temporary fix for PR60655. Ideally we need
29989 to handle most of these cases in the generic part but
29990 currently we reject minus (..) (sym_ref). We try to
29991 ameliorate the case with minus (sym_ref1) (sym_ref2)
29992 where they are in the same section. */
29994 static bool
29995 arm_const_not_ok_for_debug_p (rtx p)
29997 tree decl_op0 = NULL;
29998 tree decl_op1 = NULL;
30000 if (GET_CODE (p) == MINUS)
30002 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30004 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30005 if (decl_op1
30006 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30007 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30009 if ((TREE_CODE (decl_op1) == VAR_DECL
30010 || TREE_CODE (decl_op1) == CONST_DECL)
30011 && (TREE_CODE (decl_op0) == VAR_DECL
30012 || TREE_CODE (decl_op0) == CONST_DECL))
30013 return (get_variable_section (decl_op1, false)
30014 != get_variable_section (decl_op0, false));
30016 if (TREE_CODE (decl_op1) == LABEL_DECL
30017 && TREE_CODE (decl_op0) == LABEL_DECL)
30018 return (DECL_CONTEXT (decl_op1)
30019 != DECL_CONTEXT (decl_op0));
30022 return true;
30026 return false;
30029 /* return TRUE if x is a reference to a value in a constant pool */
30030 extern bool
30031 arm_is_constant_pool_ref (rtx x)
30033 return (MEM_P (x)
30034 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30035 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30038 /* Remember the last target of arm_set_current_function. */
30039 static GTY(()) tree arm_previous_fndecl;
30041 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30043 void
30044 save_restore_target_globals (tree new_tree)
30046 /* If we have a previous state, use it. */
30047 if (TREE_TARGET_GLOBALS (new_tree))
30048 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30049 else if (new_tree == target_option_default_node)
30050 restore_target_globals (&default_target_globals);
30051 else
30053 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30054 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30057 arm_option_params_internal ();
30060 /* Invalidate arm_previous_fndecl. */
30062 void
30063 arm_reset_previous_fndecl (void)
30065 arm_previous_fndecl = NULL_TREE;
30068 /* Establish appropriate back-end context for processing the function
30069 FNDECL. The argument might be NULL to indicate processing at top
30070 level, outside of any function scope. */
30072 static void
30073 arm_set_current_function (tree fndecl)
30075 if (!fndecl || fndecl == arm_previous_fndecl)
30076 return;
30078 tree old_tree = (arm_previous_fndecl
30079 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30080 : NULL_TREE);
30082 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30084 /* If current function has no attributes but previous one did,
30085 use the default node. */
30086 if (! new_tree && old_tree)
30087 new_tree = target_option_default_node;
30089 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30090 the default have been handled by save_restore_target_globals from
30091 arm_pragma_target_parse. */
30092 if (old_tree == new_tree)
30093 return;
30095 arm_previous_fndecl = fndecl;
30097 /* First set the target options. */
30098 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30100 save_restore_target_globals (new_tree);
30103 /* Implement TARGET_OPTION_PRINT. */
30105 static void
30106 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30108 int flags = ptr->x_target_flags;
30109 const struct arm_fpu_desc *fpu_desc = &all_fpus[ptr->x_arm_fpu_index];
30111 fprintf (file, "%*sselected arch %s\n", indent, "",
30112 TARGET_THUMB2_P (flags) ? "thumb2" :
30113 TARGET_THUMB_P (flags) ? "thumb1" :
30114 "arm");
30116 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_desc->name);
30119 /* Hook to determine if one function can safely inline another. */
30121 static bool
30122 arm_can_inline_p (tree caller, tree callee)
30124 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30125 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30127 struct cl_target_option *caller_opts
30128 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30129 : target_option_default_node);
30131 struct cl_target_option *callee_opts
30132 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30133 : target_option_default_node);
30135 const struct arm_fpu_desc *caller_fpu
30136 = &all_fpus[caller_opts->x_arm_fpu_index];
30137 const struct arm_fpu_desc *callee_fpu
30138 = &all_fpus[callee_opts->x_arm_fpu_index];
30140 /* Callee's fpu features should be a subset of the caller's. */
30141 if ((caller_fpu->features & callee_fpu->features) != callee_fpu->features)
30142 return false;
30144 /* Need same model and regs. */
30145 if (callee_fpu->model != caller_fpu->model
30146 || callee_fpu->regs != callee_fpu->regs)
30147 return false;
30149 /* OK to inline between different modes.
30150 Function with mode specific instructions, e.g using asm,
30151 must be explicitly protected with noinline. */
30152 return true;
30155 /* Hook to fix function's alignment affected by target attribute. */
30157 static void
30158 arm_relayout_function (tree fndecl)
30160 if (DECL_USER_ALIGN (fndecl))
30161 return;
30163 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30165 if (!callee_tree)
30166 callee_tree = target_option_default_node;
30168 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30169 SET_DECL_ALIGN (fndecl, FUNCTION_BOUNDARY_P (opts->x_target_flags));
30172 /* Inner function to process the attribute((target(...))), take an argument and
30173 set the current options from the argument. If we have a list, recursively
30174 go over the list. */
30176 static bool
30177 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30179 if (TREE_CODE (args) == TREE_LIST)
30181 bool ret = true;
30183 for (; args; args = TREE_CHAIN (args))
30184 if (TREE_VALUE (args)
30185 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30186 ret = false;
30187 return ret;
30190 else if (TREE_CODE (args) != STRING_CST)
30192 error ("attribute %<target%> argument not a string");
30193 return false;
30196 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30197 char *q;
30199 while ((q = strtok (argstr, ",")) != NULL)
30201 while (ISSPACE (*q)) ++q;
30203 argstr = NULL;
30204 if (!strncmp (q, "thumb", 5))
30205 opts->x_target_flags |= MASK_THUMB;
30207 else if (!strncmp (q, "arm", 3))
30208 opts->x_target_flags &= ~MASK_THUMB;
30210 else if (!strncmp (q, "fpu=", 4))
30212 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30213 &opts->x_arm_fpu_index, CL_TARGET))
30215 error ("invalid fpu for attribute(target(\"%s\"))", q);
30216 return false;
30219 else
30221 error ("attribute(target(\"%s\")) is unknown", q);
30222 return false;
30225 arm_option_check_internal (opts);
30228 return true;
30231 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30233 tree
30234 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30235 struct gcc_options *opts_set)
30237 if (!arm_valid_target_attribute_rec (args, opts))
30238 return NULL_TREE;
30240 /* Do any overrides, such as global options arch=xxx. */
30241 arm_option_override_internal (opts, opts_set);
30243 return build_target_option_node (opts);
30246 static void
30247 add_attribute (const char * mode, tree *attributes)
30249 size_t len = strlen (mode);
30250 tree value = build_string (len, mode);
30252 TREE_TYPE (value) = build_array_type (char_type_node,
30253 build_index_type (size_int (len)));
30255 *attributes = tree_cons (get_identifier ("target"),
30256 build_tree_list (NULL_TREE, value),
30257 *attributes);
30260 /* For testing. Insert thumb or arm modes alternatively on functions. */
30262 static void
30263 arm_insert_attributes (tree fndecl, tree * attributes)
30265 const char *mode;
30267 if (! TARGET_FLIP_THUMB)
30268 return;
30270 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30271 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30272 return;
30274 /* Nested definitions must inherit mode. */
30275 if (current_function_decl)
30277 mode = TARGET_THUMB ? "thumb" : "arm";
30278 add_attribute (mode, attributes);
30279 return;
30282 /* If there is already a setting don't change it. */
30283 if (lookup_attribute ("target", *attributes) != NULL)
30284 return;
30286 mode = thumb_flipper ? "thumb" : "arm";
30287 add_attribute (mode, attributes);
30289 thumb_flipper = !thumb_flipper;
30292 /* Hook to validate attribute((target("string"))). */
30294 static bool
30295 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30296 tree args, int ARG_UNUSED (flags))
30298 bool ret = true;
30299 struct gcc_options func_options;
30300 tree cur_tree, new_optimize;
30301 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30303 /* Get the optimization options of the current function. */
30304 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30306 /* If the function changed the optimization levels as well as setting target
30307 options, start with the optimizations specified. */
30308 if (!func_optimize)
30309 func_optimize = optimization_default_node;
30311 /* Init func_options. */
30312 memset (&func_options, 0, sizeof (func_options));
30313 init_options_struct (&func_options, NULL);
30314 lang_hooks.init_options_struct (&func_options);
30316 /* Initialize func_options to the defaults. */
30317 cl_optimization_restore (&func_options,
30318 TREE_OPTIMIZATION (func_optimize));
30320 cl_target_option_restore (&func_options,
30321 TREE_TARGET_OPTION (target_option_default_node));
30323 /* Set func_options flags with new target mode. */
30324 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30325 &global_options_set);
30327 if (cur_tree == NULL_TREE)
30328 ret = false;
30330 new_optimize = build_optimization_node (&func_options);
30332 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30334 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30336 finalize_options_struct (&func_options);
30338 return ret;
30341 void
30342 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30345 fprintf (stream, "\t.syntax unified\n");
30347 if (TARGET_THUMB)
30349 if (is_called_in_ARM_mode (decl)
30350 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30351 && cfun->is_thunk))
30352 fprintf (stream, "\t.code 32\n");
30353 else if (TARGET_THUMB1)
30354 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30355 else
30356 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30358 else
30359 fprintf (stream, "\t.arm\n");
30361 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30362 TARGET_SOFT_FLOAT ? "softvfp" : TARGET_FPU_NAME);
30364 if (TARGET_POKE_FUNCTION_NAME)
30365 arm_poke_function_name (stream, (const char *) name);
30368 /* If MEM is in the form of [base+offset], extract the two parts
30369 of address and set to BASE and OFFSET, otherwise return false
30370 after clearing BASE and OFFSET. */
30372 static bool
30373 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30375 rtx addr;
30377 gcc_assert (MEM_P (mem));
30379 addr = XEXP (mem, 0);
30381 /* Strip off const from addresses like (const (addr)). */
30382 if (GET_CODE (addr) == CONST)
30383 addr = XEXP (addr, 0);
30385 if (GET_CODE (addr) == REG)
30387 *base = addr;
30388 *offset = const0_rtx;
30389 return true;
30392 if (GET_CODE (addr) == PLUS
30393 && GET_CODE (XEXP (addr, 0)) == REG
30394 && CONST_INT_P (XEXP (addr, 1)))
30396 *base = XEXP (addr, 0);
30397 *offset = XEXP (addr, 1);
30398 return true;
30401 *base = NULL_RTX;
30402 *offset = NULL_RTX;
30404 return false;
30407 /* If INSN is a load or store of address in the form of [base+offset],
30408 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30409 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30410 otherwise return FALSE. */
30412 static bool
30413 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30415 rtx x, dest, src;
30417 gcc_assert (INSN_P (insn));
30418 x = PATTERN (insn);
30419 if (GET_CODE (x) != SET)
30420 return false;
30422 src = SET_SRC (x);
30423 dest = SET_DEST (x);
30424 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30426 *is_load = false;
30427 extract_base_offset_in_addr (dest, base, offset);
30429 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30431 *is_load = true;
30432 extract_base_offset_in_addr (src, base, offset);
30434 else
30435 return false;
30437 return (*base != NULL_RTX && *offset != NULL_RTX);
30440 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30442 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30443 and PRI are only calculated for these instructions. For other instruction,
30444 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30445 instruction fusion can be supported by returning different priorities.
30447 It's important that irrelevant instructions get the largest FUSION_PRI. */
30449 static void
30450 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30451 int *fusion_pri, int *pri)
30453 int tmp, off_val;
30454 bool is_load;
30455 rtx base, offset;
30457 gcc_assert (INSN_P (insn));
30459 tmp = max_pri - 1;
30460 if (!fusion_load_store (insn, &base, &offset, &is_load))
30462 *pri = tmp;
30463 *fusion_pri = tmp;
30464 return;
30467 /* Load goes first. */
30468 if (is_load)
30469 *fusion_pri = tmp - 1;
30470 else
30471 *fusion_pri = tmp - 2;
30473 tmp /= 2;
30475 /* INSN with smaller base register goes first. */
30476 tmp -= ((REGNO (base) & 0xff) << 20);
30478 /* INSN with smaller offset goes first. */
30479 off_val = (int)(INTVAL (offset));
30480 if (off_val >= 0)
30481 tmp -= (off_val & 0xfffff);
30482 else
30483 tmp += ((- off_val) & 0xfffff);
30485 *pri = tmp;
30486 return;
30490 /* Construct and return a PARALLEL RTX vector with elements numbering the
30491 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30492 the vector - from the perspective of the architecture. This does not
30493 line up with GCC's perspective on lane numbers, so we end up with
30494 different masks depending on our target endian-ness. The diagram
30495 below may help. We must draw the distinction when building masks
30496 which select one half of the vector. An instruction selecting
30497 architectural low-lanes for a big-endian target, must be described using
30498 a mask selecting GCC high-lanes.
30500 Big-Endian Little-Endian
30502 GCC 0 1 2 3 3 2 1 0
30503 | x | x | x | x | | x | x | x | x |
30504 Architecture 3 2 1 0 3 2 1 0
30506 Low Mask: { 2, 3 } { 0, 1 }
30507 High Mask: { 0, 1 } { 2, 3 }
30511 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30513 int nunits = GET_MODE_NUNITS (mode);
30514 rtvec v = rtvec_alloc (nunits / 2);
30515 int high_base = nunits / 2;
30516 int low_base = 0;
30517 int base;
30518 rtx t1;
30519 int i;
30521 if (BYTES_BIG_ENDIAN)
30522 base = high ? low_base : high_base;
30523 else
30524 base = high ? high_base : low_base;
30526 for (i = 0; i < nunits / 2; i++)
30527 RTVEC_ELT (v, i) = GEN_INT (base + i);
30529 t1 = gen_rtx_PARALLEL (mode, v);
30530 return t1;
30533 /* Check OP for validity as a PARALLEL RTX vector with elements
30534 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30535 from the perspective of the architecture. See the diagram above
30536 arm_simd_vect_par_cnst_half_p for more details. */
30538 bool
30539 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30540 bool high)
30542 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30543 HOST_WIDE_INT count_op = XVECLEN (op, 0);
30544 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30545 int i = 0;
30547 if (!VECTOR_MODE_P (mode))
30548 return false;
30550 if (count_op != count_ideal)
30551 return false;
30553 for (i = 0; i < count_ideal; i++)
30555 rtx elt_op = XVECEXP (op, 0, i);
30556 rtx elt_ideal = XVECEXP (ideal, 0, i);
30558 if (!CONST_INT_P (elt_op)
30559 || INTVAL (elt_ideal) != INTVAL (elt_op))
30560 return false;
30562 return true;
30565 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30566 in Thumb1. */
30567 static bool
30568 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30569 const_tree)
30571 /* For now, we punt and not handle this for TARGET_THUMB1. */
30572 if (vcall_offset && TARGET_THUMB1)
30573 return false;
30575 /* Otherwise ok. */
30576 return true;
30579 #include "gt-arm.h"