2016-07-07 Thomas Preud'homme <thomas.preudhomme@arm.com>
[official-gcc.git] / gcc / config / arm / arm.c
blob2394a173f053a4751196029339f6656b6976a017
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "cfghooks.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "emit-rtl.h"
37 #include "recog.h"
38 #include "cgraph.h"
39 #include "diagnostic-core.h"
40 #include "alias.h"
41 #include "fold-const.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "varasm.h"
45 #include "output.h"
46 #include "insn-attr.h"
47 #include "flags.h"
48 #include "reload.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "cfgrtl.h"
52 #include "sched-int.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
55 #include "intl.h"
56 #include "libfuncs.h"
57 #include "params.h"
58 #include "opts.h"
59 #include "dumpfile.h"
60 #include "target-globals.h"
61 #include "builtins.h"
62 #include "tm-constrs.h"
63 #include "rtl-iter.h"
65 /* This file should be included last. */
66 #include "target-def.h"
68 /* Forward definitions of types. */
69 typedef struct minipool_node Mnode;
70 typedef struct minipool_fixup Mfix;
72 void (*arm_lang_output_object_attributes_hook)(void);
74 struct four_ints
76 int i[4];
79 /* Forward function declarations. */
80 static bool arm_const_not_ok_for_debug_p (rtx);
81 static bool arm_needs_doubleword_align (machine_mode, const_tree);
82 static int arm_compute_static_chain_stack_bytes (void);
83 static arm_stack_offsets *arm_get_frame_offsets (void);
84 static void arm_add_gc_roots (void);
85 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
86 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
87 static unsigned bit_count (unsigned long);
88 static unsigned feature_count (const arm_feature_set*);
89 static int arm_address_register_rtx_p (rtx, int);
90 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
91 static bool is_called_in_ARM_mode (tree);
92 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
93 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
94 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
95 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
96 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
97 inline static int thumb1_index_register_rtx_p (rtx, int);
98 static int thumb_far_jump_used_p (void);
99 static bool thumb_force_lr_save (void);
100 static unsigned arm_size_return_regs (void);
101 static bool arm_assemble_integer (rtx, unsigned int, int);
102 static void arm_print_operand (FILE *, rtx, int);
103 static void arm_print_operand_address (FILE *, machine_mode, rtx);
104 static bool arm_print_operand_punct_valid_p (unsigned char code);
105 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
106 static arm_cc get_arm_condition_code (rtx);
107 static const char *output_multi_immediate (rtx *, const char *, const char *,
108 int, HOST_WIDE_INT);
109 static const char *shift_op (rtx, HOST_WIDE_INT *);
110 static struct machine_function *arm_init_machine_status (void);
111 static void thumb_exit (FILE *, int);
112 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
113 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
114 static Mnode *add_minipool_forward_ref (Mfix *);
115 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
116 static Mnode *add_minipool_backward_ref (Mfix *);
117 static void assign_minipool_offsets (Mfix *);
118 static void arm_print_value (FILE *, rtx);
119 static void dump_minipool (rtx_insn *);
120 static int arm_barrier_cost (rtx_insn *);
121 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
122 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
123 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
124 machine_mode, rtx);
125 static void arm_reorg (void);
126 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
127 static unsigned long arm_compute_save_reg0_reg12_mask (void);
128 static unsigned long arm_compute_save_reg_mask (void);
129 static unsigned long arm_isr_value (tree);
130 static unsigned long arm_compute_func_type (void);
131 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
132 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
133 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
134 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
135 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
136 #endif
137 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
138 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
139 static int arm_comp_type_attributes (const_tree, const_tree);
140 static void arm_set_default_type_attributes (tree);
141 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
142 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
143 static int optimal_immediate_sequence (enum rtx_code code,
144 unsigned HOST_WIDE_INT val,
145 struct four_ints *return_sequence);
146 static int optimal_immediate_sequence_1 (enum rtx_code code,
147 unsigned HOST_WIDE_INT val,
148 struct four_ints *return_sequence,
149 int i);
150 static int arm_get_strip_length (int);
151 static bool arm_function_ok_for_sibcall (tree, tree);
152 static machine_mode arm_promote_function_mode (const_tree,
153 machine_mode, int *,
154 const_tree, int);
155 static bool arm_return_in_memory (const_tree, const_tree);
156 static rtx arm_function_value (const_tree, const_tree, bool);
157 static rtx arm_libcall_value_1 (machine_mode);
158 static rtx arm_libcall_value (machine_mode, const_rtx);
159 static bool arm_function_value_regno_p (const unsigned int);
160 static void arm_internal_label (FILE *, const char *, unsigned long);
161 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
162 tree);
163 static bool arm_have_conditional_execution (void);
164 static bool arm_cannot_force_const_mem (machine_mode, rtx);
165 static bool arm_legitimate_constant_p (machine_mode, rtx);
166 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
167 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
168 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
169 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
170 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
171 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
172 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
173 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
174 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
175 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
176 static void emit_constant_insn (rtx cond, rtx pattern);
177 static rtx_insn *emit_set_insn (rtx, rtx);
178 static rtx emit_multi_reg_push (unsigned long, unsigned long);
179 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
180 tree, bool);
181 static rtx arm_function_arg (cumulative_args_t, machine_mode,
182 const_tree, bool);
183 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
184 const_tree, bool);
185 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
186 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
187 const_tree);
188 static rtx aapcs_libcall_value (machine_mode);
189 static int aapcs_select_return_coproc (const_tree, const_tree);
191 #ifdef OBJECT_FORMAT_ELF
192 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
193 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
194 #endif
195 #ifndef ARM_PE
196 static void arm_encode_section_info (tree, rtx, int);
197 #endif
199 static void arm_file_end (void);
200 static void arm_file_start (void);
201 static void arm_insert_attributes (tree, tree *);
203 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
204 tree, int *, int);
205 static bool arm_pass_by_reference (cumulative_args_t,
206 machine_mode, const_tree, bool);
207 static bool arm_promote_prototypes (const_tree);
208 static bool arm_default_short_enums (void);
209 static bool arm_align_anon_bitfield (void);
210 static bool arm_return_in_msb (const_tree);
211 static bool arm_must_pass_in_stack (machine_mode, const_tree);
212 static bool arm_return_in_memory (const_tree, const_tree);
213 #if ARM_UNWIND_INFO
214 static void arm_unwind_emit (FILE *, rtx_insn *);
215 static bool arm_output_ttype (rtx);
216 static void arm_asm_emit_except_personality (rtx);
217 static void arm_asm_init_sections (void);
218 #endif
219 static rtx arm_dwarf_register_span (rtx);
221 static tree arm_cxx_guard_type (void);
222 static bool arm_cxx_guard_mask_bit (void);
223 static tree arm_get_cookie_size (tree);
224 static bool arm_cookie_has_size (void);
225 static bool arm_cxx_cdtor_returns_this (void);
226 static bool arm_cxx_key_method_may_be_inline (void);
227 static void arm_cxx_determine_class_data_visibility (tree);
228 static bool arm_cxx_class_data_always_comdat (void);
229 static bool arm_cxx_use_aeabi_atexit (void);
230 static void arm_init_libfuncs (void);
231 static tree arm_build_builtin_va_list (void);
232 static void arm_expand_builtin_va_start (tree, rtx);
233 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
234 static void arm_option_override (void);
235 static void arm_override_options_after_change (void);
236 static void arm_option_print (FILE *, int, struct cl_target_option *);
237 static void arm_set_current_function (tree);
238 static bool arm_can_inline_p (tree, tree);
239 static void arm_relayout_function (tree);
240 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
241 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
242 static bool arm_macro_fusion_p (void);
243 static bool arm_cannot_copy_insn_p (rtx_insn *);
244 static int arm_issue_rate (void);
245 static int arm_first_cycle_multipass_dfa_lookahead (void);
246 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
247 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
248 static bool arm_output_addr_const_extra (FILE *, rtx);
249 static bool arm_allocate_stack_slots_for_args (void);
250 static bool arm_warn_func_return (tree);
251 static tree arm_promoted_type (const_tree t);
252 static tree arm_convert_to_type (tree type, tree expr);
253 static bool arm_scalar_mode_supported_p (machine_mode);
254 static bool arm_frame_pointer_required (void);
255 static bool arm_can_eliminate (const int, const int);
256 static void arm_asm_trampoline_template (FILE *);
257 static void arm_trampoline_init (rtx, tree, rtx);
258 static rtx arm_trampoline_adjust_address (rtx);
259 static rtx arm_pic_static_addr (rtx orig, rtx reg);
260 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
261 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
262 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
263 static bool arm_array_mode_supported_p (machine_mode,
264 unsigned HOST_WIDE_INT);
265 static machine_mode arm_preferred_simd_mode (machine_mode);
266 static bool arm_class_likely_spilled_p (reg_class_t);
267 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
268 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
269 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
270 const_tree type,
271 int misalignment,
272 bool is_packed);
273 static void arm_conditional_register_usage (void);
274 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
275 static unsigned int arm_autovectorize_vector_sizes (void);
276 static int arm_default_branch_cost (bool, bool);
277 static int arm_cortex_a5_branch_cost (bool, bool);
278 static int arm_cortex_m_branch_cost (bool, bool);
279 static int arm_cortex_m7_branch_cost (bool, bool);
281 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
282 const unsigned char *sel);
284 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
286 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
287 tree vectype,
288 int misalign ATTRIBUTE_UNUSED);
289 static unsigned arm_add_stmt_cost (void *data, int count,
290 enum vect_cost_for_stmt kind,
291 struct _stmt_vec_info *stmt_info,
292 int misalign,
293 enum vect_cost_model_location where);
295 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
296 bool op0_preserve_value);
297 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
299 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
300 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
301 const_tree);
304 /* Table of machine attributes. */
305 static const struct attribute_spec arm_attribute_table[] =
307 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
308 affects_type_identity } */
309 /* Function calls made to this symbol must be done indirectly, because
310 it may lie outside of the 26 bit addressing range of a normal function
311 call. */
312 { "long_call", 0, 0, false, true, true, NULL, false },
313 /* Whereas these functions are always known to reside within the 26 bit
314 addressing range. */
315 { "short_call", 0, 0, false, true, true, NULL, false },
316 /* Specify the procedure call conventions for a function. */
317 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
318 false },
319 /* Interrupt Service Routines have special prologue and epilogue requirements. */
320 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
321 false },
322 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
323 false },
324 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
325 false },
326 #ifdef ARM_PE
327 /* ARM/PE has three new attributes:
328 interfacearm - ?
329 dllexport - for exporting a function/variable that will live in a dll
330 dllimport - for importing a function/variable from a dll
332 Microsoft allows multiple declspecs in one __declspec, separating
333 them with spaces. We do NOT support this. Instead, use __declspec
334 multiple times.
336 { "dllimport", 0, 0, true, false, false, NULL, false },
337 { "dllexport", 0, 0, true, false, false, NULL, false },
338 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
339 false },
340 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
341 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
342 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
343 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
344 false },
345 #endif
346 { NULL, 0, 0, false, false, false, NULL, false }
349 /* Initialize the GCC target structure. */
350 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
351 #undef TARGET_MERGE_DECL_ATTRIBUTES
352 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
353 #endif
355 #undef TARGET_LEGITIMIZE_ADDRESS
356 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
358 #undef TARGET_LRA_P
359 #define TARGET_LRA_P hook_bool_void_true
361 #undef TARGET_ATTRIBUTE_TABLE
362 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
364 #undef TARGET_INSERT_ATTRIBUTES
365 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
367 #undef TARGET_ASM_FILE_START
368 #define TARGET_ASM_FILE_START arm_file_start
369 #undef TARGET_ASM_FILE_END
370 #define TARGET_ASM_FILE_END arm_file_end
372 #undef TARGET_ASM_ALIGNED_SI_OP
373 #define TARGET_ASM_ALIGNED_SI_OP NULL
374 #undef TARGET_ASM_INTEGER
375 #define TARGET_ASM_INTEGER arm_assemble_integer
377 #undef TARGET_PRINT_OPERAND
378 #define TARGET_PRINT_OPERAND arm_print_operand
379 #undef TARGET_PRINT_OPERAND_ADDRESS
380 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
381 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
382 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
384 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
385 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
387 #undef TARGET_ASM_FUNCTION_PROLOGUE
388 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
390 #undef TARGET_ASM_FUNCTION_EPILOGUE
391 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
393 #undef TARGET_CAN_INLINE_P
394 #define TARGET_CAN_INLINE_P arm_can_inline_p
396 #undef TARGET_RELAYOUT_FUNCTION
397 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
399 #undef TARGET_OPTION_OVERRIDE
400 #define TARGET_OPTION_OVERRIDE arm_option_override
402 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
403 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
405 #undef TARGET_OPTION_PRINT
406 #define TARGET_OPTION_PRINT arm_option_print
408 #undef TARGET_COMP_TYPE_ATTRIBUTES
409 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
411 #undef TARGET_SCHED_MACRO_FUSION_P
412 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
414 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
415 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
417 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
418 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
420 #undef TARGET_SCHED_ADJUST_COST
421 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
423 #undef TARGET_SET_CURRENT_FUNCTION
424 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
426 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
427 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
429 #undef TARGET_SCHED_REORDER
430 #define TARGET_SCHED_REORDER arm_sched_reorder
432 #undef TARGET_REGISTER_MOVE_COST
433 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
435 #undef TARGET_MEMORY_MOVE_COST
436 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
438 #undef TARGET_ENCODE_SECTION_INFO
439 #ifdef ARM_PE
440 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
441 #else
442 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
443 #endif
445 #undef TARGET_STRIP_NAME_ENCODING
446 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
448 #undef TARGET_ASM_INTERNAL_LABEL
449 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
451 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
452 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
454 #undef TARGET_FUNCTION_VALUE
455 #define TARGET_FUNCTION_VALUE arm_function_value
457 #undef TARGET_LIBCALL_VALUE
458 #define TARGET_LIBCALL_VALUE arm_libcall_value
460 #undef TARGET_FUNCTION_VALUE_REGNO_P
461 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
463 #undef TARGET_ASM_OUTPUT_MI_THUNK
464 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
465 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
466 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
468 #undef TARGET_RTX_COSTS
469 #define TARGET_RTX_COSTS arm_rtx_costs
470 #undef TARGET_ADDRESS_COST
471 #define TARGET_ADDRESS_COST arm_address_cost
473 #undef TARGET_SHIFT_TRUNCATION_MASK
474 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
475 #undef TARGET_VECTOR_MODE_SUPPORTED_P
476 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
477 #undef TARGET_ARRAY_MODE_SUPPORTED_P
478 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
479 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
480 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
481 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
482 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
483 arm_autovectorize_vector_sizes
485 #undef TARGET_MACHINE_DEPENDENT_REORG
486 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
488 #undef TARGET_INIT_BUILTINS
489 #define TARGET_INIT_BUILTINS arm_init_builtins
490 #undef TARGET_EXPAND_BUILTIN
491 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
492 #undef TARGET_BUILTIN_DECL
493 #define TARGET_BUILTIN_DECL arm_builtin_decl
495 #undef TARGET_INIT_LIBFUNCS
496 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
498 #undef TARGET_PROMOTE_FUNCTION_MODE
499 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
500 #undef TARGET_PROMOTE_PROTOTYPES
501 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
502 #undef TARGET_PASS_BY_REFERENCE
503 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
504 #undef TARGET_ARG_PARTIAL_BYTES
505 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
506 #undef TARGET_FUNCTION_ARG
507 #define TARGET_FUNCTION_ARG arm_function_arg
508 #undef TARGET_FUNCTION_ARG_ADVANCE
509 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
510 #undef TARGET_FUNCTION_ARG_BOUNDARY
511 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
513 #undef TARGET_SETUP_INCOMING_VARARGS
514 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
516 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
517 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
519 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
520 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
521 #undef TARGET_TRAMPOLINE_INIT
522 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
523 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
524 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
526 #undef TARGET_WARN_FUNC_RETURN
527 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
529 #undef TARGET_DEFAULT_SHORT_ENUMS
530 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
532 #undef TARGET_ALIGN_ANON_BITFIELD
533 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
535 #undef TARGET_NARROW_VOLATILE_BITFIELD
536 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
538 #undef TARGET_CXX_GUARD_TYPE
539 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
541 #undef TARGET_CXX_GUARD_MASK_BIT
542 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
544 #undef TARGET_CXX_GET_COOKIE_SIZE
545 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
547 #undef TARGET_CXX_COOKIE_HAS_SIZE
548 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
550 #undef TARGET_CXX_CDTOR_RETURNS_THIS
551 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
553 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
554 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
556 #undef TARGET_CXX_USE_AEABI_ATEXIT
557 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
559 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
560 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
561 arm_cxx_determine_class_data_visibility
563 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
564 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
566 #undef TARGET_RETURN_IN_MSB
567 #define TARGET_RETURN_IN_MSB arm_return_in_msb
569 #undef TARGET_RETURN_IN_MEMORY
570 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
572 #undef TARGET_MUST_PASS_IN_STACK
573 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
575 #if ARM_UNWIND_INFO
576 #undef TARGET_ASM_UNWIND_EMIT
577 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
579 /* EABI unwinding tables use a different format for the typeinfo tables. */
580 #undef TARGET_ASM_TTYPE
581 #define TARGET_ASM_TTYPE arm_output_ttype
583 #undef TARGET_ARM_EABI_UNWINDER
584 #define TARGET_ARM_EABI_UNWINDER true
586 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
587 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
589 #undef TARGET_ASM_INIT_SECTIONS
590 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
591 #endif /* ARM_UNWIND_INFO */
593 #undef TARGET_DWARF_REGISTER_SPAN
594 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
596 #undef TARGET_CANNOT_COPY_INSN_P
597 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
599 #ifdef HAVE_AS_TLS
600 #undef TARGET_HAVE_TLS
601 #define TARGET_HAVE_TLS true
602 #endif
604 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
605 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
607 #undef TARGET_LEGITIMATE_CONSTANT_P
608 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
610 #undef TARGET_CANNOT_FORCE_CONST_MEM
611 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
613 #undef TARGET_MAX_ANCHOR_OFFSET
614 #define TARGET_MAX_ANCHOR_OFFSET 4095
616 /* The minimum is set such that the total size of the block
617 for a particular anchor is -4088 + 1 + 4095 bytes, which is
618 divisible by eight, ensuring natural spacing of anchors. */
619 #undef TARGET_MIN_ANCHOR_OFFSET
620 #define TARGET_MIN_ANCHOR_OFFSET -4088
622 #undef TARGET_SCHED_ISSUE_RATE
623 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
625 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
626 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
627 arm_first_cycle_multipass_dfa_lookahead
629 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
630 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
631 arm_first_cycle_multipass_dfa_lookahead_guard
633 #undef TARGET_MANGLE_TYPE
634 #define TARGET_MANGLE_TYPE arm_mangle_type
636 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
637 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
639 #undef TARGET_BUILD_BUILTIN_VA_LIST
640 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
641 #undef TARGET_EXPAND_BUILTIN_VA_START
642 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
643 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
644 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
646 #ifdef HAVE_AS_TLS
647 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
648 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
649 #endif
651 #undef TARGET_LEGITIMATE_ADDRESS_P
652 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
654 #undef TARGET_PREFERRED_RELOAD_CLASS
655 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
657 #undef TARGET_PROMOTED_TYPE
658 #define TARGET_PROMOTED_TYPE arm_promoted_type
660 #undef TARGET_CONVERT_TO_TYPE
661 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
663 #undef TARGET_SCALAR_MODE_SUPPORTED_P
664 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
666 #undef TARGET_FRAME_POINTER_REQUIRED
667 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
669 #undef TARGET_CAN_ELIMINATE
670 #define TARGET_CAN_ELIMINATE arm_can_eliminate
672 #undef TARGET_CONDITIONAL_REGISTER_USAGE
673 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
675 #undef TARGET_CLASS_LIKELY_SPILLED_P
676 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
678 #undef TARGET_VECTORIZE_BUILTINS
679 #define TARGET_VECTORIZE_BUILTINS
681 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
682 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
683 arm_builtin_vectorized_function
685 #undef TARGET_VECTOR_ALIGNMENT
686 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
688 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
689 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
690 arm_vector_alignment_reachable
692 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
693 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
694 arm_builtin_support_vector_misalignment
696 #undef TARGET_PREFERRED_RENAME_CLASS
697 #define TARGET_PREFERRED_RENAME_CLASS \
698 arm_preferred_rename_class
700 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
701 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
702 arm_vectorize_vec_perm_const_ok
704 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
705 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
706 arm_builtin_vectorization_cost
707 #undef TARGET_VECTORIZE_ADD_STMT_COST
708 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
710 #undef TARGET_CANONICALIZE_COMPARISON
711 #define TARGET_CANONICALIZE_COMPARISON \
712 arm_canonicalize_comparison
714 #undef TARGET_ASAN_SHADOW_OFFSET
715 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
717 #undef MAX_INSN_PER_IT_BLOCK
718 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
720 #undef TARGET_CAN_USE_DOLOOP_P
721 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
723 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
724 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
726 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
727 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
729 #undef TARGET_SCHED_FUSION_PRIORITY
730 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
732 struct gcc_target targetm = TARGET_INITIALIZER;
734 /* Obstack for minipool constant handling. */
735 static struct obstack minipool_obstack;
736 static char * minipool_startobj;
738 /* The maximum number of insns skipped which
739 will be conditionalised if possible. */
740 static int max_insns_skipped = 5;
742 extern FILE * asm_out_file;
744 /* True if we are currently building a constant table. */
745 int making_const_table;
747 /* The processor for which instructions should be scheduled. */
748 enum processor_type arm_tune = arm_none;
750 /* The current tuning set. */
751 const struct tune_params *current_tune;
753 /* Which floating point hardware to schedule for. */
754 int arm_fpu_attr;
756 /* Used for Thumb call_via trampolines. */
757 rtx thumb_call_via_label[14];
758 static int thumb_call_reg_needed;
760 /* The bits in this mask specify which
761 instructions we are allowed to generate. */
762 arm_feature_set insn_flags = ARM_FSET_EMPTY;
764 /* The bits in this mask specify which instruction scheduling options should
765 be used. */
766 arm_feature_set tune_flags = ARM_FSET_EMPTY;
768 /* The highest ARM architecture version supported by the
769 target. */
770 enum base_architecture arm_base_arch = BASE_ARCH_0;
772 /* The following are used in the arm.md file as equivalents to bits
773 in the above two flag variables. */
775 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
776 int arm_arch3m = 0;
778 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
779 int arm_arch4 = 0;
781 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
782 int arm_arch4t = 0;
784 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
785 int arm_arch5 = 0;
787 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
788 int arm_arch5e = 0;
790 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
791 int arm_arch6 = 0;
793 /* Nonzero if this chip supports the ARM 6K extensions. */
794 int arm_arch6k = 0;
796 /* Nonzero if this chip supports the ARM 6KZ extensions. */
797 int arm_arch6kz = 0;
799 /* Nonzero if instructions present in ARMv6-M can be used. */
800 int arm_arch6m = 0;
802 /* Nonzero if this chip supports the ARM 7 extensions. */
803 int arm_arch7 = 0;
805 /* Nonzero if instructions not present in the 'M' profile can be used. */
806 int arm_arch_notm = 0;
808 /* Nonzero if instructions present in ARMv7E-M can be used. */
809 int arm_arch7em = 0;
811 /* Nonzero if instructions present in ARMv8 can be used. */
812 int arm_arch8 = 0;
814 /* Nonzero if this chip supports the ARMv8.1 extensions. */
815 int arm_arch8_1 = 0;
817 /* Nonzero if this chip can benefit from load scheduling. */
818 int arm_ld_sched = 0;
820 /* Nonzero if this chip is a StrongARM. */
821 int arm_tune_strongarm = 0;
823 /* Nonzero if this chip supports Intel Wireless MMX technology. */
824 int arm_arch_iwmmxt = 0;
826 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
827 int arm_arch_iwmmxt2 = 0;
829 /* Nonzero if this chip is an XScale. */
830 int arm_arch_xscale = 0;
832 /* Nonzero if tuning for XScale */
833 int arm_tune_xscale = 0;
835 /* Nonzero if we want to tune for stores that access the write-buffer.
836 This typically means an ARM6 or ARM7 with MMU or MPU. */
837 int arm_tune_wbuf = 0;
839 /* Nonzero if tuning for Cortex-A9. */
840 int arm_tune_cortex_a9 = 0;
842 /* Nonzero if we should define __THUMB_INTERWORK__ in the
843 preprocessor.
844 XXX This is a bit of a hack, it's intended to help work around
845 problems in GLD which doesn't understand that armv5t code is
846 interworking clean. */
847 int arm_cpp_interwork = 0;
849 /* Nonzero if chip supports Thumb 1. */
850 int arm_arch_thumb1;
852 /* Nonzero if chip supports Thumb 2. */
853 int arm_arch_thumb2;
855 /* Nonzero if chip supports integer division instruction. */
856 int arm_arch_arm_hwdiv;
857 int arm_arch_thumb_hwdiv;
859 /* Nonzero if chip disallows volatile memory access in IT block. */
860 int arm_arch_no_volatile_ce;
862 /* Nonzero if we should use Neon to handle 64-bits operations rather
863 than core registers. */
864 int prefer_neon_for_64bits = 0;
866 /* Nonzero if we shouldn't use literal pools. */
867 bool arm_disable_literal_pool = false;
869 /* The register number to be used for the PIC offset register. */
870 unsigned arm_pic_register = INVALID_REGNUM;
872 enum arm_pcs arm_pcs_default;
874 /* For an explanation of these variables, see final_prescan_insn below. */
875 int arm_ccfsm_state;
876 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
877 enum arm_cond_code arm_current_cc;
879 rtx arm_target_insn;
880 int arm_target_label;
881 /* The number of conditionally executed insns, including the current insn. */
882 int arm_condexec_count = 0;
883 /* A bitmask specifying the patterns for the IT block.
884 Zero means do not output an IT block before this insn. */
885 int arm_condexec_mask = 0;
886 /* The number of bits used in arm_condexec_mask. */
887 int arm_condexec_masklen = 0;
889 /* Nonzero if chip supports the ARMv8 CRC instructions. */
890 int arm_arch_crc = 0;
892 /* Nonzero if the core has a very small, high-latency, multiply unit. */
893 int arm_m_profile_small_mul = 0;
895 /* The condition codes of the ARM, and the inverse function. */
896 static const char * const arm_condition_codes[] =
898 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
899 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
902 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
903 int arm_regs_in_sequence[] =
905 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
908 #define ARM_LSL_NAME "lsl"
909 #define streq(string1, string2) (strcmp (string1, string2) == 0)
911 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
912 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
913 | (1 << PIC_OFFSET_TABLE_REGNUM)))
915 /* Initialization code. */
917 struct processors
919 const char *const name;
920 enum processor_type core;
921 const char *arch;
922 enum base_architecture base_arch;
923 const arm_feature_set flags;
924 const struct tune_params *const tune;
928 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
929 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
931 num_slots, \
932 l1_size, \
933 l1_line_size \
936 /* arm generic vectorizer costs. */
937 static const
938 struct cpu_vec_costs arm_default_vec_cost = {
939 1, /* scalar_stmt_cost. */
940 1, /* scalar load_cost. */
941 1, /* scalar_store_cost. */
942 1, /* vec_stmt_cost. */
943 1, /* vec_to_scalar_cost. */
944 1, /* scalar_to_vec_cost. */
945 1, /* vec_align_load_cost. */
946 1, /* vec_unalign_load_cost. */
947 1, /* vec_unalign_store_cost. */
948 1, /* vec_store_cost. */
949 3, /* cond_taken_branch_cost. */
950 1, /* cond_not_taken_branch_cost. */
953 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
954 #include "aarch-cost-tables.h"
958 const struct cpu_cost_table cortexa9_extra_costs =
960 /* ALU */
962 0, /* arith. */
963 0, /* logical. */
964 0, /* shift. */
965 COSTS_N_INSNS (1), /* shift_reg. */
966 COSTS_N_INSNS (1), /* arith_shift. */
967 COSTS_N_INSNS (2), /* arith_shift_reg. */
968 0, /* log_shift. */
969 COSTS_N_INSNS (1), /* log_shift_reg. */
970 COSTS_N_INSNS (1), /* extend. */
971 COSTS_N_INSNS (2), /* extend_arith. */
972 COSTS_N_INSNS (1), /* bfi. */
973 COSTS_N_INSNS (1), /* bfx. */
974 0, /* clz. */
975 0, /* rev. */
976 0, /* non_exec. */
977 true /* non_exec_costs_exec. */
980 /* MULT SImode */
982 COSTS_N_INSNS (3), /* simple. */
983 COSTS_N_INSNS (3), /* flag_setting. */
984 COSTS_N_INSNS (2), /* extend. */
985 COSTS_N_INSNS (3), /* add. */
986 COSTS_N_INSNS (2), /* extend_add. */
987 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
989 /* MULT DImode */
991 0, /* simple (N/A). */
992 0, /* flag_setting (N/A). */
993 COSTS_N_INSNS (4), /* extend. */
994 0, /* add (N/A). */
995 COSTS_N_INSNS (4), /* extend_add. */
996 0 /* idiv (N/A). */
999 /* LD/ST */
1001 COSTS_N_INSNS (2), /* load. */
1002 COSTS_N_INSNS (2), /* load_sign_extend. */
1003 COSTS_N_INSNS (2), /* ldrd. */
1004 COSTS_N_INSNS (2), /* ldm_1st. */
1005 1, /* ldm_regs_per_insn_1st. */
1006 2, /* ldm_regs_per_insn_subsequent. */
1007 COSTS_N_INSNS (5), /* loadf. */
1008 COSTS_N_INSNS (5), /* loadd. */
1009 COSTS_N_INSNS (1), /* load_unaligned. */
1010 COSTS_N_INSNS (2), /* store. */
1011 COSTS_N_INSNS (2), /* strd. */
1012 COSTS_N_INSNS (2), /* stm_1st. */
1013 1, /* stm_regs_per_insn_1st. */
1014 2, /* stm_regs_per_insn_subsequent. */
1015 COSTS_N_INSNS (1), /* storef. */
1016 COSTS_N_INSNS (1), /* stored. */
1017 COSTS_N_INSNS (1), /* store_unaligned. */
1018 COSTS_N_INSNS (1), /* loadv. */
1019 COSTS_N_INSNS (1) /* storev. */
1022 /* FP SFmode */
1024 COSTS_N_INSNS (14), /* div. */
1025 COSTS_N_INSNS (4), /* mult. */
1026 COSTS_N_INSNS (7), /* mult_addsub. */
1027 COSTS_N_INSNS (30), /* fma. */
1028 COSTS_N_INSNS (3), /* addsub. */
1029 COSTS_N_INSNS (1), /* fpconst. */
1030 COSTS_N_INSNS (1), /* neg. */
1031 COSTS_N_INSNS (3), /* compare. */
1032 COSTS_N_INSNS (3), /* widen. */
1033 COSTS_N_INSNS (3), /* narrow. */
1034 COSTS_N_INSNS (3), /* toint. */
1035 COSTS_N_INSNS (3), /* fromint. */
1036 COSTS_N_INSNS (3) /* roundint. */
1038 /* FP DFmode */
1040 COSTS_N_INSNS (24), /* div. */
1041 COSTS_N_INSNS (5), /* mult. */
1042 COSTS_N_INSNS (8), /* mult_addsub. */
1043 COSTS_N_INSNS (30), /* fma. */
1044 COSTS_N_INSNS (3), /* addsub. */
1045 COSTS_N_INSNS (1), /* fpconst. */
1046 COSTS_N_INSNS (1), /* neg. */
1047 COSTS_N_INSNS (3), /* compare. */
1048 COSTS_N_INSNS (3), /* widen. */
1049 COSTS_N_INSNS (3), /* narrow. */
1050 COSTS_N_INSNS (3), /* toint. */
1051 COSTS_N_INSNS (3), /* fromint. */
1052 COSTS_N_INSNS (3) /* roundint. */
1055 /* Vector */
1057 COSTS_N_INSNS (1) /* alu. */
1061 const struct cpu_cost_table cortexa8_extra_costs =
1063 /* ALU */
1065 0, /* arith. */
1066 0, /* logical. */
1067 COSTS_N_INSNS (1), /* shift. */
1068 0, /* shift_reg. */
1069 COSTS_N_INSNS (1), /* arith_shift. */
1070 0, /* arith_shift_reg. */
1071 COSTS_N_INSNS (1), /* log_shift. */
1072 0, /* log_shift_reg. */
1073 0, /* extend. */
1074 0, /* extend_arith. */
1075 0, /* bfi. */
1076 0, /* bfx. */
1077 0, /* clz. */
1078 0, /* rev. */
1079 0, /* non_exec. */
1080 true /* non_exec_costs_exec. */
1083 /* MULT SImode */
1085 COSTS_N_INSNS (1), /* simple. */
1086 COSTS_N_INSNS (1), /* flag_setting. */
1087 COSTS_N_INSNS (1), /* extend. */
1088 COSTS_N_INSNS (1), /* add. */
1089 COSTS_N_INSNS (1), /* extend_add. */
1090 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1092 /* MULT DImode */
1094 0, /* simple (N/A). */
1095 0, /* flag_setting (N/A). */
1096 COSTS_N_INSNS (2), /* extend. */
1097 0, /* add (N/A). */
1098 COSTS_N_INSNS (2), /* extend_add. */
1099 0 /* idiv (N/A). */
1102 /* LD/ST */
1104 COSTS_N_INSNS (1), /* load. */
1105 COSTS_N_INSNS (1), /* load_sign_extend. */
1106 COSTS_N_INSNS (1), /* ldrd. */
1107 COSTS_N_INSNS (1), /* ldm_1st. */
1108 1, /* ldm_regs_per_insn_1st. */
1109 2, /* ldm_regs_per_insn_subsequent. */
1110 COSTS_N_INSNS (1), /* loadf. */
1111 COSTS_N_INSNS (1), /* loadd. */
1112 COSTS_N_INSNS (1), /* load_unaligned. */
1113 COSTS_N_INSNS (1), /* store. */
1114 COSTS_N_INSNS (1), /* strd. */
1115 COSTS_N_INSNS (1), /* stm_1st. */
1116 1, /* stm_regs_per_insn_1st. */
1117 2, /* stm_regs_per_insn_subsequent. */
1118 COSTS_N_INSNS (1), /* storef. */
1119 COSTS_N_INSNS (1), /* stored. */
1120 COSTS_N_INSNS (1), /* store_unaligned. */
1121 COSTS_N_INSNS (1), /* loadv. */
1122 COSTS_N_INSNS (1) /* storev. */
1125 /* FP SFmode */
1127 COSTS_N_INSNS (36), /* div. */
1128 COSTS_N_INSNS (11), /* mult. */
1129 COSTS_N_INSNS (20), /* mult_addsub. */
1130 COSTS_N_INSNS (30), /* fma. */
1131 COSTS_N_INSNS (9), /* addsub. */
1132 COSTS_N_INSNS (3), /* fpconst. */
1133 COSTS_N_INSNS (3), /* neg. */
1134 COSTS_N_INSNS (6), /* compare. */
1135 COSTS_N_INSNS (4), /* widen. */
1136 COSTS_N_INSNS (4), /* narrow. */
1137 COSTS_N_INSNS (8), /* toint. */
1138 COSTS_N_INSNS (8), /* fromint. */
1139 COSTS_N_INSNS (8) /* roundint. */
1141 /* FP DFmode */
1143 COSTS_N_INSNS (64), /* div. */
1144 COSTS_N_INSNS (16), /* mult. */
1145 COSTS_N_INSNS (25), /* mult_addsub. */
1146 COSTS_N_INSNS (30), /* fma. */
1147 COSTS_N_INSNS (9), /* addsub. */
1148 COSTS_N_INSNS (3), /* fpconst. */
1149 COSTS_N_INSNS (3), /* neg. */
1150 COSTS_N_INSNS (6), /* compare. */
1151 COSTS_N_INSNS (6), /* widen. */
1152 COSTS_N_INSNS (6), /* narrow. */
1153 COSTS_N_INSNS (8), /* toint. */
1154 COSTS_N_INSNS (8), /* fromint. */
1155 COSTS_N_INSNS (8) /* roundint. */
1158 /* Vector */
1160 COSTS_N_INSNS (1) /* alu. */
1164 const struct cpu_cost_table cortexa5_extra_costs =
1166 /* ALU */
1168 0, /* arith. */
1169 0, /* logical. */
1170 COSTS_N_INSNS (1), /* shift. */
1171 COSTS_N_INSNS (1), /* shift_reg. */
1172 COSTS_N_INSNS (1), /* arith_shift. */
1173 COSTS_N_INSNS (1), /* arith_shift_reg. */
1174 COSTS_N_INSNS (1), /* log_shift. */
1175 COSTS_N_INSNS (1), /* log_shift_reg. */
1176 COSTS_N_INSNS (1), /* extend. */
1177 COSTS_N_INSNS (1), /* extend_arith. */
1178 COSTS_N_INSNS (1), /* bfi. */
1179 COSTS_N_INSNS (1), /* bfx. */
1180 COSTS_N_INSNS (1), /* clz. */
1181 COSTS_N_INSNS (1), /* rev. */
1182 0, /* non_exec. */
1183 true /* non_exec_costs_exec. */
1187 /* MULT SImode */
1189 0, /* simple. */
1190 COSTS_N_INSNS (1), /* flag_setting. */
1191 COSTS_N_INSNS (1), /* extend. */
1192 COSTS_N_INSNS (1), /* add. */
1193 COSTS_N_INSNS (1), /* extend_add. */
1194 COSTS_N_INSNS (7) /* idiv. */
1196 /* MULT DImode */
1198 0, /* simple (N/A). */
1199 0, /* flag_setting (N/A). */
1200 COSTS_N_INSNS (1), /* extend. */
1201 0, /* add. */
1202 COSTS_N_INSNS (2), /* extend_add. */
1203 0 /* idiv (N/A). */
1206 /* LD/ST */
1208 COSTS_N_INSNS (1), /* load. */
1209 COSTS_N_INSNS (1), /* load_sign_extend. */
1210 COSTS_N_INSNS (6), /* ldrd. */
1211 COSTS_N_INSNS (1), /* ldm_1st. */
1212 1, /* ldm_regs_per_insn_1st. */
1213 2, /* ldm_regs_per_insn_subsequent. */
1214 COSTS_N_INSNS (2), /* loadf. */
1215 COSTS_N_INSNS (4), /* loadd. */
1216 COSTS_N_INSNS (1), /* load_unaligned. */
1217 COSTS_N_INSNS (1), /* store. */
1218 COSTS_N_INSNS (3), /* strd. */
1219 COSTS_N_INSNS (1), /* stm_1st. */
1220 1, /* stm_regs_per_insn_1st. */
1221 2, /* stm_regs_per_insn_subsequent. */
1222 COSTS_N_INSNS (2), /* storef. */
1223 COSTS_N_INSNS (2), /* stored. */
1224 COSTS_N_INSNS (1), /* store_unaligned. */
1225 COSTS_N_INSNS (1), /* loadv. */
1226 COSTS_N_INSNS (1) /* storev. */
1229 /* FP SFmode */
1231 COSTS_N_INSNS (15), /* div. */
1232 COSTS_N_INSNS (3), /* mult. */
1233 COSTS_N_INSNS (7), /* mult_addsub. */
1234 COSTS_N_INSNS (7), /* fma. */
1235 COSTS_N_INSNS (3), /* addsub. */
1236 COSTS_N_INSNS (3), /* fpconst. */
1237 COSTS_N_INSNS (3), /* neg. */
1238 COSTS_N_INSNS (3), /* compare. */
1239 COSTS_N_INSNS (3), /* widen. */
1240 COSTS_N_INSNS (3), /* narrow. */
1241 COSTS_N_INSNS (3), /* toint. */
1242 COSTS_N_INSNS (3), /* fromint. */
1243 COSTS_N_INSNS (3) /* roundint. */
1245 /* FP DFmode */
1247 COSTS_N_INSNS (30), /* div. */
1248 COSTS_N_INSNS (6), /* mult. */
1249 COSTS_N_INSNS (10), /* mult_addsub. */
1250 COSTS_N_INSNS (7), /* fma. */
1251 COSTS_N_INSNS (3), /* addsub. */
1252 COSTS_N_INSNS (3), /* fpconst. */
1253 COSTS_N_INSNS (3), /* neg. */
1254 COSTS_N_INSNS (3), /* compare. */
1255 COSTS_N_INSNS (3), /* widen. */
1256 COSTS_N_INSNS (3), /* narrow. */
1257 COSTS_N_INSNS (3), /* toint. */
1258 COSTS_N_INSNS (3), /* fromint. */
1259 COSTS_N_INSNS (3) /* roundint. */
1262 /* Vector */
1264 COSTS_N_INSNS (1) /* alu. */
1269 const struct cpu_cost_table cortexa7_extra_costs =
1271 /* ALU */
1273 0, /* arith. */
1274 0, /* logical. */
1275 COSTS_N_INSNS (1), /* shift. */
1276 COSTS_N_INSNS (1), /* shift_reg. */
1277 COSTS_N_INSNS (1), /* arith_shift. */
1278 COSTS_N_INSNS (1), /* arith_shift_reg. */
1279 COSTS_N_INSNS (1), /* log_shift. */
1280 COSTS_N_INSNS (1), /* log_shift_reg. */
1281 COSTS_N_INSNS (1), /* extend. */
1282 COSTS_N_INSNS (1), /* extend_arith. */
1283 COSTS_N_INSNS (1), /* bfi. */
1284 COSTS_N_INSNS (1), /* bfx. */
1285 COSTS_N_INSNS (1), /* clz. */
1286 COSTS_N_INSNS (1), /* rev. */
1287 0, /* non_exec. */
1288 true /* non_exec_costs_exec. */
1292 /* MULT SImode */
1294 0, /* simple. */
1295 COSTS_N_INSNS (1), /* flag_setting. */
1296 COSTS_N_INSNS (1), /* extend. */
1297 COSTS_N_INSNS (1), /* add. */
1298 COSTS_N_INSNS (1), /* extend_add. */
1299 COSTS_N_INSNS (7) /* idiv. */
1301 /* MULT DImode */
1303 0, /* simple (N/A). */
1304 0, /* flag_setting (N/A). */
1305 COSTS_N_INSNS (1), /* extend. */
1306 0, /* add. */
1307 COSTS_N_INSNS (2), /* extend_add. */
1308 0 /* idiv (N/A). */
1311 /* LD/ST */
1313 COSTS_N_INSNS (1), /* load. */
1314 COSTS_N_INSNS (1), /* load_sign_extend. */
1315 COSTS_N_INSNS (3), /* ldrd. */
1316 COSTS_N_INSNS (1), /* ldm_1st. */
1317 1, /* ldm_regs_per_insn_1st. */
1318 2, /* ldm_regs_per_insn_subsequent. */
1319 COSTS_N_INSNS (2), /* loadf. */
1320 COSTS_N_INSNS (2), /* loadd. */
1321 COSTS_N_INSNS (1), /* load_unaligned. */
1322 COSTS_N_INSNS (1), /* store. */
1323 COSTS_N_INSNS (3), /* strd. */
1324 COSTS_N_INSNS (1), /* stm_1st. */
1325 1, /* stm_regs_per_insn_1st. */
1326 2, /* stm_regs_per_insn_subsequent. */
1327 COSTS_N_INSNS (2), /* storef. */
1328 COSTS_N_INSNS (2), /* stored. */
1329 COSTS_N_INSNS (1), /* store_unaligned. */
1330 COSTS_N_INSNS (1), /* loadv. */
1331 COSTS_N_INSNS (1) /* storev. */
1334 /* FP SFmode */
1336 COSTS_N_INSNS (15), /* div. */
1337 COSTS_N_INSNS (3), /* mult. */
1338 COSTS_N_INSNS (7), /* mult_addsub. */
1339 COSTS_N_INSNS (7), /* fma. */
1340 COSTS_N_INSNS (3), /* addsub. */
1341 COSTS_N_INSNS (3), /* fpconst. */
1342 COSTS_N_INSNS (3), /* neg. */
1343 COSTS_N_INSNS (3), /* compare. */
1344 COSTS_N_INSNS (3), /* widen. */
1345 COSTS_N_INSNS (3), /* narrow. */
1346 COSTS_N_INSNS (3), /* toint. */
1347 COSTS_N_INSNS (3), /* fromint. */
1348 COSTS_N_INSNS (3) /* roundint. */
1350 /* FP DFmode */
1352 COSTS_N_INSNS (30), /* div. */
1353 COSTS_N_INSNS (6), /* mult. */
1354 COSTS_N_INSNS (10), /* mult_addsub. */
1355 COSTS_N_INSNS (7), /* fma. */
1356 COSTS_N_INSNS (3), /* addsub. */
1357 COSTS_N_INSNS (3), /* fpconst. */
1358 COSTS_N_INSNS (3), /* neg. */
1359 COSTS_N_INSNS (3), /* compare. */
1360 COSTS_N_INSNS (3), /* widen. */
1361 COSTS_N_INSNS (3), /* narrow. */
1362 COSTS_N_INSNS (3), /* toint. */
1363 COSTS_N_INSNS (3), /* fromint. */
1364 COSTS_N_INSNS (3) /* roundint. */
1367 /* Vector */
1369 COSTS_N_INSNS (1) /* alu. */
1373 const struct cpu_cost_table cortexa12_extra_costs =
1375 /* ALU */
1377 0, /* arith. */
1378 0, /* logical. */
1379 0, /* shift. */
1380 COSTS_N_INSNS (1), /* shift_reg. */
1381 COSTS_N_INSNS (1), /* arith_shift. */
1382 COSTS_N_INSNS (1), /* arith_shift_reg. */
1383 COSTS_N_INSNS (1), /* log_shift. */
1384 COSTS_N_INSNS (1), /* log_shift_reg. */
1385 0, /* extend. */
1386 COSTS_N_INSNS (1), /* extend_arith. */
1387 0, /* bfi. */
1388 COSTS_N_INSNS (1), /* bfx. */
1389 COSTS_N_INSNS (1), /* clz. */
1390 COSTS_N_INSNS (1), /* rev. */
1391 0, /* non_exec. */
1392 true /* non_exec_costs_exec. */
1394 /* MULT SImode */
1397 COSTS_N_INSNS (2), /* simple. */
1398 COSTS_N_INSNS (3), /* flag_setting. */
1399 COSTS_N_INSNS (2), /* extend. */
1400 COSTS_N_INSNS (3), /* add. */
1401 COSTS_N_INSNS (2), /* extend_add. */
1402 COSTS_N_INSNS (18) /* idiv. */
1404 /* MULT DImode */
1406 0, /* simple (N/A). */
1407 0, /* flag_setting (N/A). */
1408 COSTS_N_INSNS (3), /* extend. */
1409 0, /* add (N/A). */
1410 COSTS_N_INSNS (3), /* extend_add. */
1411 0 /* idiv (N/A). */
1414 /* LD/ST */
1416 COSTS_N_INSNS (3), /* load. */
1417 COSTS_N_INSNS (3), /* load_sign_extend. */
1418 COSTS_N_INSNS (3), /* ldrd. */
1419 COSTS_N_INSNS (3), /* ldm_1st. */
1420 1, /* ldm_regs_per_insn_1st. */
1421 2, /* ldm_regs_per_insn_subsequent. */
1422 COSTS_N_INSNS (3), /* loadf. */
1423 COSTS_N_INSNS (3), /* loadd. */
1424 0, /* load_unaligned. */
1425 0, /* store. */
1426 0, /* strd. */
1427 0, /* stm_1st. */
1428 1, /* stm_regs_per_insn_1st. */
1429 2, /* stm_regs_per_insn_subsequent. */
1430 COSTS_N_INSNS (2), /* storef. */
1431 COSTS_N_INSNS (2), /* stored. */
1432 0, /* store_unaligned. */
1433 COSTS_N_INSNS (1), /* loadv. */
1434 COSTS_N_INSNS (1) /* storev. */
1437 /* FP SFmode */
1439 COSTS_N_INSNS (17), /* div. */
1440 COSTS_N_INSNS (4), /* mult. */
1441 COSTS_N_INSNS (8), /* mult_addsub. */
1442 COSTS_N_INSNS (8), /* fma. */
1443 COSTS_N_INSNS (4), /* addsub. */
1444 COSTS_N_INSNS (2), /* fpconst. */
1445 COSTS_N_INSNS (2), /* neg. */
1446 COSTS_N_INSNS (2), /* compare. */
1447 COSTS_N_INSNS (4), /* widen. */
1448 COSTS_N_INSNS (4), /* narrow. */
1449 COSTS_N_INSNS (4), /* toint. */
1450 COSTS_N_INSNS (4), /* fromint. */
1451 COSTS_N_INSNS (4) /* roundint. */
1453 /* FP DFmode */
1455 COSTS_N_INSNS (31), /* div. */
1456 COSTS_N_INSNS (4), /* mult. */
1457 COSTS_N_INSNS (8), /* mult_addsub. */
1458 COSTS_N_INSNS (8), /* fma. */
1459 COSTS_N_INSNS (4), /* addsub. */
1460 COSTS_N_INSNS (2), /* fpconst. */
1461 COSTS_N_INSNS (2), /* neg. */
1462 COSTS_N_INSNS (2), /* compare. */
1463 COSTS_N_INSNS (4), /* widen. */
1464 COSTS_N_INSNS (4), /* narrow. */
1465 COSTS_N_INSNS (4), /* toint. */
1466 COSTS_N_INSNS (4), /* fromint. */
1467 COSTS_N_INSNS (4) /* roundint. */
1470 /* Vector */
1472 COSTS_N_INSNS (1) /* alu. */
1476 const struct cpu_cost_table cortexa15_extra_costs =
1478 /* ALU */
1480 0, /* arith. */
1481 0, /* logical. */
1482 0, /* shift. */
1483 0, /* shift_reg. */
1484 COSTS_N_INSNS (1), /* arith_shift. */
1485 COSTS_N_INSNS (1), /* arith_shift_reg. */
1486 COSTS_N_INSNS (1), /* log_shift. */
1487 COSTS_N_INSNS (1), /* log_shift_reg. */
1488 0, /* extend. */
1489 COSTS_N_INSNS (1), /* extend_arith. */
1490 COSTS_N_INSNS (1), /* bfi. */
1491 0, /* bfx. */
1492 0, /* clz. */
1493 0, /* rev. */
1494 0, /* non_exec. */
1495 true /* non_exec_costs_exec. */
1497 /* MULT SImode */
1500 COSTS_N_INSNS (2), /* simple. */
1501 COSTS_N_INSNS (3), /* flag_setting. */
1502 COSTS_N_INSNS (2), /* extend. */
1503 COSTS_N_INSNS (2), /* add. */
1504 COSTS_N_INSNS (2), /* extend_add. */
1505 COSTS_N_INSNS (18) /* idiv. */
1507 /* MULT DImode */
1509 0, /* simple (N/A). */
1510 0, /* flag_setting (N/A). */
1511 COSTS_N_INSNS (3), /* extend. */
1512 0, /* add (N/A). */
1513 COSTS_N_INSNS (3), /* extend_add. */
1514 0 /* idiv (N/A). */
1517 /* LD/ST */
1519 COSTS_N_INSNS (3), /* load. */
1520 COSTS_N_INSNS (3), /* load_sign_extend. */
1521 COSTS_N_INSNS (3), /* ldrd. */
1522 COSTS_N_INSNS (4), /* ldm_1st. */
1523 1, /* ldm_regs_per_insn_1st. */
1524 2, /* ldm_regs_per_insn_subsequent. */
1525 COSTS_N_INSNS (4), /* loadf. */
1526 COSTS_N_INSNS (4), /* loadd. */
1527 0, /* load_unaligned. */
1528 0, /* store. */
1529 0, /* strd. */
1530 COSTS_N_INSNS (1), /* stm_1st. */
1531 1, /* stm_regs_per_insn_1st. */
1532 2, /* stm_regs_per_insn_subsequent. */
1533 0, /* storef. */
1534 0, /* stored. */
1535 0, /* store_unaligned. */
1536 COSTS_N_INSNS (1), /* loadv. */
1537 COSTS_N_INSNS (1) /* storev. */
1540 /* FP SFmode */
1542 COSTS_N_INSNS (17), /* div. */
1543 COSTS_N_INSNS (4), /* mult. */
1544 COSTS_N_INSNS (8), /* mult_addsub. */
1545 COSTS_N_INSNS (8), /* fma. */
1546 COSTS_N_INSNS (4), /* addsub. */
1547 COSTS_N_INSNS (2), /* fpconst. */
1548 COSTS_N_INSNS (2), /* neg. */
1549 COSTS_N_INSNS (5), /* compare. */
1550 COSTS_N_INSNS (4), /* widen. */
1551 COSTS_N_INSNS (4), /* narrow. */
1552 COSTS_N_INSNS (4), /* toint. */
1553 COSTS_N_INSNS (4), /* fromint. */
1554 COSTS_N_INSNS (4) /* roundint. */
1556 /* FP DFmode */
1558 COSTS_N_INSNS (31), /* div. */
1559 COSTS_N_INSNS (4), /* mult. */
1560 COSTS_N_INSNS (8), /* mult_addsub. */
1561 COSTS_N_INSNS (8), /* fma. */
1562 COSTS_N_INSNS (4), /* addsub. */
1563 COSTS_N_INSNS (2), /* fpconst. */
1564 COSTS_N_INSNS (2), /* neg. */
1565 COSTS_N_INSNS (2), /* compare. */
1566 COSTS_N_INSNS (4), /* widen. */
1567 COSTS_N_INSNS (4), /* narrow. */
1568 COSTS_N_INSNS (4), /* toint. */
1569 COSTS_N_INSNS (4), /* fromint. */
1570 COSTS_N_INSNS (4) /* roundint. */
1573 /* Vector */
1575 COSTS_N_INSNS (1) /* alu. */
1579 const struct cpu_cost_table v7m_extra_costs =
1581 /* ALU */
1583 0, /* arith. */
1584 0, /* logical. */
1585 0, /* shift. */
1586 0, /* shift_reg. */
1587 0, /* arith_shift. */
1588 COSTS_N_INSNS (1), /* arith_shift_reg. */
1589 0, /* log_shift. */
1590 COSTS_N_INSNS (1), /* log_shift_reg. */
1591 0, /* extend. */
1592 COSTS_N_INSNS (1), /* extend_arith. */
1593 0, /* bfi. */
1594 0, /* bfx. */
1595 0, /* clz. */
1596 0, /* rev. */
1597 COSTS_N_INSNS (1), /* non_exec. */
1598 false /* non_exec_costs_exec. */
1601 /* MULT SImode */
1603 COSTS_N_INSNS (1), /* simple. */
1604 COSTS_N_INSNS (1), /* flag_setting. */
1605 COSTS_N_INSNS (2), /* extend. */
1606 COSTS_N_INSNS (1), /* add. */
1607 COSTS_N_INSNS (3), /* extend_add. */
1608 COSTS_N_INSNS (8) /* idiv. */
1610 /* MULT DImode */
1612 0, /* simple (N/A). */
1613 0, /* flag_setting (N/A). */
1614 COSTS_N_INSNS (2), /* extend. */
1615 0, /* add (N/A). */
1616 COSTS_N_INSNS (3), /* extend_add. */
1617 0 /* idiv (N/A). */
1620 /* LD/ST */
1622 COSTS_N_INSNS (2), /* load. */
1623 0, /* load_sign_extend. */
1624 COSTS_N_INSNS (3), /* ldrd. */
1625 COSTS_N_INSNS (2), /* ldm_1st. */
1626 1, /* ldm_regs_per_insn_1st. */
1627 1, /* ldm_regs_per_insn_subsequent. */
1628 COSTS_N_INSNS (2), /* loadf. */
1629 COSTS_N_INSNS (3), /* loadd. */
1630 COSTS_N_INSNS (1), /* load_unaligned. */
1631 COSTS_N_INSNS (2), /* store. */
1632 COSTS_N_INSNS (3), /* strd. */
1633 COSTS_N_INSNS (2), /* stm_1st. */
1634 1, /* stm_regs_per_insn_1st. */
1635 1, /* stm_regs_per_insn_subsequent. */
1636 COSTS_N_INSNS (2), /* storef. */
1637 COSTS_N_INSNS (3), /* stored. */
1638 COSTS_N_INSNS (1), /* store_unaligned. */
1639 COSTS_N_INSNS (1), /* loadv. */
1640 COSTS_N_INSNS (1) /* storev. */
1643 /* FP SFmode */
1645 COSTS_N_INSNS (7), /* div. */
1646 COSTS_N_INSNS (2), /* mult. */
1647 COSTS_N_INSNS (5), /* mult_addsub. */
1648 COSTS_N_INSNS (3), /* fma. */
1649 COSTS_N_INSNS (1), /* addsub. */
1650 0, /* fpconst. */
1651 0, /* neg. */
1652 0, /* compare. */
1653 0, /* widen. */
1654 0, /* narrow. */
1655 0, /* toint. */
1656 0, /* fromint. */
1657 0 /* roundint. */
1659 /* FP DFmode */
1661 COSTS_N_INSNS (15), /* div. */
1662 COSTS_N_INSNS (5), /* mult. */
1663 COSTS_N_INSNS (7), /* mult_addsub. */
1664 COSTS_N_INSNS (7), /* fma. */
1665 COSTS_N_INSNS (3), /* addsub. */
1666 0, /* fpconst. */
1667 0, /* neg. */
1668 0, /* compare. */
1669 0, /* widen. */
1670 0, /* narrow. */
1671 0, /* toint. */
1672 0, /* fromint. */
1673 0 /* roundint. */
1676 /* Vector */
1678 COSTS_N_INSNS (1) /* alu. */
1682 const struct tune_params arm_slowmul_tune =
1684 arm_slowmul_rtx_costs,
1685 NULL, /* Insn extra costs. */
1686 NULL, /* Sched adj cost. */
1687 arm_default_branch_cost,
1688 &arm_default_vec_cost,
1689 3, /* Constant limit. */
1690 5, /* Max cond insns. */
1691 8, /* Memset max inline. */
1692 1, /* Issue rate. */
1693 ARM_PREFETCH_NOT_BENEFICIAL,
1694 tune_params::PREF_CONST_POOL_TRUE,
1695 tune_params::PREF_LDRD_FALSE,
1696 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1697 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1698 tune_params::DISPARAGE_FLAGS_NEITHER,
1699 tune_params::PREF_NEON_64_FALSE,
1700 tune_params::PREF_NEON_STRINGOPS_FALSE,
1701 tune_params::FUSE_NOTHING,
1702 tune_params::SCHED_AUTOPREF_OFF
1705 const struct tune_params arm_fastmul_tune =
1707 arm_fastmul_rtx_costs,
1708 NULL, /* Insn extra costs. */
1709 NULL, /* Sched adj cost. */
1710 arm_default_branch_cost,
1711 &arm_default_vec_cost,
1712 1, /* Constant limit. */
1713 5, /* Max cond insns. */
1714 8, /* Memset max inline. */
1715 1, /* Issue rate. */
1716 ARM_PREFETCH_NOT_BENEFICIAL,
1717 tune_params::PREF_CONST_POOL_TRUE,
1718 tune_params::PREF_LDRD_FALSE,
1719 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1720 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1721 tune_params::DISPARAGE_FLAGS_NEITHER,
1722 tune_params::PREF_NEON_64_FALSE,
1723 tune_params::PREF_NEON_STRINGOPS_FALSE,
1724 tune_params::FUSE_NOTHING,
1725 tune_params::SCHED_AUTOPREF_OFF
1728 /* StrongARM has early execution of branches, so a sequence that is worth
1729 skipping is shorter. Set max_insns_skipped to a lower value. */
1731 const struct tune_params arm_strongarm_tune =
1733 arm_fastmul_rtx_costs,
1734 NULL, /* Insn extra costs. */
1735 NULL, /* Sched adj cost. */
1736 arm_default_branch_cost,
1737 &arm_default_vec_cost,
1738 1, /* Constant limit. */
1739 3, /* Max cond insns. */
1740 8, /* Memset max inline. */
1741 1, /* Issue rate. */
1742 ARM_PREFETCH_NOT_BENEFICIAL,
1743 tune_params::PREF_CONST_POOL_TRUE,
1744 tune_params::PREF_LDRD_FALSE,
1745 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1746 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1747 tune_params::DISPARAGE_FLAGS_NEITHER,
1748 tune_params::PREF_NEON_64_FALSE,
1749 tune_params::PREF_NEON_STRINGOPS_FALSE,
1750 tune_params::FUSE_NOTHING,
1751 tune_params::SCHED_AUTOPREF_OFF
1754 const struct tune_params arm_xscale_tune =
1756 arm_xscale_rtx_costs,
1757 NULL, /* Insn extra costs. */
1758 xscale_sched_adjust_cost,
1759 arm_default_branch_cost,
1760 &arm_default_vec_cost,
1761 2, /* Constant limit. */
1762 3, /* Max cond insns. */
1763 8, /* Memset max inline. */
1764 1, /* Issue rate. */
1765 ARM_PREFETCH_NOT_BENEFICIAL,
1766 tune_params::PREF_CONST_POOL_TRUE,
1767 tune_params::PREF_LDRD_FALSE,
1768 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1769 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1770 tune_params::DISPARAGE_FLAGS_NEITHER,
1771 tune_params::PREF_NEON_64_FALSE,
1772 tune_params::PREF_NEON_STRINGOPS_FALSE,
1773 tune_params::FUSE_NOTHING,
1774 tune_params::SCHED_AUTOPREF_OFF
1777 const struct tune_params arm_9e_tune =
1779 arm_9e_rtx_costs,
1780 NULL, /* Insn extra costs. */
1781 NULL, /* Sched adj cost. */
1782 arm_default_branch_cost,
1783 &arm_default_vec_cost,
1784 1, /* Constant limit. */
1785 5, /* Max cond insns. */
1786 8, /* Memset max inline. */
1787 1, /* Issue rate. */
1788 ARM_PREFETCH_NOT_BENEFICIAL,
1789 tune_params::PREF_CONST_POOL_TRUE,
1790 tune_params::PREF_LDRD_FALSE,
1791 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1792 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1793 tune_params::DISPARAGE_FLAGS_NEITHER,
1794 tune_params::PREF_NEON_64_FALSE,
1795 tune_params::PREF_NEON_STRINGOPS_FALSE,
1796 tune_params::FUSE_NOTHING,
1797 tune_params::SCHED_AUTOPREF_OFF
1800 const struct tune_params arm_marvell_pj4_tune =
1802 arm_9e_rtx_costs,
1803 NULL, /* Insn extra costs. */
1804 NULL, /* Sched adj cost. */
1805 arm_default_branch_cost,
1806 &arm_default_vec_cost,
1807 1, /* Constant limit. */
1808 5, /* Max cond insns. */
1809 8, /* Memset max inline. */
1810 2, /* Issue rate. */
1811 ARM_PREFETCH_NOT_BENEFICIAL,
1812 tune_params::PREF_CONST_POOL_TRUE,
1813 tune_params::PREF_LDRD_FALSE,
1814 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1815 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1816 tune_params::DISPARAGE_FLAGS_NEITHER,
1817 tune_params::PREF_NEON_64_FALSE,
1818 tune_params::PREF_NEON_STRINGOPS_FALSE,
1819 tune_params::FUSE_NOTHING,
1820 tune_params::SCHED_AUTOPREF_OFF
1823 const struct tune_params arm_v6t2_tune =
1825 arm_9e_rtx_costs,
1826 NULL, /* Insn extra costs. */
1827 NULL, /* Sched adj cost. */
1828 arm_default_branch_cost,
1829 &arm_default_vec_cost,
1830 1, /* Constant limit. */
1831 5, /* Max cond insns. */
1832 8, /* Memset max inline. */
1833 1, /* Issue rate. */
1834 ARM_PREFETCH_NOT_BENEFICIAL,
1835 tune_params::PREF_CONST_POOL_FALSE,
1836 tune_params::PREF_LDRD_FALSE,
1837 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1838 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1839 tune_params::DISPARAGE_FLAGS_NEITHER,
1840 tune_params::PREF_NEON_64_FALSE,
1841 tune_params::PREF_NEON_STRINGOPS_FALSE,
1842 tune_params::FUSE_NOTHING,
1843 tune_params::SCHED_AUTOPREF_OFF
1847 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1848 const struct tune_params arm_cortex_tune =
1850 arm_9e_rtx_costs,
1851 &generic_extra_costs,
1852 NULL, /* Sched adj cost. */
1853 arm_default_branch_cost,
1854 &arm_default_vec_cost,
1855 1, /* Constant limit. */
1856 5, /* Max cond insns. */
1857 8, /* Memset max inline. */
1858 2, /* Issue rate. */
1859 ARM_PREFETCH_NOT_BENEFICIAL,
1860 tune_params::PREF_CONST_POOL_FALSE,
1861 tune_params::PREF_LDRD_FALSE,
1862 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1863 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1864 tune_params::DISPARAGE_FLAGS_NEITHER,
1865 tune_params::PREF_NEON_64_FALSE,
1866 tune_params::PREF_NEON_STRINGOPS_FALSE,
1867 tune_params::FUSE_NOTHING,
1868 tune_params::SCHED_AUTOPREF_OFF
1871 const struct tune_params arm_cortex_a8_tune =
1873 arm_9e_rtx_costs,
1874 &cortexa8_extra_costs,
1875 NULL, /* Sched adj cost. */
1876 arm_default_branch_cost,
1877 &arm_default_vec_cost,
1878 1, /* Constant limit. */
1879 5, /* Max cond insns. */
1880 8, /* Memset max inline. */
1881 2, /* Issue rate. */
1882 ARM_PREFETCH_NOT_BENEFICIAL,
1883 tune_params::PREF_CONST_POOL_FALSE,
1884 tune_params::PREF_LDRD_FALSE,
1885 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1886 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1887 tune_params::DISPARAGE_FLAGS_NEITHER,
1888 tune_params::PREF_NEON_64_FALSE,
1889 tune_params::PREF_NEON_STRINGOPS_TRUE,
1890 tune_params::FUSE_NOTHING,
1891 tune_params::SCHED_AUTOPREF_OFF
1894 const struct tune_params arm_cortex_a7_tune =
1896 arm_9e_rtx_costs,
1897 &cortexa7_extra_costs,
1898 NULL, /* Sched adj cost. */
1899 arm_default_branch_cost,
1900 &arm_default_vec_cost,
1901 1, /* Constant limit. */
1902 5, /* Max cond insns. */
1903 8, /* Memset max inline. */
1904 2, /* Issue rate. */
1905 ARM_PREFETCH_NOT_BENEFICIAL,
1906 tune_params::PREF_CONST_POOL_FALSE,
1907 tune_params::PREF_LDRD_FALSE,
1908 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1909 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1910 tune_params::DISPARAGE_FLAGS_NEITHER,
1911 tune_params::PREF_NEON_64_FALSE,
1912 tune_params::PREF_NEON_STRINGOPS_TRUE,
1913 tune_params::FUSE_NOTHING,
1914 tune_params::SCHED_AUTOPREF_OFF
1917 const struct tune_params arm_cortex_a15_tune =
1919 arm_9e_rtx_costs,
1920 &cortexa15_extra_costs,
1921 NULL, /* Sched adj cost. */
1922 arm_default_branch_cost,
1923 &arm_default_vec_cost,
1924 1, /* Constant limit. */
1925 2, /* Max cond insns. */
1926 8, /* Memset max inline. */
1927 3, /* Issue rate. */
1928 ARM_PREFETCH_NOT_BENEFICIAL,
1929 tune_params::PREF_CONST_POOL_FALSE,
1930 tune_params::PREF_LDRD_TRUE,
1931 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1932 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1933 tune_params::DISPARAGE_FLAGS_ALL,
1934 tune_params::PREF_NEON_64_FALSE,
1935 tune_params::PREF_NEON_STRINGOPS_TRUE,
1936 tune_params::FUSE_NOTHING,
1937 tune_params::SCHED_AUTOPREF_FULL
1940 const struct tune_params arm_cortex_a35_tune =
1942 arm_9e_rtx_costs,
1943 &cortexa53_extra_costs,
1944 NULL, /* Sched adj cost. */
1945 arm_default_branch_cost,
1946 &arm_default_vec_cost,
1947 1, /* Constant limit. */
1948 5, /* Max cond insns. */
1949 8, /* Memset max inline. */
1950 1, /* Issue rate. */
1951 ARM_PREFETCH_NOT_BENEFICIAL,
1952 tune_params::PREF_CONST_POOL_FALSE,
1953 tune_params::PREF_LDRD_FALSE,
1954 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1955 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1956 tune_params::DISPARAGE_FLAGS_NEITHER,
1957 tune_params::PREF_NEON_64_FALSE,
1958 tune_params::PREF_NEON_STRINGOPS_TRUE,
1959 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1960 tune_params::SCHED_AUTOPREF_OFF
1963 const struct tune_params arm_cortex_a53_tune =
1965 arm_9e_rtx_costs,
1966 &cortexa53_extra_costs,
1967 NULL, /* Sched adj cost. */
1968 arm_default_branch_cost,
1969 &arm_default_vec_cost,
1970 1, /* Constant limit. */
1971 5, /* Max cond insns. */
1972 8, /* Memset max inline. */
1973 2, /* Issue rate. */
1974 ARM_PREFETCH_NOT_BENEFICIAL,
1975 tune_params::PREF_CONST_POOL_FALSE,
1976 tune_params::PREF_LDRD_FALSE,
1977 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1978 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1979 tune_params::DISPARAGE_FLAGS_NEITHER,
1980 tune_params::PREF_NEON_64_FALSE,
1981 tune_params::PREF_NEON_STRINGOPS_TRUE,
1982 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
1983 tune_params::SCHED_AUTOPREF_OFF
1986 const struct tune_params arm_cortex_a57_tune =
1988 arm_9e_rtx_costs,
1989 &cortexa57_extra_costs,
1990 NULL, /* Sched adj cost. */
1991 arm_default_branch_cost,
1992 &arm_default_vec_cost,
1993 1, /* Constant limit. */
1994 2, /* Max cond insns. */
1995 8, /* Memset max inline. */
1996 3, /* Issue rate. */
1997 ARM_PREFETCH_NOT_BENEFICIAL,
1998 tune_params::PREF_CONST_POOL_FALSE,
1999 tune_params::PREF_LDRD_TRUE,
2000 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2001 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2002 tune_params::DISPARAGE_FLAGS_ALL,
2003 tune_params::PREF_NEON_64_FALSE,
2004 tune_params::PREF_NEON_STRINGOPS_TRUE,
2005 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2006 tune_params::SCHED_AUTOPREF_FULL
2009 const struct tune_params arm_exynosm1_tune =
2011 arm_9e_rtx_costs,
2012 &exynosm1_extra_costs,
2013 NULL, /* Sched adj cost. */
2014 arm_default_branch_cost,
2015 &arm_default_vec_cost,
2016 1, /* Constant limit. */
2017 2, /* Max cond insns. */
2018 8, /* Memset max inline. */
2019 3, /* Issue rate. */
2020 ARM_PREFETCH_NOT_BENEFICIAL,
2021 tune_params::PREF_CONST_POOL_FALSE,
2022 tune_params::PREF_LDRD_TRUE,
2023 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2024 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2025 tune_params::DISPARAGE_FLAGS_ALL,
2026 tune_params::PREF_NEON_64_FALSE,
2027 tune_params::PREF_NEON_STRINGOPS_TRUE,
2028 tune_params::FUSE_NOTHING,
2029 tune_params::SCHED_AUTOPREF_OFF
2032 const struct tune_params arm_xgene1_tune =
2034 arm_9e_rtx_costs,
2035 &xgene1_extra_costs,
2036 NULL, /* Sched adj cost. */
2037 arm_default_branch_cost,
2038 &arm_default_vec_cost,
2039 1, /* Constant limit. */
2040 2, /* Max cond insns. */
2041 32, /* Memset max inline. */
2042 4, /* Issue rate. */
2043 ARM_PREFETCH_NOT_BENEFICIAL,
2044 tune_params::PREF_CONST_POOL_FALSE,
2045 tune_params::PREF_LDRD_TRUE,
2046 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2047 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2048 tune_params::DISPARAGE_FLAGS_ALL,
2049 tune_params::PREF_NEON_64_FALSE,
2050 tune_params::PREF_NEON_STRINGOPS_FALSE,
2051 tune_params::FUSE_NOTHING,
2052 tune_params::SCHED_AUTOPREF_OFF
2055 const struct tune_params arm_qdf24xx_tune =
2057 arm_9e_rtx_costs,
2058 &qdf24xx_extra_costs,
2059 NULL, /* Scheduler cost adjustment. */
2060 arm_default_branch_cost,
2061 &arm_default_vec_cost, /* Vectorizer costs. */
2062 1, /* Constant limit. */
2063 2, /* Max cond insns. */
2064 8, /* Memset max inline. */
2065 4, /* Issue rate. */
2066 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2067 tune_params::PREF_CONST_POOL_FALSE,
2068 tune_params::PREF_LDRD_TRUE,
2069 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2070 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2071 tune_params::DISPARAGE_FLAGS_ALL,
2072 tune_params::PREF_NEON_64_FALSE,
2073 tune_params::PREF_NEON_STRINGOPS_TRUE,
2074 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2075 tune_params::SCHED_AUTOPREF_FULL
2078 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2079 less appealing. Set max_insns_skipped to a low value. */
2081 const struct tune_params arm_cortex_a5_tune =
2083 arm_9e_rtx_costs,
2084 &cortexa5_extra_costs,
2085 NULL, /* Sched adj cost. */
2086 arm_cortex_a5_branch_cost,
2087 &arm_default_vec_cost,
2088 1, /* Constant limit. */
2089 1, /* Max cond insns. */
2090 8, /* Memset max inline. */
2091 2, /* Issue rate. */
2092 ARM_PREFETCH_NOT_BENEFICIAL,
2093 tune_params::PREF_CONST_POOL_FALSE,
2094 tune_params::PREF_LDRD_FALSE,
2095 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2096 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2097 tune_params::DISPARAGE_FLAGS_NEITHER,
2098 tune_params::PREF_NEON_64_FALSE,
2099 tune_params::PREF_NEON_STRINGOPS_TRUE,
2100 tune_params::FUSE_NOTHING,
2101 tune_params::SCHED_AUTOPREF_OFF
2104 const struct tune_params arm_cortex_a9_tune =
2106 arm_9e_rtx_costs,
2107 &cortexa9_extra_costs,
2108 cortex_a9_sched_adjust_cost,
2109 arm_default_branch_cost,
2110 &arm_default_vec_cost,
2111 1, /* Constant limit. */
2112 5, /* Max cond insns. */
2113 8, /* Memset max inline. */
2114 2, /* Issue rate. */
2115 ARM_PREFETCH_BENEFICIAL(4,32,32),
2116 tune_params::PREF_CONST_POOL_FALSE,
2117 tune_params::PREF_LDRD_FALSE,
2118 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2119 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2120 tune_params::DISPARAGE_FLAGS_NEITHER,
2121 tune_params::PREF_NEON_64_FALSE,
2122 tune_params::PREF_NEON_STRINGOPS_FALSE,
2123 tune_params::FUSE_NOTHING,
2124 tune_params::SCHED_AUTOPREF_OFF
2127 const struct tune_params arm_cortex_a12_tune =
2129 arm_9e_rtx_costs,
2130 &cortexa12_extra_costs,
2131 NULL, /* Sched adj cost. */
2132 arm_default_branch_cost,
2133 &arm_default_vec_cost, /* Vectorizer costs. */
2134 1, /* Constant limit. */
2135 2, /* Max cond insns. */
2136 8, /* Memset max inline. */
2137 2, /* Issue rate. */
2138 ARM_PREFETCH_NOT_BENEFICIAL,
2139 tune_params::PREF_CONST_POOL_FALSE,
2140 tune_params::PREF_LDRD_TRUE,
2141 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2142 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2143 tune_params::DISPARAGE_FLAGS_ALL,
2144 tune_params::PREF_NEON_64_FALSE,
2145 tune_params::PREF_NEON_STRINGOPS_TRUE,
2146 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2147 tune_params::SCHED_AUTOPREF_OFF
2150 const struct tune_params arm_cortex_a73_tune =
2152 arm_9e_rtx_costs,
2153 &cortexa57_extra_costs,
2154 NULL, /* Sched adj cost. */
2155 arm_default_branch_cost,
2156 &arm_default_vec_cost, /* Vectorizer costs. */
2157 1, /* Constant limit. */
2158 2, /* Max cond insns. */
2159 8, /* Memset max inline. */
2160 2, /* Issue rate. */
2161 ARM_PREFETCH_NOT_BENEFICIAL,
2162 tune_params::PREF_CONST_POOL_FALSE,
2163 tune_params::PREF_LDRD_TRUE,
2164 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2165 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2166 tune_params::DISPARAGE_FLAGS_ALL,
2167 tune_params::PREF_NEON_64_FALSE,
2168 tune_params::PREF_NEON_STRINGOPS_TRUE,
2169 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2170 tune_params::SCHED_AUTOPREF_FULL
2173 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2174 cycle to execute each. An LDR from the constant pool also takes two cycles
2175 to execute, but mildly increases pipelining opportunity (consecutive
2176 loads/stores can be pipelined together, saving one cycle), and may also
2177 improve icache utilisation. Hence we prefer the constant pool for such
2178 processors. */
2180 const struct tune_params arm_v7m_tune =
2182 arm_9e_rtx_costs,
2183 &v7m_extra_costs,
2184 NULL, /* Sched adj cost. */
2185 arm_cortex_m_branch_cost,
2186 &arm_default_vec_cost,
2187 1, /* Constant limit. */
2188 2, /* Max cond insns. */
2189 8, /* Memset max inline. */
2190 1, /* Issue rate. */
2191 ARM_PREFETCH_NOT_BENEFICIAL,
2192 tune_params::PREF_CONST_POOL_TRUE,
2193 tune_params::PREF_LDRD_FALSE,
2194 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2195 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2196 tune_params::DISPARAGE_FLAGS_NEITHER,
2197 tune_params::PREF_NEON_64_FALSE,
2198 tune_params::PREF_NEON_STRINGOPS_FALSE,
2199 tune_params::FUSE_NOTHING,
2200 tune_params::SCHED_AUTOPREF_OFF
2203 /* Cortex-M7 tuning. */
2205 const struct tune_params arm_cortex_m7_tune =
2207 arm_9e_rtx_costs,
2208 &v7m_extra_costs,
2209 NULL, /* Sched adj cost. */
2210 arm_cortex_m7_branch_cost,
2211 &arm_default_vec_cost,
2212 0, /* Constant limit. */
2213 1, /* Max cond insns. */
2214 8, /* Memset max inline. */
2215 2, /* Issue rate. */
2216 ARM_PREFETCH_NOT_BENEFICIAL,
2217 tune_params::PREF_CONST_POOL_TRUE,
2218 tune_params::PREF_LDRD_FALSE,
2219 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2220 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2221 tune_params::DISPARAGE_FLAGS_NEITHER,
2222 tune_params::PREF_NEON_64_FALSE,
2223 tune_params::PREF_NEON_STRINGOPS_FALSE,
2224 tune_params::FUSE_NOTHING,
2225 tune_params::SCHED_AUTOPREF_OFF
2228 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2229 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2230 const struct tune_params arm_v6m_tune =
2232 arm_9e_rtx_costs,
2233 NULL, /* Insn extra costs. */
2234 NULL, /* Sched adj cost. */
2235 arm_default_branch_cost,
2236 &arm_default_vec_cost, /* Vectorizer costs. */
2237 1, /* Constant limit. */
2238 5, /* Max cond insns. */
2239 8, /* Memset max inline. */
2240 1, /* Issue rate. */
2241 ARM_PREFETCH_NOT_BENEFICIAL,
2242 tune_params::PREF_CONST_POOL_FALSE,
2243 tune_params::PREF_LDRD_FALSE,
2244 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2245 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2246 tune_params::DISPARAGE_FLAGS_NEITHER,
2247 tune_params::PREF_NEON_64_FALSE,
2248 tune_params::PREF_NEON_STRINGOPS_FALSE,
2249 tune_params::FUSE_NOTHING,
2250 tune_params::SCHED_AUTOPREF_OFF
2253 const struct tune_params arm_fa726te_tune =
2255 arm_9e_rtx_costs,
2256 NULL, /* Insn extra costs. */
2257 fa726te_sched_adjust_cost,
2258 arm_default_branch_cost,
2259 &arm_default_vec_cost,
2260 1, /* Constant limit. */
2261 5, /* Max cond insns. */
2262 8, /* Memset max inline. */
2263 2, /* Issue rate. */
2264 ARM_PREFETCH_NOT_BENEFICIAL,
2265 tune_params::PREF_CONST_POOL_TRUE,
2266 tune_params::PREF_LDRD_FALSE,
2267 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2268 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2269 tune_params::DISPARAGE_FLAGS_NEITHER,
2270 tune_params::PREF_NEON_64_FALSE,
2271 tune_params::PREF_NEON_STRINGOPS_FALSE,
2272 tune_params::FUSE_NOTHING,
2273 tune_params::SCHED_AUTOPREF_OFF
2277 /* Not all of these give usefully different compilation alternatives,
2278 but there is no simple way of generalizing them. */
2279 static const struct processors all_cores[] =
2281 /* ARM Cores */
2282 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2283 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2284 FLAGS, &arm_##COSTS##_tune},
2285 #include "arm-cores.def"
2286 #undef ARM_CORE
2287 {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
2290 static const struct processors all_architectures[] =
2292 /* ARM Architectures */
2293 /* We don't specify tuning costs here as it will be figured out
2294 from the core. */
2296 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2297 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2298 #include "arm-arches.def"
2299 #undef ARM_ARCH
2300 {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
2304 /* These are populated as commandline arguments are processed, or NULL
2305 if not specified. */
2306 static const struct processors *arm_selected_arch;
2307 static const struct processors *arm_selected_cpu;
2308 static const struct processors *arm_selected_tune;
2310 /* The name of the preprocessor macro to define for this architecture. PROFILE
2311 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2312 is thus chosen to be big enough to hold the longest architecture name. */
2314 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2316 /* Available values for -mfpu=. */
2318 const struct arm_fpu_desc all_fpus[] =
2320 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, FEATURES) \
2321 { NAME, MODEL, REV, VFP_REGS, FEATURES },
2322 #include "arm-fpus.def"
2323 #undef ARM_FPU
2326 /* Supported TLS relocations. */
2328 enum tls_reloc {
2329 TLS_GD32,
2330 TLS_LDM32,
2331 TLS_LDO32,
2332 TLS_IE32,
2333 TLS_LE32,
2334 TLS_DESCSEQ /* GNU scheme */
2337 /* The maximum number of insns to be used when loading a constant. */
2338 inline static int
2339 arm_constant_limit (bool size_p)
2341 return size_p ? 1 : current_tune->constant_limit;
2344 /* Emit an insn that's a simple single-set. Both the operands must be known
2345 to be valid. */
2346 inline static rtx_insn *
2347 emit_set_insn (rtx x, rtx y)
2349 return emit_insn (gen_rtx_SET (x, y));
2352 /* Return the number of bits set in VALUE. */
2353 static unsigned
2354 bit_count (unsigned long value)
2356 unsigned long count = 0;
2358 while (value)
2360 count++;
2361 value &= value - 1; /* Clear the least-significant set bit. */
2364 return count;
2367 /* Return the number of features in feature-set SET. */
2368 static unsigned
2369 feature_count (const arm_feature_set * set)
2371 return (bit_count (ARM_FSET_CPU1 (*set))
2372 + bit_count (ARM_FSET_CPU2 (*set)));
2375 typedef struct
2377 machine_mode mode;
2378 const char *name;
2379 } arm_fixed_mode_set;
2381 /* A small helper for setting fixed-point library libfuncs. */
2383 static void
2384 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2385 const char *funcname, const char *modename,
2386 int num_suffix)
2388 char buffer[50];
2390 if (num_suffix == 0)
2391 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2392 else
2393 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2395 set_optab_libfunc (optable, mode, buffer);
2398 static void
2399 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2400 machine_mode from, const char *funcname,
2401 const char *toname, const char *fromname)
2403 char buffer[50];
2404 const char *maybe_suffix_2 = "";
2406 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2407 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2408 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2409 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2410 maybe_suffix_2 = "2";
2412 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2413 maybe_suffix_2);
2415 set_conv_libfunc (optable, to, from, buffer);
2418 /* Set up library functions unique to ARM. */
2420 static void
2421 arm_init_libfuncs (void)
2423 /* For Linux, we have access to kernel support for atomic operations. */
2424 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2425 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2427 /* There are no special library functions unless we are using the
2428 ARM BPABI. */
2429 if (!TARGET_BPABI)
2430 return;
2432 /* The functions below are described in Section 4 of the "Run-Time
2433 ABI for the ARM architecture", Version 1.0. */
2435 /* Double-precision floating-point arithmetic. Table 2. */
2436 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2437 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2438 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2439 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2440 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2442 /* Double-precision comparisons. Table 3. */
2443 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2444 set_optab_libfunc (ne_optab, DFmode, NULL);
2445 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2446 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2447 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2448 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2449 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2451 /* Single-precision floating-point arithmetic. Table 4. */
2452 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2453 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2454 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2455 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2456 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2458 /* Single-precision comparisons. Table 5. */
2459 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2460 set_optab_libfunc (ne_optab, SFmode, NULL);
2461 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2462 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2463 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2464 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2465 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2467 /* Floating-point to integer conversions. Table 6. */
2468 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2469 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2470 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2471 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2472 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2473 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2474 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2475 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2477 /* Conversions between floating types. Table 7. */
2478 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2479 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2481 /* Integer to floating-point conversions. Table 8. */
2482 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2483 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2484 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2485 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2486 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2487 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2488 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2489 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2491 /* Long long. Table 9. */
2492 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2493 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2494 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2495 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2496 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2497 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2498 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2499 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2501 /* Integer (32/32->32) division. \S 4.3.1. */
2502 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2503 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2505 /* The divmod functions are designed so that they can be used for
2506 plain division, even though they return both the quotient and the
2507 remainder. The quotient is returned in the usual location (i.e.,
2508 r0 for SImode, {r0, r1} for DImode), just as would be expected
2509 for an ordinary division routine. Because the AAPCS calling
2510 conventions specify that all of { r0, r1, r2, r3 } are
2511 callee-saved registers, there is no need to tell the compiler
2512 explicitly that those registers are clobbered by these
2513 routines. */
2514 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2515 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2517 /* For SImode division the ABI provides div-without-mod routines,
2518 which are faster. */
2519 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2520 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2522 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2523 divmod libcalls instead. */
2524 set_optab_libfunc (smod_optab, DImode, NULL);
2525 set_optab_libfunc (umod_optab, DImode, NULL);
2526 set_optab_libfunc (smod_optab, SImode, NULL);
2527 set_optab_libfunc (umod_optab, SImode, NULL);
2529 /* Half-precision float operations. The compiler handles all operations
2530 with NULL libfuncs by converting the SFmode. */
2531 switch (arm_fp16_format)
2533 case ARM_FP16_FORMAT_IEEE:
2534 case ARM_FP16_FORMAT_ALTERNATIVE:
2536 /* Conversions. */
2537 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2538 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2539 ? "__gnu_f2h_ieee"
2540 : "__gnu_f2h_alternative"));
2541 set_conv_libfunc (sext_optab, SFmode, HFmode,
2542 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2543 ? "__gnu_h2f_ieee"
2544 : "__gnu_h2f_alternative"));
2546 /* Arithmetic. */
2547 set_optab_libfunc (add_optab, HFmode, NULL);
2548 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2549 set_optab_libfunc (smul_optab, HFmode, NULL);
2550 set_optab_libfunc (neg_optab, HFmode, NULL);
2551 set_optab_libfunc (sub_optab, HFmode, NULL);
2553 /* Comparisons. */
2554 set_optab_libfunc (eq_optab, HFmode, NULL);
2555 set_optab_libfunc (ne_optab, HFmode, NULL);
2556 set_optab_libfunc (lt_optab, HFmode, NULL);
2557 set_optab_libfunc (le_optab, HFmode, NULL);
2558 set_optab_libfunc (ge_optab, HFmode, NULL);
2559 set_optab_libfunc (gt_optab, HFmode, NULL);
2560 set_optab_libfunc (unord_optab, HFmode, NULL);
2561 break;
2563 default:
2564 break;
2567 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2569 const arm_fixed_mode_set fixed_arith_modes[] =
2571 { QQmode, "qq" },
2572 { UQQmode, "uqq" },
2573 { HQmode, "hq" },
2574 { UHQmode, "uhq" },
2575 { SQmode, "sq" },
2576 { USQmode, "usq" },
2577 { DQmode, "dq" },
2578 { UDQmode, "udq" },
2579 { TQmode, "tq" },
2580 { UTQmode, "utq" },
2581 { HAmode, "ha" },
2582 { UHAmode, "uha" },
2583 { SAmode, "sa" },
2584 { USAmode, "usa" },
2585 { DAmode, "da" },
2586 { UDAmode, "uda" },
2587 { TAmode, "ta" },
2588 { UTAmode, "uta" }
2590 const arm_fixed_mode_set fixed_conv_modes[] =
2592 { QQmode, "qq" },
2593 { UQQmode, "uqq" },
2594 { HQmode, "hq" },
2595 { UHQmode, "uhq" },
2596 { SQmode, "sq" },
2597 { USQmode, "usq" },
2598 { DQmode, "dq" },
2599 { UDQmode, "udq" },
2600 { TQmode, "tq" },
2601 { UTQmode, "utq" },
2602 { HAmode, "ha" },
2603 { UHAmode, "uha" },
2604 { SAmode, "sa" },
2605 { USAmode, "usa" },
2606 { DAmode, "da" },
2607 { UDAmode, "uda" },
2608 { TAmode, "ta" },
2609 { UTAmode, "uta" },
2610 { QImode, "qi" },
2611 { HImode, "hi" },
2612 { SImode, "si" },
2613 { DImode, "di" },
2614 { TImode, "ti" },
2615 { SFmode, "sf" },
2616 { DFmode, "df" }
2618 unsigned int i, j;
2620 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2622 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2623 "add", fixed_arith_modes[i].name, 3);
2624 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2625 "ssadd", fixed_arith_modes[i].name, 3);
2626 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2627 "usadd", fixed_arith_modes[i].name, 3);
2628 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2629 "sub", fixed_arith_modes[i].name, 3);
2630 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2631 "sssub", fixed_arith_modes[i].name, 3);
2632 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2633 "ussub", fixed_arith_modes[i].name, 3);
2634 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2635 "mul", fixed_arith_modes[i].name, 3);
2636 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2637 "ssmul", fixed_arith_modes[i].name, 3);
2638 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2639 "usmul", fixed_arith_modes[i].name, 3);
2640 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2641 "div", fixed_arith_modes[i].name, 3);
2642 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2643 "udiv", fixed_arith_modes[i].name, 3);
2644 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2645 "ssdiv", fixed_arith_modes[i].name, 3);
2646 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2647 "usdiv", fixed_arith_modes[i].name, 3);
2648 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2649 "neg", fixed_arith_modes[i].name, 2);
2650 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2651 "ssneg", fixed_arith_modes[i].name, 2);
2652 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2653 "usneg", fixed_arith_modes[i].name, 2);
2654 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2655 "ashl", fixed_arith_modes[i].name, 3);
2656 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2657 "ashr", fixed_arith_modes[i].name, 3);
2658 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2659 "lshr", fixed_arith_modes[i].name, 3);
2660 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2661 "ssashl", fixed_arith_modes[i].name, 3);
2662 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2663 "usashl", fixed_arith_modes[i].name, 3);
2664 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2665 "cmp", fixed_arith_modes[i].name, 2);
2668 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2669 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2671 if (i == j
2672 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2673 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2674 continue;
2676 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2677 fixed_conv_modes[j].mode, "fract",
2678 fixed_conv_modes[i].name,
2679 fixed_conv_modes[j].name);
2680 arm_set_fixed_conv_libfunc (satfract_optab,
2681 fixed_conv_modes[i].mode,
2682 fixed_conv_modes[j].mode, "satfract",
2683 fixed_conv_modes[i].name,
2684 fixed_conv_modes[j].name);
2685 arm_set_fixed_conv_libfunc (fractuns_optab,
2686 fixed_conv_modes[i].mode,
2687 fixed_conv_modes[j].mode, "fractuns",
2688 fixed_conv_modes[i].name,
2689 fixed_conv_modes[j].name);
2690 arm_set_fixed_conv_libfunc (satfractuns_optab,
2691 fixed_conv_modes[i].mode,
2692 fixed_conv_modes[j].mode, "satfractuns",
2693 fixed_conv_modes[i].name,
2694 fixed_conv_modes[j].name);
2698 if (TARGET_AAPCS_BASED)
2699 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2702 /* On AAPCS systems, this is the "struct __va_list". */
2703 static GTY(()) tree va_list_type;
2705 /* Return the type to use as __builtin_va_list. */
2706 static tree
2707 arm_build_builtin_va_list (void)
2709 tree va_list_name;
2710 tree ap_field;
2712 if (!TARGET_AAPCS_BASED)
2713 return std_build_builtin_va_list ();
2715 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2716 defined as:
2718 struct __va_list
2720 void *__ap;
2723 The C Library ABI further reinforces this definition in \S
2724 4.1.
2726 We must follow this definition exactly. The structure tag
2727 name is visible in C++ mangled names, and thus forms a part
2728 of the ABI. The field name may be used by people who
2729 #include <stdarg.h>. */
2730 /* Create the type. */
2731 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2732 /* Give it the required name. */
2733 va_list_name = build_decl (BUILTINS_LOCATION,
2734 TYPE_DECL,
2735 get_identifier ("__va_list"),
2736 va_list_type);
2737 DECL_ARTIFICIAL (va_list_name) = 1;
2738 TYPE_NAME (va_list_type) = va_list_name;
2739 TYPE_STUB_DECL (va_list_type) = va_list_name;
2740 /* Create the __ap field. */
2741 ap_field = build_decl (BUILTINS_LOCATION,
2742 FIELD_DECL,
2743 get_identifier ("__ap"),
2744 ptr_type_node);
2745 DECL_ARTIFICIAL (ap_field) = 1;
2746 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2747 TYPE_FIELDS (va_list_type) = ap_field;
2748 /* Compute its layout. */
2749 layout_type (va_list_type);
2751 return va_list_type;
2754 /* Return an expression of type "void *" pointing to the next
2755 available argument in a variable-argument list. VALIST is the
2756 user-level va_list object, of type __builtin_va_list. */
2757 static tree
2758 arm_extract_valist_ptr (tree valist)
2760 if (TREE_TYPE (valist) == error_mark_node)
2761 return error_mark_node;
2763 /* On an AAPCS target, the pointer is stored within "struct
2764 va_list". */
2765 if (TARGET_AAPCS_BASED)
2767 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2768 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2769 valist, ap_field, NULL_TREE);
2772 return valist;
2775 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2776 static void
2777 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2779 valist = arm_extract_valist_ptr (valist);
2780 std_expand_builtin_va_start (valist, nextarg);
2783 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2784 static tree
2785 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2786 gimple_seq *post_p)
2788 valist = arm_extract_valist_ptr (valist);
2789 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2792 /* Check any incompatible options that the user has specified. */
2793 static void
2794 arm_option_check_internal (struct gcc_options *opts)
2796 int flags = opts->x_target_flags;
2797 const struct arm_fpu_desc *fpu_desc = &all_fpus[opts->x_arm_fpu_index];
2799 /* iWMMXt and NEON are incompatible. */
2800 if (TARGET_IWMMXT && TARGET_VFP
2801 && ARM_FPU_FSET_HAS (fpu_desc->features, FPU_FL_NEON))
2802 error ("iWMMXt and NEON are incompatible");
2804 /* Make sure that the processor choice does not conflict with any of the
2805 other command line choices. */
2806 if (TARGET_ARM_P (flags) && !ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM))
2807 error ("target CPU does not support ARM mode");
2809 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2810 from here where no function is being compiled currently. */
2811 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2812 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2814 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2815 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2817 /* If this target is normally configured to use APCS frames, warn if they
2818 are turned off and debugging is turned on. */
2819 if (TARGET_ARM_P (flags)
2820 && write_symbols != NO_DEBUG
2821 && !TARGET_APCS_FRAME
2822 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2823 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2825 /* iWMMXt unsupported under Thumb mode. */
2826 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2827 error ("iWMMXt unsupported under Thumb mode");
2829 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2830 error ("can not use -mtp=cp15 with 16-bit Thumb");
2832 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2834 error ("RTP PIC is incompatible with Thumb");
2835 flag_pic = 0;
2838 /* We only support -mslow-flash-data on armv7-m targets. */
2839 if (target_slow_flash_data
2840 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2841 || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON)))
2842 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2845 /* Recompute the global settings depending on target attribute options. */
2847 static void
2848 arm_option_params_internal (void)
2850 /* If we are not using the default (ARM mode) section anchor offset
2851 ranges, then set the correct ranges now. */
2852 if (TARGET_THUMB1)
2854 /* Thumb-1 LDR instructions cannot have negative offsets.
2855 Permissible positive offset ranges are 5-bit (for byte loads),
2856 6-bit (for halfword loads), or 7-bit (for word loads).
2857 Empirical results suggest a 7-bit anchor range gives the best
2858 overall code size. */
2859 targetm.min_anchor_offset = 0;
2860 targetm.max_anchor_offset = 127;
2862 else if (TARGET_THUMB2)
2864 /* The minimum is set such that the total size of the block
2865 for a particular anchor is 248 + 1 + 4095 bytes, which is
2866 divisible by eight, ensuring natural spacing of anchors. */
2867 targetm.min_anchor_offset = -248;
2868 targetm.max_anchor_offset = 4095;
2870 else
2872 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2873 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2876 if (optimize_size)
2878 /* If optimizing for size, bump the number of instructions that we
2879 are prepared to conditionally execute (even on a StrongARM). */
2880 max_insns_skipped = 6;
2882 /* For THUMB2, we limit the conditional sequence to one IT block. */
2883 if (TARGET_THUMB2)
2884 max_insns_skipped = arm_restrict_it ? 1 : 4;
2886 else
2887 /* When -mrestrict-it is in use tone down the if-conversion. */
2888 max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2889 ? 1 : current_tune->max_insns_skipped;
2892 /* True if -mflip-thumb should next add an attribute for the default
2893 mode, false if it should next add an attribute for the opposite mode. */
2894 static GTY(()) bool thumb_flipper;
2896 /* Options after initial target override. */
2897 static GTY(()) tree init_optimize;
2899 static void
2900 arm_override_options_after_change_1 (struct gcc_options *opts)
2902 if (opts->x_align_functions <= 0)
2903 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2904 && opts->x_optimize_size ? 2 : 4;
2907 /* Implement targetm.override_options_after_change. */
2909 static void
2910 arm_override_options_after_change (void)
2912 arm_override_options_after_change_1 (&global_options);
2915 /* Reset options between modes that the user has specified. */
2916 static void
2917 arm_option_override_internal (struct gcc_options *opts,
2918 struct gcc_options *opts_set)
2920 arm_override_options_after_change_1 (opts);
2922 if (TARGET_INTERWORK && !ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB))
2924 /* The default is to enable interworking, so this warning message would
2925 be confusing to users who have just compiled with, eg, -march=armv3. */
2926 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2927 opts->x_target_flags &= ~MASK_INTERWORK;
2930 if (TARGET_THUMB_P (opts->x_target_flags)
2931 && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB)))
2933 warning (0, "target CPU does not support THUMB instructions");
2934 opts->x_target_flags &= ~MASK_THUMB;
2937 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2939 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2940 opts->x_target_flags &= ~MASK_APCS_FRAME;
2943 /* Callee super interworking implies thumb interworking. Adding
2944 this to the flags here simplifies the logic elsewhere. */
2945 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2946 opts->x_target_flags |= MASK_INTERWORK;
2948 /* need to remember initial values so combinaisons of options like
2949 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2950 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2952 if (! opts_set->x_arm_restrict_it)
2953 opts->x_arm_restrict_it = arm_arch8;
2955 /* ARM execution state and M profile don't have [restrict] IT. */
2956 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2957 opts->x_arm_restrict_it = 0;
2959 /* Enable -munaligned-access by default for
2960 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2961 i.e. Thumb2 and ARM state only.
2962 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2963 - ARMv8 architecture-base processors.
2965 Disable -munaligned-access by default for
2966 - all pre-ARMv6 architecture-based processors
2967 - ARMv6-M architecture-based processors
2968 - ARMv8-M Baseline processors. */
2970 if (! opts_set->x_unaligned_access)
2972 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2973 && arm_arch6 && (arm_arch_notm || arm_arch7));
2975 else if (opts->x_unaligned_access == 1
2976 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2978 warning (0, "target CPU does not support unaligned accesses");
2979 opts->x_unaligned_access = 0;
2982 /* Don't warn since it's on by default in -O2. */
2983 if (TARGET_THUMB1_P (opts->x_target_flags))
2984 opts->x_flag_schedule_insns = 0;
2985 else
2986 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
2988 /* Disable shrink-wrap when optimizing function for size, since it tends to
2989 generate additional returns. */
2990 if (optimize_function_for_size_p (cfun)
2991 && TARGET_THUMB2_P (opts->x_target_flags))
2992 opts->x_flag_shrink_wrap = false;
2993 else
2994 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
2996 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2997 - epilogue_insns - does not accurately model the corresponding insns
2998 emitted in the asm file. In particular, see the comment in thumb_exit
2999 'Find out how many of the (return) argument registers we can corrupt'.
3000 As a consequence, the epilogue may clobber registers without fipa-ra
3001 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3002 TODO: Accurately model clobbers for epilogue_insns and reenable
3003 fipa-ra. */
3004 if (TARGET_THUMB1_P (opts->x_target_flags))
3005 opts->x_flag_ipa_ra = 0;
3006 else
3007 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3009 /* Thumb2 inline assembly code should always use unified syntax.
3010 This will apply to ARM and Thumb1 eventually. */
3011 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3013 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3014 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3015 #endif
3018 /* Fix up any incompatible options that the user has specified. */
3019 static void
3020 arm_option_override (void)
3022 arm_selected_arch = NULL;
3023 arm_selected_cpu = NULL;
3024 arm_selected_tune = NULL;
3026 if (global_options_set.x_arm_arch_option)
3027 arm_selected_arch = &all_architectures[arm_arch_option];
3029 if (global_options_set.x_arm_cpu_option)
3031 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
3032 arm_selected_tune = &all_cores[(int) arm_cpu_option];
3035 if (global_options_set.x_arm_tune_option)
3036 arm_selected_tune = &all_cores[(int) arm_tune_option];
3038 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3039 SUBTARGET_OVERRIDE_OPTIONS;
3040 #endif
3042 if (arm_selected_arch)
3044 if (arm_selected_cpu)
3046 const arm_feature_set tuning_flags = ARM_FSET_MAKE_CPU1 (FL_TUNE);
3047 arm_feature_set selected_flags;
3048 ARM_FSET_XOR (selected_flags, arm_selected_cpu->flags,
3049 arm_selected_arch->flags);
3050 ARM_FSET_EXCLUDE (selected_flags, selected_flags, tuning_flags);
3051 /* Check for conflict between mcpu and march. */
3052 if (!ARM_FSET_IS_EMPTY (selected_flags))
3054 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3055 arm_selected_cpu->name, arm_selected_arch->name);
3056 /* -march wins for code generation.
3057 -mcpu wins for default tuning. */
3058 if (!arm_selected_tune)
3059 arm_selected_tune = arm_selected_cpu;
3061 arm_selected_cpu = arm_selected_arch;
3063 else
3064 /* -mcpu wins. */
3065 arm_selected_arch = NULL;
3067 else
3068 /* Pick a CPU based on the architecture. */
3069 arm_selected_cpu = arm_selected_arch;
3072 /* If the user did not specify a processor, choose one for them. */
3073 if (!arm_selected_cpu)
3075 const struct processors * sel;
3076 arm_feature_set sought = ARM_FSET_EMPTY;;
3078 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
3079 if (!arm_selected_cpu->name)
3081 #ifdef SUBTARGET_CPU_DEFAULT
3082 /* Use the subtarget default CPU if none was specified by
3083 configure. */
3084 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
3085 #endif
3086 /* Default to ARM6. */
3087 if (!arm_selected_cpu->name)
3088 arm_selected_cpu = &all_cores[arm6];
3091 sel = arm_selected_cpu;
3092 insn_flags = sel->flags;
3094 /* Now check to see if the user has specified some command line
3095 switch that require certain abilities from the cpu. */
3097 if (TARGET_INTERWORK || TARGET_THUMB)
3099 ARM_FSET_ADD_CPU1 (sought, FL_THUMB);
3100 ARM_FSET_ADD_CPU1 (sought, FL_MODE32);
3102 /* There are no ARM processors that support both APCS-26 and
3103 interworking. Therefore we force FL_MODE26 to be removed
3104 from insn_flags here (if it was set), so that the search
3105 below will always be able to find a compatible processor. */
3106 ARM_FSET_DEL_CPU1 (insn_flags, FL_MODE26);
3109 if (!ARM_FSET_IS_EMPTY (sought)
3110 && !(ARM_FSET_CPU_SUBSET (sought, insn_flags)))
3112 /* Try to locate a CPU type that supports all of the abilities
3113 of the default CPU, plus the extra abilities requested by
3114 the user. */
3115 for (sel = all_cores; sel->name != NULL; sel++)
3116 if (ARM_FSET_CPU_SUBSET (sought, sel->flags))
3117 break;
3119 if (sel->name == NULL)
3121 unsigned current_bit_count = 0;
3122 const struct processors * best_fit = NULL;
3124 /* Ideally we would like to issue an error message here
3125 saying that it was not possible to find a CPU compatible
3126 with the default CPU, but which also supports the command
3127 line options specified by the programmer, and so they
3128 ought to use the -mcpu=<name> command line option to
3129 override the default CPU type.
3131 If we cannot find a cpu that has both the
3132 characteristics of the default cpu and the given
3133 command line options we scan the array again looking
3134 for a best match. */
3135 for (sel = all_cores; sel->name != NULL; sel++)
3137 arm_feature_set required = ARM_FSET_EMPTY;
3138 ARM_FSET_UNION (required, sought, insn_flags);
3139 if (ARM_FSET_CPU_SUBSET (required, sel->flags))
3141 unsigned count;
3142 arm_feature_set flags;
3143 ARM_FSET_INTER (flags, sel->flags, insn_flags);
3144 count = feature_count (&flags);
3146 if (count >= current_bit_count)
3148 best_fit = sel;
3149 current_bit_count = count;
3153 gcc_assert (best_fit);
3154 sel = best_fit;
3157 arm_selected_cpu = sel;
3161 gcc_assert (arm_selected_cpu);
3162 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3163 if (!arm_selected_tune)
3164 arm_selected_tune = &all_cores[arm_selected_cpu->core];
3166 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
3167 insn_flags = arm_selected_cpu->flags;
3168 arm_base_arch = arm_selected_cpu->base_arch;
3170 arm_tune = arm_selected_tune->core;
3171 tune_flags = arm_selected_tune->flags;
3172 current_tune = arm_selected_tune->tune;
3174 /* TBD: Dwarf info for apcs frame is not handled yet. */
3175 if (TARGET_APCS_FRAME)
3176 flag_shrink_wrap = false;
3178 /* BPABI targets use linker tricks to allow interworking on cores
3179 without thumb support. */
3180 if (TARGET_INTERWORK
3181 && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB) || TARGET_BPABI))
3183 warning (0, "target CPU does not support interworking" );
3184 target_flags &= ~MASK_INTERWORK;
3187 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3189 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3190 target_flags |= MASK_APCS_FRAME;
3193 if (TARGET_POKE_FUNCTION_NAME)
3194 target_flags |= MASK_APCS_FRAME;
3196 if (TARGET_APCS_REENT && flag_pic)
3197 error ("-fpic and -mapcs-reent are incompatible");
3199 if (TARGET_APCS_REENT)
3200 warning (0, "APCS reentrant code not supported. Ignored");
3202 if (TARGET_APCS_FLOAT)
3203 warning (0, "passing floating point arguments in fp regs not yet supported");
3205 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3206 arm_arch3m = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH3M);
3207 arm_arch4 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH4);
3208 arm_arch4t = arm_arch4 && (ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB));
3209 arm_arch5 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5);
3210 arm_arch5e = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5E);
3211 arm_arch6 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6);
3212 arm_arch6k = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6K);
3213 arm_arch6kz = arm_arch6k && ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6KZ);
3214 arm_arch_notm = ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM);
3215 arm_arch6m = arm_arch6 && !arm_arch_notm;
3216 arm_arch7 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7);
3217 arm_arch7em = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7EM);
3218 arm_arch8 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH8);
3219 arm_arch8_1 = ARM_FSET_HAS_CPU2 (insn_flags, FL2_ARCH8_1);
3220 arm_arch_thumb1 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB);
3221 arm_arch_thumb2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB2);
3222 arm_arch_xscale = ARM_FSET_HAS_CPU1 (insn_flags, FL_XSCALE);
3224 arm_ld_sched = ARM_FSET_HAS_CPU1 (tune_flags, FL_LDSCHED);
3225 arm_tune_strongarm = ARM_FSET_HAS_CPU1 (tune_flags, FL_STRONG);
3226 arm_tune_wbuf = ARM_FSET_HAS_CPU1 (tune_flags, FL_WBUF);
3227 arm_tune_xscale = ARM_FSET_HAS_CPU1 (tune_flags, FL_XSCALE);
3228 arm_arch_iwmmxt = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT);
3229 arm_arch_iwmmxt2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT2);
3230 arm_arch_thumb_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB_DIV);
3231 arm_arch_arm_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARM_DIV);
3232 arm_arch_no_volatile_ce = ARM_FSET_HAS_CPU1 (insn_flags, FL_NO_VOLATILE_CE);
3233 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
3234 arm_arch_crc = ARM_FSET_HAS_CPU1 (insn_flags, FL_CRC32);
3235 arm_m_profile_small_mul = ARM_FSET_HAS_CPU1 (insn_flags, FL_SMALLMUL);
3237 /* V5 code we generate is completely interworking capable, so we turn off
3238 TARGET_INTERWORK here to avoid many tests later on. */
3240 /* XXX However, we must pass the right pre-processor defines to CPP
3241 or GLD can get confused. This is a hack. */
3242 if (TARGET_INTERWORK)
3243 arm_cpp_interwork = 1;
3245 if (arm_arch5)
3246 target_flags &= ~MASK_INTERWORK;
3248 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3249 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3251 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3252 error ("iwmmxt abi requires an iwmmxt capable cpu");
3254 if (!global_options_set.x_arm_fpu_index)
3256 const char *target_fpu_name;
3257 bool ok;
3259 #ifdef FPUTYPE_DEFAULT
3260 target_fpu_name = FPUTYPE_DEFAULT;
3261 #else
3262 target_fpu_name = "vfp";
3263 #endif
3265 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
3266 CL_TARGET);
3267 gcc_assert (ok);
3270 /* If soft-float is specified then don't use FPU. */
3271 if (TARGET_SOFT_FLOAT)
3272 arm_fpu_attr = FPU_NONE;
3273 else if (TARGET_VFP)
3274 arm_fpu_attr = FPU_VFP;
3275 else
3276 gcc_unreachable();
3278 if (TARGET_AAPCS_BASED)
3280 if (TARGET_CALLER_INTERWORKING)
3281 error ("AAPCS does not support -mcaller-super-interworking");
3282 else
3283 if (TARGET_CALLEE_INTERWORKING)
3284 error ("AAPCS does not support -mcallee-super-interworking");
3287 /* __fp16 support currently assumes the core has ldrh. */
3288 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3289 sorry ("__fp16 and no ldrh");
3291 if (TARGET_AAPCS_BASED)
3293 if (arm_abi == ARM_ABI_IWMMXT)
3294 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3295 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3296 && TARGET_HARD_FLOAT
3297 && TARGET_VFP)
3298 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3299 else
3300 arm_pcs_default = ARM_PCS_AAPCS;
3302 else
3304 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
3305 sorry ("-mfloat-abi=hard and VFP");
3307 if (arm_abi == ARM_ABI_APCS)
3308 arm_pcs_default = ARM_PCS_APCS;
3309 else
3310 arm_pcs_default = ARM_PCS_ATPCS;
3313 /* For arm2/3 there is no need to do any scheduling if we are doing
3314 software floating-point. */
3315 if (TARGET_SOFT_FLOAT && !ARM_FSET_HAS_CPU1 (tune_flags, FL_MODE32))
3316 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3318 /* Use the cp15 method if it is available. */
3319 if (target_thread_pointer == TP_AUTO)
3321 if (arm_arch6k && !TARGET_THUMB1)
3322 target_thread_pointer = TP_CP15;
3323 else
3324 target_thread_pointer = TP_SOFT;
3327 /* Override the default structure alignment for AAPCS ABI. */
3328 if (!global_options_set.x_arm_structure_size_boundary)
3330 if (TARGET_AAPCS_BASED)
3331 arm_structure_size_boundary = 8;
3333 else
3335 if (arm_structure_size_boundary != 8
3336 && arm_structure_size_boundary != 32
3337 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3339 if (ARM_DOUBLEWORD_ALIGN)
3340 warning (0,
3341 "structure size boundary can only be set to 8, 32 or 64");
3342 else
3343 warning (0, "structure size boundary can only be set to 8 or 32");
3344 arm_structure_size_boundary
3345 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3349 /* If stack checking is disabled, we can use r10 as the PIC register,
3350 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3351 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3353 if (TARGET_VXWORKS_RTP)
3354 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3355 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3358 if (flag_pic && TARGET_VXWORKS_RTP)
3359 arm_pic_register = 9;
3361 if (arm_pic_register_string != NULL)
3363 int pic_register = decode_reg_name (arm_pic_register_string);
3365 if (!flag_pic)
3366 warning (0, "-mpic-register= is useless without -fpic");
3368 /* Prevent the user from choosing an obviously stupid PIC register. */
3369 else if (pic_register < 0 || call_used_regs[pic_register]
3370 || pic_register == HARD_FRAME_POINTER_REGNUM
3371 || pic_register == STACK_POINTER_REGNUM
3372 || pic_register >= PC_REGNUM
3373 || (TARGET_VXWORKS_RTP
3374 && (unsigned int) pic_register != arm_pic_register))
3375 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3376 else
3377 arm_pic_register = pic_register;
3380 if (TARGET_VXWORKS_RTP
3381 && !global_options_set.x_arm_pic_data_is_text_relative)
3382 arm_pic_data_is_text_relative = 0;
3384 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3385 if (fix_cm3_ldrd == 2)
3387 if (arm_selected_cpu->core == cortexm3)
3388 fix_cm3_ldrd = 1;
3389 else
3390 fix_cm3_ldrd = 0;
3393 /* Hot/Cold partitioning is not currently supported, since we can't
3394 handle literal pool placement in that case. */
3395 if (flag_reorder_blocks_and_partition)
3397 inform (input_location,
3398 "-freorder-blocks-and-partition not supported on this architecture");
3399 flag_reorder_blocks_and_partition = 0;
3400 flag_reorder_blocks = 1;
3403 if (flag_pic)
3404 /* Hoisting PIC address calculations more aggressively provides a small,
3405 but measurable, size reduction for PIC code. Therefore, we decrease
3406 the bar for unrestricted expression hoisting to the cost of PIC address
3407 calculation, which is 2 instructions. */
3408 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3409 global_options.x_param_values,
3410 global_options_set.x_param_values);
3412 /* ARM EABI defaults to strict volatile bitfields. */
3413 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3414 && abi_version_at_least(2))
3415 flag_strict_volatile_bitfields = 1;
3417 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3418 have deemed it beneficial (signified by setting
3419 prefetch.num_slots to 1 or more). */
3420 if (flag_prefetch_loop_arrays < 0
3421 && HAVE_prefetch
3422 && optimize >= 3
3423 && current_tune->prefetch.num_slots > 0)
3424 flag_prefetch_loop_arrays = 1;
3426 /* Set up parameters to be used in prefetching algorithm. Do not
3427 override the defaults unless we are tuning for a core we have
3428 researched values for. */
3429 if (current_tune->prefetch.num_slots > 0)
3430 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3431 current_tune->prefetch.num_slots,
3432 global_options.x_param_values,
3433 global_options_set.x_param_values);
3434 if (current_tune->prefetch.l1_cache_line_size >= 0)
3435 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3436 current_tune->prefetch.l1_cache_line_size,
3437 global_options.x_param_values,
3438 global_options_set.x_param_values);
3439 if (current_tune->prefetch.l1_cache_size >= 0)
3440 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3441 current_tune->prefetch.l1_cache_size,
3442 global_options.x_param_values,
3443 global_options_set.x_param_values);
3445 /* Use Neon to perform 64-bits operations rather than core
3446 registers. */
3447 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3448 if (use_neon_for_64bits == 1)
3449 prefer_neon_for_64bits = true;
3451 /* Use the alternative scheduling-pressure algorithm by default. */
3452 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3453 global_options.x_param_values,
3454 global_options_set.x_param_values);
3456 /* Look through ready list and all of queue for instructions
3457 relevant for L2 auto-prefetcher. */
3458 int param_sched_autopref_queue_depth;
3460 switch (current_tune->sched_autopref)
3462 case tune_params::SCHED_AUTOPREF_OFF:
3463 param_sched_autopref_queue_depth = -1;
3464 break;
3466 case tune_params::SCHED_AUTOPREF_RANK:
3467 param_sched_autopref_queue_depth = 0;
3468 break;
3470 case tune_params::SCHED_AUTOPREF_FULL:
3471 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3472 break;
3474 default:
3475 gcc_unreachable ();
3478 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3479 param_sched_autopref_queue_depth,
3480 global_options.x_param_values,
3481 global_options_set.x_param_values);
3483 /* Currently, for slow flash data, we just disable literal pools. */
3484 if (target_slow_flash_data)
3485 arm_disable_literal_pool = true;
3487 /* Disable scheduling fusion by default if it's not armv7 processor
3488 or doesn't prefer ldrd/strd. */
3489 if (flag_schedule_fusion == 2
3490 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3491 flag_schedule_fusion = 0;
3493 /* Need to remember initial options before they are overriden. */
3494 init_optimize = build_optimization_node (&global_options);
3496 arm_option_override_internal (&global_options, &global_options_set);
3497 arm_option_check_internal (&global_options);
3498 arm_option_params_internal ();
3500 /* Register global variables with the garbage collector. */
3501 arm_add_gc_roots ();
3503 /* Save the initial options in case the user does function specific
3504 options or #pragma target. */
3505 target_option_default_node = target_option_current_node
3506 = build_target_option_node (&global_options);
3508 /* Init initial mode for testing. */
3509 thumb_flipper = TARGET_THUMB;
3512 static void
3513 arm_add_gc_roots (void)
3515 gcc_obstack_init(&minipool_obstack);
3516 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3519 /* A table of known ARM exception types.
3520 For use with the interrupt function attribute. */
3522 typedef struct
3524 const char *const arg;
3525 const unsigned long return_value;
3527 isr_attribute_arg;
3529 static const isr_attribute_arg isr_attribute_args [] =
3531 { "IRQ", ARM_FT_ISR },
3532 { "irq", ARM_FT_ISR },
3533 { "FIQ", ARM_FT_FIQ },
3534 { "fiq", ARM_FT_FIQ },
3535 { "ABORT", ARM_FT_ISR },
3536 { "abort", ARM_FT_ISR },
3537 { "ABORT", ARM_FT_ISR },
3538 { "abort", ARM_FT_ISR },
3539 { "UNDEF", ARM_FT_EXCEPTION },
3540 { "undef", ARM_FT_EXCEPTION },
3541 { "SWI", ARM_FT_EXCEPTION },
3542 { "swi", ARM_FT_EXCEPTION },
3543 { NULL, ARM_FT_NORMAL }
3546 /* Returns the (interrupt) function type of the current
3547 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3549 static unsigned long
3550 arm_isr_value (tree argument)
3552 const isr_attribute_arg * ptr;
3553 const char * arg;
3555 if (!arm_arch_notm)
3556 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3558 /* No argument - default to IRQ. */
3559 if (argument == NULL_TREE)
3560 return ARM_FT_ISR;
3562 /* Get the value of the argument. */
3563 if (TREE_VALUE (argument) == NULL_TREE
3564 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3565 return ARM_FT_UNKNOWN;
3567 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3569 /* Check it against the list of known arguments. */
3570 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3571 if (streq (arg, ptr->arg))
3572 return ptr->return_value;
3574 /* An unrecognized interrupt type. */
3575 return ARM_FT_UNKNOWN;
3578 /* Computes the type of the current function. */
3580 static unsigned long
3581 arm_compute_func_type (void)
3583 unsigned long type = ARM_FT_UNKNOWN;
3584 tree a;
3585 tree attr;
3587 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3589 /* Decide if the current function is volatile. Such functions
3590 never return, and many memory cycles can be saved by not storing
3591 register values that will never be needed again. This optimization
3592 was added to speed up context switching in a kernel application. */
3593 if (optimize > 0
3594 && (TREE_NOTHROW (current_function_decl)
3595 || !(flag_unwind_tables
3596 || (flag_exceptions
3597 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3598 && TREE_THIS_VOLATILE (current_function_decl))
3599 type |= ARM_FT_VOLATILE;
3601 if (cfun->static_chain_decl != NULL)
3602 type |= ARM_FT_NESTED;
3604 attr = DECL_ATTRIBUTES (current_function_decl);
3606 a = lookup_attribute ("naked", attr);
3607 if (a != NULL_TREE)
3608 type |= ARM_FT_NAKED;
3610 a = lookup_attribute ("isr", attr);
3611 if (a == NULL_TREE)
3612 a = lookup_attribute ("interrupt", attr);
3614 if (a == NULL_TREE)
3615 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3616 else
3617 type |= arm_isr_value (TREE_VALUE (a));
3619 return type;
3622 /* Returns the type of the current function. */
3624 unsigned long
3625 arm_current_func_type (void)
3627 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3628 cfun->machine->func_type = arm_compute_func_type ();
3630 return cfun->machine->func_type;
3633 bool
3634 arm_allocate_stack_slots_for_args (void)
3636 /* Naked functions should not allocate stack slots for arguments. */
3637 return !IS_NAKED (arm_current_func_type ());
3640 static bool
3641 arm_warn_func_return (tree decl)
3643 /* Naked functions are implemented entirely in assembly, including the
3644 return sequence, so suppress warnings about this. */
3645 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3649 /* Output assembler code for a block containing the constant parts
3650 of a trampoline, leaving space for the variable parts.
3652 On the ARM, (if r8 is the static chain regnum, and remembering that
3653 referencing pc adds an offset of 8) the trampoline looks like:
3654 ldr r8, [pc, #0]
3655 ldr pc, [pc]
3656 .word static chain value
3657 .word function's address
3658 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3660 static void
3661 arm_asm_trampoline_template (FILE *f)
3663 fprintf (f, "\t.syntax unified\n");
3665 if (TARGET_ARM)
3667 fprintf (f, "\t.arm\n");
3668 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3669 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3671 else if (TARGET_THUMB2)
3673 fprintf (f, "\t.thumb\n");
3674 /* The Thumb-2 trampoline is similar to the arm implementation.
3675 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3676 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3677 STATIC_CHAIN_REGNUM, PC_REGNUM);
3678 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3680 else
3682 ASM_OUTPUT_ALIGN (f, 2);
3683 fprintf (f, "\t.code\t16\n");
3684 fprintf (f, ".Ltrampoline_start:\n");
3685 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3686 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3687 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3688 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3689 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3690 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3692 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3693 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3696 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3698 static void
3699 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3701 rtx fnaddr, mem, a_tramp;
3703 emit_block_move (m_tramp, assemble_trampoline_template (),
3704 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3706 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3707 emit_move_insn (mem, chain_value);
3709 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3710 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3711 emit_move_insn (mem, fnaddr);
3713 a_tramp = XEXP (m_tramp, 0);
3714 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3715 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3716 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3719 /* Thumb trampolines should be entered in thumb mode, so set
3720 the bottom bit of the address. */
3722 static rtx
3723 arm_trampoline_adjust_address (rtx addr)
3725 if (TARGET_THUMB)
3726 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3727 NULL, 0, OPTAB_LIB_WIDEN);
3728 return addr;
3731 /* Return 1 if it is possible to return using a single instruction.
3732 If SIBLING is non-null, this is a test for a return before a sibling
3733 call. SIBLING is the call insn, so we can examine its register usage. */
3736 use_return_insn (int iscond, rtx sibling)
3738 int regno;
3739 unsigned int func_type;
3740 unsigned long saved_int_regs;
3741 unsigned HOST_WIDE_INT stack_adjust;
3742 arm_stack_offsets *offsets;
3744 /* Never use a return instruction before reload has run. */
3745 if (!reload_completed)
3746 return 0;
3748 func_type = arm_current_func_type ();
3750 /* Naked, volatile and stack alignment functions need special
3751 consideration. */
3752 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3753 return 0;
3755 /* So do interrupt functions that use the frame pointer and Thumb
3756 interrupt functions. */
3757 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3758 return 0;
3760 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3761 && !optimize_function_for_size_p (cfun))
3762 return 0;
3764 offsets = arm_get_frame_offsets ();
3765 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3767 /* As do variadic functions. */
3768 if (crtl->args.pretend_args_size
3769 || cfun->machine->uses_anonymous_args
3770 /* Or if the function calls __builtin_eh_return () */
3771 || crtl->calls_eh_return
3772 /* Or if the function calls alloca */
3773 || cfun->calls_alloca
3774 /* Or if there is a stack adjustment. However, if the stack pointer
3775 is saved on the stack, we can use a pre-incrementing stack load. */
3776 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3777 && stack_adjust == 4))
3778 /* Or if the static chain register was saved above the frame, under the
3779 assumption that the stack pointer isn't saved on the stack. */
3780 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3781 && arm_compute_static_chain_stack_bytes() != 0))
3782 return 0;
3784 saved_int_regs = offsets->saved_regs_mask;
3786 /* Unfortunately, the insn
3788 ldmib sp, {..., sp, ...}
3790 triggers a bug on most SA-110 based devices, such that the stack
3791 pointer won't be correctly restored if the instruction takes a
3792 page fault. We work around this problem by popping r3 along with
3793 the other registers, since that is never slower than executing
3794 another instruction.
3796 We test for !arm_arch5 here, because code for any architecture
3797 less than this could potentially be run on one of the buggy
3798 chips. */
3799 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3801 /* Validate that r3 is a call-clobbered register (always true in
3802 the default abi) ... */
3803 if (!call_used_regs[3])
3804 return 0;
3806 /* ... that it isn't being used for a return value ... */
3807 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3808 return 0;
3810 /* ... or for a tail-call argument ... */
3811 if (sibling)
3813 gcc_assert (CALL_P (sibling));
3815 if (find_regno_fusage (sibling, USE, 3))
3816 return 0;
3819 /* ... and that there are no call-saved registers in r0-r2
3820 (always true in the default ABI). */
3821 if (saved_int_regs & 0x7)
3822 return 0;
3825 /* Can't be done if interworking with Thumb, and any registers have been
3826 stacked. */
3827 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3828 return 0;
3830 /* On StrongARM, conditional returns are expensive if they aren't
3831 taken and multiple registers have been stacked. */
3832 if (iscond && arm_tune_strongarm)
3834 /* Conditional return when just the LR is stored is a simple
3835 conditional-load instruction, that's not expensive. */
3836 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3837 return 0;
3839 if (flag_pic
3840 && arm_pic_register != INVALID_REGNUM
3841 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3842 return 0;
3845 /* If there are saved registers but the LR isn't saved, then we need
3846 two instructions for the return. */
3847 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3848 return 0;
3850 /* Can't be done if any of the VFP regs are pushed,
3851 since this also requires an insn. */
3852 if (TARGET_HARD_FLOAT && TARGET_VFP)
3853 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3854 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3855 return 0;
3857 if (TARGET_REALLY_IWMMXT)
3858 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3859 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3860 return 0;
3862 return 1;
3865 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3866 shrink-wrapping if possible. This is the case if we need to emit a
3867 prologue, which we can test by looking at the offsets. */
3868 bool
3869 use_simple_return_p (void)
3871 arm_stack_offsets *offsets;
3873 offsets = arm_get_frame_offsets ();
3874 return offsets->outgoing_args != 0;
3877 /* Return TRUE if int I is a valid immediate ARM constant. */
3880 const_ok_for_arm (HOST_WIDE_INT i)
3882 int lowbit;
3884 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3885 be all zero, or all one. */
3886 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3887 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3888 != ((~(unsigned HOST_WIDE_INT) 0)
3889 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3890 return FALSE;
3892 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3894 /* Fast return for 0 and small values. We must do this for zero, since
3895 the code below can't handle that one case. */
3896 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3897 return TRUE;
3899 /* Get the number of trailing zeros. */
3900 lowbit = ffs((int) i) - 1;
3902 /* Only even shifts are allowed in ARM mode so round down to the
3903 nearest even number. */
3904 if (TARGET_ARM)
3905 lowbit &= ~1;
3907 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3908 return TRUE;
3910 if (TARGET_ARM)
3912 /* Allow rotated constants in ARM mode. */
3913 if (lowbit <= 4
3914 && ((i & ~0xc000003f) == 0
3915 || (i & ~0xf000000f) == 0
3916 || (i & ~0xfc000003) == 0))
3917 return TRUE;
3919 else
3921 HOST_WIDE_INT v;
3923 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3924 v = i & 0xff;
3925 v |= v << 16;
3926 if (i == v || i == (v | (v << 8)))
3927 return TRUE;
3929 /* Allow repeated pattern 0xXY00XY00. */
3930 v = i & 0xff00;
3931 v |= v << 16;
3932 if (i == v)
3933 return TRUE;
3936 return FALSE;
3939 /* Return true if I is a valid constant for the operation CODE. */
3941 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3943 if (const_ok_for_arm (i))
3944 return 1;
3946 switch (code)
3948 case SET:
3949 /* See if we can use movw. */
3950 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
3951 return 1;
3952 else
3953 /* Otherwise, try mvn. */
3954 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3956 case PLUS:
3957 /* See if we can use addw or subw. */
3958 if (TARGET_THUMB2
3959 && ((i & 0xfffff000) == 0
3960 || ((-i) & 0xfffff000) == 0))
3961 return 1;
3962 /* else fall through. */
3964 case COMPARE:
3965 case EQ:
3966 case NE:
3967 case GT:
3968 case LE:
3969 case LT:
3970 case GE:
3971 case GEU:
3972 case LTU:
3973 case GTU:
3974 case LEU:
3975 case UNORDERED:
3976 case ORDERED:
3977 case UNEQ:
3978 case UNGE:
3979 case UNLT:
3980 case UNGT:
3981 case UNLE:
3982 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3984 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3985 case XOR:
3986 return 0;
3988 case IOR:
3989 if (TARGET_THUMB2)
3990 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3991 return 0;
3993 case AND:
3994 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3996 default:
3997 gcc_unreachable ();
4001 /* Return true if I is a valid di mode constant for the operation CODE. */
4003 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4005 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4006 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4007 rtx hi = GEN_INT (hi_val);
4008 rtx lo = GEN_INT (lo_val);
4010 if (TARGET_THUMB1)
4011 return 0;
4013 switch (code)
4015 case AND:
4016 case IOR:
4017 case XOR:
4018 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4019 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4020 case PLUS:
4021 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4023 default:
4024 return 0;
4028 /* Emit a sequence of insns to handle a large constant.
4029 CODE is the code of the operation required, it can be any of SET, PLUS,
4030 IOR, AND, XOR, MINUS;
4031 MODE is the mode in which the operation is being performed;
4032 VAL is the integer to operate on;
4033 SOURCE is the other operand (a register, or a null-pointer for SET);
4034 SUBTARGETS means it is safe to create scratch registers if that will
4035 either produce a simpler sequence, or we will want to cse the values.
4036 Return value is the number of insns emitted. */
4038 /* ??? Tweak this for thumb2. */
4040 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4041 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4043 rtx cond;
4045 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4046 cond = COND_EXEC_TEST (PATTERN (insn));
4047 else
4048 cond = NULL_RTX;
4050 if (subtargets || code == SET
4051 || (REG_P (target) && REG_P (source)
4052 && REGNO (target) != REGNO (source)))
4054 /* After arm_reorg has been called, we can't fix up expensive
4055 constants by pushing them into memory so we must synthesize
4056 them in-line, regardless of the cost. This is only likely to
4057 be more costly on chips that have load delay slots and we are
4058 compiling without running the scheduler (so no splitting
4059 occurred before the final instruction emission).
4061 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4063 if (!cfun->machine->after_arm_reorg
4064 && !cond
4065 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4066 1, 0)
4067 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4068 + (code != SET))))
4070 if (code == SET)
4072 /* Currently SET is the only monadic value for CODE, all
4073 the rest are diadic. */
4074 if (TARGET_USE_MOVT)
4075 arm_emit_movpair (target, GEN_INT (val));
4076 else
4077 emit_set_insn (target, GEN_INT (val));
4079 return 1;
4081 else
4083 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4085 if (TARGET_USE_MOVT)
4086 arm_emit_movpair (temp, GEN_INT (val));
4087 else
4088 emit_set_insn (temp, GEN_INT (val));
4090 /* For MINUS, the value is subtracted from, since we never
4091 have subtraction of a constant. */
4092 if (code == MINUS)
4093 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4094 else
4095 emit_set_insn (target,
4096 gen_rtx_fmt_ee (code, mode, source, temp));
4097 return 2;
4102 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4106 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4107 ARM/THUMB2 immediates, and add up to VAL.
4108 Thr function return value gives the number of insns required. */
4109 static int
4110 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4111 struct four_ints *return_sequence)
4113 int best_consecutive_zeros = 0;
4114 int i;
4115 int best_start = 0;
4116 int insns1, insns2;
4117 struct four_ints tmp_sequence;
4119 /* If we aren't targeting ARM, the best place to start is always at
4120 the bottom, otherwise look more closely. */
4121 if (TARGET_ARM)
4123 for (i = 0; i < 32; i += 2)
4125 int consecutive_zeros = 0;
4127 if (!(val & (3 << i)))
4129 while ((i < 32) && !(val & (3 << i)))
4131 consecutive_zeros += 2;
4132 i += 2;
4134 if (consecutive_zeros > best_consecutive_zeros)
4136 best_consecutive_zeros = consecutive_zeros;
4137 best_start = i - consecutive_zeros;
4139 i -= 2;
4144 /* So long as it won't require any more insns to do so, it's
4145 desirable to emit a small constant (in bits 0...9) in the last
4146 insn. This way there is more chance that it can be combined with
4147 a later addressing insn to form a pre-indexed load or store
4148 operation. Consider:
4150 *((volatile int *)0xe0000100) = 1;
4151 *((volatile int *)0xe0000110) = 2;
4153 We want this to wind up as:
4155 mov rA, #0xe0000000
4156 mov rB, #1
4157 str rB, [rA, #0x100]
4158 mov rB, #2
4159 str rB, [rA, #0x110]
4161 rather than having to synthesize both large constants from scratch.
4163 Therefore, we calculate how many insns would be required to emit
4164 the constant starting from `best_start', and also starting from
4165 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4166 yield a shorter sequence, we may as well use zero. */
4167 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4168 if (best_start != 0
4169 && ((HOST_WIDE_INT_1U << best_start) < val))
4171 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4172 if (insns2 <= insns1)
4174 *return_sequence = tmp_sequence;
4175 insns1 = insns2;
4179 return insns1;
4182 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4183 static int
4184 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4185 struct four_ints *return_sequence, int i)
4187 int remainder = val & 0xffffffff;
4188 int insns = 0;
4190 /* Try and find a way of doing the job in either two or three
4191 instructions.
4193 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4194 location. We start at position I. This may be the MSB, or
4195 optimial_immediate_sequence may have positioned it at the largest block
4196 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4197 wrapping around to the top of the word when we drop off the bottom.
4198 In the worst case this code should produce no more than four insns.
4200 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4201 constants, shifted to any arbitrary location. We should always start
4202 at the MSB. */
4205 int end;
4206 unsigned int b1, b2, b3, b4;
4207 unsigned HOST_WIDE_INT result;
4208 int loc;
4210 gcc_assert (insns < 4);
4212 if (i <= 0)
4213 i += 32;
4215 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4216 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4218 loc = i;
4219 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4220 /* We can use addw/subw for the last 12 bits. */
4221 result = remainder;
4222 else
4224 /* Use an 8-bit shifted/rotated immediate. */
4225 end = i - 8;
4226 if (end < 0)
4227 end += 32;
4228 result = remainder & ((0x0ff << end)
4229 | ((i < end) ? (0xff >> (32 - end))
4230 : 0));
4231 i -= 8;
4234 else
4236 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4237 arbitrary shifts. */
4238 i -= TARGET_ARM ? 2 : 1;
4239 continue;
4242 /* Next, see if we can do a better job with a thumb2 replicated
4243 constant.
4245 We do it this way around to catch the cases like 0x01F001E0 where
4246 two 8-bit immediates would work, but a replicated constant would
4247 make it worse.
4249 TODO: 16-bit constants that don't clear all the bits, but still win.
4250 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4251 if (TARGET_THUMB2)
4253 b1 = (remainder & 0xff000000) >> 24;
4254 b2 = (remainder & 0x00ff0000) >> 16;
4255 b3 = (remainder & 0x0000ff00) >> 8;
4256 b4 = remainder & 0xff;
4258 if (loc > 24)
4260 /* The 8-bit immediate already found clears b1 (and maybe b2),
4261 but must leave b3 and b4 alone. */
4263 /* First try to find a 32-bit replicated constant that clears
4264 almost everything. We can assume that we can't do it in one,
4265 or else we wouldn't be here. */
4266 unsigned int tmp = b1 & b2 & b3 & b4;
4267 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4268 + (tmp << 24);
4269 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4270 + (tmp == b3) + (tmp == b4);
4271 if (tmp
4272 && (matching_bytes >= 3
4273 || (matching_bytes == 2
4274 && const_ok_for_op (remainder & ~tmp2, code))))
4276 /* At least 3 of the bytes match, and the fourth has at
4277 least as many bits set, or two of the bytes match
4278 and it will only require one more insn to finish. */
4279 result = tmp2;
4280 i = tmp != b1 ? 32
4281 : tmp != b2 ? 24
4282 : tmp != b3 ? 16
4283 : 8;
4286 /* Second, try to find a 16-bit replicated constant that can
4287 leave three of the bytes clear. If b2 or b4 is already
4288 zero, then we can. If the 8-bit from above would not
4289 clear b2 anyway, then we still win. */
4290 else if (b1 == b3 && (!b2 || !b4
4291 || (remainder & 0x00ff0000 & ~result)))
4293 result = remainder & 0xff00ff00;
4294 i = 24;
4297 else if (loc > 16)
4299 /* The 8-bit immediate already found clears b2 (and maybe b3)
4300 and we don't get here unless b1 is alredy clear, but it will
4301 leave b4 unchanged. */
4303 /* If we can clear b2 and b4 at once, then we win, since the
4304 8-bits couldn't possibly reach that far. */
4305 if (b2 == b4)
4307 result = remainder & 0x00ff00ff;
4308 i = 16;
4313 return_sequence->i[insns++] = result;
4314 remainder &= ~result;
4316 if (code == SET || code == MINUS)
4317 code = PLUS;
4319 while (remainder);
4321 return insns;
4324 /* Emit an instruction with the indicated PATTERN. If COND is
4325 non-NULL, conditionalize the execution of the instruction on COND
4326 being true. */
4328 static void
4329 emit_constant_insn (rtx cond, rtx pattern)
4331 if (cond)
4332 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4333 emit_insn (pattern);
4336 /* As above, but extra parameter GENERATE which, if clear, suppresses
4337 RTL generation. */
4339 static int
4340 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4341 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4342 int subtargets, int generate)
4344 int can_invert = 0;
4345 int can_negate = 0;
4346 int final_invert = 0;
4347 int i;
4348 int set_sign_bit_copies = 0;
4349 int clear_sign_bit_copies = 0;
4350 int clear_zero_bit_copies = 0;
4351 int set_zero_bit_copies = 0;
4352 int insns = 0, neg_insns, inv_insns;
4353 unsigned HOST_WIDE_INT temp1, temp2;
4354 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4355 struct four_ints *immediates;
4356 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4358 /* Find out which operations are safe for a given CODE. Also do a quick
4359 check for degenerate cases; these can occur when DImode operations
4360 are split. */
4361 switch (code)
4363 case SET:
4364 can_invert = 1;
4365 break;
4367 case PLUS:
4368 can_negate = 1;
4369 break;
4371 case IOR:
4372 if (remainder == 0xffffffff)
4374 if (generate)
4375 emit_constant_insn (cond,
4376 gen_rtx_SET (target,
4377 GEN_INT (ARM_SIGN_EXTEND (val))));
4378 return 1;
4381 if (remainder == 0)
4383 if (reload_completed && rtx_equal_p (target, source))
4384 return 0;
4386 if (generate)
4387 emit_constant_insn (cond, gen_rtx_SET (target, source));
4388 return 1;
4390 break;
4392 case AND:
4393 if (remainder == 0)
4395 if (generate)
4396 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4397 return 1;
4399 if (remainder == 0xffffffff)
4401 if (reload_completed && rtx_equal_p (target, source))
4402 return 0;
4403 if (generate)
4404 emit_constant_insn (cond, gen_rtx_SET (target, source));
4405 return 1;
4407 can_invert = 1;
4408 break;
4410 case XOR:
4411 if (remainder == 0)
4413 if (reload_completed && rtx_equal_p (target, source))
4414 return 0;
4415 if (generate)
4416 emit_constant_insn (cond, gen_rtx_SET (target, source));
4417 return 1;
4420 if (remainder == 0xffffffff)
4422 if (generate)
4423 emit_constant_insn (cond,
4424 gen_rtx_SET (target,
4425 gen_rtx_NOT (mode, source)));
4426 return 1;
4428 final_invert = 1;
4429 break;
4431 case MINUS:
4432 /* We treat MINUS as (val - source), since (source - val) is always
4433 passed as (source + (-val)). */
4434 if (remainder == 0)
4436 if (generate)
4437 emit_constant_insn (cond,
4438 gen_rtx_SET (target,
4439 gen_rtx_NEG (mode, source)));
4440 return 1;
4442 if (const_ok_for_arm (val))
4444 if (generate)
4445 emit_constant_insn (cond,
4446 gen_rtx_SET (target,
4447 gen_rtx_MINUS (mode, GEN_INT (val),
4448 source)));
4449 return 1;
4452 break;
4454 default:
4455 gcc_unreachable ();
4458 /* If we can do it in one insn get out quickly. */
4459 if (const_ok_for_op (val, code))
4461 if (generate)
4462 emit_constant_insn (cond,
4463 gen_rtx_SET (target,
4464 (source
4465 ? gen_rtx_fmt_ee (code, mode, source,
4466 GEN_INT (val))
4467 : GEN_INT (val))));
4468 return 1;
4471 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4472 insn. */
4473 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4474 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4476 if (generate)
4478 if (mode == SImode && i == 16)
4479 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4480 smaller insn. */
4481 emit_constant_insn (cond,
4482 gen_zero_extendhisi2
4483 (target, gen_lowpart (HImode, source)));
4484 else
4485 /* Extz only supports SImode, but we can coerce the operands
4486 into that mode. */
4487 emit_constant_insn (cond,
4488 gen_extzv_t2 (gen_lowpart (SImode, target),
4489 gen_lowpart (SImode, source),
4490 GEN_INT (i), const0_rtx));
4493 return 1;
4496 /* Calculate a few attributes that may be useful for specific
4497 optimizations. */
4498 /* Count number of leading zeros. */
4499 for (i = 31; i >= 0; i--)
4501 if ((remainder & (1 << i)) == 0)
4502 clear_sign_bit_copies++;
4503 else
4504 break;
4507 /* Count number of leading 1's. */
4508 for (i = 31; i >= 0; i--)
4510 if ((remainder & (1 << i)) != 0)
4511 set_sign_bit_copies++;
4512 else
4513 break;
4516 /* Count number of trailing zero's. */
4517 for (i = 0; i <= 31; i++)
4519 if ((remainder & (1 << i)) == 0)
4520 clear_zero_bit_copies++;
4521 else
4522 break;
4525 /* Count number of trailing 1's. */
4526 for (i = 0; i <= 31; i++)
4528 if ((remainder & (1 << i)) != 0)
4529 set_zero_bit_copies++;
4530 else
4531 break;
4534 switch (code)
4536 case SET:
4537 /* See if we can do this by sign_extending a constant that is known
4538 to be negative. This is a good, way of doing it, since the shift
4539 may well merge into a subsequent insn. */
4540 if (set_sign_bit_copies > 1)
4542 if (const_ok_for_arm
4543 (temp1 = ARM_SIGN_EXTEND (remainder
4544 << (set_sign_bit_copies - 1))))
4546 if (generate)
4548 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4549 emit_constant_insn (cond,
4550 gen_rtx_SET (new_src, GEN_INT (temp1)));
4551 emit_constant_insn (cond,
4552 gen_ashrsi3 (target, new_src,
4553 GEN_INT (set_sign_bit_copies - 1)));
4555 return 2;
4557 /* For an inverted constant, we will need to set the low bits,
4558 these will be shifted out of harm's way. */
4559 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4560 if (const_ok_for_arm (~temp1))
4562 if (generate)
4564 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4565 emit_constant_insn (cond,
4566 gen_rtx_SET (new_src, GEN_INT (temp1)));
4567 emit_constant_insn (cond,
4568 gen_ashrsi3 (target, new_src,
4569 GEN_INT (set_sign_bit_copies - 1)));
4571 return 2;
4575 /* See if we can calculate the value as the difference between two
4576 valid immediates. */
4577 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4579 int topshift = clear_sign_bit_copies & ~1;
4581 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4582 & (0xff000000 >> topshift));
4584 /* If temp1 is zero, then that means the 9 most significant
4585 bits of remainder were 1 and we've caused it to overflow.
4586 When topshift is 0 we don't need to do anything since we
4587 can borrow from 'bit 32'. */
4588 if (temp1 == 0 && topshift != 0)
4589 temp1 = 0x80000000 >> (topshift - 1);
4591 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4593 if (const_ok_for_arm (temp2))
4595 if (generate)
4597 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4598 emit_constant_insn (cond,
4599 gen_rtx_SET (new_src, GEN_INT (temp1)));
4600 emit_constant_insn (cond,
4601 gen_addsi3 (target, new_src,
4602 GEN_INT (-temp2)));
4605 return 2;
4609 /* See if we can generate this by setting the bottom (or the top)
4610 16 bits, and then shifting these into the other half of the
4611 word. We only look for the simplest cases, to do more would cost
4612 too much. Be careful, however, not to generate this when the
4613 alternative would take fewer insns. */
4614 if (val & 0xffff0000)
4616 temp1 = remainder & 0xffff0000;
4617 temp2 = remainder & 0x0000ffff;
4619 /* Overlaps outside this range are best done using other methods. */
4620 for (i = 9; i < 24; i++)
4622 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4623 && !const_ok_for_arm (temp2))
4625 rtx new_src = (subtargets
4626 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4627 : target);
4628 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4629 source, subtargets, generate);
4630 source = new_src;
4631 if (generate)
4632 emit_constant_insn
4633 (cond,
4634 gen_rtx_SET
4635 (target,
4636 gen_rtx_IOR (mode,
4637 gen_rtx_ASHIFT (mode, source,
4638 GEN_INT (i)),
4639 source)));
4640 return insns + 1;
4644 /* Don't duplicate cases already considered. */
4645 for (i = 17; i < 24; i++)
4647 if (((temp1 | (temp1 >> i)) == remainder)
4648 && !const_ok_for_arm (temp1))
4650 rtx new_src = (subtargets
4651 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4652 : target);
4653 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4654 source, subtargets, generate);
4655 source = new_src;
4656 if (generate)
4657 emit_constant_insn
4658 (cond,
4659 gen_rtx_SET (target,
4660 gen_rtx_IOR
4661 (mode,
4662 gen_rtx_LSHIFTRT (mode, source,
4663 GEN_INT (i)),
4664 source)));
4665 return insns + 1;
4669 break;
4671 case IOR:
4672 case XOR:
4673 /* If we have IOR or XOR, and the constant can be loaded in a
4674 single instruction, and we can find a temporary to put it in,
4675 then this can be done in two instructions instead of 3-4. */
4676 if (subtargets
4677 /* TARGET can't be NULL if SUBTARGETS is 0 */
4678 || (reload_completed && !reg_mentioned_p (target, source)))
4680 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4682 if (generate)
4684 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4686 emit_constant_insn (cond,
4687 gen_rtx_SET (sub, GEN_INT (val)));
4688 emit_constant_insn (cond,
4689 gen_rtx_SET (target,
4690 gen_rtx_fmt_ee (code, mode,
4691 source, sub)));
4693 return 2;
4697 if (code == XOR)
4698 break;
4700 /* Convert.
4701 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4702 and the remainder 0s for e.g. 0xfff00000)
4703 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4705 This can be done in 2 instructions by using shifts with mov or mvn.
4706 e.g. for
4707 x = x | 0xfff00000;
4708 we generate.
4709 mvn r0, r0, asl #12
4710 mvn r0, r0, lsr #12 */
4711 if (set_sign_bit_copies > 8
4712 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4714 if (generate)
4716 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4717 rtx shift = GEN_INT (set_sign_bit_copies);
4719 emit_constant_insn
4720 (cond,
4721 gen_rtx_SET (sub,
4722 gen_rtx_NOT (mode,
4723 gen_rtx_ASHIFT (mode,
4724 source,
4725 shift))));
4726 emit_constant_insn
4727 (cond,
4728 gen_rtx_SET (target,
4729 gen_rtx_NOT (mode,
4730 gen_rtx_LSHIFTRT (mode, sub,
4731 shift))));
4733 return 2;
4736 /* Convert
4737 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4739 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4741 For eg. r0 = r0 | 0xfff
4742 mvn r0, r0, lsr #12
4743 mvn r0, r0, asl #12
4746 if (set_zero_bit_copies > 8
4747 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4749 if (generate)
4751 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4752 rtx shift = GEN_INT (set_zero_bit_copies);
4754 emit_constant_insn
4755 (cond,
4756 gen_rtx_SET (sub,
4757 gen_rtx_NOT (mode,
4758 gen_rtx_LSHIFTRT (mode,
4759 source,
4760 shift))));
4761 emit_constant_insn
4762 (cond,
4763 gen_rtx_SET (target,
4764 gen_rtx_NOT (mode,
4765 gen_rtx_ASHIFT (mode, sub,
4766 shift))));
4768 return 2;
4771 /* This will never be reached for Thumb2 because orn is a valid
4772 instruction. This is for Thumb1 and the ARM 32 bit cases.
4774 x = y | constant (such that ~constant is a valid constant)
4775 Transform this to
4776 x = ~(~y & ~constant).
4778 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4780 if (generate)
4782 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4783 emit_constant_insn (cond,
4784 gen_rtx_SET (sub,
4785 gen_rtx_NOT (mode, source)));
4786 source = sub;
4787 if (subtargets)
4788 sub = gen_reg_rtx (mode);
4789 emit_constant_insn (cond,
4790 gen_rtx_SET (sub,
4791 gen_rtx_AND (mode, source,
4792 GEN_INT (temp1))));
4793 emit_constant_insn (cond,
4794 gen_rtx_SET (target,
4795 gen_rtx_NOT (mode, sub)));
4797 return 3;
4799 break;
4801 case AND:
4802 /* See if two shifts will do 2 or more insn's worth of work. */
4803 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4805 HOST_WIDE_INT shift_mask = ((0xffffffff
4806 << (32 - clear_sign_bit_copies))
4807 & 0xffffffff);
4809 if ((remainder | shift_mask) != 0xffffffff)
4811 HOST_WIDE_INT new_val
4812 = ARM_SIGN_EXTEND (remainder | shift_mask);
4814 if (generate)
4816 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4817 insns = arm_gen_constant (AND, SImode, cond, new_val,
4818 new_src, source, subtargets, 1);
4819 source = new_src;
4821 else
4823 rtx targ = subtargets ? NULL_RTX : target;
4824 insns = arm_gen_constant (AND, mode, cond, new_val,
4825 targ, source, subtargets, 0);
4829 if (generate)
4831 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4832 rtx shift = GEN_INT (clear_sign_bit_copies);
4834 emit_insn (gen_ashlsi3 (new_src, source, shift));
4835 emit_insn (gen_lshrsi3 (target, new_src, shift));
4838 return insns + 2;
4841 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4843 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4845 if ((remainder | shift_mask) != 0xffffffff)
4847 HOST_WIDE_INT new_val
4848 = ARM_SIGN_EXTEND (remainder | shift_mask);
4849 if (generate)
4851 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4853 insns = arm_gen_constant (AND, mode, cond, new_val,
4854 new_src, source, subtargets, 1);
4855 source = new_src;
4857 else
4859 rtx targ = subtargets ? NULL_RTX : target;
4861 insns = arm_gen_constant (AND, mode, cond, new_val,
4862 targ, source, subtargets, 0);
4866 if (generate)
4868 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4869 rtx shift = GEN_INT (clear_zero_bit_copies);
4871 emit_insn (gen_lshrsi3 (new_src, source, shift));
4872 emit_insn (gen_ashlsi3 (target, new_src, shift));
4875 return insns + 2;
4878 break;
4880 default:
4881 break;
4884 /* Calculate what the instruction sequences would be if we generated it
4885 normally, negated, or inverted. */
4886 if (code == AND)
4887 /* AND cannot be split into multiple insns, so invert and use BIC. */
4888 insns = 99;
4889 else
4890 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4892 if (can_negate)
4893 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4894 &neg_immediates);
4895 else
4896 neg_insns = 99;
4898 if (can_invert || final_invert)
4899 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4900 &inv_immediates);
4901 else
4902 inv_insns = 99;
4904 immediates = &pos_immediates;
4906 /* Is the negated immediate sequence more efficient? */
4907 if (neg_insns < insns && neg_insns <= inv_insns)
4909 insns = neg_insns;
4910 immediates = &neg_immediates;
4912 else
4913 can_negate = 0;
4915 /* Is the inverted immediate sequence more efficient?
4916 We must allow for an extra NOT instruction for XOR operations, although
4917 there is some chance that the final 'mvn' will get optimized later. */
4918 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4920 insns = inv_insns;
4921 immediates = &inv_immediates;
4923 else
4925 can_invert = 0;
4926 final_invert = 0;
4929 /* Now output the chosen sequence as instructions. */
4930 if (generate)
4932 for (i = 0; i < insns; i++)
4934 rtx new_src, temp1_rtx;
4936 temp1 = immediates->i[i];
4938 if (code == SET || code == MINUS)
4939 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4940 else if ((final_invert || i < (insns - 1)) && subtargets)
4941 new_src = gen_reg_rtx (mode);
4942 else
4943 new_src = target;
4945 if (can_invert)
4946 temp1 = ~temp1;
4947 else if (can_negate)
4948 temp1 = -temp1;
4950 temp1 = trunc_int_for_mode (temp1, mode);
4951 temp1_rtx = GEN_INT (temp1);
4953 if (code == SET)
4955 else if (code == MINUS)
4956 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4957 else
4958 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4960 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
4961 source = new_src;
4963 if (code == SET)
4965 can_negate = can_invert;
4966 can_invert = 0;
4967 code = PLUS;
4969 else if (code == MINUS)
4970 code = PLUS;
4974 if (final_invert)
4976 if (generate)
4977 emit_constant_insn (cond, gen_rtx_SET (target,
4978 gen_rtx_NOT (mode, source)));
4979 insns++;
4982 return insns;
4985 /* Canonicalize a comparison so that we are more likely to recognize it.
4986 This can be done for a few constant compares, where we can make the
4987 immediate value easier to load. */
4989 static void
4990 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4991 bool op0_preserve_value)
4993 machine_mode mode;
4994 unsigned HOST_WIDE_INT i, maxval;
4996 mode = GET_MODE (*op0);
4997 if (mode == VOIDmode)
4998 mode = GET_MODE (*op1);
5000 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5002 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5003 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5004 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5005 for GTU/LEU in Thumb mode. */
5006 if (mode == DImode)
5009 if (*code == GT || *code == LE
5010 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5012 /* Missing comparison. First try to use an available
5013 comparison. */
5014 if (CONST_INT_P (*op1))
5016 i = INTVAL (*op1);
5017 switch (*code)
5019 case GT:
5020 case LE:
5021 if (i != maxval
5022 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5024 *op1 = GEN_INT (i + 1);
5025 *code = *code == GT ? GE : LT;
5026 return;
5028 break;
5029 case GTU:
5030 case LEU:
5031 if (i != ~((unsigned HOST_WIDE_INT) 0)
5032 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5034 *op1 = GEN_INT (i + 1);
5035 *code = *code == GTU ? GEU : LTU;
5036 return;
5038 break;
5039 default:
5040 gcc_unreachable ();
5044 /* If that did not work, reverse the condition. */
5045 if (!op0_preserve_value)
5047 std::swap (*op0, *op1);
5048 *code = (int)swap_condition ((enum rtx_code)*code);
5051 return;
5054 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5055 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5056 to facilitate possible combining with a cmp into 'ands'. */
5057 if (mode == SImode
5058 && GET_CODE (*op0) == ZERO_EXTEND
5059 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5060 && GET_MODE (XEXP (*op0, 0)) == QImode
5061 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5062 && subreg_lowpart_p (XEXP (*op0, 0))
5063 && *op1 == const0_rtx)
5064 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5065 GEN_INT (255));
5067 /* Comparisons smaller than DImode. Only adjust comparisons against
5068 an out-of-range constant. */
5069 if (!CONST_INT_P (*op1)
5070 || const_ok_for_arm (INTVAL (*op1))
5071 || const_ok_for_arm (- INTVAL (*op1)))
5072 return;
5074 i = INTVAL (*op1);
5076 switch (*code)
5078 case EQ:
5079 case NE:
5080 return;
5082 case GT:
5083 case LE:
5084 if (i != maxval
5085 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5087 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5088 *code = *code == GT ? GE : LT;
5089 return;
5091 break;
5093 case GE:
5094 case LT:
5095 if (i != ~maxval
5096 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5098 *op1 = GEN_INT (i - 1);
5099 *code = *code == GE ? GT : LE;
5100 return;
5102 break;
5104 case GTU:
5105 case LEU:
5106 if (i != ~((unsigned HOST_WIDE_INT) 0)
5107 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5109 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5110 *code = *code == GTU ? GEU : LTU;
5111 return;
5113 break;
5115 case GEU:
5116 case LTU:
5117 if (i != 0
5118 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5120 *op1 = GEN_INT (i - 1);
5121 *code = *code == GEU ? GTU : LEU;
5122 return;
5124 break;
5126 default:
5127 gcc_unreachable ();
5132 /* Define how to find the value returned by a function. */
5134 static rtx
5135 arm_function_value(const_tree type, const_tree func,
5136 bool outgoing ATTRIBUTE_UNUSED)
5138 machine_mode mode;
5139 int unsignedp ATTRIBUTE_UNUSED;
5140 rtx r ATTRIBUTE_UNUSED;
5142 mode = TYPE_MODE (type);
5144 if (TARGET_AAPCS_BASED)
5145 return aapcs_allocate_return_reg (mode, type, func);
5147 /* Promote integer types. */
5148 if (INTEGRAL_TYPE_P (type))
5149 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5151 /* Promotes small structs returned in a register to full-word size
5152 for big-endian AAPCS. */
5153 if (arm_return_in_msb (type))
5155 HOST_WIDE_INT size = int_size_in_bytes (type);
5156 if (size % UNITS_PER_WORD != 0)
5158 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5159 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5163 return arm_libcall_value_1 (mode);
5166 /* libcall hashtable helpers. */
5168 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5170 static inline hashval_t hash (const rtx_def *);
5171 static inline bool equal (const rtx_def *, const rtx_def *);
5172 static inline void remove (rtx_def *);
5175 inline bool
5176 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5178 return rtx_equal_p (p1, p2);
5181 inline hashval_t
5182 libcall_hasher::hash (const rtx_def *p1)
5184 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5187 typedef hash_table<libcall_hasher> libcall_table_type;
5189 static void
5190 add_libcall (libcall_table_type *htab, rtx libcall)
5192 *htab->find_slot (libcall, INSERT) = libcall;
5195 static bool
5196 arm_libcall_uses_aapcs_base (const_rtx libcall)
5198 static bool init_done = false;
5199 static libcall_table_type *libcall_htab = NULL;
5201 if (!init_done)
5203 init_done = true;
5205 libcall_htab = new libcall_table_type (31);
5206 add_libcall (libcall_htab,
5207 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5208 add_libcall (libcall_htab,
5209 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5210 add_libcall (libcall_htab,
5211 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5212 add_libcall (libcall_htab,
5213 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5215 add_libcall (libcall_htab,
5216 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5217 add_libcall (libcall_htab,
5218 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5219 add_libcall (libcall_htab,
5220 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5221 add_libcall (libcall_htab,
5222 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5224 add_libcall (libcall_htab,
5225 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5226 add_libcall (libcall_htab,
5227 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5228 add_libcall (libcall_htab,
5229 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5230 add_libcall (libcall_htab,
5231 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5232 add_libcall (libcall_htab,
5233 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5234 add_libcall (libcall_htab,
5235 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5236 add_libcall (libcall_htab,
5237 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5238 add_libcall (libcall_htab,
5239 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5241 /* Values from double-precision helper functions are returned in core
5242 registers if the selected core only supports single-precision
5243 arithmetic, even if we are using the hard-float ABI. The same is
5244 true for single-precision helpers, but we will never be using the
5245 hard-float ABI on a CPU which doesn't support single-precision
5246 operations in hardware. */
5247 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5248 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5249 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5250 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5251 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5252 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5253 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5254 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5255 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5256 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5257 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5258 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5259 SFmode));
5260 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5261 DFmode));
5264 return libcall && libcall_htab->find (libcall) != NULL;
5267 static rtx
5268 arm_libcall_value_1 (machine_mode mode)
5270 if (TARGET_AAPCS_BASED)
5271 return aapcs_libcall_value (mode);
5272 else if (TARGET_IWMMXT_ABI
5273 && arm_vector_mode_supported_p (mode))
5274 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5275 else
5276 return gen_rtx_REG (mode, ARG_REGISTER (1));
5279 /* Define how to find the value returned by a library function
5280 assuming the value has mode MODE. */
5282 static rtx
5283 arm_libcall_value (machine_mode mode, const_rtx libcall)
5285 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5286 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5288 /* The following libcalls return their result in integer registers,
5289 even though they return a floating point value. */
5290 if (arm_libcall_uses_aapcs_base (libcall))
5291 return gen_rtx_REG (mode, ARG_REGISTER(1));
5295 return arm_libcall_value_1 (mode);
5298 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5300 static bool
5301 arm_function_value_regno_p (const unsigned int regno)
5303 if (regno == ARG_REGISTER (1)
5304 || (TARGET_32BIT
5305 && TARGET_AAPCS_BASED
5306 && TARGET_VFP
5307 && TARGET_HARD_FLOAT
5308 && regno == FIRST_VFP_REGNUM)
5309 || (TARGET_IWMMXT_ABI
5310 && regno == FIRST_IWMMXT_REGNUM))
5311 return true;
5313 return false;
5316 /* Determine the amount of memory needed to store the possible return
5317 registers of an untyped call. */
5319 arm_apply_result_size (void)
5321 int size = 16;
5323 if (TARGET_32BIT)
5325 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
5326 size += 32;
5327 if (TARGET_IWMMXT_ABI)
5328 size += 8;
5331 return size;
5334 /* Decide whether TYPE should be returned in memory (true)
5335 or in a register (false). FNTYPE is the type of the function making
5336 the call. */
5337 static bool
5338 arm_return_in_memory (const_tree type, const_tree fntype)
5340 HOST_WIDE_INT size;
5342 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5344 if (TARGET_AAPCS_BASED)
5346 /* Simple, non-aggregate types (ie not including vectors and
5347 complex) are always returned in a register (or registers).
5348 We don't care about which register here, so we can short-cut
5349 some of the detail. */
5350 if (!AGGREGATE_TYPE_P (type)
5351 && TREE_CODE (type) != VECTOR_TYPE
5352 && TREE_CODE (type) != COMPLEX_TYPE)
5353 return false;
5355 /* Any return value that is no larger than one word can be
5356 returned in r0. */
5357 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5358 return false;
5360 /* Check any available co-processors to see if they accept the
5361 type as a register candidate (VFP, for example, can return
5362 some aggregates in consecutive registers). These aren't
5363 available if the call is variadic. */
5364 if (aapcs_select_return_coproc (type, fntype) >= 0)
5365 return false;
5367 /* Vector values should be returned using ARM registers, not
5368 memory (unless they're over 16 bytes, which will break since
5369 we only have four call-clobbered registers to play with). */
5370 if (TREE_CODE (type) == VECTOR_TYPE)
5371 return (size < 0 || size > (4 * UNITS_PER_WORD));
5373 /* The rest go in memory. */
5374 return true;
5377 if (TREE_CODE (type) == VECTOR_TYPE)
5378 return (size < 0 || size > (4 * UNITS_PER_WORD));
5380 if (!AGGREGATE_TYPE_P (type) &&
5381 (TREE_CODE (type) != VECTOR_TYPE))
5382 /* All simple types are returned in registers. */
5383 return false;
5385 if (arm_abi != ARM_ABI_APCS)
5387 /* ATPCS and later return aggregate types in memory only if they are
5388 larger than a word (or are variable size). */
5389 return (size < 0 || size > UNITS_PER_WORD);
5392 /* For the arm-wince targets we choose to be compatible with Microsoft's
5393 ARM and Thumb compilers, which always return aggregates in memory. */
5394 #ifndef ARM_WINCE
5395 /* All structures/unions bigger than one word are returned in memory.
5396 Also catch the case where int_size_in_bytes returns -1. In this case
5397 the aggregate is either huge or of variable size, and in either case
5398 we will want to return it via memory and not in a register. */
5399 if (size < 0 || size > UNITS_PER_WORD)
5400 return true;
5402 if (TREE_CODE (type) == RECORD_TYPE)
5404 tree field;
5406 /* For a struct the APCS says that we only return in a register
5407 if the type is 'integer like' and every addressable element
5408 has an offset of zero. For practical purposes this means
5409 that the structure can have at most one non bit-field element
5410 and that this element must be the first one in the structure. */
5412 /* Find the first field, ignoring non FIELD_DECL things which will
5413 have been created by C++. */
5414 for (field = TYPE_FIELDS (type);
5415 field && TREE_CODE (field) != FIELD_DECL;
5416 field = DECL_CHAIN (field))
5417 continue;
5419 if (field == NULL)
5420 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5422 /* Check that the first field is valid for returning in a register. */
5424 /* ... Floats are not allowed */
5425 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5426 return true;
5428 /* ... Aggregates that are not themselves valid for returning in
5429 a register are not allowed. */
5430 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5431 return true;
5433 /* Now check the remaining fields, if any. Only bitfields are allowed,
5434 since they are not addressable. */
5435 for (field = DECL_CHAIN (field);
5436 field;
5437 field = DECL_CHAIN (field))
5439 if (TREE_CODE (field) != FIELD_DECL)
5440 continue;
5442 if (!DECL_BIT_FIELD_TYPE (field))
5443 return true;
5446 return false;
5449 if (TREE_CODE (type) == UNION_TYPE)
5451 tree field;
5453 /* Unions can be returned in registers if every element is
5454 integral, or can be returned in an integer register. */
5455 for (field = TYPE_FIELDS (type);
5456 field;
5457 field = DECL_CHAIN (field))
5459 if (TREE_CODE (field) != FIELD_DECL)
5460 continue;
5462 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5463 return true;
5465 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5466 return true;
5469 return false;
5471 #endif /* not ARM_WINCE */
5473 /* Return all other types in memory. */
5474 return true;
5477 const struct pcs_attribute_arg
5479 const char *arg;
5480 enum arm_pcs value;
5481 } pcs_attribute_args[] =
5483 {"aapcs", ARM_PCS_AAPCS},
5484 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5485 #if 0
5486 /* We could recognize these, but changes would be needed elsewhere
5487 * to implement them. */
5488 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5489 {"atpcs", ARM_PCS_ATPCS},
5490 {"apcs", ARM_PCS_APCS},
5491 #endif
5492 {NULL, ARM_PCS_UNKNOWN}
5495 static enum arm_pcs
5496 arm_pcs_from_attribute (tree attr)
5498 const struct pcs_attribute_arg *ptr;
5499 const char *arg;
5501 /* Get the value of the argument. */
5502 if (TREE_VALUE (attr) == NULL_TREE
5503 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5504 return ARM_PCS_UNKNOWN;
5506 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5508 /* Check it against the list of known arguments. */
5509 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5510 if (streq (arg, ptr->arg))
5511 return ptr->value;
5513 /* An unrecognized interrupt type. */
5514 return ARM_PCS_UNKNOWN;
5517 /* Get the PCS variant to use for this call. TYPE is the function's type
5518 specification, DECL is the specific declartion. DECL may be null if
5519 the call could be indirect or if this is a library call. */
5520 static enum arm_pcs
5521 arm_get_pcs_model (const_tree type, const_tree decl)
5523 bool user_convention = false;
5524 enum arm_pcs user_pcs = arm_pcs_default;
5525 tree attr;
5527 gcc_assert (type);
5529 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5530 if (attr)
5532 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5533 user_convention = true;
5536 if (TARGET_AAPCS_BASED)
5538 /* Detect varargs functions. These always use the base rules
5539 (no argument is ever a candidate for a co-processor
5540 register). */
5541 bool base_rules = stdarg_p (type);
5543 if (user_convention)
5545 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5546 sorry ("non-AAPCS derived PCS variant");
5547 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5548 error ("variadic functions must use the base AAPCS variant");
5551 if (base_rules)
5552 return ARM_PCS_AAPCS;
5553 else if (user_convention)
5554 return user_pcs;
5555 else if (decl && flag_unit_at_a_time)
5557 /* Local functions never leak outside this compilation unit,
5558 so we are free to use whatever conventions are
5559 appropriate. */
5560 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5561 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5562 if (i && i->local)
5563 return ARM_PCS_AAPCS_LOCAL;
5566 else if (user_convention && user_pcs != arm_pcs_default)
5567 sorry ("PCS variant");
5569 /* For everything else we use the target's default. */
5570 return arm_pcs_default;
5574 static void
5575 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5576 const_tree fntype ATTRIBUTE_UNUSED,
5577 rtx libcall ATTRIBUTE_UNUSED,
5578 const_tree fndecl ATTRIBUTE_UNUSED)
5580 /* Record the unallocated VFP registers. */
5581 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5582 pcum->aapcs_vfp_reg_alloc = 0;
5585 /* Walk down the type tree of TYPE counting consecutive base elements.
5586 If *MODEP is VOIDmode, then set it to the first valid floating point
5587 type. If a non-floating point type is found, or if a floating point
5588 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5589 otherwise return the count in the sub-tree. */
5590 static int
5591 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5593 machine_mode mode;
5594 HOST_WIDE_INT size;
5596 switch (TREE_CODE (type))
5598 case REAL_TYPE:
5599 mode = TYPE_MODE (type);
5600 if (mode != DFmode && mode != SFmode && mode != HFmode)
5601 return -1;
5603 if (*modep == VOIDmode)
5604 *modep = mode;
5606 if (*modep == mode)
5607 return 1;
5609 break;
5611 case COMPLEX_TYPE:
5612 mode = TYPE_MODE (TREE_TYPE (type));
5613 if (mode != DFmode && mode != SFmode)
5614 return -1;
5616 if (*modep == VOIDmode)
5617 *modep = mode;
5619 if (*modep == mode)
5620 return 2;
5622 break;
5624 case VECTOR_TYPE:
5625 /* Use V2SImode and V4SImode as representatives of all 64-bit
5626 and 128-bit vector types, whether or not those modes are
5627 supported with the present options. */
5628 size = int_size_in_bytes (type);
5629 switch (size)
5631 case 8:
5632 mode = V2SImode;
5633 break;
5634 case 16:
5635 mode = V4SImode;
5636 break;
5637 default:
5638 return -1;
5641 if (*modep == VOIDmode)
5642 *modep = mode;
5644 /* Vector modes are considered to be opaque: two vectors are
5645 equivalent for the purposes of being homogeneous aggregates
5646 if they are the same size. */
5647 if (*modep == mode)
5648 return 1;
5650 break;
5652 case ARRAY_TYPE:
5654 int count;
5655 tree index = TYPE_DOMAIN (type);
5657 /* Can't handle incomplete types nor sizes that are not
5658 fixed. */
5659 if (!COMPLETE_TYPE_P (type)
5660 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5661 return -1;
5663 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5664 if (count == -1
5665 || !index
5666 || !TYPE_MAX_VALUE (index)
5667 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5668 || !TYPE_MIN_VALUE (index)
5669 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5670 || count < 0)
5671 return -1;
5673 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5674 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5676 /* There must be no padding. */
5677 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5678 return -1;
5680 return count;
5683 case RECORD_TYPE:
5685 int count = 0;
5686 int sub_count;
5687 tree field;
5689 /* Can't handle incomplete types nor sizes that are not
5690 fixed. */
5691 if (!COMPLETE_TYPE_P (type)
5692 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5693 return -1;
5695 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5697 if (TREE_CODE (field) != FIELD_DECL)
5698 continue;
5700 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5701 if (sub_count < 0)
5702 return -1;
5703 count += sub_count;
5706 /* There must be no padding. */
5707 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5708 return -1;
5710 return count;
5713 case UNION_TYPE:
5714 case QUAL_UNION_TYPE:
5716 /* These aren't very interesting except in a degenerate case. */
5717 int count = 0;
5718 int sub_count;
5719 tree field;
5721 /* Can't handle incomplete types nor sizes that are not
5722 fixed. */
5723 if (!COMPLETE_TYPE_P (type)
5724 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5725 return -1;
5727 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5729 if (TREE_CODE (field) != FIELD_DECL)
5730 continue;
5732 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5733 if (sub_count < 0)
5734 return -1;
5735 count = count > sub_count ? count : sub_count;
5738 /* There must be no padding. */
5739 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5740 return -1;
5742 return count;
5745 default:
5746 break;
5749 return -1;
5752 /* Return true if PCS_VARIANT should use VFP registers. */
5753 static bool
5754 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5756 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5758 static bool seen_thumb1_vfp = false;
5760 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5762 sorry ("Thumb-1 hard-float VFP ABI");
5763 /* sorry() is not immediately fatal, so only display this once. */
5764 seen_thumb1_vfp = true;
5767 return true;
5770 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5771 return false;
5773 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5774 (TARGET_VFP_DOUBLE || !is_double));
5777 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5778 suitable for passing or returning in VFP registers for the PCS
5779 variant selected. If it is, then *BASE_MODE is updated to contain
5780 a machine mode describing each element of the argument's type and
5781 *COUNT to hold the number of such elements. */
5782 static bool
5783 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5784 machine_mode mode, const_tree type,
5785 machine_mode *base_mode, int *count)
5787 machine_mode new_mode = VOIDmode;
5789 /* If we have the type information, prefer that to working things
5790 out from the mode. */
5791 if (type)
5793 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5795 if (ag_count > 0 && ag_count <= 4)
5796 *count = ag_count;
5797 else
5798 return false;
5800 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5801 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5802 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5804 *count = 1;
5805 new_mode = mode;
5807 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5809 *count = 2;
5810 new_mode = (mode == DCmode ? DFmode : SFmode);
5812 else
5813 return false;
5816 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5817 return false;
5819 *base_mode = new_mode;
5820 return true;
5823 static bool
5824 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5825 machine_mode mode, const_tree type)
5827 int count ATTRIBUTE_UNUSED;
5828 machine_mode ag_mode ATTRIBUTE_UNUSED;
5830 if (!use_vfp_abi (pcs_variant, false))
5831 return false;
5832 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5833 &ag_mode, &count);
5836 static bool
5837 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5838 const_tree type)
5840 if (!use_vfp_abi (pcum->pcs_variant, false))
5841 return false;
5843 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5844 &pcum->aapcs_vfp_rmode,
5845 &pcum->aapcs_vfp_rcount);
5848 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
5849 for the behaviour of this function. */
5851 static bool
5852 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5853 const_tree type ATTRIBUTE_UNUSED)
5855 int rmode_size
5856 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
5857 int shift = rmode_size / GET_MODE_SIZE (SFmode);
5858 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5859 int regno;
5861 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5862 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5864 pcum->aapcs_vfp_reg_alloc = mask << regno;
5865 if (mode == BLKmode
5866 || (mode == TImode && ! TARGET_NEON)
5867 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5869 int i;
5870 int rcount = pcum->aapcs_vfp_rcount;
5871 int rshift = shift;
5872 machine_mode rmode = pcum->aapcs_vfp_rmode;
5873 rtx par;
5874 if (!TARGET_NEON)
5876 /* Avoid using unsupported vector modes. */
5877 if (rmode == V2SImode)
5878 rmode = DImode;
5879 else if (rmode == V4SImode)
5881 rmode = DImode;
5882 rcount *= 2;
5883 rshift /= 2;
5886 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5887 for (i = 0; i < rcount; i++)
5889 rtx tmp = gen_rtx_REG (rmode,
5890 FIRST_VFP_REGNUM + regno + i * rshift);
5891 tmp = gen_rtx_EXPR_LIST
5892 (VOIDmode, tmp,
5893 GEN_INT (i * GET_MODE_SIZE (rmode)));
5894 XVECEXP (par, 0, i) = tmp;
5897 pcum->aapcs_reg = par;
5899 else
5900 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5901 return true;
5903 return false;
5906 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
5907 comment there for the behaviour of this function. */
5909 static rtx
5910 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5911 machine_mode mode,
5912 const_tree type ATTRIBUTE_UNUSED)
5914 if (!use_vfp_abi (pcs_variant, false))
5915 return NULL;
5917 if (mode == BLKmode
5918 || (GET_MODE_CLASS (mode) == MODE_INT
5919 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
5920 && !TARGET_NEON))
5922 int count;
5923 machine_mode ag_mode;
5924 int i;
5925 rtx par;
5926 int shift;
5928 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5929 &ag_mode, &count);
5931 if (!TARGET_NEON)
5933 if (ag_mode == V2SImode)
5934 ag_mode = DImode;
5935 else if (ag_mode == V4SImode)
5937 ag_mode = DImode;
5938 count *= 2;
5941 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5942 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5943 for (i = 0; i < count; i++)
5945 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5946 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5947 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5948 XVECEXP (par, 0, i) = tmp;
5951 return par;
5954 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5957 static void
5958 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5959 machine_mode mode ATTRIBUTE_UNUSED,
5960 const_tree type ATTRIBUTE_UNUSED)
5962 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5963 pcum->aapcs_vfp_reg_alloc = 0;
5964 return;
5967 #define AAPCS_CP(X) \
5969 aapcs_ ## X ## _cum_init, \
5970 aapcs_ ## X ## _is_call_candidate, \
5971 aapcs_ ## X ## _allocate, \
5972 aapcs_ ## X ## _is_return_candidate, \
5973 aapcs_ ## X ## _allocate_return_reg, \
5974 aapcs_ ## X ## _advance \
5977 /* Table of co-processors that can be used to pass arguments in
5978 registers. Idealy no arugment should be a candidate for more than
5979 one co-processor table entry, but the table is processed in order
5980 and stops after the first match. If that entry then fails to put
5981 the argument into a co-processor register, the argument will go on
5982 the stack. */
5983 static struct
5985 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5986 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5988 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5989 BLKmode) is a candidate for this co-processor's registers; this
5990 function should ignore any position-dependent state in
5991 CUMULATIVE_ARGS and only use call-type dependent information. */
5992 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5994 /* Return true if the argument does get a co-processor register; it
5995 should set aapcs_reg to an RTX of the register allocated as is
5996 required for a return from FUNCTION_ARG. */
5997 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5999 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6000 be returned in this co-processor's registers. */
6001 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6003 /* Allocate and return an RTX element to hold the return type of a call. This
6004 routine must not fail and will only be called if is_return_candidate
6005 returned true with the same parameters. */
6006 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6008 /* Finish processing this argument and prepare to start processing
6009 the next one. */
6010 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6011 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6013 AAPCS_CP(vfp)
6016 #undef AAPCS_CP
6018 static int
6019 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6020 const_tree type)
6022 int i;
6024 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6025 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6026 return i;
6028 return -1;
6031 static int
6032 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6034 /* We aren't passed a decl, so we can't check that a call is local.
6035 However, it isn't clear that that would be a win anyway, since it
6036 might limit some tail-calling opportunities. */
6037 enum arm_pcs pcs_variant;
6039 if (fntype)
6041 const_tree fndecl = NULL_TREE;
6043 if (TREE_CODE (fntype) == FUNCTION_DECL)
6045 fndecl = fntype;
6046 fntype = TREE_TYPE (fntype);
6049 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6051 else
6052 pcs_variant = arm_pcs_default;
6054 if (pcs_variant != ARM_PCS_AAPCS)
6056 int i;
6058 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6059 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6060 TYPE_MODE (type),
6061 type))
6062 return i;
6064 return -1;
6067 static rtx
6068 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6069 const_tree fntype)
6071 /* We aren't passed a decl, so we can't check that a call is local.
6072 However, it isn't clear that that would be a win anyway, since it
6073 might limit some tail-calling opportunities. */
6074 enum arm_pcs pcs_variant;
6075 int unsignedp ATTRIBUTE_UNUSED;
6077 if (fntype)
6079 const_tree fndecl = NULL_TREE;
6081 if (TREE_CODE (fntype) == FUNCTION_DECL)
6083 fndecl = fntype;
6084 fntype = TREE_TYPE (fntype);
6087 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6089 else
6090 pcs_variant = arm_pcs_default;
6092 /* Promote integer types. */
6093 if (type && INTEGRAL_TYPE_P (type))
6094 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6096 if (pcs_variant != ARM_PCS_AAPCS)
6098 int i;
6100 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6101 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6102 type))
6103 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6104 mode, type);
6107 /* Promotes small structs returned in a register to full-word size
6108 for big-endian AAPCS. */
6109 if (type && arm_return_in_msb (type))
6111 HOST_WIDE_INT size = int_size_in_bytes (type);
6112 if (size % UNITS_PER_WORD != 0)
6114 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6115 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6119 return gen_rtx_REG (mode, R0_REGNUM);
6122 static rtx
6123 aapcs_libcall_value (machine_mode mode)
6125 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6126 && GET_MODE_SIZE (mode) <= 4)
6127 mode = SImode;
6129 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6132 /* Lay out a function argument using the AAPCS rules. The rule
6133 numbers referred to here are those in the AAPCS. */
6134 static void
6135 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6136 const_tree type, bool named)
6138 int nregs, nregs2;
6139 int ncrn;
6141 /* We only need to do this once per argument. */
6142 if (pcum->aapcs_arg_processed)
6143 return;
6145 pcum->aapcs_arg_processed = true;
6147 /* Special case: if named is false then we are handling an incoming
6148 anonymous argument which is on the stack. */
6149 if (!named)
6150 return;
6152 /* Is this a potential co-processor register candidate? */
6153 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6155 int slot = aapcs_select_call_coproc (pcum, mode, type);
6156 pcum->aapcs_cprc_slot = slot;
6158 /* We don't have to apply any of the rules from part B of the
6159 preparation phase, these are handled elsewhere in the
6160 compiler. */
6162 if (slot >= 0)
6164 /* A Co-processor register candidate goes either in its own
6165 class of registers or on the stack. */
6166 if (!pcum->aapcs_cprc_failed[slot])
6168 /* C1.cp - Try to allocate the argument to co-processor
6169 registers. */
6170 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6171 return;
6173 /* C2.cp - Put the argument on the stack and note that we
6174 can't assign any more candidates in this slot. We also
6175 need to note that we have allocated stack space, so that
6176 we won't later try to split a non-cprc candidate between
6177 core registers and the stack. */
6178 pcum->aapcs_cprc_failed[slot] = true;
6179 pcum->can_split = false;
6182 /* We didn't get a register, so this argument goes on the
6183 stack. */
6184 gcc_assert (pcum->can_split == false);
6185 return;
6189 /* C3 - For double-word aligned arguments, round the NCRN up to the
6190 next even number. */
6191 ncrn = pcum->aapcs_ncrn;
6192 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
6193 ncrn++;
6195 nregs = ARM_NUM_REGS2(mode, type);
6197 /* Sigh, this test should really assert that nregs > 0, but a GCC
6198 extension allows empty structs and then gives them empty size; it
6199 then allows such a structure to be passed by value. For some of
6200 the code below we have to pretend that such an argument has
6201 non-zero size so that we 'locate' it correctly either in
6202 registers or on the stack. */
6203 gcc_assert (nregs >= 0);
6205 nregs2 = nregs ? nregs : 1;
6207 /* C4 - Argument fits entirely in core registers. */
6208 if (ncrn + nregs2 <= NUM_ARG_REGS)
6210 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6211 pcum->aapcs_next_ncrn = ncrn + nregs;
6212 return;
6215 /* C5 - Some core registers left and there are no arguments already
6216 on the stack: split this argument between the remaining core
6217 registers and the stack. */
6218 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6220 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6221 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6222 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6223 return;
6226 /* C6 - NCRN is set to 4. */
6227 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6229 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6230 return;
6233 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6234 for a call to a function whose data type is FNTYPE.
6235 For a library call, FNTYPE is NULL. */
6236 void
6237 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6238 rtx libname,
6239 tree fndecl ATTRIBUTE_UNUSED)
6241 /* Long call handling. */
6242 if (fntype)
6243 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6244 else
6245 pcum->pcs_variant = arm_pcs_default;
6247 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6249 if (arm_libcall_uses_aapcs_base (libname))
6250 pcum->pcs_variant = ARM_PCS_AAPCS;
6252 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6253 pcum->aapcs_reg = NULL_RTX;
6254 pcum->aapcs_partial = 0;
6255 pcum->aapcs_arg_processed = false;
6256 pcum->aapcs_cprc_slot = -1;
6257 pcum->can_split = true;
6259 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6261 int i;
6263 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6265 pcum->aapcs_cprc_failed[i] = false;
6266 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6269 return;
6272 /* Legacy ABIs */
6274 /* On the ARM, the offset starts at 0. */
6275 pcum->nregs = 0;
6276 pcum->iwmmxt_nregs = 0;
6277 pcum->can_split = true;
6279 /* Varargs vectors are treated the same as long long.
6280 named_count avoids having to change the way arm handles 'named' */
6281 pcum->named_count = 0;
6282 pcum->nargs = 0;
6284 if (TARGET_REALLY_IWMMXT && fntype)
6286 tree fn_arg;
6288 for (fn_arg = TYPE_ARG_TYPES (fntype);
6289 fn_arg;
6290 fn_arg = TREE_CHAIN (fn_arg))
6291 pcum->named_count += 1;
6293 if (! pcum->named_count)
6294 pcum->named_count = INT_MAX;
6298 /* Return true if mode/type need doubleword alignment. */
6299 static bool
6300 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6302 if (!type)
6303 return PARM_BOUNDARY < GET_MODE_ALIGNMENT (mode);
6305 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6306 if (!AGGREGATE_TYPE_P (type))
6307 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6309 /* Array types: Use member alignment of element type. */
6310 if (TREE_CODE (type) == ARRAY_TYPE)
6311 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6313 /* Record/aggregate types: Use greatest member alignment of any member. */
6314 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6315 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6316 return true;
6318 return false;
6322 /* Determine where to put an argument to a function.
6323 Value is zero to push the argument on the stack,
6324 or a hard register in which to store the argument.
6326 MODE is the argument's machine mode.
6327 TYPE is the data type of the argument (as a tree).
6328 This is null for libcalls where that information may
6329 not be available.
6330 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6331 the preceding args and about the function being called.
6332 NAMED is nonzero if this argument is a named parameter
6333 (otherwise it is an extra parameter matching an ellipsis).
6335 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6336 other arguments are passed on the stack. If (NAMED == 0) (which happens
6337 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6338 defined), say it is passed in the stack (function_prologue will
6339 indeed make it pass in the stack if necessary). */
6341 static rtx
6342 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6343 const_tree type, bool named)
6345 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6346 int nregs;
6348 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6349 a call insn (op3 of a call_value insn). */
6350 if (mode == VOIDmode)
6351 return const0_rtx;
6353 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6355 aapcs_layout_arg (pcum, mode, type, named);
6356 return pcum->aapcs_reg;
6359 /* Varargs vectors are treated the same as long long.
6360 named_count avoids having to change the way arm handles 'named' */
6361 if (TARGET_IWMMXT_ABI
6362 && arm_vector_mode_supported_p (mode)
6363 && pcum->named_count > pcum->nargs + 1)
6365 if (pcum->iwmmxt_nregs <= 9)
6366 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6367 else
6369 pcum->can_split = false;
6370 return NULL_RTX;
6374 /* Put doubleword aligned quantities in even register pairs. */
6375 if (pcum->nregs & 1
6376 && ARM_DOUBLEWORD_ALIGN
6377 && arm_needs_doubleword_align (mode, type))
6378 pcum->nregs++;
6380 /* Only allow splitting an arg between regs and memory if all preceding
6381 args were allocated to regs. For args passed by reference we only count
6382 the reference pointer. */
6383 if (pcum->can_split)
6384 nregs = 1;
6385 else
6386 nregs = ARM_NUM_REGS2 (mode, type);
6388 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6389 return NULL_RTX;
6391 return gen_rtx_REG (mode, pcum->nregs);
6394 static unsigned int
6395 arm_function_arg_boundary (machine_mode mode, const_tree type)
6397 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6398 ? DOUBLEWORD_ALIGNMENT
6399 : PARM_BOUNDARY);
6402 static int
6403 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6404 tree type, bool named)
6406 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6407 int nregs = pcum->nregs;
6409 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6411 aapcs_layout_arg (pcum, mode, type, named);
6412 return pcum->aapcs_partial;
6415 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6416 return 0;
6418 if (NUM_ARG_REGS > nregs
6419 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6420 && pcum->can_split)
6421 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6423 return 0;
6426 /* Update the data in PCUM to advance over an argument
6427 of mode MODE and data type TYPE.
6428 (TYPE is null for libcalls where that information may not be available.) */
6430 static void
6431 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6432 const_tree type, bool named)
6434 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6436 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6438 aapcs_layout_arg (pcum, mode, type, named);
6440 if (pcum->aapcs_cprc_slot >= 0)
6442 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6443 type);
6444 pcum->aapcs_cprc_slot = -1;
6447 /* Generic stuff. */
6448 pcum->aapcs_arg_processed = false;
6449 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6450 pcum->aapcs_reg = NULL_RTX;
6451 pcum->aapcs_partial = 0;
6453 else
6455 pcum->nargs += 1;
6456 if (arm_vector_mode_supported_p (mode)
6457 && pcum->named_count > pcum->nargs
6458 && TARGET_IWMMXT_ABI)
6459 pcum->iwmmxt_nregs += 1;
6460 else
6461 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6465 /* Variable sized types are passed by reference. This is a GCC
6466 extension to the ARM ABI. */
6468 static bool
6469 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6470 machine_mode mode ATTRIBUTE_UNUSED,
6471 const_tree type, bool named ATTRIBUTE_UNUSED)
6473 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6476 /* Encode the current state of the #pragma [no_]long_calls. */
6477 typedef enum
6479 OFF, /* No #pragma [no_]long_calls is in effect. */
6480 LONG, /* #pragma long_calls is in effect. */
6481 SHORT /* #pragma no_long_calls is in effect. */
6482 } arm_pragma_enum;
6484 static arm_pragma_enum arm_pragma_long_calls = OFF;
6486 void
6487 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6489 arm_pragma_long_calls = LONG;
6492 void
6493 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6495 arm_pragma_long_calls = SHORT;
6498 void
6499 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6501 arm_pragma_long_calls = OFF;
6504 /* Handle an attribute requiring a FUNCTION_DECL;
6505 arguments as in struct attribute_spec.handler. */
6506 static tree
6507 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6508 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6510 if (TREE_CODE (*node) != FUNCTION_DECL)
6512 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6513 name);
6514 *no_add_attrs = true;
6517 return NULL_TREE;
6520 /* Handle an "interrupt" or "isr" attribute;
6521 arguments as in struct attribute_spec.handler. */
6522 static tree
6523 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6524 bool *no_add_attrs)
6526 if (DECL_P (*node))
6528 if (TREE_CODE (*node) != FUNCTION_DECL)
6530 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6531 name);
6532 *no_add_attrs = true;
6534 /* FIXME: the argument if any is checked for type attributes;
6535 should it be checked for decl ones? */
6537 else
6539 if (TREE_CODE (*node) == FUNCTION_TYPE
6540 || TREE_CODE (*node) == METHOD_TYPE)
6542 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6544 warning (OPT_Wattributes, "%qE attribute ignored",
6545 name);
6546 *no_add_attrs = true;
6549 else if (TREE_CODE (*node) == POINTER_TYPE
6550 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6551 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6552 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6554 *node = build_variant_type_copy (*node);
6555 TREE_TYPE (*node) = build_type_attribute_variant
6556 (TREE_TYPE (*node),
6557 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6558 *no_add_attrs = true;
6560 else
6562 /* Possibly pass this attribute on from the type to a decl. */
6563 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6564 | (int) ATTR_FLAG_FUNCTION_NEXT
6565 | (int) ATTR_FLAG_ARRAY_NEXT))
6567 *no_add_attrs = true;
6568 return tree_cons (name, args, NULL_TREE);
6570 else
6572 warning (OPT_Wattributes, "%qE attribute ignored",
6573 name);
6578 return NULL_TREE;
6581 /* Handle a "pcs" attribute; arguments as in struct
6582 attribute_spec.handler. */
6583 static tree
6584 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6585 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6587 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6589 warning (OPT_Wattributes, "%qE attribute ignored", name);
6590 *no_add_attrs = true;
6592 return NULL_TREE;
6595 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6596 /* Handle the "notshared" attribute. This attribute is another way of
6597 requesting hidden visibility. ARM's compiler supports
6598 "__declspec(notshared)"; we support the same thing via an
6599 attribute. */
6601 static tree
6602 arm_handle_notshared_attribute (tree *node,
6603 tree name ATTRIBUTE_UNUSED,
6604 tree args ATTRIBUTE_UNUSED,
6605 int flags ATTRIBUTE_UNUSED,
6606 bool *no_add_attrs)
6608 tree decl = TYPE_NAME (*node);
6610 if (decl)
6612 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6613 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6614 *no_add_attrs = false;
6616 return NULL_TREE;
6618 #endif
6620 /* Return 0 if the attributes for two types are incompatible, 1 if they
6621 are compatible, and 2 if they are nearly compatible (which causes a
6622 warning to be generated). */
6623 static int
6624 arm_comp_type_attributes (const_tree type1, const_tree type2)
6626 int l1, l2, s1, s2;
6628 /* Check for mismatch of non-default calling convention. */
6629 if (TREE_CODE (type1) != FUNCTION_TYPE)
6630 return 1;
6632 /* Check for mismatched call attributes. */
6633 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6634 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6635 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6636 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6638 /* Only bother to check if an attribute is defined. */
6639 if (l1 | l2 | s1 | s2)
6641 /* If one type has an attribute, the other must have the same attribute. */
6642 if ((l1 != l2) || (s1 != s2))
6643 return 0;
6645 /* Disallow mixed attributes. */
6646 if ((l1 & s2) || (l2 & s1))
6647 return 0;
6650 /* Check for mismatched ISR attribute. */
6651 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6652 if (! l1)
6653 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6654 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6655 if (! l2)
6656 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6657 if (l1 != l2)
6658 return 0;
6660 return 1;
6663 /* Assigns default attributes to newly defined type. This is used to
6664 set short_call/long_call attributes for function types of
6665 functions defined inside corresponding #pragma scopes. */
6666 static void
6667 arm_set_default_type_attributes (tree type)
6669 /* Add __attribute__ ((long_call)) to all functions, when
6670 inside #pragma long_calls or __attribute__ ((short_call)),
6671 when inside #pragma no_long_calls. */
6672 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6674 tree type_attr_list, attr_name;
6675 type_attr_list = TYPE_ATTRIBUTES (type);
6677 if (arm_pragma_long_calls == LONG)
6678 attr_name = get_identifier ("long_call");
6679 else if (arm_pragma_long_calls == SHORT)
6680 attr_name = get_identifier ("short_call");
6681 else
6682 return;
6684 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6685 TYPE_ATTRIBUTES (type) = type_attr_list;
6689 /* Return true if DECL is known to be linked into section SECTION. */
6691 static bool
6692 arm_function_in_section_p (tree decl, section *section)
6694 /* We can only be certain about the prevailing symbol definition. */
6695 if (!decl_binds_to_current_def_p (decl))
6696 return false;
6698 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6699 if (!DECL_SECTION_NAME (decl))
6701 /* Make sure that we will not create a unique section for DECL. */
6702 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6703 return false;
6706 return function_section (decl) == section;
6709 /* Return nonzero if a 32-bit "long_call" should be generated for
6710 a call from the current function to DECL. We generate a long_call
6711 if the function:
6713 a. has an __attribute__((long call))
6714 or b. is within the scope of a #pragma long_calls
6715 or c. the -mlong-calls command line switch has been specified
6717 However we do not generate a long call if the function:
6719 d. has an __attribute__ ((short_call))
6720 or e. is inside the scope of a #pragma no_long_calls
6721 or f. is defined in the same section as the current function. */
6723 bool
6724 arm_is_long_call_p (tree decl)
6726 tree attrs;
6728 if (!decl)
6729 return TARGET_LONG_CALLS;
6731 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6732 if (lookup_attribute ("short_call", attrs))
6733 return false;
6735 /* For "f", be conservative, and only cater for cases in which the
6736 whole of the current function is placed in the same section. */
6737 if (!flag_reorder_blocks_and_partition
6738 && TREE_CODE (decl) == FUNCTION_DECL
6739 && arm_function_in_section_p (decl, current_function_section ()))
6740 return false;
6742 if (lookup_attribute ("long_call", attrs))
6743 return true;
6745 return TARGET_LONG_CALLS;
6748 /* Return nonzero if it is ok to make a tail-call to DECL. */
6749 static bool
6750 arm_function_ok_for_sibcall (tree decl, tree exp)
6752 unsigned long func_type;
6754 if (cfun->machine->sibcall_blocked)
6755 return false;
6757 /* Never tailcall something if we are generating code for Thumb-1. */
6758 if (TARGET_THUMB1)
6759 return false;
6761 /* The PIC register is live on entry to VxWorks PLT entries, so we
6762 must make the call before restoring the PIC register. */
6763 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
6764 return false;
6766 /* If we are interworking and the function is not declared static
6767 then we can't tail-call it unless we know that it exists in this
6768 compilation unit (since it might be a Thumb routine). */
6769 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6770 && !TREE_ASM_WRITTEN (decl))
6771 return false;
6773 func_type = arm_current_func_type ();
6774 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6775 if (IS_INTERRUPT (func_type))
6776 return false;
6778 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6780 /* Check that the return value locations are the same. For
6781 example that we aren't returning a value from the sibling in
6782 a VFP register but then need to transfer it to a core
6783 register. */
6784 rtx a, b;
6785 tree decl_or_type = decl;
6787 /* If it is an indirect function pointer, get the function type. */
6788 if (!decl)
6789 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
6791 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
6792 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6793 cfun->decl, false);
6794 if (!rtx_equal_p (a, b))
6795 return false;
6798 /* Never tailcall if function may be called with a misaligned SP. */
6799 if (IS_STACKALIGN (func_type))
6800 return false;
6802 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6803 references should become a NOP. Don't convert such calls into
6804 sibling calls. */
6805 if (TARGET_AAPCS_BASED
6806 && arm_abi == ARM_ABI_AAPCS
6807 && decl
6808 && DECL_WEAK (decl))
6809 return false;
6811 /* Everything else is ok. */
6812 return true;
6816 /* Addressing mode support functions. */
6818 /* Return nonzero if X is a legitimate immediate operand when compiling
6819 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6821 legitimate_pic_operand_p (rtx x)
6823 if (GET_CODE (x) == SYMBOL_REF
6824 || (GET_CODE (x) == CONST
6825 && GET_CODE (XEXP (x, 0)) == PLUS
6826 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6827 return 0;
6829 return 1;
6832 /* Record that the current function needs a PIC register. Initialize
6833 cfun->machine->pic_reg if we have not already done so. */
6835 static void
6836 require_pic_register (void)
6838 /* A lot of the logic here is made obscure by the fact that this
6839 routine gets called as part of the rtx cost estimation process.
6840 We don't want those calls to affect any assumptions about the real
6841 function; and further, we can't call entry_of_function() until we
6842 start the real expansion process. */
6843 if (!crtl->uses_pic_offset_table)
6845 gcc_assert (can_create_pseudo_p ());
6846 if (arm_pic_register != INVALID_REGNUM
6847 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6849 if (!cfun->machine->pic_reg)
6850 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6852 /* Play games to avoid marking the function as needing pic
6853 if we are being called as part of the cost-estimation
6854 process. */
6855 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6856 crtl->uses_pic_offset_table = 1;
6858 else
6860 rtx_insn *seq, *insn;
6862 if (!cfun->machine->pic_reg)
6863 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6865 /* Play games to avoid marking the function as needing pic
6866 if we are being called as part of the cost-estimation
6867 process. */
6868 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6870 crtl->uses_pic_offset_table = 1;
6871 start_sequence ();
6873 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6874 && arm_pic_register > LAST_LO_REGNUM)
6875 emit_move_insn (cfun->machine->pic_reg,
6876 gen_rtx_REG (Pmode, arm_pic_register));
6877 else
6878 arm_load_pic_register (0UL);
6880 seq = get_insns ();
6881 end_sequence ();
6883 for (insn = seq; insn; insn = NEXT_INSN (insn))
6884 if (INSN_P (insn))
6885 INSN_LOCATION (insn) = prologue_location;
6887 /* We can be called during expansion of PHI nodes, where
6888 we can't yet emit instructions directly in the final
6889 insn stream. Queue the insns on the entry edge, they will
6890 be committed after everything else is expanded. */
6891 insert_insn_on_edge (seq,
6892 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6899 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6901 if (GET_CODE (orig) == SYMBOL_REF
6902 || GET_CODE (orig) == LABEL_REF)
6904 rtx insn;
6906 if (reg == 0)
6908 gcc_assert (can_create_pseudo_p ());
6909 reg = gen_reg_rtx (Pmode);
6912 /* VxWorks does not impose a fixed gap between segments; the run-time
6913 gap can be different from the object-file gap. We therefore can't
6914 use GOTOFF unless we are absolutely sure that the symbol is in the
6915 same segment as the GOT. Unfortunately, the flexibility of linker
6916 scripts means that we can't be sure of that in general, so assume
6917 that GOTOFF is never valid on VxWorks. */
6918 if ((GET_CODE (orig) == LABEL_REF
6919 || (GET_CODE (orig) == SYMBOL_REF &&
6920 SYMBOL_REF_LOCAL_P (orig)))
6921 && NEED_GOT_RELOC
6922 && arm_pic_data_is_text_relative)
6923 insn = arm_pic_static_addr (orig, reg);
6924 else
6926 rtx pat;
6927 rtx mem;
6929 /* If this function doesn't have a pic register, create one now. */
6930 require_pic_register ();
6932 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6934 /* Make the MEM as close to a constant as possible. */
6935 mem = SET_SRC (pat);
6936 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6937 MEM_READONLY_P (mem) = 1;
6938 MEM_NOTRAP_P (mem) = 1;
6940 insn = emit_insn (pat);
6943 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6944 by loop. */
6945 set_unique_reg_note (insn, REG_EQUAL, orig);
6947 return reg;
6949 else if (GET_CODE (orig) == CONST)
6951 rtx base, offset;
6953 if (GET_CODE (XEXP (orig, 0)) == PLUS
6954 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6955 return orig;
6957 /* Handle the case where we have: const (UNSPEC_TLS). */
6958 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6959 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6960 return orig;
6962 /* Handle the case where we have:
6963 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6964 CONST_INT. */
6965 if (GET_CODE (XEXP (orig, 0)) == PLUS
6966 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6967 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6969 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6970 return orig;
6973 if (reg == 0)
6975 gcc_assert (can_create_pseudo_p ());
6976 reg = gen_reg_rtx (Pmode);
6979 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6981 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6982 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6983 base == reg ? 0 : reg);
6985 if (CONST_INT_P (offset))
6987 /* The base register doesn't really matter, we only want to
6988 test the index for the appropriate mode. */
6989 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6991 gcc_assert (can_create_pseudo_p ());
6992 offset = force_reg (Pmode, offset);
6995 if (CONST_INT_P (offset))
6996 return plus_constant (Pmode, base, INTVAL (offset));
6999 if (GET_MODE_SIZE (mode) > 4
7000 && (GET_MODE_CLASS (mode) == MODE_INT
7001 || TARGET_SOFT_FLOAT))
7003 emit_insn (gen_addsi3 (reg, base, offset));
7004 return reg;
7007 return gen_rtx_PLUS (Pmode, base, offset);
7010 return orig;
7014 /* Find a spare register to use during the prolog of a function. */
7016 static int
7017 thumb_find_work_register (unsigned long pushed_regs_mask)
7019 int reg;
7021 /* Check the argument registers first as these are call-used. The
7022 register allocation order means that sometimes r3 might be used
7023 but earlier argument registers might not, so check them all. */
7024 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7025 if (!df_regs_ever_live_p (reg))
7026 return reg;
7028 /* Before going on to check the call-saved registers we can try a couple
7029 more ways of deducing that r3 is available. The first is when we are
7030 pushing anonymous arguments onto the stack and we have less than 4
7031 registers worth of fixed arguments(*). In this case r3 will be part of
7032 the variable argument list and so we can be sure that it will be
7033 pushed right at the start of the function. Hence it will be available
7034 for the rest of the prologue.
7035 (*): ie crtl->args.pretend_args_size is greater than 0. */
7036 if (cfun->machine->uses_anonymous_args
7037 && crtl->args.pretend_args_size > 0)
7038 return LAST_ARG_REGNUM;
7040 /* The other case is when we have fixed arguments but less than 4 registers
7041 worth. In this case r3 might be used in the body of the function, but
7042 it is not being used to convey an argument into the function. In theory
7043 we could just check crtl->args.size to see how many bytes are
7044 being passed in argument registers, but it seems that it is unreliable.
7045 Sometimes it will have the value 0 when in fact arguments are being
7046 passed. (See testcase execute/20021111-1.c for an example). So we also
7047 check the args_info.nregs field as well. The problem with this field is
7048 that it makes no allowances for arguments that are passed to the
7049 function but which are not used. Hence we could miss an opportunity
7050 when a function has an unused argument in r3. But it is better to be
7051 safe than to be sorry. */
7052 if (! cfun->machine->uses_anonymous_args
7053 && crtl->args.size >= 0
7054 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7055 && (TARGET_AAPCS_BASED
7056 ? crtl->args.info.aapcs_ncrn < 4
7057 : crtl->args.info.nregs < 4))
7058 return LAST_ARG_REGNUM;
7060 /* Otherwise look for a call-saved register that is going to be pushed. */
7061 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7062 if (pushed_regs_mask & (1 << reg))
7063 return reg;
7065 if (TARGET_THUMB2)
7067 /* Thumb-2 can use high regs. */
7068 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7069 if (pushed_regs_mask & (1 << reg))
7070 return reg;
7072 /* Something went wrong - thumb_compute_save_reg_mask()
7073 should have arranged for a suitable register to be pushed. */
7074 gcc_unreachable ();
7077 static GTY(()) int pic_labelno;
7079 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7080 low register. */
7082 void
7083 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7085 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7087 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7088 return;
7090 gcc_assert (flag_pic);
7092 pic_reg = cfun->machine->pic_reg;
7093 if (TARGET_VXWORKS_RTP)
7095 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7096 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7097 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7099 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7101 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7102 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7104 else
7106 /* We use an UNSPEC rather than a LABEL_REF because this label
7107 never appears in the code stream. */
7109 labelno = GEN_INT (pic_labelno++);
7110 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7111 l1 = gen_rtx_CONST (VOIDmode, l1);
7113 /* On the ARM the PC register contains 'dot + 8' at the time of the
7114 addition, on the Thumb it is 'dot + 4'. */
7115 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7116 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7117 UNSPEC_GOTSYM_OFF);
7118 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7120 if (TARGET_32BIT)
7122 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7124 else /* TARGET_THUMB1 */
7126 if (arm_pic_register != INVALID_REGNUM
7127 && REGNO (pic_reg) > LAST_LO_REGNUM)
7129 /* We will have pushed the pic register, so we should always be
7130 able to find a work register. */
7131 pic_tmp = gen_rtx_REG (SImode,
7132 thumb_find_work_register (saved_regs));
7133 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7134 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7135 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7137 else if (arm_pic_register != INVALID_REGNUM
7138 && arm_pic_register > LAST_LO_REGNUM
7139 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7141 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7142 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7143 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7145 else
7146 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7150 /* Need to emit this whether or not we obey regdecls,
7151 since setjmp/longjmp can cause life info to screw up. */
7152 emit_use (pic_reg);
7155 /* Generate code to load the address of a static var when flag_pic is set. */
7156 static rtx
7157 arm_pic_static_addr (rtx orig, rtx reg)
7159 rtx l1, labelno, offset_rtx, insn;
7161 gcc_assert (flag_pic);
7163 /* We use an UNSPEC rather than a LABEL_REF because this label
7164 never appears in the code stream. */
7165 labelno = GEN_INT (pic_labelno++);
7166 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7167 l1 = gen_rtx_CONST (VOIDmode, l1);
7169 /* On the ARM the PC register contains 'dot + 8' at the time of the
7170 addition, on the Thumb it is 'dot + 4'. */
7171 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7172 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7173 UNSPEC_SYMBOL_OFFSET);
7174 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7176 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7177 return insn;
7180 /* Return nonzero if X is valid as an ARM state addressing register. */
7181 static int
7182 arm_address_register_rtx_p (rtx x, int strict_p)
7184 int regno;
7186 if (!REG_P (x))
7187 return 0;
7189 regno = REGNO (x);
7191 if (strict_p)
7192 return ARM_REGNO_OK_FOR_BASE_P (regno);
7194 return (regno <= LAST_ARM_REGNUM
7195 || regno >= FIRST_PSEUDO_REGISTER
7196 || regno == FRAME_POINTER_REGNUM
7197 || regno == ARG_POINTER_REGNUM);
7200 /* Return TRUE if this rtx is the difference of a symbol and a label,
7201 and will reduce to a PC-relative relocation in the object file.
7202 Expressions like this can be left alone when generating PIC, rather
7203 than forced through the GOT. */
7204 static int
7205 pcrel_constant_p (rtx x)
7207 if (GET_CODE (x) == MINUS)
7208 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7210 return FALSE;
7213 /* Return true if X will surely end up in an index register after next
7214 splitting pass. */
7215 static bool
7216 will_be_in_index_register (const_rtx x)
7218 /* arm.md: calculate_pic_address will split this into a register. */
7219 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7222 /* Return nonzero if X is a valid ARM state address operand. */
7224 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7225 int strict_p)
7227 bool use_ldrd;
7228 enum rtx_code code = GET_CODE (x);
7230 if (arm_address_register_rtx_p (x, strict_p))
7231 return 1;
7233 use_ldrd = (TARGET_LDRD
7234 && (mode == DImode
7235 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7237 if (code == POST_INC || code == PRE_DEC
7238 || ((code == PRE_INC || code == POST_DEC)
7239 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7240 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7242 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7243 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7244 && GET_CODE (XEXP (x, 1)) == PLUS
7245 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7247 rtx addend = XEXP (XEXP (x, 1), 1);
7249 /* Don't allow ldrd post increment by register because it's hard
7250 to fixup invalid register choices. */
7251 if (use_ldrd
7252 && GET_CODE (x) == POST_MODIFY
7253 && REG_P (addend))
7254 return 0;
7256 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7257 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7260 /* After reload constants split into minipools will have addresses
7261 from a LABEL_REF. */
7262 else if (reload_completed
7263 && (code == LABEL_REF
7264 || (code == CONST
7265 && GET_CODE (XEXP (x, 0)) == PLUS
7266 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7267 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7268 return 1;
7270 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7271 return 0;
7273 else if (code == PLUS)
7275 rtx xop0 = XEXP (x, 0);
7276 rtx xop1 = XEXP (x, 1);
7278 return ((arm_address_register_rtx_p (xop0, strict_p)
7279 && ((CONST_INT_P (xop1)
7280 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7281 || (!strict_p && will_be_in_index_register (xop1))))
7282 || (arm_address_register_rtx_p (xop1, strict_p)
7283 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7286 #if 0
7287 /* Reload currently can't handle MINUS, so disable this for now */
7288 else if (GET_CODE (x) == MINUS)
7290 rtx xop0 = XEXP (x, 0);
7291 rtx xop1 = XEXP (x, 1);
7293 return (arm_address_register_rtx_p (xop0, strict_p)
7294 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7296 #endif
7298 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7299 && code == SYMBOL_REF
7300 && CONSTANT_POOL_ADDRESS_P (x)
7301 && ! (flag_pic
7302 && symbol_mentioned_p (get_pool_constant (x))
7303 && ! pcrel_constant_p (get_pool_constant (x))))
7304 return 1;
7306 return 0;
7309 /* Return nonzero if X is a valid Thumb-2 address operand. */
7310 static int
7311 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7313 bool use_ldrd;
7314 enum rtx_code code = GET_CODE (x);
7316 if (arm_address_register_rtx_p (x, strict_p))
7317 return 1;
7319 use_ldrd = (TARGET_LDRD
7320 && (mode == DImode
7321 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7323 if (code == POST_INC || code == PRE_DEC
7324 || ((code == PRE_INC || code == POST_DEC)
7325 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7326 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7328 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7329 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7330 && GET_CODE (XEXP (x, 1)) == PLUS
7331 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7333 /* Thumb-2 only has autoincrement by constant. */
7334 rtx addend = XEXP (XEXP (x, 1), 1);
7335 HOST_WIDE_INT offset;
7337 if (!CONST_INT_P (addend))
7338 return 0;
7340 offset = INTVAL(addend);
7341 if (GET_MODE_SIZE (mode) <= 4)
7342 return (offset > -256 && offset < 256);
7344 return (use_ldrd && offset > -1024 && offset < 1024
7345 && (offset & 3) == 0);
7348 /* After reload constants split into minipools will have addresses
7349 from a LABEL_REF. */
7350 else if (reload_completed
7351 && (code == LABEL_REF
7352 || (code == CONST
7353 && GET_CODE (XEXP (x, 0)) == PLUS
7354 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7355 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7356 return 1;
7358 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7359 return 0;
7361 else if (code == PLUS)
7363 rtx xop0 = XEXP (x, 0);
7364 rtx xop1 = XEXP (x, 1);
7366 return ((arm_address_register_rtx_p (xop0, strict_p)
7367 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7368 || (!strict_p && will_be_in_index_register (xop1))))
7369 || (arm_address_register_rtx_p (xop1, strict_p)
7370 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7373 /* Normally we can assign constant values to target registers without
7374 the help of constant pool. But there are cases we have to use constant
7375 pool like:
7376 1) assign a label to register.
7377 2) sign-extend a 8bit value to 32bit and then assign to register.
7379 Constant pool access in format:
7380 (set (reg r0) (mem (symbol_ref (".LC0"))))
7381 will cause the use of literal pool (later in function arm_reorg).
7382 So here we mark such format as an invalid format, then the compiler
7383 will adjust it into:
7384 (set (reg r0) (symbol_ref (".LC0")))
7385 (set (reg r0) (mem (reg r0))).
7386 No extra register is required, and (mem (reg r0)) won't cause the use
7387 of literal pools. */
7388 else if (arm_disable_literal_pool && code == SYMBOL_REF
7389 && CONSTANT_POOL_ADDRESS_P (x))
7390 return 0;
7392 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7393 && code == SYMBOL_REF
7394 && CONSTANT_POOL_ADDRESS_P (x)
7395 && ! (flag_pic
7396 && symbol_mentioned_p (get_pool_constant (x))
7397 && ! pcrel_constant_p (get_pool_constant (x))))
7398 return 1;
7400 return 0;
7403 /* Return nonzero if INDEX is valid for an address index operand in
7404 ARM state. */
7405 static int
7406 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7407 int strict_p)
7409 HOST_WIDE_INT range;
7410 enum rtx_code code = GET_CODE (index);
7412 /* Standard coprocessor addressing modes. */
7413 if (TARGET_HARD_FLOAT
7414 && TARGET_VFP
7415 && (mode == SFmode || mode == DFmode))
7416 return (code == CONST_INT && INTVAL (index) < 1024
7417 && INTVAL (index) > -1024
7418 && (INTVAL (index) & 3) == 0);
7420 /* For quad modes, we restrict the constant offset to be slightly less
7421 than what the instruction format permits. We do this because for
7422 quad mode moves, we will actually decompose them into two separate
7423 double-mode reads or writes. INDEX must therefore be a valid
7424 (double-mode) offset and so should INDEX+8. */
7425 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7426 return (code == CONST_INT
7427 && INTVAL (index) < 1016
7428 && INTVAL (index) > -1024
7429 && (INTVAL (index) & 3) == 0);
7431 /* We have no such constraint on double mode offsets, so we permit the
7432 full range of the instruction format. */
7433 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7434 return (code == CONST_INT
7435 && INTVAL (index) < 1024
7436 && INTVAL (index) > -1024
7437 && (INTVAL (index) & 3) == 0);
7439 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7440 return (code == CONST_INT
7441 && INTVAL (index) < 1024
7442 && INTVAL (index) > -1024
7443 && (INTVAL (index) & 3) == 0);
7445 if (arm_address_register_rtx_p (index, strict_p)
7446 && (GET_MODE_SIZE (mode) <= 4))
7447 return 1;
7449 if (mode == DImode || mode == DFmode)
7451 if (code == CONST_INT)
7453 HOST_WIDE_INT val = INTVAL (index);
7455 if (TARGET_LDRD)
7456 return val > -256 && val < 256;
7457 else
7458 return val > -4096 && val < 4092;
7461 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7464 if (GET_MODE_SIZE (mode) <= 4
7465 && ! (arm_arch4
7466 && (mode == HImode
7467 || mode == HFmode
7468 || (mode == QImode && outer == SIGN_EXTEND))))
7470 if (code == MULT)
7472 rtx xiop0 = XEXP (index, 0);
7473 rtx xiop1 = XEXP (index, 1);
7475 return ((arm_address_register_rtx_p (xiop0, strict_p)
7476 && power_of_two_operand (xiop1, SImode))
7477 || (arm_address_register_rtx_p (xiop1, strict_p)
7478 && power_of_two_operand (xiop0, SImode)));
7480 else if (code == LSHIFTRT || code == ASHIFTRT
7481 || code == ASHIFT || code == ROTATERT)
7483 rtx op = XEXP (index, 1);
7485 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7486 && CONST_INT_P (op)
7487 && INTVAL (op) > 0
7488 && INTVAL (op) <= 31);
7492 /* For ARM v4 we may be doing a sign-extend operation during the
7493 load. */
7494 if (arm_arch4)
7496 if (mode == HImode
7497 || mode == HFmode
7498 || (outer == SIGN_EXTEND && mode == QImode))
7499 range = 256;
7500 else
7501 range = 4096;
7503 else
7504 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7506 return (code == CONST_INT
7507 && INTVAL (index) < range
7508 && INTVAL (index) > -range);
7511 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7512 index operand. i.e. 1, 2, 4 or 8. */
7513 static bool
7514 thumb2_index_mul_operand (rtx op)
7516 HOST_WIDE_INT val;
7518 if (!CONST_INT_P (op))
7519 return false;
7521 val = INTVAL(op);
7522 return (val == 1 || val == 2 || val == 4 || val == 8);
7525 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7526 static int
7527 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7529 enum rtx_code code = GET_CODE (index);
7531 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7532 /* Standard coprocessor addressing modes. */
7533 if (TARGET_HARD_FLOAT
7534 && TARGET_VFP
7535 && (mode == SFmode || mode == DFmode))
7536 return (code == CONST_INT && INTVAL (index) < 1024
7537 /* Thumb-2 allows only > -256 index range for it's core register
7538 load/stores. Since we allow SF/DF in core registers, we have
7539 to use the intersection between -256~4096 (core) and -1024~1024
7540 (coprocessor). */
7541 && INTVAL (index) > -256
7542 && (INTVAL (index) & 3) == 0);
7544 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7546 /* For DImode assume values will usually live in core regs
7547 and only allow LDRD addressing modes. */
7548 if (!TARGET_LDRD || mode != DImode)
7549 return (code == CONST_INT
7550 && INTVAL (index) < 1024
7551 && INTVAL (index) > -1024
7552 && (INTVAL (index) & 3) == 0);
7555 /* For quad modes, we restrict the constant offset to be slightly less
7556 than what the instruction format permits. We do this because for
7557 quad mode moves, we will actually decompose them into two separate
7558 double-mode reads or writes. INDEX must therefore be a valid
7559 (double-mode) offset and so should INDEX+8. */
7560 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7561 return (code == CONST_INT
7562 && INTVAL (index) < 1016
7563 && INTVAL (index) > -1024
7564 && (INTVAL (index) & 3) == 0);
7566 /* We have no such constraint on double mode offsets, so we permit the
7567 full range of the instruction format. */
7568 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7569 return (code == CONST_INT
7570 && INTVAL (index) < 1024
7571 && INTVAL (index) > -1024
7572 && (INTVAL (index) & 3) == 0);
7574 if (arm_address_register_rtx_p (index, strict_p)
7575 && (GET_MODE_SIZE (mode) <= 4))
7576 return 1;
7578 if (mode == DImode || mode == DFmode)
7580 if (code == CONST_INT)
7582 HOST_WIDE_INT val = INTVAL (index);
7583 /* ??? Can we assume ldrd for thumb2? */
7584 /* Thumb-2 ldrd only has reg+const addressing modes. */
7585 /* ldrd supports offsets of +-1020.
7586 However the ldr fallback does not. */
7587 return val > -256 && val < 256 && (val & 3) == 0;
7589 else
7590 return 0;
7593 if (code == MULT)
7595 rtx xiop0 = XEXP (index, 0);
7596 rtx xiop1 = XEXP (index, 1);
7598 return ((arm_address_register_rtx_p (xiop0, strict_p)
7599 && thumb2_index_mul_operand (xiop1))
7600 || (arm_address_register_rtx_p (xiop1, strict_p)
7601 && thumb2_index_mul_operand (xiop0)));
7603 else if (code == ASHIFT)
7605 rtx op = XEXP (index, 1);
7607 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7608 && CONST_INT_P (op)
7609 && INTVAL (op) > 0
7610 && INTVAL (op) <= 3);
7613 return (code == CONST_INT
7614 && INTVAL (index) < 4096
7615 && INTVAL (index) > -256);
7618 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7619 static int
7620 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7622 int regno;
7624 if (!REG_P (x))
7625 return 0;
7627 regno = REGNO (x);
7629 if (strict_p)
7630 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7632 return (regno <= LAST_LO_REGNUM
7633 || regno > LAST_VIRTUAL_REGISTER
7634 || regno == FRAME_POINTER_REGNUM
7635 || (GET_MODE_SIZE (mode) >= 4
7636 && (regno == STACK_POINTER_REGNUM
7637 || regno >= FIRST_PSEUDO_REGISTER
7638 || x == hard_frame_pointer_rtx
7639 || x == arg_pointer_rtx)));
7642 /* Return nonzero if x is a legitimate index register. This is the case
7643 for any base register that can access a QImode object. */
7644 inline static int
7645 thumb1_index_register_rtx_p (rtx x, int strict_p)
7647 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7650 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7652 The AP may be eliminated to either the SP or the FP, so we use the
7653 least common denominator, e.g. SImode, and offsets from 0 to 64.
7655 ??? Verify whether the above is the right approach.
7657 ??? Also, the FP may be eliminated to the SP, so perhaps that
7658 needs special handling also.
7660 ??? Look at how the mips16 port solves this problem. It probably uses
7661 better ways to solve some of these problems.
7663 Although it is not incorrect, we don't accept QImode and HImode
7664 addresses based on the frame pointer or arg pointer until the
7665 reload pass starts. This is so that eliminating such addresses
7666 into stack based ones won't produce impossible code. */
7668 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7670 /* ??? Not clear if this is right. Experiment. */
7671 if (GET_MODE_SIZE (mode) < 4
7672 && !(reload_in_progress || reload_completed)
7673 && (reg_mentioned_p (frame_pointer_rtx, x)
7674 || reg_mentioned_p (arg_pointer_rtx, x)
7675 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7676 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7677 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7678 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7679 return 0;
7681 /* Accept any base register. SP only in SImode or larger. */
7682 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7683 return 1;
7685 /* This is PC relative data before arm_reorg runs. */
7686 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7687 && GET_CODE (x) == SYMBOL_REF
7688 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7689 return 1;
7691 /* This is PC relative data after arm_reorg runs. */
7692 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7693 && reload_completed
7694 && (GET_CODE (x) == LABEL_REF
7695 || (GET_CODE (x) == CONST
7696 && GET_CODE (XEXP (x, 0)) == PLUS
7697 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7698 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7699 return 1;
7701 /* Post-inc indexing only supported for SImode and larger. */
7702 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7703 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7704 return 1;
7706 else if (GET_CODE (x) == PLUS)
7708 /* REG+REG address can be any two index registers. */
7709 /* We disallow FRAME+REG addressing since we know that FRAME
7710 will be replaced with STACK, and SP relative addressing only
7711 permits SP+OFFSET. */
7712 if (GET_MODE_SIZE (mode) <= 4
7713 && XEXP (x, 0) != frame_pointer_rtx
7714 && XEXP (x, 1) != frame_pointer_rtx
7715 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7716 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7717 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7718 return 1;
7720 /* REG+const has 5-7 bit offset for non-SP registers. */
7721 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7722 || XEXP (x, 0) == arg_pointer_rtx)
7723 && CONST_INT_P (XEXP (x, 1))
7724 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7725 return 1;
7727 /* REG+const has 10-bit offset for SP, but only SImode and
7728 larger is supported. */
7729 /* ??? Should probably check for DI/DFmode overflow here
7730 just like GO_IF_LEGITIMATE_OFFSET does. */
7731 else if (REG_P (XEXP (x, 0))
7732 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7733 && GET_MODE_SIZE (mode) >= 4
7734 && CONST_INT_P (XEXP (x, 1))
7735 && INTVAL (XEXP (x, 1)) >= 0
7736 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7737 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7738 return 1;
7740 else if (REG_P (XEXP (x, 0))
7741 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7742 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7743 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7744 && REGNO (XEXP (x, 0))
7745 <= LAST_VIRTUAL_POINTER_REGISTER))
7746 && GET_MODE_SIZE (mode) >= 4
7747 && CONST_INT_P (XEXP (x, 1))
7748 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7749 return 1;
7752 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7753 && GET_MODE_SIZE (mode) == 4
7754 && GET_CODE (x) == SYMBOL_REF
7755 && CONSTANT_POOL_ADDRESS_P (x)
7756 && ! (flag_pic
7757 && symbol_mentioned_p (get_pool_constant (x))
7758 && ! pcrel_constant_p (get_pool_constant (x))))
7759 return 1;
7761 return 0;
7764 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7765 instruction of mode MODE. */
7767 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7769 switch (GET_MODE_SIZE (mode))
7771 case 1:
7772 return val >= 0 && val < 32;
7774 case 2:
7775 return val >= 0 && val < 64 && (val & 1) == 0;
7777 default:
7778 return (val >= 0
7779 && (val + GET_MODE_SIZE (mode)) <= 128
7780 && (val & 3) == 0);
7784 bool
7785 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7787 if (TARGET_ARM)
7788 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7789 else if (TARGET_THUMB2)
7790 return thumb2_legitimate_address_p (mode, x, strict_p);
7791 else /* if (TARGET_THUMB1) */
7792 return thumb1_legitimate_address_p (mode, x, strict_p);
7795 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7797 Given an rtx X being reloaded into a reg required to be
7798 in class CLASS, return the class of reg to actually use.
7799 In general this is just CLASS, but for the Thumb core registers and
7800 immediate constants we prefer a LO_REGS class or a subset. */
7802 static reg_class_t
7803 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7805 if (TARGET_32BIT)
7806 return rclass;
7807 else
7809 if (rclass == GENERAL_REGS)
7810 return LO_REGS;
7811 else
7812 return rclass;
7816 /* Build the SYMBOL_REF for __tls_get_addr. */
7818 static GTY(()) rtx tls_get_addr_libfunc;
7820 static rtx
7821 get_tls_get_addr (void)
7823 if (!tls_get_addr_libfunc)
7824 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7825 return tls_get_addr_libfunc;
7829 arm_load_tp (rtx target)
7831 if (!target)
7832 target = gen_reg_rtx (SImode);
7834 if (TARGET_HARD_TP)
7836 /* Can return in any reg. */
7837 emit_insn (gen_load_tp_hard (target));
7839 else
7841 /* Always returned in r0. Immediately copy the result into a pseudo,
7842 otherwise other uses of r0 (e.g. setting up function arguments) may
7843 clobber the value. */
7845 rtx tmp;
7847 emit_insn (gen_load_tp_soft ());
7849 tmp = gen_rtx_REG (SImode, R0_REGNUM);
7850 emit_move_insn (target, tmp);
7852 return target;
7855 static rtx
7856 load_tls_operand (rtx x, rtx reg)
7858 rtx tmp;
7860 if (reg == NULL_RTX)
7861 reg = gen_reg_rtx (SImode);
7863 tmp = gen_rtx_CONST (SImode, x);
7865 emit_move_insn (reg, tmp);
7867 return reg;
7870 static rtx
7871 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7873 rtx insns, label, labelno, sum;
7875 gcc_assert (reloc != TLS_DESCSEQ);
7876 start_sequence ();
7878 labelno = GEN_INT (pic_labelno++);
7879 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7880 label = gen_rtx_CONST (VOIDmode, label);
7882 sum = gen_rtx_UNSPEC (Pmode,
7883 gen_rtvec (4, x, GEN_INT (reloc), label,
7884 GEN_INT (TARGET_ARM ? 8 : 4)),
7885 UNSPEC_TLS);
7886 reg = load_tls_operand (sum, reg);
7888 if (TARGET_ARM)
7889 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7890 else
7891 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7893 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7894 LCT_PURE, /* LCT_CONST? */
7895 Pmode, 1, reg, Pmode);
7897 insns = get_insns ();
7898 end_sequence ();
7900 return insns;
7903 static rtx
7904 arm_tls_descseq_addr (rtx x, rtx reg)
7906 rtx labelno = GEN_INT (pic_labelno++);
7907 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7908 rtx sum = gen_rtx_UNSPEC (Pmode,
7909 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7910 gen_rtx_CONST (VOIDmode, label),
7911 GEN_INT (!TARGET_ARM)),
7912 UNSPEC_TLS);
7913 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7915 emit_insn (gen_tlscall (x, labelno));
7916 if (!reg)
7917 reg = gen_reg_rtx (SImode);
7918 else
7919 gcc_assert (REGNO (reg) != R0_REGNUM);
7921 emit_move_insn (reg, reg0);
7923 return reg;
7927 legitimize_tls_address (rtx x, rtx reg)
7929 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7930 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7932 switch (model)
7934 case TLS_MODEL_GLOBAL_DYNAMIC:
7935 if (TARGET_GNU2_TLS)
7937 reg = arm_tls_descseq_addr (x, reg);
7939 tp = arm_load_tp (NULL_RTX);
7941 dest = gen_rtx_PLUS (Pmode, tp, reg);
7943 else
7945 /* Original scheme */
7946 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7947 dest = gen_reg_rtx (Pmode);
7948 emit_libcall_block (insns, dest, ret, x);
7950 return dest;
7952 case TLS_MODEL_LOCAL_DYNAMIC:
7953 if (TARGET_GNU2_TLS)
7955 reg = arm_tls_descseq_addr (x, reg);
7957 tp = arm_load_tp (NULL_RTX);
7959 dest = gen_rtx_PLUS (Pmode, tp, reg);
7961 else
7963 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7965 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7966 share the LDM result with other LD model accesses. */
7967 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7968 UNSPEC_TLS);
7969 dest = gen_reg_rtx (Pmode);
7970 emit_libcall_block (insns, dest, ret, eqv);
7972 /* Load the addend. */
7973 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7974 GEN_INT (TLS_LDO32)),
7975 UNSPEC_TLS);
7976 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7977 dest = gen_rtx_PLUS (Pmode, dest, addend);
7979 return dest;
7981 case TLS_MODEL_INITIAL_EXEC:
7982 labelno = GEN_INT (pic_labelno++);
7983 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7984 label = gen_rtx_CONST (VOIDmode, label);
7985 sum = gen_rtx_UNSPEC (Pmode,
7986 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7987 GEN_INT (TARGET_ARM ? 8 : 4)),
7988 UNSPEC_TLS);
7989 reg = load_tls_operand (sum, reg);
7991 if (TARGET_ARM)
7992 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7993 else if (TARGET_THUMB2)
7994 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7995 else
7997 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7998 emit_move_insn (reg, gen_const_mem (SImode, reg));
8001 tp = arm_load_tp (NULL_RTX);
8003 return gen_rtx_PLUS (Pmode, tp, reg);
8005 case TLS_MODEL_LOCAL_EXEC:
8006 tp = arm_load_tp (NULL_RTX);
8008 reg = gen_rtx_UNSPEC (Pmode,
8009 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8010 UNSPEC_TLS);
8011 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8013 return gen_rtx_PLUS (Pmode, tp, reg);
8015 default:
8016 abort ();
8020 /* Try machine-dependent ways of modifying an illegitimate address
8021 to be legitimate. If we find one, return the new, valid address. */
8023 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8025 if (arm_tls_referenced_p (x))
8027 rtx addend = NULL;
8029 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8031 addend = XEXP (XEXP (x, 0), 1);
8032 x = XEXP (XEXP (x, 0), 0);
8035 if (GET_CODE (x) != SYMBOL_REF)
8036 return x;
8038 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8040 x = legitimize_tls_address (x, NULL_RTX);
8042 if (addend)
8044 x = gen_rtx_PLUS (SImode, x, addend);
8045 orig_x = x;
8047 else
8048 return x;
8051 if (!TARGET_ARM)
8053 /* TODO: legitimize_address for Thumb2. */
8054 if (TARGET_THUMB2)
8055 return x;
8056 return thumb_legitimize_address (x, orig_x, mode);
8059 if (GET_CODE (x) == PLUS)
8061 rtx xop0 = XEXP (x, 0);
8062 rtx xop1 = XEXP (x, 1);
8064 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8065 xop0 = force_reg (SImode, xop0);
8067 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8068 && !symbol_mentioned_p (xop1))
8069 xop1 = force_reg (SImode, xop1);
8071 if (ARM_BASE_REGISTER_RTX_P (xop0)
8072 && CONST_INT_P (xop1))
8074 HOST_WIDE_INT n, low_n;
8075 rtx base_reg, val;
8076 n = INTVAL (xop1);
8078 /* VFP addressing modes actually allow greater offsets, but for
8079 now we just stick with the lowest common denominator. */
8080 if (mode == DImode
8081 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
8083 low_n = n & 0x0f;
8084 n &= ~0x0f;
8085 if (low_n > 4)
8087 n += 16;
8088 low_n -= 16;
8091 else
8093 low_n = ((mode) == TImode ? 0
8094 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8095 n -= low_n;
8098 base_reg = gen_reg_rtx (SImode);
8099 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8100 emit_move_insn (base_reg, val);
8101 x = plus_constant (Pmode, base_reg, low_n);
8103 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8104 x = gen_rtx_PLUS (SImode, xop0, xop1);
8107 /* XXX We don't allow MINUS any more -- see comment in
8108 arm_legitimate_address_outer_p (). */
8109 else if (GET_CODE (x) == MINUS)
8111 rtx xop0 = XEXP (x, 0);
8112 rtx xop1 = XEXP (x, 1);
8114 if (CONSTANT_P (xop0))
8115 xop0 = force_reg (SImode, xop0);
8117 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8118 xop1 = force_reg (SImode, xop1);
8120 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8121 x = gen_rtx_MINUS (SImode, xop0, xop1);
8124 /* Make sure to take full advantage of the pre-indexed addressing mode
8125 with absolute addresses which often allows for the base register to
8126 be factorized for multiple adjacent memory references, and it might
8127 even allows for the mini pool to be avoided entirely. */
8128 else if (CONST_INT_P (x) && optimize > 0)
8130 unsigned int bits;
8131 HOST_WIDE_INT mask, base, index;
8132 rtx base_reg;
8134 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8135 use a 8-bit index. So let's use a 12-bit index for SImode only and
8136 hope that arm_gen_constant will enable ldrb to use more bits. */
8137 bits = (mode == SImode) ? 12 : 8;
8138 mask = (1 << bits) - 1;
8139 base = INTVAL (x) & ~mask;
8140 index = INTVAL (x) & mask;
8141 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8143 /* It'll most probably be more efficient to generate the base
8144 with more bits set and use a negative index instead. */
8145 base |= mask;
8146 index -= mask;
8148 base_reg = force_reg (SImode, GEN_INT (base));
8149 x = plus_constant (Pmode, base_reg, index);
8152 if (flag_pic)
8154 /* We need to find and carefully transform any SYMBOL and LABEL
8155 references; so go back to the original address expression. */
8156 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8158 if (new_x != orig_x)
8159 x = new_x;
8162 return x;
8166 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8167 to be legitimate. If we find one, return the new, valid address. */
8169 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8171 if (GET_CODE (x) == PLUS
8172 && CONST_INT_P (XEXP (x, 1))
8173 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8174 || INTVAL (XEXP (x, 1)) < 0))
8176 rtx xop0 = XEXP (x, 0);
8177 rtx xop1 = XEXP (x, 1);
8178 HOST_WIDE_INT offset = INTVAL (xop1);
8180 /* Try and fold the offset into a biasing of the base register and
8181 then offsetting that. Don't do this when optimizing for space
8182 since it can cause too many CSEs. */
8183 if (optimize_size && offset >= 0
8184 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8186 HOST_WIDE_INT delta;
8188 if (offset >= 256)
8189 delta = offset - (256 - GET_MODE_SIZE (mode));
8190 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8191 delta = 31 * GET_MODE_SIZE (mode);
8192 else
8193 delta = offset & (~31 * GET_MODE_SIZE (mode));
8195 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8196 NULL_RTX);
8197 x = plus_constant (Pmode, xop0, delta);
8199 else if (offset < 0 && offset > -256)
8200 /* Small negative offsets are best done with a subtract before the
8201 dereference, forcing these into a register normally takes two
8202 instructions. */
8203 x = force_operand (x, NULL_RTX);
8204 else
8206 /* For the remaining cases, force the constant into a register. */
8207 xop1 = force_reg (SImode, xop1);
8208 x = gen_rtx_PLUS (SImode, xop0, xop1);
8211 else if (GET_CODE (x) == PLUS
8212 && s_register_operand (XEXP (x, 1), SImode)
8213 && !s_register_operand (XEXP (x, 0), SImode))
8215 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8217 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8220 if (flag_pic)
8222 /* We need to find and carefully transform any SYMBOL and LABEL
8223 references; so go back to the original address expression. */
8224 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8226 if (new_x != orig_x)
8227 x = new_x;
8230 return x;
8233 /* Return TRUE if X contains any TLS symbol references. */
8235 bool
8236 arm_tls_referenced_p (rtx x)
8238 if (! TARGET_HAVE_TLS)
8239 return false;
8241 subrtx_iterator::array_type array;
8242 FOR_EACH_SUBRTX (iter, array, x, ALL)
8244 const_rtx x = *iter;
8245 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8246 return true;
8248 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8249 TLS offsets, not real symbol references. */
8250 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8251 iter.skip_subrtxes ();
8253 return false;
8256 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8258 On the ARM, allow any integer (invalid ones are removed later by insn
8259 patterns), nice doubles and symbol_refs which refer to the function's
8260 constant pool XXX.
8262 When generating pic allow anything. */
8264 static bool
8265 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8267 return flag_pic || !label_mentioned_p (x);
8270 static bool
8271 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8273 return (CONST_INT_P (x)
8274 || CONST_DOUBLE_P (x)
8275 || CONSTANT_ADDRESS_P (x)
8276 || flag_pic);
8279 static bool
8280 arm_legitimate_constant_p (machine_mode mode, rtx x)
8282 return (!arm_cannot_force_const_mem (mode, x)
8283 && (TARGET_32BIT
8284 ? arm_legitimate_constant_p_1 (mode, x)
8285 : thumb_legitimate_constant_p (mode, x)));
8288 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8290 static bool
8291 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8293 rtx base, offset;
8295 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8297 split_const (x, &base, &offset);
8298 if (GET_CODE (base) == SYMBOL_REF
8299 && !offset_within_block_p (base, INTVAL (offset)))
8300 return true;
8302 return arm_tls_referenced_p (x);
8305 #define REG_OR_SUBREG_REG(X) \
8306 (REG_P (X) \
8307 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8309 #define REG_OR_SUBREG_RTX(X) \
8310 (REG_P (X) ? (X) : SUBREG_REG (X))
8312 static inline int
8313 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8315 machine_mode mode = GET_MODE (x);
8316 int total, words;
8318 switch (code)
8320 case ASHIFT:
8321 case ASHIFTRT:
8322 case LSHIFTRT:
8323 case ROTATERT:
8324 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8326 case PLUS:
8327 case MINUS:
8328 case COMPARE:
8329 case NEG:
8330 case NOT:
8331 return COSTS_N_INSNS (1);
8333 case MULT:
8334 if (CONST_INT_P (XEXP (x, 1)))
8336 int cycles = 0;
8337 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8339 while (i)
8341 i >>= 2;
8342 cycles++;
8344 return COSTS_N_INSNS (2) + cycles;
8346 return COSTS_N_INSNS (1) + 16;
8348 case SET:
8349 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8350 the mode. */
8351 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8352 return (COSTS_N_INSNS (words)
8353 + 4 * ((MEM_P (SET_SRC (x)))
8354 + MEM_P (SET_DEST (x))));
8356 case CONST_INT:
8357 if (outer == SET)
8359 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8360 return 0;
8361 if (thumb_shiftable_const (INTVAL (x)))
8362 return COSTS_N_INSNS (2);
8363 return COSTS_N_INSNS (3);
8365 else if ((outer == PLUS || outer == COMPARE)
8366 && INTVAL (x) < 256 && INTVAL (x) > -256)
8367 return 0;
8368 else if ((outer == IOR || outer == XOR || outer == AND)
8369 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8370 return COSTS_N_INSNS (1);
8371 else if (outer == AND)
8373 int i;
8374 /* This duplicates the tests in the andsi3 expander. */
8375 for (i = 9; i <= 31; i++)
8376 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8377 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8378 return COSTS_N_INSNS (2);
8380 else if (outer == ASHIFT || outer == ASHIFTRT
8381 || outer == LSHIFTRT)
8382 return 0;
8383 return COSTS_N_INSNS (2);
8385 case CONST:
8386 case CONST_DOUBLE:
8387 case LABEL_REF:
8388 case SYMBOL_REF:
8389 return COSTS_N_INSNS (3);
8391 case UDIV:
8392 case UMOD:
8393 case DIV:
8394 case MOD:
8395 return 100;
8397 case TRUNCATE:
8398 return 99;
8400 case AND:
8401 case XOR:
8402 case IOR:
8403 /* XXX guess. */
8404 return 8;
8406 case MEM:
8407 /* XXX another guess. */
8408 /* Memory costs quite a lot for the first word, but subsequent words
8409 load at the equivalent of a single insn each. */
8410 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8411 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8412 ? 4 : 0));
8414 case IF_THEN_ELSE:
8415 /* XXX a guess. */
8416 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8417 return 14;
8418 return 2;
8420 case SIGN_EXTEND:
8421 case ZERO_EXTEND:
8422 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8423 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8425 if (mode == SImode)
8426 return total;
8428 if (arm_arch6)
8429 return total + COSTS_N_INSNS (1);
8431 /* Assume a two-shift sequence. Increase the cost slightly so
8432 we prefer actual shifts over an extend operation. */
8433 return total + 1 + COSTS_N_INSNS (2);
8435 default:
8436 return 99;
8440 static inline bool
8441 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8443 machine_mode mode = GET_MODE (x);
8444 enum rtx_code subcode;
8445 rtx operand;
8446 enum rtx_code code = GET_CODE (x);
8447 *total = 0;
8449 switch (code)
8451 case MEM:
8452 /* Memory costs quite a lot for the first word, but subsequent words
8453 load at the equivalent of a single insn each. */
8454 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8455 return true;
8457 case DIV:
8458 case MOD:
8459 case UDIV:
8460 case UMOD:
8461 if (TARGET_HARD_FLOAT && mode == SFmode)
8462 *total = COSTS_N_INSNS (2);
8463 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8464 *total = COSTS_N_INSNS (4);
8465 else
8466 *total = COSTS_N_INSNS (20);
8467 return false;
8469 case ROTATE:
8470 if (REG_P (XEXP (x, 1)))
8471 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8472 else if (!CONST_INT_P (XEXP (x, 1)))
8473 *total = rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8475 /* Fall through */
8476 case ROTATERT:
8477 if (mode != SImode)
8479 *total += COSTS_N_INSNS (4);
8480 return true;
8483 /* Fall through */
8484 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8485 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8486 if (mode == DImode)
8488 *total += COSTS_N_INSNS (3);
8489 return true;
8492 *total += COSTS_N_INSNS (1);
8493 /* Increase the cost of complex shifts because they aren't any faster,
8494 and reduce dual issue opportunities. */
8495 if (arm_tune_cortex_a9
8496 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8497 ++*total;
8499 return true;
8501 case MINUS:
8502 if (mode == DImode)
8504 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8505 if (CONST_INT_P (XEXP (x, 0))
8506 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8508 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8509 return true;
8512 if (CONST_INT_P (XEXP (x, 1))
8513 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8515 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8516 return true;
8519 return false;
8522 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8524 if (TARGET_HARD_FLOAT
8525 && (mode == SFmode
8526 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8528 *total = COSTS_N_INSNS (1);
8529 if (CONST_DOUBLE_P (XEXP (x, 0))
8530 && arm_const_double_rtx (XEXP (x, 0)))
8532 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8533 return true;
8536 if (CONST_DOUBLE_P (XEXP (x, 1))
8537 && arm_const_double_rtx (XEXP (x, 1)))
8539 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8540 return true;
8543 return false;
8545 *total = COSTS_N_INSNS (20);
8546 return false;
8549 *total = COSTS_N_INSNS (1);
8550 if (CONST_INT_P (XEXP (x, 0))
8551 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8553 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8554 return true;
8557 subcode = GET_CODE (XEXP (x, 1));
8558 if (subcode == ASHIFT || subcode == ASHIFTRT
8559 || subcode == LSHIFTRT
8560 || subcode == ROTATE || subcode == ROTATERT)
8562 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8563 *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, subcode, 0, speed);
8564 return true;
8567 /* A shift as a part of RSB costs no more than RSB itself. */
8568 if (GET_CODE (XEXP (x, 0)) == MULT
8569 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8571 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code, 0, speed);
8572 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8573 return true;
8576 if (subcode == MULT
8577 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8579 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8580 *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, subcode, 0, speed);
8581 return true;
8584 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8585 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8587 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
8588 0, speed);
8589 if (REG_P (XEXP (XEXP (x, 1), 0))
8590 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8591 *total += COSTS_N_INSNS (1);
8593 return true;
8596 /* Fall through */
8598 case PLUS:
8599 if (code == PLUS && arm_arch6 && mode == SImode
8600 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8601 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8603 *total = COSTS_N_INSNS (1);
8604 *total += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
8605 GET_CODE (XEXP (x, 0)), 0, speed);
8606 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8607 return true;
8610 /* MLA: All arguments must be registers. We filter out
8611 multiplication by a power of two, so that we fall down into
8612 the code below. */
8613 if (GET_CODE (XEXP (x, 0)) == MULT
8614 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8616 /* The cost comes from the cost of the multiply. */
8617 return false;
8620 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8622 if (TARGET_HARD_FLOAT
8623 && (mode == SFmode
8624 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8626 *total = COSTS_N_INSNS (1);
8627 if (CONST_DOUBLE_P (XEXP (x, 1))
8628 && arm_const_double_rtx (XEXP (x, 1)))
8630 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8631 return true;
8634 return false;
8637 *total = COSTS_N_INSNS (20);
8638 return false;
8641 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8642 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8644 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), mode, code,
8645 1, speed);
8646 if (REG_P (XEXP (XEXP (x, 0), 0))
8647 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8648 *total += COSTS_N_INSNS (1);
8649 return true;
8652 /* Fall through */
8654 case AND: case XOR: case IOR:
8656 /* Normally the frame registers will be spilt into reg+const during
8657 reload, so it is a bad idea to combine them with other instructions,
8658 since then they might not be moved outside of loops. As a compromise
8659 we allow integration with ops that have a constant as their second
8660 operand. */
8661 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8662 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8663 && !CONST_INT_P (XEXP (x, 1)))
8664 *total = COSTS_N_INSNS (1);
8666 if (mode == DImode)
8668 *total += COSTS_N_INSNS (2);
8669 if (CONST_INT_P (XEXP (x, 1))
8670 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8672 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8673 return true;
8676 return false;
8679 *total += COSTS_N_INSNS (1);
8680 if (CONST_INT_P (XEXP (x, 1))
8681 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8683 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8684 return true;
8686 subcode = GET_CODE (XEXP (x, 0));
8687 if (subcode == ASHIFT || subcode == ASHIFTRT
8688 || subcode == LSHIFTRT
8689 || subcode == ROTATE || subcode == ROTATERT)
8691 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8692 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8693 return true;
8696 if (subcode == MULT
8697 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8699 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8700 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8701 return true;
8704 if (subcode == UMIN || subcode == UMAX
8705 || subcode == SMIN || subcode == SMAX)
8707 *total = COSTS_N_INSNS (3);
8708 return true;
8711 return false;
8713 case MULT:
8714 /* This should have been handled by the CPU specific routines. */
8715 gcc_unreachable ();
8717 case TRUNCATE:
8718 if (arm_arch3m && mode == SImode
8719 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8720 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8721 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8722 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8723 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8724 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8726 *total = rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, LSHIFTRT,
8727 0, speed);
8728 return true;
8730 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8731 return false;
8733 case NEG:
8734 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8736 if (TARGET_HARD_FLOAT
8737 && (mode == SFmode
8738 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8740 *total = COSTS_N_INSNS (1);
8741 return false;
8743 *total = COSTS_N_INSNS (2);
8744 return false;
8747 /* Fall through */
8748 case NOT:
8749 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8750 if (mode == SImode && code == NOT)
8752 subcode = GET_CODE (XEXP (x, 0));
8753 if (subcode == ASHIFT || subcode == ASHIFTRT
8754 || subcode == LSHIFTRT
8755 || subcode == ROTATE || subcode == ROTATERT
8756 || (subcode == MULT
8757 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8759 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode,
8760 0, speed);
8761 /* Register shifts cost an extra cycle. */
8762 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8763 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8764 mode, subcode,
8765 1, speed);
8766 return true;
8770 return false;
8772 case IF_THEN_ELSE:
8773 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8775 *total = COSTS_N_INSNS (4);
8776 return true;
8779 operand = XEXP (x, 0);
8781 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8782 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8783 && REG_P (XEXP (operand, 0))
8784 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8785 *total += COSTS_N_INSNS (1);
8786 *total += rtx_cost (XEXP (x, 1), VOIDmode, code, 1, speed);
8787 *total += rtx_cost (XEXP (x, 2), VOIDmode, code, 2, speed);
8788 return true;
8790 case NE:
8791 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8793 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
8794 0, speed);
8795 return true;
8797 goto scc_insn;
8799 case GE:
8800 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8801 && mode == SImode && XEXP (x, 1) == const0_rtx)
8803 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
8804 0, speed);
8805 return true;
8807 goto scc_insn;
8809 case LT:
8810 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8811 && mode == SImode && XEXP (x, 1) == const0_rtx)
8813 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
8814 0, speed);
8815 return true;
8817 goto scc_insn;
8819 case EQ:
8820 case GT:
8821 case LE:
8822 case GEU:
8823 case LTU:
8824 case GTU:
8825 case LEU:
8826 case UNORDERED:
8827 case ORDERED:
8828 case UNEQ:
8829 case UNGE:
8830 case UNLT:
8831 case UNGT:
8832 case UNLE:
8833 scc_insn:
8834 /* SCC insns. In the case where the comparison has already been
8835 performed, then they cost 2 instructions. Otherwise they need
8836 an additional comparison before them. */
8837 *total = COSTS_N_INSNS (2);
8838 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8840 return true;
8843 /* Fall through */
8844 case COMPARE:
8845 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8847 *total = 0;
8848 return true;
8851 *total += COSTS_N_INSNS (1);
8852 if (CONST_INT_P (XEXP (x, 1))
8853 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8855 *total += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed);
8856 return true;
8859 subcode = GET_CODE (XEXP (x, 0));
8860 if (subcode == ASHIFT || subcode == ASHIFTRT
8861 || subcode == LSHIFTRT
8862 || subcode == ROTATE || subcode == ROTATERT)
8864 mode = GET_MODE (XEXP (x, 0));
8865 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8866 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8867 return true;
8870 if (subcode == MULT
8871 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8873 mode = GET_MODE (XEXP (x, 0));
8874 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8875 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8876 return true;
8879 return false;
8881 case UMIN:
8882 case UMAX:
8883 case SMIN:
8884 case SMAX:
8885 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8886 if (!CONST_INT_P (XEXP (x, 1))
8887 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8888 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8889 return true;
8891 case ABS:
8892 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8894 if (TARGET_HARD_FLOAT
8895 && (mode == SFmode
8896 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8898 *total = COSTS_N_INSNS (1);
8899 return false;
8901 *total = COSTS_N_INSNS (20);
8902 return false;
8904 *total = COSTS_N_INSNS (1);
8905 if (mode == DImode)
8906 *total += COSTS_N_INSNS (3);
8907 return false;
8909 case SIGN_EXTEND:
8910 case ZERO_EXTEND:
8911 *total = 0;
8912 if (GET_MODE_CLASS (mode) == MODE_INT)
8914 rtx op = XEXP (x, 0);
8915 machine_mode opmode = GET_MODE (op);
8917 if (mode == DImode)
8918 *total += COSTS_N_INSNS (1);
8920 if (opmode != SImode)
8922 if (MEM_P (op))
8924 /* If !arm_arch4, we use one of the extendhisi2_mem
8925 or movhi_bytes patterns for HImode. For a QImode
8926 sign extension, we first zero-extend from memory
8927 and then perform a shift sequence. */
8928 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8929 *total += COSTS_N_INSNS (2);
8931 else if (arm_arch6)
8932 *total += COSTS_N_INSNS (1);
8934 /* We don't have the necessary insn, so we need to perform some
8935 other operation. */
8936 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8937 /* An and with constant 255. */
8938 *total += COSTS_N_INSNS (1);
8939 else
8940 /* A shift sequence. Increase costs slightly to avoid
8941 combining two shifts into an extend operation. */
8942 *total += COSTS_N_INSNS (2) + 1;
8945 return false;
8948 switch (GET_MODE (XEXP (x, 0)))
8950 case V8QImode:
8951 case V4HImode:
8952 case V2SImode:
8953 case V4QImode:
8954 case V2HImode:
8955 *total = COSTS_N_INSNS (1);
8956 return false;
8958 default:
8959 gcc_unreachable ();
8961 gcc_unreachable ();
8963 case ZERO_EXTRACT:
8964 case SIGN_EXTRACT:
8965 mode = GET_MODE (XEXP (x, 0));
8966 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8967 return true;
8969 case CONST_INT:
8970 if (const_ok_for_arm (INTVAL (x))
8971 || const_ok_for_arm (~INTVAL (x)))
8972 *total = COSTS_N_INSNS (1);
8973 else
8974 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8975 INTVAL (x), NULL_RTX,
8976 NULL_RTX, 0, 0));
8977 return true;
8979 case CONST:
8980 case LABEL_REF:
8981 case SYMBOL_REF:
8982 *total = COSTS_N_INSNS (3);
8983 return true;
8985 case HIGH:
8986 *total = COSTS_N_INSNS (1);
8987 return true;
8989 case LO_SUM:
8990 *total = COSTS_N_INSNS (1);
8991 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8992 return true;
8994 case CONST_DOUBLE:
8995 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8996 && (mode == SFmode || !TARGET_VFP_SINGLE))
8997 *total = COSTS_N_INSNS (1);
8998 else
8999 *total = COSTS_N_INSNS (4);
9000 return true;
9002 case SET:
9003 /* The vec_extract patterns accept memory operands that require an
9004 address reload. Account for the cost of that reload to give the
9005 auto-inc-dec pass an incentive to try to replace them. */
9006 if (TARGET_NEON && MEM_P (SET_DEST (x))
9007 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
9009 mode = GET_MODE (SET_DEST (x));
9010 *total = rtx_cost (SET_DEST (x), mode, code, 0, speed);
9011 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
9012 *total += COSTS_N_INSNS (1);
9013 return true;
9015 /* Likewise for the vec_set patterns. */
9016 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
9017 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
9018 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
9020 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
9021 mode = GET_MODE (SET_DEST (x));
9022 *total = rtx_cost (mem, mode, code, 0, speed);
9023 if (!neon_vector_mem_operand (mem, 2, true))
9024 *total += COSTS_N_INSNS (1);
9025 return true;
9027 return false;
9029 case UNSPEC:
9030 /* We cost this as high as our memory costs to allow this to
9031 be hoisted from loops. */
9032 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
9034 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
9036 return true;
9038 case CONST_VECTOR:
9039 if (TARGET_NEON
9040 && TARGET_HARD_FLOAT
9041 && outer == SET
9042 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9043 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9044 *total = COSTS_N_INSNS (1);
9045 else
9046 *total = COSTS_N_INSNS (4);
9047 return true;
9049 default:
9050 *total = COSTS_N_INSNS (4);
9051 return false;
9055 /* Estimates the size cost of thumb1 instructions.
9056 For now most of the code is copied from thumb1_rtx_costs. We need more
9057 fine grain tuning when we have more related test cases. */
9058 static inline int
9059 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9061 machine_mode mode = GET_MODE (x);
9062 int words;
9064 switch (code)
9066 case ASHIFT:
9067 case ASHIFTRT:
9068 case LSHIFTRT:
9069 case ROTATERT:
9070 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9072 case PLUS:
9073 case MINUS:
9074 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9075 defined by RTL expansion, especially for the expansion of
9076 multiplication. */
9077 if ((GET_CODE (XEXP (x, 0)) == MULT
9078 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9079 || (GET_CODE (XEXP (x, 1)) == MULT
9080 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9081 return COSTS_N_INSNS (2);
9082 /* On purpose fall through for normal RTX. */
9083 case COMPARE:
9084 case NEG:
9085 case NOT:
9086 return COSTS_N_INSNS (1);
9088 case MULT:
9089 if (CONST_INT_P (XEXP (x, 1)))
9091 /* Thumb1 mul instruction can't operate on const. We must Load it
9092 into a register first. */
9093 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9094 /* For the targets which have a very small and high-latency multiply
9095 unit, we prefer to synthesize the mult with up to 5 instructions,
9096 giving a good balance between size and performance. */
9097 if (arm_arch6m && arm_m_profile_small_mul)
9098 return COSTS_N_INSNS (5);
9099 else
9100 return COSTS_N_INSNS (1) + const_size;
9102 return COSTS_N_INSNS (1);
9104 case SET:
9105 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9106 the mode. */
9107 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9108 return COSTS_N_INSNS (words)
9109 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
9110 || satisfies_constraint_K (SET_SRC (x))
9111 /* thumb1_movdi_insn. */
9112 || ((words > 1) && MEM_P (SET_SRC (x))));
9114 case CONST_INT:
9115 if (outer == SET)
9117 if (UINTVAL (x) < 256)
9118 return COSTS_N_INSNS (1);
9119 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9120 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9121 return COSTS_N_INSNS (2);
9122 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9123 if (thumb_shiftable_const (INTVAL (x)))
9124 return COSTS_N_INSNS (2);
9125 return COSTS_N_INSNS (3);
9127 else if ((outer == PLUS || outer == COMPARE)
9128 && INTVAL (x) < 256 && INTVAL (x) > -256)
9129 return 0;
9130 else if ((outer == IOR || outer == XOR || outer == AND)
9131 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9132 return COSTS_N_INSNS (1);
9133 else if (outer == AND)
9135 int i;
9136 /* This duplicates the tests in the andsi3 expander. */
9137 for (i = 9; i <= 31; i++)
9138 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9139 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9140 return COSTS_N_INSNS (2);
9142 else if (outer == ASHIFT || outer == ASHIFTRT
9143 || outer == LSHIFTRT)
9144 return 0;
9145 return COSTS_N_INSNS (2);
9147 case CONST:
9148 case CONST_DOUBLE:
9149 case LABEL_REF:
9150 case SYMBOL_REF:
9151 return COSTS_N_INSNS (3);
9153 case UDIV:
9154 case UMOD:
9155 case DIV:
9156 case MOD:
9157 return 100;
9159 case TRUNCATE:
9160 return 99;
9162 case AND:
9163 case XOR:
9164 case IOR:
9165 return COSTS_N_INSNS (1);
9167 case MEM:
9168 return (COSTS_N_INSNS (1)
9169 + COSTS_N_INSNS (1)
9170 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9171 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9172 ? COSTS_N_INSNS (1) : 0));
9174 case IF_THEN_ELSE:
9175 /* XXX a guess. */
9176 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9177 return 14;
9178 return 2;
9180 case ZERO_EXTEND:
9181 /* XXX still guessing. */
9182 switch (GET_MODE (XEXP (x, 0)))
9184 case QImode:
9185 return (1 + (mode == DImode ? 4 : 0)
9186 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9188 case HImode:
9189 return (4 + (mode == DImode ? 4 : 0)
9190 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9192 case SImode:
9193 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9195 default:
9196 return 99;
9199 default:
9200 return 99;
9204 /* RTX costs when optimizing for size. */
9205 static bool
9206 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9207 int *total)
9209 machine_mode mode = GET_MODE (x);
9210 if (TARGET_THUMB1)
9212 *total = thumb1_size_rtx_costs (x, code, outer_code);
9213 return true;
9216 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9217 switch (code)
9219 case MEM:
9220 /* A memory access costs 1 insn if the mode is small, or the address is
9221 a single register, otherwise it costs one insn per word. */
9222 if (REG_P (XEXP (x, 0)))
9223 *total = COSTS_N_INSNS (1);
9224 else if (flag_pic
9225 && GET_CODE (XEXP (x, 0)) == PLUS
9226 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9227 /* This will be split into two instructions.
9228 See arm.md:calculate_pic_address. */
9229 *total = COSTS_N_INSNS (2);
9230 else
9231 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9232 return true;
9234 case DIV:
9235 case MOD:
9236 case UDIV:
9237 case UMOD:
9238 /* Needs a libcall, so it costs about this. */
9239 *total = COSTS_N_INSNS (2);
9240 return false;
9242 case ROTATE:
9243 if (mode == SImode && REG_P (XEXP (x, 1)))
9245 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
9246 0, false);
9247 return true;
9249 /* Fall through */
9250 case ROTATERT:
9251 case ASHIFT:
9252 case LSHIFTRT:
9253 case ASHIFTRT:
9254 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9256 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), mode, code,
9257 0, false);
9258 return true;
9260 else if (mode == SImode)
9262 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
9263 0, false);
9264 /* Slightly disparage register shifts, but not by much. */
9265 if (!CONST_INT_P (XEXP (x, 1)))
9266 *total += 1 + rtx_cost (XEXP (x, 1), mode, code, 1, false);
9267 return true;
9270 /* Needs a libcall. */
9271 *total = COSTS_N_INSNS (2);
9272 return false;
9274 case MINUS:
9275 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9276 && (mode == SFmode || !TARGET_VFP_SINGLE))
9278 *total = COSTS_N_INSNS (1);
9279 return false;
9282 if (mode == SImode)
9284 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9285 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9287 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9288 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9289 || subcode1 == ROTATE || subcode1 == ROTATERT
9290 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9291 || subcode1 == ASHIFTRT)
9293 /* It's just the cost of the two operands. */
9294 *total = 0;
9295 return false;
9298 *total = COSTS_N_INSNS (1);
9299 return false;
9302 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9303 return false;
9305 case PLUS:
9306 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9307 && (mode == SFmode || !TARGET_VFP_SINGLE))
9309 *total = COSTS_N_INSNS (1);
9310 return false;
9313 /* A shift as a part of ADD costs nothing. */
9314 if (GET_CODE (XEXP (x, 0)) == MULT
9315 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9317 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9318 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code, 0, false);
9319 *total += rtx_cost (XEXP (x, 1), mode, code, 1, false);
9320 return true;
9323 /* Fall through */
9324 case AND: case XOR: case IOR:
9325 if (mode == SImode)
9327 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9329 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9330 || subcode == LSHIFTRT || subcode == ASHIFTRT
9331 || (code == AND && subcode == NOT))
9333 /* It's just the cost of the two operands. */
9334 *total = 0;
9335 return false;
9339 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9340 return false;
9342 case MULT:
9343 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9344 return false;
9346 case NEG:
9347 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9348 && (mode == SFmode || !TARGET_VFP_SINGLE))
9350 *total = COSTS_N_INSNS (1);
9351 return false;
9354 /* Fall through */
9355 case NOT:
9356 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9358 return false;
9360 case IF_THEN_ELSE:
9361 *total = 0;
9362 return false;
9364 case COMPARE:
9365 if (cc_register (XEXP (x, 0), VOIDmode))
9366 * total = 0;
9367 else
9368 *total = COSTS_N_INSNS (1);
9369 return false;
9371 case ABS:
9372 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9373 && (mode == SFmode || !TARGET_VFP_SINGLE))
9374 *total = COSTS_N_INSNS (1);
9375 else
9376 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9377 return false;
9379 case SIGN_EXTEND:
9380 case ZERO_EXTEND:
9381 return arm_rtx_costs_1 (x, outer_code, total, 0);
9383 case CONST_INT:
9384 if (const_ok_for_arm (INTVAL (x)))
9385 /* A multiplication by a constant requires another instruction
9386 to load the constant to a register. */
9387 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9388 ? 1 : 0);
9389 else if (const_ok_for_arm (~INTVAL (x)))
9390 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9391 else if (const_ok_for_arm (-INTVAL (x)))
9393 if (outer_code == COMPARE || outer_code == PLUS
9394 || outer_code == MINUS)
9395 *total = 0;
9396 else
9397 *total = COSTS_N_INSNS (1);
9399 else
9400 *total = COSTS_N_INSNS (2);
9401 return true;
9403 case CONST:
9404 case LABEL_REF:
9405 case SYMBOL_REF:
9406 *total = COSTS_N_INSNS (2);
9407 return true;
9409 case CONST_DOUBLE:
9410 *total = COSTS_N_INSNS (4);
9411 return true;
9413 case CONST_VECTOR:
9414 if (TARGET_NEON
9415 && TARGET_HARD_FLOAT
9416 && outer_code == SET
9417 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9418 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9419 *total = COSTS_N_INSNS (1);
9420 else
9421 *total = COSTS_N_INSNS (4);
9422 return true;
9424 case HIGH:
9425 case LO_SUM:
9426 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9427 cost of these slightly. */
9428 *total = COSTS_N_INSNS (1) + 1;
9429 return true;
9431 case SET:
9432 return false;
9434 default:
9435 if (mode != VOIDmode)
9436 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9437 else
9438 *total = COSTS_N_INSNS (4); /* How knows? */
9439 return false;
9443 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9444 operand, then return the operand that is being shifted. If the shift
9445 is not by a constant, then set SHIFT_REG to point to the operand.
9446 Return NULL if OP is not a shifter operand. */
9447 static rtx
9448 shifter_op_p (rtx op, rtx *shift_reg)
9450 enum rtx_code code = GET_CODE (op);
9452 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9453 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9454 return XEXP (op, 0);
9455 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9456 return XEXP (op, 0);
9457 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9458 || code == ASHIFTRT)
9460 if (!CONST_INT_P (XEXP (op, 1)))
9461 *shift_reg = XEXP (op, 1);
9462 return XEXP (op, 0);
9465 return NULL;
9468 static bool
9469 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9471 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9472 rtx_code code = GET_CODE (x);
9473 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9475 switch (XINT (x, 1))
9477 case UNSPEC_UNALIGNED_LOAD:
9478 /* We can only do unaligned loads into the integer unit, and we can't
9479 use LDM or LDRD. */
9480 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9481 if (speed_p)
9482 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9483 + extra_cost->ldst.load_unaligned);
9485 #ifdef NOT_YET
9486 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9487 ADDR_SPACE_GENERIC, speed_p);
9488 #endif
9489 return true;
9491 case UNSPEC_UNALIGNED_STORE:
9492 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9493 if (speed_p)
9494 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9495 + extra_cost->ldst.store_unaligned);
9497 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9498 #ifdef NOT_YET
9499 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9500 ADDR_SPACE_GENERIC, speed_p);
9501 #endif
9502 return true;
9504 case UNSPEC_VRINTZ:
9505 case UNSPEC_VRINTP:
9506 case UNSPEC_VRINTM:
9507 case UNSPEC_VRINTR:
9508 case UNSPEC_VRINTX:
9509 case UNSPEC_VRINTA:
9510 if (speed_p)
9511 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9513 return true;
9514 default:
9515 *cost = COSTS_N_INSNS (2);
9516 break;
9518 return true;
9521 /* Cost of a libcall. We assume one insn per argument, an amount for the
9522 call (one insn for -Os) and then one for processing the result. */
9523 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9525 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9526 do \
9528 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9529 if (shift_op != NULL \
9530 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9532 if (shift_reg) \
9534 if (speed_p) \
9535 *cost += extra_cost->alu.arith_shift_reg; \
9536 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9537 ASHIFT, 1, speed_p); \
9539 else if (speed_p) \
9540 *cost += extra_cost->alu.arith_shift; \
9542 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9543 ASHIFT, 0, speed_p) \
9544 + rtx_cost (XEXP (x, 1 - IDX), \
9545 GET_MODE (shift_op), \
9546 OP, 1, speed_p)); \
9547 return true; \
9550 while (0);
9552 /* RTX costs. Make an estimate of the cost of executing the operation
9553 X, which is contained with an operation with code OUTER_CODE.
9554 SPEED_P indicates whether the cost desired is the performance cost,
9555 or the size cost. The estimate is stored in COST and the return
9556 value is TRUE if the cost calculation is final, or FALSE if the
9557 caller should recurse through the operands of X to add additional
9558 costs.
9560 We currently make no attempt to model the size savings of Thumb-2
9561 16-bit instructions. At the normal points in compilation where
9562 this code is called we have no measure of whether the condition
9563 flags are live or not, and thus no realistic way to determine what
9564 the size will eventually be. */
9565 static bool
9566 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9567 const struct cpu_cost_table *extra_cost,
9568 int *cost, bool speed_p)
9570 machine_mode mode = GET_MODE (x);
9572 *cost = COSTS_N_INSNS (1);
9574 if (TARGET_THUMB1)
9576 if (speed_p)
9577 *cost = thumb1_rtx_costs (x, code, outer_code);
9578 else
9579 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9580 return true;
9583 switch (code)
9585 case SET:
9586 *cost = 0;
9587 /* SET RTXs don't have a mode so we get it from the destination. */
9588 mode = GET_MODE (SET_DEST (x));
9590 if (REG_P (SET_SRC (x))
9591 && REG_P (SET_DEST (x)))
9593 /* Assume that most copies can be done with a single insn,
9594 unless we don't have HW FP, in which case everything
9595 larger than word mode will require two insns. */
9596 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9597 && GET_MODE_SIZE (mode) > 4)
9598 || mode == DImode)
9599 ? 2 : 1);
9600 /* Conditional register moves can be encoded
9601 in 16 bits in Thumb mode. */
9602 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9603 *cost >>= 1;
9605 return true;
9608 if (CONST_INT_P (SET_SRC (x)))
9610 /* Handle CONST_INT here, since the value doesn't have a mode
9611 and we would otherwise be unable to work out the true cost. */
9612 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9613 0, speed_p);
9614 outer_code = SET;
9615 /* Slightly lower the cost of setting a core reg to a constant.
9616 This helps break up chains and allows for better scheduling. */
9617 if (REG_P (SET_DEST (x))
9618 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9619 *cost -= 1;
9620 x = SET_SRC (x);
9621 /* Immediate moves with an immediate in the range [0, 255] can be
9622 encoded in 16 bits in Thumb mode. */
9623 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9624 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9625 *cost >>= 1;
9626 goto const_int_cost;
9629 return false;
9631 case MEM:
9632 /* A memory access costs 1 insn if the mode is small, or the address is
9633 a single register, otherwise it costs one insn per word. */
9634 if (REG_P (XEXP (x, 0)))
9635 *cost = COSTS_N_INSNS (1);
9636 else if (flag_pic
9637 && GET_CODE (XEXP (x, 0)) == PLUS
9638 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9639 /* This will be split into two instructions.
9640 See arm.md:calculate_pic_address. */
9641 *cost = COSTS_N_INSNS (2);
9642 else
9643 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9645 /* For speed optimizations, add the costs of the address and
9646 accessing memory. */
9647 if (speed_p)
9648 #ifdef NOT_YET
9649 *cost += (extra_cost->ldst.load
9650 + arm_address_cost (XEXP (x, 0), mode,
9651 ADDR_SPACE_GENERIC, speed_p));
9652 #else
9653 *cost += extra_cost->ldst.load;
9654 #endif
9655 return true;
9657 case PARALLEL:
9659 /* Calculations of LDM costs are complex. We assume an initial cost
9660 (ldm_1st) which will load the number of registers mentioned in
9661 ldm_regs_per_insn_1st registers; then each additional
9662 ldm_regs_per_insn_subsequent registers cost one more insn. The
9663 formula for N regs is thus:
9665 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9666 + ldm_regs_per_insn_subsequent - 1)
9667 / ldm_regs_per_insn_subsequent).
9669 Additional costs may also be added for addressing. A similar
9670 formula is used for STM. */
9672 bool is_ldm = load_multiple_operation (x, SImode);
9673 bool is_stm = store_multiple_operation (x, SImode);
9675 if (is_ldm || is_stm)
9677 if (speed_p)
9679 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9680 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9681 ? extra_cost->ldst.ldm_regs_per_insn_1st
9682 : extra_cost->ldst.stm_regs_per_insn_1st;
9683 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9684 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9685 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9687 *cost += regs_per_insn_1st
9688 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9689 + regs_per_insn_sub - 1)
9690 / regs_per_insn_sub);
9691 return true;
9695 return false;
9697 case DIV:
9698 case UDIV:
9699 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9700 && (mode == SFmode || !TARGET_VFP_SINGLE))
9701 *cost += COSTS_N_INSNS (speed_p
9702 ? extra_cost->fp[mode != SFmode].div : 0);
9703 else if (mode == SImode && TARGET_IDIV)
9704 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9705 else
9706 *cost = LIBCALL_COST (2);
9707 return false; /* All arguments must be in registers. */
9709 case MOD:
9710 /* MOD by a power of 2 can be expanded as:
9711 rsbs r1, r0, #0
9712 and r0, r0, #(n - 1)
9713 and r1, r1, #(n - 1)
9714 rsbpl r0, r1, #0. */
9715 if (CONST_INT_P (XEXP (x, 1))
9716 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9717 && mode == SImode)
9719 *cost += COSTS_N_INSNS (3);
9721 if (speed_p)
9722 *cost += 2 * extra_cost->alu.logical
9723 + extra_cost->alu.arith;
9724 return true;
9727 /* Fall-through. */
9728 case UMOD:
9729 *cost = LIBCALL_COST (2);
9730 return false; /* All arguments must be in registers. */
9732 case ROTATE:
9733 if (mode == SImode && REG_P (XEXP (x, 1)))
9735 *cost += (COSTS_N_INSNS (1)
9736 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9737 if (speed_p)
9738 *cost += extra_cost->alu.shift_reg;
9739 return true;
9741 /* Fall through */
9742 case ROTATERT:
9743 case ASHIFT:
9744 case LSHIFTRT:
9745 case ASHIFTRT:
9746 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9748 *cost += (COSTS_N_INSNS (2)
9749 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9750 if (speed_p)
9751 *cost += 2 * extra_cost->alu.shift;
9752 return true;
9754 else if (mode == SImode)
9756 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9757 /* Slightly disparage register shifts at -Os, but not by much. */
9758 if (!CONST_INT_P (XEXP (x, 1)))
9759 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9760 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9761 return true;
9763 else if (GET_MODE_CLASS (mode) == MODE_INT
9764 && GET_MODE_SIZE (mode) < 4)
9766 if (code == ASHIFT)
9768 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9769 /* Slightly disparage register shifts at -Os, but not by
9770 much. */
9771 if (!CONST_INT_P (XEXP (x, 1)))
9772 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9773 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9775 else if (code == LSHIFTRT || code == ASHIFTRT)
9777 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9779 /* Can use SBFX/UBFX. */
9780 if (speed_p)
9781 *cost += extra_cost->alu.bfx;
9782 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9784 else
9786 *cost += COSTS_N_INSNS (1);
9787 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9788 if (speed_p)
9790 if (CONST_INT_P (XEXP (x, 1)))
9791 *cost += 2 * extra_cost->alu.shift;
9792 else
9793 *cost += (extra_cost->alu.shift
9794 + extra_cost->alu.shift_reg);
9796 else
9797 /* Slightly disparage register shifts. */
9798 *cost += !CONST_INT_P (XEXP (x, 1));
9801 else /* Rotates. */
9803 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9804 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9805 if (speed_p)
9807 if (CONST_INT_P (XEXP (x, 1)))
9808 *cost += (2 * extra_cost->alu.shift
9809 + extra_cost->alu.log_shift);
9810 else
9811 *cost += (extra_cost->alu.shift
9812 + extra_cost->alu.shift_reg
9813 + extra_cost->alu.log_shift_reg);
9816 return true;
9819 *cost = LIBCALL_COST (2);
9820 return false;
9822 case BSWAP:
9823 if (arm_arch6)
9825 if (mode == SImode)
9827 if (speed_p)
9828 *cost += extra_cost->alu.rev;
9830 return false;
9833 else
9835 /* No rev instruction available. Look at arm_legacy_rev
9836 and thumb_legacy_rev for the form of RTL used then. */
9837 if (TARGET_THUMB)
9839 *cost += COSTS_N_INSNS (9);
9841 if (speed_p)
9843 *cost += 6 * extra_cost->alu.shift;
9844 *cost += 3 * extra_cost->alu.logical;
9847 else
9849 *cost += COSTS_N_INSNS (4);
9851 if (speed_p)
9853 *cost += 2 * extra_cost->alu.shift;
9854 *cost += extra_cost->alu.arith_shift;
9855 *cost += 2 * extra_cost->alu.logical;
9858 return true;
9860 return false;
9862 case MINUS:
9863 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9864 && (mode == SFmode || !TARGET_VFP_SINGLE))
9866 if (GET_CODE (XEXP (x, 0)) == MULT
9867 || GET_CODE (XEXP (x, 1)) == MULT)
9869 rtx mul_op0, mul_op1, sub_op;
9871 if (speed_p)
9872 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9874 if (GET_CODE (XEXP (x, 0)) == MULT)
9876 mul_op0 = XEXP (XEXP (x, 0), 0);
9877 mul_op1 = XEXP (XEXP (x, 0), 1);
9878 sub_op = XEXP (x, 1);
9880 else
9882 mul_op0 = XEXP (XEXP (x, 1), 0);
9883 mul_op1 = XEXP (XEXP (x, 1), 1);
9884 sub_op = XEXP (x, 0);
9887 /* The first operand of the multiply may be optionally
9888 negated. */
9889 if (GET_CODE (mul_op0) == NEG)
9890 mul_op0 = XEXP (mul_op0, 0);
9892 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9893 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9894 + rtx_cost (sub_op, mode, code, 0, speed_p));
9896 return true;
9899 if (speed_p)
9900 *cost += extra_cost->fp[mode != SFmode].addsub;
9901 return false;
9904 if (mode == SImode)
9906 rtx shift_by_reg = NULL;
9907 rtx shift_op;
9908 rtx non_shift_op;
9910 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9911 if (shift_op == NULL)
9913 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9914 non_shift_op = XEXP (x, 0);
9916 else
9917 non_shift_op = XEXP (x, 1);
9919 if (shift_op != NULL)
9921 if (shift_by_reg != NULL)
9923 if (speed_p)
9924 *cost += extra_cost->alu.arith_shift_reg;
9925 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9927 else if (speed_p)
9928 *cost += extra_cost->alu.arith_shift;
9930 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9931 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9932 return true;
9935 if (arm_arch_thumb2
9936 && GET_CODE (XEXP (x, 1)) == MULT)
9938 /* MLS. */
9939 if (speed_p)
9940 *cost += extra_cost->mult[0].add;
9941 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9942 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9943 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9944 return true;
9947 if (CONST_INT_P (XEXP (x, 0)))
9949 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9950 INTVAL (XEXP (x, 0)), NULL_RTX,
9951 NULL_RTX, 1, 0);
9952 *cost = COSTS_N_INSNS (insns);
9953 if (speed_p)
9954 *cost += insns * extra_cost->alu.arith;
9955 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9956 return true;
9958 else if (speed_p)
9959 *cost += extra_cost->alu.arith;
9961 return false;
9964 if (GET_MODE_CLASS (mode) == MODE_INT
9965 && GET_MODE_SIZE (mode) < 4)
9967 rtx shift_op, shift_reg;
9968 shift_reg = NULL;
9970 /* We check both sides of the MINUS for shifter operands since,
9971 unlike PLUS, it's not commutative. */
9973 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9974 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9976 /* Slightly disparage, as we might need to widen the result. */
9977 *cost += 1;
9978 if (speed_p)
9979 *cost += extra_cost->alu.arith;
9981 if (CONST_INT_P (XEXP (x, 0)))
9983 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9984 return true;
9987 return false;
9990 if (mode == DImode)
9992 *cost += COSTS_N_INSNS (1);
9994 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9996 rtx op1 = XEXP (x, 1);
9998 if (speed_p)
9999 *cost += 2 * extra_cost->alu.arith;
10001 if (GET_CODE (op1) == ZERO_EXTEND)
10002 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10003 0, speed_p);
10004 else
10005 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10006 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10007 0, speed_p);
10008 return true;
10010 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10012 if (speed_p)
10013 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10014 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10015 0, speed_p)
10016 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10017 return true;
10019 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10020 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10022 if (speed_p)
10023 *cost += (extra_cost->alu.arith
10024 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10025 ? extra_cost->alu.arith
10026 : extra_cost->alu.arith_shift));
10027 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10028 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10029 GET_CODE (XEXP (x, 1)), 0, speed_p));
10030 return true;
10033 if (speed_p)
10034 *cost += 2 * extra_cost->alu.arith;
10035 return false;
10038 /* Vector mode? */
10040 *cost = LIBCALL_COST (2);
10041 return false;
10043 case PLUS:
10044 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10045 && (mode == SFmode || !TARGET_VFP_SINGLE))
10047 if (GET_CODE (XEXP (x, 0)) == MULT)
10049 rtx mul_op0, mul_op1, add_op;
10051 if (speed_p)
10052 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10054 mul_op0 = XEXP (XEXP (x, 0), 0);
10055 mul_op1 = XEXP (XEXP (x, 0), 1);
10056 add_op = XEXP (x, 1);
10058 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10059 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10060 + rtx_cost (add_op, mode, code, 0, speed_p));
10062 return true;
10065 if (speed_p)
10066 *cost += extra_cost->fp[mode != SFmode].addsub;
10067 return false;
10069 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10071 *cost = LIBCALL_COST (2);
10072 return false;
10075 /* Narrow modes can be synthesized in SImode, but the range
10076 of useful sub-operations is limited. Check for shift operations
10077 on one of the operands. Only left shifts can be used in the
10078 narrow modes. */
10079 if (GET_MODE_CLASS (mode) == MODE_INT
10080 && GET_MODE_SIZE (mode) < 4)
10082 rtx shift_op, shift_reg;
10083 shift_reg = NULL;
10085 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
10087 if (CONST_INT_P (XEXP (x, 1)))
10089 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10090 INTVAL (XEXP (x, 1)), NULL_RTX,
10091 NULL_RTX, 1, 0);
10092 *cost = COSTS_N_INSNS (insns);
10093 if (speed_p)
10094 *cost += insns * extra_cost->alu.arith;
10095 /* Slightly penalize a narrow operation as the result may
10096 need widening. */
10097 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10098 return true;
10101 /* Slightly penalize a narrow operation as the result may
10102 need widening. */
10103 *cost += 1;
10104 if (speed_p)
10105 *cost += extra_cost->alu.arith;
10107 return false;
10110 if (mode == SImode)
10112 rtx shift_op, shift_reg;
10114 if (TARGET_INT_SIMD
10115 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10116 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10118 /* UXTA[BH] or SXTA[BH]. */
10119 if (speed_p)
10120 *cost += extra_cost->alu.extend_arith;
10121 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10122 0, speed_p)
10123 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10124 return true;
10127 shift_reg = NULL;
10128 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10129 if (shift_op != NULL)
10131 if (shift_reg)
10133 if (speed_p)
10134 *cost += extra_cost->alu.arith_shift_reg;
10135 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10137 else if (speed_p)
10138 *cost += extra_cost->alu.arith_shift;
10140 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10141 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10142 return true;
10144 if (GET_CODE (XEXP (x, 0)) == MULT)
10146 rtx mul_op = XEXP (x, 0);
10148 if (TARGET_DSP_MULTIPLY
10149 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10150 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10151 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10152 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10153 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10154 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10155 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10156 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10157 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10158 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10159 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10160 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10161 == 16))))))
10163 /* SMLA[BT][BT]. */
10164 if (speed_p)
10165 *cost += extra_cost->mult[0].extend_add;
10166 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10167 SIGN_EXTEND, 0, speed_p)
10168 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10169 SIGN_EXTEND, 0, speed_p)
10170 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10171 return true;
10174 if (speed_p)
10175 *cost += extra_cost->mult[0].add;
10176 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10177 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10178 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10179 return true;
10181 if (CONST_INT_P (XEXP (x, 1)))
10183 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10184 INTVAL (XEXP (x, 1)), NULL_RTX,
10185 NULL_RTX, 1, 0);
10186 *cost = COSTS_N_INSNS (insns);
10187 if (speed_p)
10188 *cost += insns * extra_cost->alu.arith;
10189 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10190 return true;
10192 else if (speed_p)
10193 *cost += extra_cost->alu.arith;
10195 return false;
10198 if (mode == DImode)
10200 if (arm_arch3m
10201 && GET_CODE (XEXP (x, 0)) == MULT
10202 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10203 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10204 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10205 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10207 if (speed_p)
10208 *cost += extra_cost->mult[1].extend_add;
10209 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10210 ZERO_EXTEND, 0, speed_p)
10211 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10212 ZERO_EXTEND, 0, speed_p)
10213 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10214 return true;
10217 *cost += COSTS_N_INSNS (1);
10219 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10220 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10222 if (speed_p)
10223 *cost += (extra_cost->alu.arith
10224 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10225 ? extra_cost->alu.arith
10226 : extra_cost->alu.arith_shift));
10228 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10229 0, speed_p)
10230 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10231 return true;
10234 if (speed_p)
10235 *cost += 2 * extra_cost->alu.arith;
10236 return false;
10239 /* Vector mode? */
10240 *cost = LIBCALL_COST (2);
10241 return false;
10242 case IOR:
10243 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10245 if (speed_p)
10246 *cost += extra_cost->alu.rev;
10248 return true;
10250 /* Fall through. */
10251 case AND: case XOR:
10252 if (mode == SImode)
10254 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10255 rtx op0 = XEXP (x, 0);
10256 rtx shift_op, shift_reg;
10258 if (subcode == NOT
10259 && (code == AND
10260 || (code == IOR && TARGET_THUMB2)))
10261 op0 = XEXP (op0, 0);
10263 shift_reg = NULL;
10264 shift_op = shifter_op_p (op0, &shift_reg);
10265 if (shift_op != NULL)
10267 if (shift_reg)
10269 if (speed_p)
10270 *cost += extra_cost->alu.log_shift_reg;
10271 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10273 else if (speed_p)
10274 *cost += extra_cost->alu.log_shift;
10276 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10277 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10278 return true;
10281 if (CONST_INT_P (XEXP (x, 1)))
10283 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10284 INTVAL (XEXP (x, 1)), NULL_RTX,
10285 NULL_RTX, 1, 0);
10287 *cost = COSTS_N_INSNS (insns);
10288 if (speed_p)
10289 *cost += insns * extra_cost->alu.logical;
10290 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10291 return true;
10294 if (speed_p)
10295 *cost += extra_cost->alu.logical;
10296 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10297 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10298 return true;
10301 if (mode == DImode)
10303 rtx op0 = XEXP (x, 0);
10304 enum rtx_code subcode = GET_CODE (op0);
10306 *cost += COSTS_N_INSNS (1);
10308 if (subcode == NOT
10309 && (code == AND
10310 || (code == IOR && TARGET_THUMB2)))
10311 op0 = XEXP (op0, 0);
10313 if (GET_CODE (op0) == ZERO_EXTEND)
10315 if (speed_p)
10316 *cost += 2 * extra_cost->alu.logical;
10318 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10319 0, speed_p)
10320 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10321 return true;
10323 else if (GET_CODE (op0) == SIGN_EXTEND)
10325 if (speed_p)
10326 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10328 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10329 0, speed_p)
10330 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10331 return true;
10334 if (speed_p)
10335 *cost += 2 * extra_cost->alu.logical;
10337 return true;
10339 /* Vector mode? */
10341 *cost = LIBCALL_COST (2);
10342 return false;
10344 case MULT:
10345 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10346 && (mode == SFmode || !TARGET_VFP_SINGLE))
10348 rtx op0 = XEXP (x, 0);
10350 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10351 op0 = XEXP (op0, 0);
10353 if (speed_p)
10354 *cost += extra_cost->fp[mode != SFmode].mult;
10356 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10357 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10358 return true;
10360 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10362 *cost = LIBCALL_COST (2);
10363 return false;
10366 if (mode == SImode)
10368 if (TARGET_DSP_MULTIPLY
10369 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10370 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10371 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10372 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10373 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10374 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10375 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10376 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10377 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10378 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10379 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10380 && (INTVAL (XEXP (XEXP (x, 1), 1))
10381 == 16))))))
10383 /* SMUL[TB][TB]. */
10384 if (speed_p)
10385 *cost += extra_cost->mult[0].extend;
10386 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10387 SIGN_EXTEND, 0, speed_p);
10388 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10389 SIGN_EXTEND, 1, speed_p);
10390 return true;
10392 if (speed_p)
10393 *cost += extra_cost->mult[0].simple;
10394 return false;
10397 if (mode == DImode)
10399 if (arm_arch3m
10400 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10401 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10402 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10403 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10405 if (speed_p)
10406 *cost += extra_cost->mult[1].extend;
10407 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10408 ZERO_EXTEND, 0, speed_p)
10409 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10410 ZERO_EXTEND, 0, speed_p));
10411 return true;
10414 *cost = LIBCALL_COST (2);
10415 return false;
10418 /* Vector mode? */
10419 *cost = LIBCALL_COST (2);
10420 return false;
10422 case NEG:
10423 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10424 && (mode == SFmode || !TARGET_VFP_SINGLE))
10426 if (GET_CODE (XEXP (x, 0)) == MULT)
10428 /* VNMUL. */
10429 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10430 return true;
10433 if (speed_p)
10434 *cost += extra_cost->fp[mode != SFmode].neg;
10436 return false;
10438 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10440 *cost = LIBCALL_COST (1);
10441 return false;
10444 if (mode == SImode)
10446 if (GET_CODE (XEXP (x, 0)) == ABS)
10448 *cost += COSTS_N_INSNS (1);
10449 /* Assume the non-flag-changing variant. */
10450 if (speed_p)
10451 *cost += (extra_cost->alu.log_shift
10452 + extra_cost->alu.arith_shift);
10453 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10454 return true;
10457 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10458 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10460 *cost += COSTS_N_INSNS (1);
10461 /* No extra cost for MOV imm and MVN imm. */
10462 /* If the comparison op is using the flags, there's no further
10463 cost, otherwise we need to add the cost of the comparison. */
10464 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10465 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10466 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10468 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10469 *cost += (COSTS_N_INSNS (1)
10470 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10471 0, speed_p)
10472 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10473 1, speed_p));
10474 if (speed_p)
10475 *cost += extra_cost->alu.arith;
10477 return true;
10480 if (speed_p)
10481 *cost += extra_cost->alu.arith;
10482 return false;
10485 if (GET_MODE_CLASS (mode) == MODE_INT
10486 && GET_MODE_SIZE (mode) < 4)
10488 /* Slightly disparage, as we might need an extend operation. */
10489 *cost += 1;
10490 if (speed_p)
10491 *cost += extra_cost->alu.arith;
10492 return false;
10495 if (mode == DImode)
10497 *cost += COSTS_N_INSNS (1);
10498 if (speed_p)
10499 *cost += 2 * extra_cost->alu.arith;
10500 return false;
10503 /* Vector mode? */
10504 *cost = LIBCALL_COST (1);
10505 return false;
10507 case NOT:
10508 if (mode == SImode)
10510 rtx shift_op;
10511 rtx shift_reg = NULL;
10513 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10515 if (shift_op)
10517 if (shift_reg != NULL)
10519 if (speed_p)
10520 *cost += extra_cost->alu.log_shift_reg;
10521 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10523 else if (speed_p)
10524 *cost += extra_cost->alu.log_shift;
10525 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10526 return true;
10529 if (speed_p)
10530 *cost += extra_cost->alu.logical;
10531 return false;
10533 if (mode == DImode)
10535 *cost += COSTS_N_INSNS (1);
10536 return false;
10539 /* Vector mode? */
10541 *cost += LIBCALL_COST (1);
10542 return false;
10544 case IF_THEN_ELSE:
10546 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10548 *cost += COSTS_N_INSNS (3);
10549 return true;
10551 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10552 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10554 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10555 /* Assume that if one arm of the if_then_else is a register,
10556 that it will be tied with the result and eliminate the
10557 conditional insn. */
10558 if (REG_P (XEXP (x, 1)))
10559 *cost += op2cost;
10560 else if (REG_P (XEXP (x, 2)))
10561 *cost += op1cost;
10562 else
10564 if (speed_p)
10566 if (extra_cost->alu.non_exec_costs_exec)
10567 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10568 else
10569 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10571 else
10572 *cost += op1cost + op2cost;
10575 return true;
10577 case COMPARE:
10578 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10579 *cost = 0;
10580 else
10582 machine_mode op0mode;
10583 /* We'll mostly assume that the cost of a compare is the cost of the
10584 LHS. However, there are some notable exceptions. */
10586 /* Floating point compares are never done as side-effects. */
10587 op0mode = GET_MODE (XEXP (x, 0));
10588 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10589 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10591 if (speed_p)
10592 *cost += extra_cost->fp[op0mode != SFmode].compare;
10594 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10596 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10597 return true;
10600 return false;
10602 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10604 *cost = LIBCALL_COST (2);
10605 return false;
10608 /* DImode compares normally take two insns. */
10609 if (op0mode == DImode)
10611 *cost += COSTS_N_INSNS (1);
10612 if (speed_p)
10613 *cost += 2 * extra_cost->alu.arith;
10614 return false;
10617 if (op0mode == SImode)
10619 rtx shift_op;
10620 rtx shift_reg;
10622 if (XEXP (x, 1) == const0_rtx
10623 && !(REG_P (XEXP (x, 0))
10624 || (GET_CODE (XEXP (x, 0)) == SUBREG
10625 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10627 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10629 /* Multiply operations that set the flags are often
10630 significantly more expensive. */
10631 if (speed_p
10632 && GET_CODE (XEXP (x, 0)) == MULT
10633 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10634 *cost += extra_cost->mult[0].flag_setting;
10636 if (speed_p
10637 && GET_CODE (XEXP (x, 0)) == PLUS
10638 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10639 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10640 0), 1), mode))
10641 *cost += extra_cost->mult[0].flag_setting;
10642 return true;
10645 shift_reg = NULL;
10646 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10647 if (shift_op != NULL)
10649 if (shift_reg != NULL)
10651 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10652 1, speed_p);
10653 if (speed_p)
10654 *cost += extra_cost->alu.arith_shift_reg;
10656 else if (speed_p)
10657 *cost += extra_cost->alu.arith_shift;
10658 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10659 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10660 return true;
10663 if (speed_p)
10664 *cost += extra_cost->alu.arith;
10665 if (CONST_INT_P (XEXP (x, 1))
10666 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10668 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10669 return true;
10671 return false;
10674 /* Vector mode? */
10676 *cost = LIBCALL_COST (2);
10677 return false;
10679 return true;
10681 case EQ:
10682 case NE:
10683 case LT:
10684 case LE:
10685 case GT:
10686 case GE:
10687 case LTU:
10688 case LEU:
10689 case GEU:
10690 case GTU:
10691 case ORDERED:
10692 case UNORDERED:
10693 case UNEQ:
10694 case UNLE:
10695 case UNLT:
10696 case UNGE:
10697 case UNGT:
10698 case LTGT:
10699 if (outer_code == SET)
10701 /* Is it a store-flag operation? */
10702 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10703 && XEXP (x, 1) == const0_rtx)
10705 /* Thumb also needs an IT insn. */
10706 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10707 return true;
10709 if (XEXP (x, 1) == const0_rtx)
10711 switch (code)
10713 case LT:
10714 /* LSR Rd, Rn, #31. */
10715 if (speed_p)
10716 *cost += extra_cost->alu.shift;
10717 break;
10719 case EQ:
10720 /* RSBS T1, Rn, #0
10721 ADC Rd, Rn, T1. */
10723 case NE:
10724 /* SUBS T1, Rn, #1
10725 SBC Rd, Rn, T1. */
10726 *cost += COSTS_N_INSNS (1);
10727 break;
10729 case LE:
10730 /* RSBS T1, Rn, Rn, LSR #31
10731 ADC Rd, Rn, T1. */
10732 *cost += COSTS_N_INSNS (1);
10733 if (speed_p)
10734 *cost += extra_cost->alu.arith_shift;
10735 break;
10737 case GT:
10738 /* RSB Rd, Rn, Rn, ASR #1
10739 LSR Rd, Rd, #31. */
10740 *cost += COSTS_N_INSNS (1);
10741 if (speed_p)
10742 *cost += (extra_cost->alu.arith_shift
10743 + extra_cost->alu.shift);
10744 break;
10746 case GE:
10747 /* ASR Rd, Rn, #31
10748 ADD Rd, Rn, #1. */
10749 *cost += COSTS_N_INSNS (1);
10750 if (speed_p)
10751 *cost += extra_cost->alu.shift;
10752 break;
10754 default:
10755 /* Remaining cases are either meaningless or would take
10756 three insns anyway. */
10757 *cost = COSTS_N_INSNS (3);
10758 break;
10760 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10761 return true;
10763 else
10765 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10766 if (CONST_INT_P (XEXP (x, 1))
10767 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10769 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10770 return true;
10773 return false;
10776 /* Not directly inside a set. If it involves the condition code
10777 register it must be the condition for a branch, cond_exec or
10778 I_T_E operation. Since the comparison is performed elsewhere
10779 this is just the control part which has no additional
10780 cost. */
10781 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10782 && XEXP (x, 1) == const0_rtx)
10784 *cost = 0;
10785 return true;
10787 return false;
10789 case ABS:
10790 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10791 && (mode == SFmode || !TARGET_VFP_SINGLE))
10793 if (speed_p)
10794 *cost += extra_cost->fp[mode != SFmode].neg;
10796 return false;
10798 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10800 *cost = LIBCALL_COST (1);
10801 return false;
10804 if (mode == SImode)
10806 if (speed_p)
10807 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10808 return false;
10810 /* Vector mode? */
10811 *cost = LIBCALL_COST (1);
10812 return false;
10814 case SIGN_EXTEND:
10815 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10816 && MEM_P (XEXP (x, 0)))
10818 if (mode == DImode)
10819 *cost += COSTS_N_INSNS (1);
10821 if (!speed_p)
10822 return true;
10824 if (GET_MODE (XEXP (x, 0)) == SImode)
10825 *cost += extra_cost->ldst.load;
10826 else
10827 *cost += extra_cost->ldst.load_sign_extend;
10829 if (mode == DImode)
10830 *cost += extra_cost->alu.shift;
10832 return true;
10835 /* Widening from less than 32-bits requires an extend operation. */
10836 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10838 /* We have SXTB/SXTH. */
10839 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10840 if (speed_p)
10841 *cost += extra_cost->alu.extend;
10843 else if (GET_MODE (XEXP (x, 0)) != SImode)
10845 /* Needs two shifts. */
10846 *cost += COSTS_N_INSNS (1);
10847 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10848 if (speed_p)
10849 *cost += 2 * extra_cost->alu.shift;
10852 /* Widening beyond 32-bits requires one more insn. */
10853 if (mode == DImode)
10855 *cost += COSTS_N_INSNS (1);
10856 if (speed_p)
10857 *cost += extra_cost->alu.shift;
10860 return true;
10862 case ZERO_EXTEND:
10863 if ((arm_arch4
10864 || GET_MODE (XEXP (x, 0)) == SImode
10865 || GET_MODE (XEXP (x, 0)) == QImode)
10866 && MEM_P (XEXP (x, 0)))
10868 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10870 if (mode == DImode)
10871 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10873 return true;
10876 /* Widening from less than 32-bits requires an extend operation. */
10877 if (GET_MODE (XEXP (x, 0)) == QImode)
10879 /* UXTB can be a shorter instruction in Thumb2, but it might
10880 be slower than the AND Rd, Rn, #255 alternative. When
10881 optimizing for speed it should never be slower to use
10882 AND, and we don't really model 16-bit vs 32-bit insns
10883 here. */
10884 if (speed_p)
10885 *cost += extra_cost->alu.logical;
10887 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10889 /* We have UXTB/UXTH. */
10890 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10891 if (speed_p)
10892 *cost += extra_cost->alu.extend;
10894 else if (GET_MODE (XEXP (x, 0)) != SImode)
10896 /* Needs two shifts. It's marginally preferable to use
10897 shifts rather than two BIC instructions as the second
10898 shift may merge with a subsequent insn as a shifter
10899 op. */
10900 *cost = COSTS_N_INSNS (2);
10901 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10902 if (speed_p)
10903 *cost += 2 * extra_cost->alu.shift;
10906 /* Widening beyond 32-bits requires one more insn. */
10907 if (mode == DImode)
10909 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10912 return true;
10914 case CONST_INT:
10915 *cost = 0;
10916 /* CONST_INT has no mode, so we cannot tell for sure how many
10917 insns are really going to be needed. The best we can do is
10918 look at the value passed. If it fits in SImode, then assume
10919 that's the mode it will be used for. Otherwise assume it
10920 will be used in DImode. */
10921 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10922 mode = SImode;
10923 else
10924 mode = DImode;
10926 /* Avoid blowing up in arm_gen_constant (). */
10927 if (!(outer_code == PLUS
10928 || outer_code == AND
10929 || outer_code == IOR
10930 || outer_code == XOR
10931 || outer_code == MINUS))
10932 outer_code = SET;
10934 const_int_cost:
10935 if (mode == SImode)
10937 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10938 INTVAL (x), NULL, NULL,
10939 0, 0));
10940 /* Extra costs? */
10942 else
10944 *cost += COSTS_N_INSNS (arm_gen_constant
10945 (outer_code, SImode, NULL,
10946 trunc_int_for_mode (INTVAL (x), SImode),
10947 NULL, NULL, 0, 0)
10948 + arm_gen_constant (outer_code, SImode, NULL,
10949 INTVAL (x) >> 32, NULL,
10950 NULL, 0, 0));
10951 /* Extra costs? */
10954 return true;
10956 case CONST:
10957 case LABEL_REF:
10958 case SYMBOL_REF:
10959 if (speed_p)
10961 if (arm_arch_thumb2 && !flag_pic)
10962 *cost += COSTS_N_INSNS (1);
10963 else
10964 *cost += extra_cost->ldst.load;
10966 else
10967 *cost += COSTS_N_INSNS (1);
10969 if (flag_pic)
10971 *cost += COSTS_N_INSNS (1);
10972 if (speed_p)
10973 *cost += extra_cost->alu.arith;
10976 return true;
10978 case CONST_FIXED:
10979 *cost = COSTS_N_INSNS (4);
10980 /* Fixme. */
10981 return true;
10983 case CONST_DOUBLE:
10984 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10985 && (mode == SFmode || !TARGET_VFP_SINGLE))
10987 if (vfp3_const_double_rtx (x))
10989 if (speed_p)
10990 *cost += extra_cost->fp[mode == DFmode].fpconst;
10991 return true;
10994 if (speed_p)
10996 if (mode == DFmode)
10997 *cost += extra_cost->ldst.loadd;
10998 else
10999 *cost += extra_cost->ldst.loadf;
11001 else
11002 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11004 return true;
11006 *cost = COSTS_N_INSNS (4);
11007 return true;
11009 case CONST_VECTOR:
11010 /* Fixme. */
11011 if (TARGET_NEON
11012 && TARGET_HARD_FLOAT
11013 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11014 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
11015 *cost = COSTS_N_INSNS (1);
11016 else
11017 *cost = COSTS_N_INSNS (4);
11018 return true;
11020 case HIGH:
11021 case LO_SUM:
11022 /* When optimizing for size, we prefer constant pool entries to
11023 MOVW/MOVT pairs, so bump the cost of these slightly. */
11024 if (!speed_p)
11025 *cost += 1;
11026 return true;
11028 case CLZ:
11029 if (speed_p)
11030 *cost += extra_cost->alu.clz;
11031 return false;
11033 case SMIN:
11034 if (XEXP (x, 1) == const0_rtx)
11036 if (speed_p)
11037 *cost += extra_cost->alu.log_shift;
11038 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11039 return true;
11041 /* Fall through. */
11042 case SMAX:
11043 case UMIN:
11044 case UMAX:
11045 *cost += COSTS_N_INSNS (1);
11046 return false;
11048 case TRUNCATE:
11049 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11050 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11051 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11052 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11053 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11054 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11055 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11056 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11057 == ZERO_EXTEND))))
11059 if (speed_p)
11060 *cost += extra_cost->mult[1].extend;
11061 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11062 ZERO_EXTEND, 0, speed_p)
11063 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11064 ZERO_EXTEND, 0, speed_p));
11065 return true;
11067 *cost = LIBCALL_COST (1);
11068 return false;
11070 case UNSPEC_VOLATILE:
11071 case UNSPEC:
11072 return arm_unspec_cost (x, outer_code, speed_p, cost);
11074 case PC:
11075 /* Reading the PC is like reading any other register. Writing it
11076 is more expensive, but we take that into account elsewhere. */
11077 *cost = 0;
11078 return true;
11080 case ZERO_EXTRACT:
11081 /* TODO: Simple zero_extract of bottom bits using AND. */
11082 /* Fall through. */
11083 case SIGN_EXTRACT:
11084 if (arm_arch6
11085 && mode == SImode
11086 && CONST_INT_P (XEXP (x, 1))
11087 && CONST_INT_P (XEXP (x, 2)))
11089 if (speed_p)
11090 *cost += extra_cost->alu.bfx;
11091 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11092 return true;
11094 /* Without UBFX/SBFX, need to resort to shift operations. */
11095 *cost += COSTS_N_INSNS (1);
11096 if (speed_p)
11097 *cost += 2 * extra_cost->alu.shift;
11098 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11099 return true;
11101 case FLOAT_EXTEND:
11102 if (TARGET_HARD_FLOAT)
11104 if (speed_p)
11105 *cost += extra_cost->fp[mode == DFmode].widen;
11106 if (!TARGET_FPU_ARMV8
11107 && GET_MODE (XEXP (x, 0)) == HFmode)
11109 /* Pre v8, widening HF->DF is a two-step process, first
11110 widening to SFmode. */
11111 *cost += COSTS_N_INSNS (1);
11112 if (speed_p)
11113 *cost += extra_cost->fp[0].widen;
11115 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11116 return true;
11119 *cost = LIBCALL_COST (1);
11120 return false;
11122 case FLOAT_TRUNCATE:
11123 if (TARGET_HARD_FLOAT)
11125 if (speed_p)
11126 *cost += extra_cost->fp[mode == DFmode].narrow;
11127 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11128 return true;
11129 /* Vector modes? */
11131 *cost = LIBCALL_COST (1);
11132 return false;
11134 case FMA:
11135 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11137 rtx op0 = XEXP (x, 0);
11138 rtx op1 = XEXP (x, 1);
11139 rtx op2 = XEXP (x, 2);
11142 /* vfms or vfnma. */
11143 if (GET_CODE (op0) == NEG)
11144 op0 = XEXP (op0, 0);
11146 /* vfnms or vfnma. */
11147 if (GET_CODE (op2) == NEG)
11148 op2 = XEXP (op2, 0);
11150 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11151 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11152 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11154 if (speed_p)
11155 *cost += extra_cost->fp[mode ==DFmode].fma;
11157 return true;
11160 *cost = LIBCALL_COST (3);
11161 return false;
11163 case FIX:
11164 case UNSIGNED_FIX:
11165 if (TARGET_HARD_FLOAT)
11167 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11168 a vcvt fixed-point conversion. */
11169 if (code == FIX && mode == SImode
11170 && GET_CODE (XEXP (x, 0)) == FIX
11171 && GET_MODE (XEXP (x, 0)) == SFmode
11172 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11173 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11174 > 0)
11176 if (speed_p)
11177 *cost += extra_cost->fp[0].toint;
11179 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11180 code, 0, speed_p);
11181 return true;
11184 if (GET_MODE_CLASS (mode) == MODE_INT)
11186 mode = GET_MODE (XEXP (x, 0));
11187 if (speed_p)
11188 *cost += extra_cost->fp[mode == DFmode].toint;
11189 /* Strip of the 'cost' of rounding towards zero. */
11190 if (GET_CODE (XEXP (x, 0)) == FIX)
11191 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11192 0, speed_p);
11193 else
11194 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11195 /* ??? Increase the cost to deal with transferring from
11196 FP -> CORE registers? */
11197 return true;
11199 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11200 && TARGET_FPU_ARMV8)
11202 if (speed_p)
11203 *cost += extra_cost->fp[mode == DFmode].roundint;
11204 return false;
11206 /* Vector costs? */
11208 *cost = LIBCALL_COST (1);
11209 return false;
11211 case FLOAT:
11212 case UNSIGNED_FLOAT:
11213 if (TARGET_HARD_FLOAT)
11215 /* ??? Increase the cost to deal with transferring from CORE
11216 -> FP registers? */
11217 if (speed_p)
11218 *cost += extra_cost->fp[mode == DFmode].fromint;
11219 return false;
11221 *cost = LIBCALL_COST (1);
11222 return false;
11224 case CALL:
11225 return true;
11227 case ASM_OPERANDS:
11229 /* Just a guess. Guess number of instructions in the asm
11230 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11231 though (see PR60663). */
11232 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11233 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11235 *cost = COSTS_N_INSNS (asm_length + num_operands);
11236 return true;
11238 default:
11239 if (mode != VOIDmode)
11240 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11241 else
11242 *cost = COSTS_N_INSNS (4); /* Who knows? */
11243 return false;
11247 #undef HANDLE_NARROW_SHIFT_ARITH
11249 /* RTX costs when optimizing for size. */
11250 static bool
11251 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11252 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11254 bool result;
11255 int code = GET_CODE (x);
11257 if (TARGET_OLD_RTX_COSTS
11258 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11260 /* Old way. (Deprecated.) */
11261 if (!speed)
11262 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11263 (enum rtx_code) outer_code, total);
11264 else
11265 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11266 (enum rtx_code) outer_code, total,
11267 speed);
11269 else
11271 /* New way. */
11272 if (current_tune->insn_extra_cost)
11273 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11274 (enum rtx_code) outer_code,
11275 current_tune->insn_extra_cost,
11276 total, speed);
11277 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11278 && current_tune->insn_extra_cost != NULL */
11279 else
11280 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11281 (enum rtx_code) outer_code,
11282 &generic_extra_costs, total, speed);
11285 if (dump_file && (dump_flags & TDF_DETAILS))
11287 print_rtl_single (dump_file, x);
11288 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11289 *total, result ? "final" : "partial");
11291 return result;
11294 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11295 supported on any "slowmul" cores, so it can be ignored. */
11297 static bool
11298 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11299 int *total, bool speed)
11301 machine_mode mode = GET_MODE (x);
11303 if (TARGET_THUMB)
11305 *total = thumb1_rtx_costs (x, code, outer_code);
11306 return true;
11309 switch (code)
11311 case MULT:
11312 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11313 || mode == DImode)
11315 *total = COSTS_N_INSNS (20);
11316 return false;
11319 if (CONST_INT_P (XEXP (x, 1)))
11321 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11322 & (unsigned HOST_WIDE_INT) 0xffffffff);
11323 int cost, const_ok = const_ok_for_arm (i);
11324 int j, booth_unit_size;
11326 /* Tune as appropriate. */
11327 cost = const_ok ? 4 : 8;
11328 booth_unit_size = 2;
11329 for (j = 0; i && j < 32; j += booth_unit_size)
11331 i >>= booth_unit_size;
11332 cost++;
11335 *total = COSTS_N_INSNS (cost);
11336 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
11337 return true;
11340 *total = COSTS_N_INSNS (20);
11341 return false;
11343 default:
11344 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11349 /* RTX cost for cores with a fast multiply unit (M variants). */
11351 static bool
11352 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11353 int *total, bool speed)
11355 machine_mode mode = GET_MODE (x);
11357 if (TARGET_THUMB1)
11359 *total = thumb1_rtx_costs (x, code, outer_code);
11360 return true;
11363 /* ??? should thumb2 use different costs? */
11364 switch (code)
11366 case MULT:
11367 /* There is no point basing this on the tuning, since it is always the
11368 fast variant if it exists at all. */
11369 if (mode == DImode
11370 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11371 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11372 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11374 *total = COSTS_N_INSNS(2);
11375 return false;
11379 if (mode == DImode)
11381 *total = COSTS_N_INSNS (5);
11382 return false;
11385 if (CONST_INT_P (XEXP (x, 1)))
11387 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11388 & (unsigned HOST_WIDE_INT) 0xffffffff);
11389 int cost, const_ok = const_ok_for_arm (i);
11390 int j, booth_unit_size;
11392 /* Tune as appropriate. */
11393 cost = const_ok ? 4 : 8;
11394 booth_unit_size = 8;
11395 for (j = 0; i && j < 32; j += booth_unit_size)
11397 i >>= booth_unit_size;
11398 cost++;
11401 *total = COSTS_N_INSNS(cost);
11402 return false;
11405 if (mode == SImode)
11407 *total = COSTS_N_INSNS (4);
11408 return false;
11411 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11413 if (TARGET_HARD_FLOAT
11414 && (mode == SFmode
11415 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11417 *total = COSTS_N_INSNS (1);
11418 return false;
11422 /* Requires a lib call */
11423 *total = COSTS_N_INSNS (20);
11424 return false;
11426 default:
11427 return arm_rtx_costs_1 (x, outer_code, total, speed);
11432 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11433 so it can be ignored. */
11435 static bool
11436 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11437 int *total, bool speed)
11439 machine_mode mode = GET_MODE (x);
11441 if (TARGET_THUMB)
11443 *total = thumb1_rtx_costs (x, code, outer_code);
11444 return true;
11447 switch (code)
11449 case COMPARE:
11450 if (GET_CODE (XEXP (x, 0)) != MULT)
11451 return arm_rtx_costs_1 (x, outer_code, total, speed);
11453 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11454 will stall until the multiplication is complete. */
11455 *total = COSTS_N_INSNS (3);
11456 return false;
11458 case MULT:
11459 /* There is no point basing this on the tuning, since it is always the
11460 fast variant if it exists at all. */
11461 if (mode == DImode
11462 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11463 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11464 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11466 *total = COSTS_N_INSNS (2);
11467 return false;
11471 if (mode == DImode)
11473 *total = COSTS_N_INSNS (5);
11474 return false;
11477 if (CONST_INT_P (XEXP (x, 1)))
11479 /* If operand 1 is a constant we can more accurately
11480 calculate the cost of the multiply. The multiplier can
11481 retire 15 bits on the first cycle and a further 12 on the
11482 second. We do, of course, have to load the constant into
11483 a register first. */
11484 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11485 /* There's a general overhead of one cycle. */
11486 int cost = 1;
11487 unsigned HOST_WIDE_INT masked_const;
11489 if (i & 0x80000000)
11490 i = ~i;
11492 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11494 masked_const = i & 0xffff8000;
11495 if (masked_const != 0)
11497 cost++;
11498 masked_const = i & 0xf8000000;
11499 if (masked_const != 0)
11500 cost++;
11502 *total = COSTS_N_INSNS (cost);
11503 return false;
11506 if (mode == SImode)
11508 *total = COSTS_N_INSNS (3);
11509 return false;
11512 /* Requires a lib call */
11513 *total = COSTS_N_INSNS (20);
11514 return false;
11516 default:
11517 return arm_rtx_costs_1 (x, outer_code, total, speed);
11522 /* RTX costs for 9e (and later) cores. */
11524 static bool
11525 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11526 int *total, bool speed)
11528 machine_mode mode = GET_MODE (x);
11530 if (TARGET_THUMB1)
11532 switch (code)
11534 case MULT:
11535 /* Small multiply: 32 cycles for an integer multiply inst. */
11536 if (arm_arch6m && arm_m_profile_small_mul)
11537 *total = COSTS_N_INSNS (32);
11538 else
11539 *total = COSTS_N_INSNS (3);
11540 return true;
11542 default:
11543 *total = thumb1_rtx_costs (x, code, outer_code);
11544 return true;
11548 switch (code)
11550 case MULT:
11551 /* There is no point basing this on the tuning, since it is always the
11552 fast variant if it exists at all. */
11553 if (mode == DImode
11554 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11555 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11556 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11558 *total = COSTS_N_INSNS (2);
11559 return false;
11563 if (mode == DImode)
11565 *total = COSTS_N_INSNS (5);
11566 return false;
11569 if (mode == SImode)
11571 *total = COSTS_N_INSNS (2);
11572 return false;
11575 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11577 if (TARGET_HARD_FLOAT
11578 && (mode == SFmode
11579 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11581 *total = COSTS_N_INSNS (1);
11582 return false;
11586 *total = COSTS_N_INSNS (20);
11587 return false;
11589 default:
11590 return arm_rtx_costs_1 (x, outer_code, total, speed);
11593 /* All address computations that can be done are free, but rtx cost returns
11594 the same for practically all of them. So we weight the different types
11595 of address here in the order (most pref first):
11596 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11597 static inline int
11598 arm_arm_address_cost (rtx x)
11600 enum rtx_code c = GET_CODE (x);
11602 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11603 return 0;
11604 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11605 return 10;
11607 if (c == PLUS)
11609 if (CONST_INT_P (XEXP (x, 1)))
11610 return 2;
11612 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11613 return 3;
11615 return 4;
11618 return 6;
11621 static inline int
11622 arm_thumb_address_cost (rtx x)
11624 enum rtx_code c = GET_CODE (x);
11626 if (c == REG)
11627 return 1;
11628 if (c == PLUS
11629 && REG_P (XEXP (x, 0))
11630 && CONST_INT_P (XEXP (x, 1)))
11631 return 1;
11633 return 2;
11636 static int
11637 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11638 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11640 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11643 /* Adjust cost hook for XScale. */
11644 static bool
11645 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11647 /* Some true dependencies can have a higher cost depending
11648 on precisely how certain input operands are used. */
11649 if (REG_NOTE_KIND(link) == 0
11650 && recog_memoized (insn) >= 0
11651 && recog_memoized (dep) >= 0)
11653 int shift_opnum = get_attr_shift (insn);
11654 enum attr_type attr_type = get_attr_type (dep);
11656 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11657 operand for INSN. If we have a shifted input operand and the
11658 instruction we depend on is another ALU instruction, then we may
11659 have to account for an additional stall. */
11660 if (shift_opnum != 0
11661 && (attr_type == TYPE_ALU_SHIFT_IMM
11662 || attr_type == TYPE_ALUS_SHIFT_IMM
11663 || attr_type == TYPE_LOGIC_SHIFT_IMM
11664 || attr_type == TYPE_LOGICS_SHIFT_IMM
11665 || attr_type == TYPE_ALU_SHIFT_REG
11666 || attr_type == TYPE_ALUS_SHIFT_REG
11667 || attr_type == TYPE_LOGIC_SHIFT_REG
11668 || attr_type == TYPE_LOGICS_SHIFT_REG
11669 || attr_type == TYPE_MOV_SHIFT
11670 || attr_type == TYPE_MVN_SHIFT
11671 || attr_type == TYPE_MOV_SHIFT_REG
11672 || attr_type == TYPE_MVN_SHIFT_REG))
11674 rtx shifted_operand;
11675 int opno;
11677 /* Get the shifted operand. */
11678 extract_insn (insn);
11679 shifted_operand = recog_data.operand[shift_opnum];
11681 /* Iterate over all the operands in DEP. If we write an operand
11682 that overlaps with SHIFTED_OPERAND, then we have increase the
11683 cost of this dependency. */
11684 extract_insn (dep);
11685 preprocess_constraints (dep);
11686 for (opno = 0; opno < recog_data.n_operands; opno++)
11688 /* We can ignore strict inputs. */
11689 if (recog_data.operand_type[opno] == OP_IN)
11690 continue;
11692 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11693 shifted_operand))
11695 *cost = 2;
11696 return false;
11701 return true;
11704 /* Adjust cost hook for Cortex A9. */
11705 static bool
11706 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11708 switch (REG_NOTE_KIND (link))
11710 case REG_DEP_ANTI:
11711 *cost = 0;
11712 return false;
11714 case REG_DEP_TRUE:
11715 case REG_DEP_OUTPUT:
11716 if (recog_memoized (insn) >= 0
11717 && recog_memoized (dep) >= 0)
11719 if (GET_CODE (PATTERN (insn)) == SET)
11721 if (GET_MODE_CLASS
11722 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11723 || GET_MODE_CLASS
11724 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11726 enum attr_type attr_type_insn = get_attr_type (insn);
11727 enum attr_type attr_type_dep = get_attr_type (dep);
11729 /* By default all dependencies of the form
11730 s0 = s0 <op> s1
11731 s0 = s0 <op> s2
11732 have an extra latency of 1 cycle because
11733 of the input and output dependency in this
11734 case. However this gets modeled as an true
11735 dependency and hence all these checks. */
11736 if (REG_P (SET_DEST (PATTERN (insn)))
11737 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11739 /* FMACS is a special case where the dependent
11740 instruction can be issued 3 cycles before
11741 the normal latency in case of an output
11742 dependency. */
11743 if ((attr_type_insn == TYPE_FMACS
11744 || attr_type_insn == TYPE_FMACD)
11745 && (attr_type_dep == TYPE_FMACS
11746 || attr_type_dep == TYPE_FMACD))
11748 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11749 *cost = insn_default_latency (dep) - 3;
11750 else
11751 *cost = insn_default_latency (dep);
11752 return false;
11754 else
11756 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11757 *cost = insn_default_latency (dep) + 1;
11758 else
11759 *cost = insn_default_latency (dep);
11761 return false;
11766 break;
11768 default:
11769 gcc_unreachable ();
11772 return true;
11775 /* Adjust cost hook for FA726TE. */
11776 static bool
11777 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11779 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11780 have penalty of 3. */
11781 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11782 && recog_memoized (insn) >= 0
11783 && recog_memoized (dep) >= 0
11784 && get_attr_conds (dep) == CONDS_SET)
11786 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11787 if (get_attr_conds (insn) == CONDS_USE
11788 && get_attr_type (insn) != TYPE_BRANCH)
11790 *cost = 3;
11791 return false;
11794 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11795 || get_attr_conds (insn) == CONDS_USE)
11797 *cost = 0;
11798 return false;
11802 return true;
11805 /* Implement TARGET_REGISTER_MOVE_COST.
11807 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11808 it is typically more expensive than a single memory access. We set
11809 the cost to less than two memory accesses so that floating
11810 point to integer conversion does not go through memory. */
11813 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11814 reg_class_t from, reg_class_t to)
11816 if (TARGET_32BIT)
11818 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11819 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11820 return 15;
11821 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11822 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11823 return 4;
11824 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11825 return 20;
11826 else
11827 return 2;
11829 else
11831 if (from == HI_REGS || to == HI_REGS)
11832 return 4;
11833 else
11834 return 2;
11838 /* Implement TARGET_MEMORY_MOVE_COST. */
11841 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11842 bool in ATTRIBUTE_UNUSED)
11844 if (TARGET_32BIT)
11845 return 10;
11846 else
11848 if (GET_MODE_SIZE (mode) < 4)
11849 return 8;
11850 else
11851 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11855 /* Vectorizer cost model implementation. */
11857 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11858 static int
11859 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11860 tree vectype,
11861 int misalign ATTRIBUTE_UNUSED)
11863 unsigned elements;
11865 switch (type_of_cost)
11867 case scalar_stmt:
11868 return current_tune->vec_costs->scalar_stmt_cost;
11870 case scalar_load:
11871 return current_tune->vec_costs->scalar_load_cost;
11873 case scalar_store:
11874 return current_tune->vec_costs->scalar_store_cost;
11876 case vector_stmt:
11877 return current_tune->vec_costs->vec_stmt_cost;
11879 case vector_load:
11880 return current_tune->vec_costs->vec_align_load_cost;
11882 case vector_store:
11883 return current_tune->vec_costs->vec_store_cost;
11885 case vec_to_scalar:
11886 return current_tune->vec_costs->vec_to_scalar_cost;
11888 case scalar_to_vec:
11889 return current_tune->vec_costs->scalar_to_vec_cost;
11891 case unaligned_load:
11892 return current_tune->vec_costs->vec_unalign_load_cost;
11894 case unaligned_store:
11895 return current_tune->vec_costs->vec_unalign_store_cost;
11897 case cond_branch_taken:
11898 return current_tune->vec_costs->cond_taken_branch_cost;
11900 case cond_branch_not_taken:
11901 return current_tune->vec_costs->cond_not_taken_branch_cost;
11903 case vec_perm:
11904 case vec_promote_demote:
11905 return current_tune->vec_costs->vec_stmt_cost;
11907 case vec_construct:
11908 elements = TYPE_VECTOR_SUBPARTS (vectype);
11909 return elements / 2 + 1;
11911 default:
11912 gcc_unreachable ();
11916 /* Implement targetm.vectorize.add_stmt_cost. */
11918 static unsigned
11919 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11920 struct _stmt_vec_info *stmt_info, int misalign,
11921 enum vect_cost_model_location where)
11923 unsigned *cost = (unsigned *) data;
11924 unsigned retval = 0;
11926 if (flag_vect_cost_model)
11928 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11929 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11931 /* Statements in an inner loop relative to the loop being
11932 vectorized are weighted more heavily. The value here is
11933 arbitrary and could potentially be improved with analysis. */
11934 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11935 count *= 50; /* FIXME. */
11937 retval = (unsigned) (count * stmt_cost);
11938 cost[where] += retval;
11941 return retval;
11944 /* Return true if and only if this insn can dual-issue only as older. */
11945 static bool
11946 cortexa7_older_only (rtx_insn *insn)
11948 if (recog_memoized (insn) < 0)
11949 return false;
11951 switch (get_attr_type (insn))
11953 case TYPE_ALU_DSP_REG:
11954 case TYPE_ALU_SREG:
11955 case TYPE_ALUS_SREG:
11956 case TYPE_LOGIC_REG:
11957 case TYPE_LOGICS_REG:
11958 case TYPE_ADC_REG:
11959 case TYPE_ADCS_REG:
11960 case TYPE_ADR:
11961 case TYPE_BFM:
11962 case TYPE_REV:
11963 case TYPE_MVN_REG:
11964 case TYPE_SHIFT_IMM:
11965 case TYPE_SHIFT_REG:
11966 case TYPE_LOAD_BYTE:
11967 case TYPE_LOAD1:
11968 case TYPE_STORE1:
11969 case TYPE_FFARITHS:
11970 case TYPE_FADDS:
11971 case TYPE_FFARITHD:
11972 case TYPE_FADDD:
11973 case TYPE_FMOV:
11974 case TYPE_F_CVT:
11975 case TYPE_FCMPS:
11976 case TYPE_FCMPD:
11977 case TYPE_FCONSTS:
11978 case TYPE_FCONSTD:
11979 case TYPE_FMULS:
11980 case TYPE_FMACS:
11981 case TYPE_FMULD:
11982 case TYPE_FMACD:
11983 case TYPE_FDIVS:
11984 case TYPE_FDIVD:
11985 case TYPE_F_MRC:
11986 case TYPE_F_MRRC:
11987 case TYPE_F_FLAG:
11988 case TYPE_F_LOADS:
11989 case TYPE_F_STORES:
11990 return true;
11991 default:
11992 return false;
11996 /* Return true if and only if this insn can dual-issue as younger. */
11997 static bool
11998 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12000 if (recog_memoized (insn) < 0)
12002 if (verbose > 5)
12003 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12004 return false;
12007 switch (get_attr_type (insn))
12009 case TYPE_ALU_IMM:
12010 case TYPE_ALUS_IMM:
12011 case TYPE_LOGIC_IMM:
12012 case TYPE_LOGICS_IMM:
12013 case TYPE_EXTEND:
12014 case TYPE_MVN_IMM:
12015 case TYPE_MOV_IMM:
12016 case TYPE_MOV_REG:
12017 case TYPE_MOV_SHIFT:
12018 case TYPE_MOV_SHIFT_REG:
12019 case TYPE_BRANCH:
12020 case TYPE_CALL:
12021 return true;
12022 default:
12023 return false;
12028 /* Look for an instruction that can dual issue only as an older
12029 instruction, and move it in front of any instructions that can
12030 dual-issue as younger, while preserving the relative order of all
12031 other instructions in the ready list. This is a hueuristic to help
12032 dual-issue in later cycles, by postponing issue of more flexible
12033 instructions. This heuristic may affect dual issue opportunities
12034 in the current cycle. */
12035 static void
12036 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12037 int *n_readyp, int clock)
12039 int i;
12040 int first_older_only = -1, first_younger = -1;
12042 if (verbose > 5)
12043 fprintf (file,
12044 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12045 clock,
12046 *n_readyp);
12048 /* Traverse the ready list from the head (the instruction to issue
12049 first), and looking for the first instruction that can issue as
12050 younger and the first instruction that can dual-issue only as
12051 older. */
12052 for (i = *n_readyp - 1; i >= 0; i--)
12054 rtx_insn *insn = ready[i];
12055 if (cortexa7_older_only (insn))
12057 first_older_only = i;
12058 if (verbose > 5)
12059 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12060 break;
12062 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12063 first_younger = i;
12066 /* Nothing to reorder because either no younger insn found or insn
12067 that can dual-issue only as older appears before any insn that
12068 can dual-issue as younger. */
12069 if (first_younger == -1)
12071 if (verbose > 5)
12072 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12073 return;
12076 /* Nothing to reorder because no older-only insn in the ready list. */
12077 if (first_older_only == -1)
12079 if (verbose > 5)
12080 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12081 return;
12084 /* Move first_older_only insn before first_younger. */
12085 if (verbose > 5)
12086 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12087 INSN_UID(ready [first_older_only]),
12088 INSN_UID(ready [first_younger]));
12089 rtx_insn *first_older_only_insn = ready [first_older_only];
12090 for (i = first_older_only; i < first_younger; i++)
12092 ready[i] = ready[i+1];
12095 ready[i] = first_older_only_insn;
12096 return;
12099 /* Implement TARGET_SCHED_REORDER. */
12100 static int
12101 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12102 int clock)
12104 switch (arm_tune)
12106 case cortexa7:
12107 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12108 break;
12109 default:
12110 /* Do nothing for other cores. */
12111 break;
12114 return arm_issue_rate ();
12117 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12118 It corrects the value of COST based on the relationship between
12119 INSN and DEP through the dependence LINK. It returns the new
12120 value. There is a per-core adjust_cost hook to adjust scheduler costs
12121 and the per-core hook can choose to completely override the generic
12122 adjust_cost function. Only put bits of code into arm_adjust_cost that
12123 are common across all cores. */
12124 static int
12125 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
12127 rtx i_pat, d_pat;
12129 /* When generating Thumb-1 code, we want to place flag-setting operations
12130 close to a conditional branch which depends on them, so that we can
12131 omit the comparison. */
12132 if (TARGET_THUMB1
12133 && REG_NOTE_KIND (link) == 0
12134 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12135 && recog_memoized (dep) >= 0
12136 && get_attr_conds (dep) == CONDS_SET)
12137 return 0;
12139 if (current_tune->sched_adjust_cost != NULL)
12141 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
12142 return cost;
12145 /* XXX Is this strictly true? */
12146 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
12147 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
12148 return 0;
12150 /* Call insns don't incur a stall, even if they follow a load. */
12151 if (REG_NOTE_KIND (link) == 0
12152 && CALL_P (insn))
12153 return 1;
12155 if ((i_pat = single_set (insn)) != NULL
12156 && MEM_P (SET_SRC (i_pat))
12157 && (d_pat = single_set (dep)) != NULL
12158 && MEM_P (SET_DEST (d_pat)))
12160 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12161 /* This is a load after a store, there is no conflict if the load reads
12162 from a cached area. Assume that loads from the stack, and from the
12163 constant pool are cached, and that others will miss. This is a
12164 hack. */
12166 if ((GET_CODE (src_mem) == SYMBOL_REF
12167 && CONSTANT_POOL_ADDRESS_P (src_mem))
12168 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12169 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12170 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12171 return 1;
12174 return cost;
12178 arm_max_conditional_execute (void)
12180 return max_insns_skipped;
12183 static int
12184 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12186 if (TARGET_32BIT)
12187 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12188 else
12189 return (optimize > 0) ? 2 : 0;
12192 static int
12193 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12195 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12198 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12199 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12200 sequences of non-executed instructions in IT blocks probably take the same
12201 amount of time as executed instructions (and the IT instruction itself takes
12202 space in icache). This function was experimentally determined to give good
12203 results on a popular embedded benchmark. */
12205 static int
12206 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12208 return (TARGET_32BIT && speed_p) ? 1
12209 : arm_default_branch_cost (speed_p, predictable_p);
12212 static int
12213 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12215 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12218 static bool fp_consts_inited = false;
12220 static REAL_VALUE_TYPE value_fp0;
12222 static void
12223 init_fp_table (void)
12225 REAL_VALUE_TYPE r;
12227 r = REAL_VALUE_ATOF ("0", DFmode);
12228 value_fp0 = r;
12229 fp_consts_inited = true;
12232 /* Return TRUE if rtx X is a valid immediate FP constant. */
12234 arm_const_double_rtx (rtx x)
12236 const REAL_VALUE_TYPE *r;
12238 if (!fp_consts_inited)
12239 init_fp_table ();
12241 r = CONST_DOUBLE_REAL_VALUE (x);
12242 if (REAL_VALUE_MINUS_ZERO (*r))
12243 return 0;
12245 if (real_equal (r, &value_fp0))
12246 return 1;
12248 return 0;
12251 /* VFPv3 has a fairly wide range of representable immediates, formed from
12252 "quarter-precision" floating-point values. These can be evaluated using this
12253 formula (with ^ for exponentiation):
12255 -1^s * n * 2^-r
12257 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12258 16 <= n <= 31 and 0 <= r <= 7.
12260 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12262 - A (most-significant) is the sign bit.
12263 - BCD are the exponent (encoded as r XOR 3).
12264 - EFGH are the mantissa (encoded as n - 16).
12267 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12268 fconst[sd] instruction, or -1 if X isn't suitable. */
12269 static int
12270 vfp3_const_double_index (rtx x)
12272 REAL_VALUE_TYPE r, m;
12273 int sign, exponent;
12274 unsigned HOST_WIDE_INT mantissa, mant_hi;
12275 unsigned HOST_WIDE_INT mask;
12276 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12277 bool fail;
12279 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12280 return -1;
12282 r = *CONST_DOUBLE_REAL_VALUE (x);
12284 /* We can't represent these things, so detect them first. */
12285 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12286 return -1;
12288 /* Extract sign, exponent and mantissa. */
12289 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12290 r = real_value_abs (&r);
12291 exponent = REAL_EXP (&r);
12292 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12293 highest (sign) bit, with a fixed binary point at bit point_pos.
12294 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12295 bits for the mantissa, this may fail (low bits would be lost). */
12296 real_ldexp (&m, &r, point_pos - exponent);
12297 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12298 mantissa = w.elt (0);
12299 mant_hi = w.elt (1);
12301 /* If there are bits set in the low part of the mantissa, we can't
12302 represent this value. */
12303 if (mantissa != 0)
12304 return -1;
12306 /* Now make it so that mantissa contains the most-significant bits, and move
12307 the point_pos to indicate that the least-significant bits have been
12308 discarded. */
12309 point_pos -= HOST_BITS_PER_WIDE_INT;
12310 mantissa = mant_hi;
12312 /* We can permit four significant bits of mantissa only, plus a high bit
12313 which is always 1. */
12314 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12315 if ((mantissa & mask) != 0)
12316 return -1;
12318 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12319 mantissa >>= point_pos - 5;
12321 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12322 floating-point immediate zero with Neon using an integer-zero load, but
12323 that case is handled elsewhere.) */
12324 if (mantissa == 0)
12325 return -1;
12327 gcc_assert (mantissa >= 16 && mantissa <= 31);
12329 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12330 normalized significands are in the range [1, 2). (Our mantissa is shifted
12331 left 4 places at this point relative to normalized IEEE754 values). GCC
12332 internally uses [0.5, 1) (see real.c), so the exponent returned from
12333 REAL_EXP must be altered. */
12334 exponent = 5 - exponent;
12336 if (exponent < 0 || exponent > 7)
12337 return -1;
12339 /* Sign, mantissa and exponent are now in the correct form to plug into the
12340 formula described in the comment above. */
12341 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12344 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12346 vfp3_const_double_rtx (rtx x)
12348 if (!TARGET_VFP3)
12349 return 0;
12351 return vfp3_const_double_index (x) != -1;
12354 /* Recognize immediates which can be used in various Neon instructions. Legal
12355 immediates are described by the following table (for VMVN variants, the
12356 bitwise inverse of the constant shown is recognized. In either case, VMOV
12357 is output and the correct instruction to use for a given constant is chosen
12358 by the assembler). The constant shown is replicated across all elements of
12359 the destination vector.
12361 insn elems variant constant (binary)
12362 ---- ----- ------- -----------------
12363 vmov i32 0 00000000 00000000 00000000 abcdefgh
12364 vmov i32 1 00000000 00000000 abcdefgh 00000000
12365 vmov i32 2 00000000 abcdefgh 00000000 00000000
12366 vmov i32 3 abcdefgh 00000000 00000000 00000000
12367 vmov i16 4 00000000 abcdefgh
12368 vmov i16 5 abcdefgh 00000000
12369 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12370 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12371 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12372 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12373 vmvn i16 10 00000000 abcdefgh
12374 vmvn i16 11 abcdefgh 00000000
12375 vmov i32 12 00000000 00000000 abcdefgh 11111111
12376 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12377 vmov i32 14 00000000 abcdefgh 11111111 11111111
12378 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12379 vmov i8 16 abcdefgh
12380 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12381 eeeeeeee ffffffff gggggggg hhhhhhhh
12382 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12383 vmov f32 19 00000000 00000000 00000000 00000000
12385 For case 18, B = !b. Representable values are exactly those accepted by
12386 vfp3_const_double_index, but are output as floating-point numbers rather
12387 than indices.
12389 For case 19, we will change it to vmov.i32 when assembling.
12391 Variants 0-5 (inclusive) may also be used as immediates for the second
12392 operand of VORR/VBIC instructions.
12394 The INVERSE argument causes the bitwise inverse of the given operand to be
12395 recognized instead (used for recognizing legal immediates for the VAND/VORN
12396 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12397 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12398 output, rather than the real insns vbic/vorr).
12400 INVERSE makes no difference to the recognition of float vectors.
12402 The return value is the variant of immediate as shown in the above table, or
12403 -1 if the given value doesn't match any of the listed patterns.
12405 static int
12406 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12407 rtx *modconst, int *elementwidth)
12409 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12410 matches = 1; \
12411 for (i = 0; i < idx; i += (STRIDE)) \
12412 if (!(TEST)) \
12413 matches = 0; \
12414 if (matches) \
12416 immtype = (CLASS); \
12417 elsize = (ELSIZE); \
12418 break; \
12421 unsigned int i, elsize = 0, idx = 0, n_elts;
12422 unsigned int innersize;
12423 unsigned char bytes[16];
12424 int immtype = -1, matches;
12425 unsigned int invmask = inverse ? 0xff : 0;
12426 bool vector = GET_CODE (op) == CONST_VECTOR;
12428 if (vector)
12429 n_elts = CONST_VECTOR_NUNITS (op);
12430 else
12432 n_elts = 1;
12433 if (mode == VOIDmode)
12434 mode = DImode;
12437 innersize = GET_MODE_UNIT_SIZE (mode);
12439 /* Vectors of float constants. */
12440 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12442 rtx el0 = CONST_VECTOR_ELT (op, 0);
12443 const REAL_VALUE_TYPE *r0;
12445 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12446 return -1;
12448 /* FP16 vectors cannot be represented. */
12449 if (GET_MODE_INNER (mode) == HFmode)
12450 return -1;
12452 r0 = CONST_DOUBLE_REAL_VALUE (el0);
12454 for (i = 1; i < n_elts; i++)
12456 rtx elt = CONST_VECTOR_ELT (op, i);
12457 if (!real_equal (r0, CONST_DOUBLE_REAL_VALUE (elt)))
12458 return -1;
12461 if (modconst)
12462 *modconst = CONST_VECTOR_ELT (op, 0);
12464 if (elementwidth)
12465 *elementwidth = 0;
12467 if (el0 == CONST0_RTX (GET_MODE (el0)))
12468 return 19;
12469 else
12470 return 18;
12473 /* Splat vector constant out into a byte vector. */
12474 for (i = 0; i < n_elts; i++)
12476 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12477 unsigned HOST_WIDE_INT elpart;
12479 gcc_assert (CONST_INT_P (el));
12480 elpart = INTVAL (el);
12482 for (unsigned int byte = 0; byte < innersize; byte++)
12484 bytes[idx++] = (elpart & 0xff) ^ invmask;
12485 elpart >>= BITS_PER_UNIT;
12489 /* Sanity check. */
12490 gcc_assert (idx == GET_MODE_SIZE (mode));
12494 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12495 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12497 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12498 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12500 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12501 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12503 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12504 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12506 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12508 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12510 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12511 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12513 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12514 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12516 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12517 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12519 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12520 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12522 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12524 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12526 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12527 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12529 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12530 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12532 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12533 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12535 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12536 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12538 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12540 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12541 && bytes[i] == bytes[(i + 8) % idx]);
12543 while (0);
12545 if (immtype == -1)
12546 return -1;
12548 if (elementwidth)
12549 *elementwidth = elsize;
12551 if (modconst)
12553 unsigned HOST_WIDE_INT imm = 0;
12555 /* Un-invert bytes of recognized vector, if necessary. */
12556 if (invmask != 0)
12557 for (i = 0; i < idx; i++)
12558 bytes[i] ^= invmask;
12560 if (immtype == 17)
12562 /* FIXME: Broken on 32-bit H_W_I hosts. */
12563 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12565 for (i = 0; i < 8; i++)
12566 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12567 << (i * BITS_PER_UNIT);
12569 *modconst = GEN_INT (imm);
12571 else
12573 unsigned HOST_WIDE_INT imm = 0;
12575 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12576 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12578 *modconst = GEN_INT (imm);
12582 return immtype;
12583 #undef CHECK
12586 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12587 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12588 float elements), and a modified constant (whatever should be output for a
12589 VMOV) in *MODCONST. */
12592 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12593 rtx *modconst, int *elementwidth)
12595 rtx tmpconst;
12596 int tmpwidth;
12597 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12599 if (retval == -1)
12600 return 0;
12602 if (modconst)
12603 *modconst = tmpconst;
12605 if (elementwidth)
12606 *elementwidth = tmpwidth;
12608 return 1;
12611 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12612 the immediate is valid, write a constant suitable for using as an operand
12613 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12614 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12617 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12618 rtx *modconst, int *elementwidth)
12620 rtx tmpconst;
12621 int tmpwidth;
12622 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12624 if (retval < 0 || retval > 5)
12625 return 0;
12627 if (modconst)
12628 *modconst = tmpconst;
12630 if (elementwidth)
12631 *elementwidth = tmpwidth;
12633 return 1;
12636 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12637 the immediate is valid, write a constant suitable for using as an operand
12638 to VSHR/VSHL to *MODCONST and the corresponding element width to
12639 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12640 because they have different limitations. */
12643 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12644 rtx *modconst, int *elementwidth,
12645 bool isleftshift)
12647 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12648 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12649 unsigned HOST_WIDE_INT last_elt = 0;
12650 unsigned HOST_WIDE_INT maxshift;
12652 /* Split vector constant out into a byte vector. */
12653 for (i = 0; i < n_elts; i++)
12655 rtx el = CONST_VECTOR_ELT (op, i);
12656 unsigned HOST_WIDE_INT elpart;
12658 if (CONST_INT_P (el))
12659 elpart = INTVAL (el);
12660 else if (CONST_DOUBLE_P (el))
12661 return 0;
12662 else
12663 gcc_unreachable ();
12665 if (i != 0 && elpart != last_elt)
12666 return 0;
12668 last_elt = elpart;
12671 /* Shift less than element size. */
12672 maxshift = innersize * 8;
12674 if (isleftshift)
12676 /* Left shift immediate value can be from 0 to <size>-1. */
12677 if (last_elt >= maxshift)
12678 return 0;
12680 else
12682 /* Right shift immediate value can be from 1 to <size>. */
12683 if (last_elt == 0 || last_elt > maxshift)
12684 return 0;
12687 if (elementwidth)
12688 *elementwidth = innersize * 8;
12690 if (modconst)
12691 *modconst = CONST_VECTOR_ELT (op, 0);
12693 return 1;
12696 /* Return a string suitable for output of Neon immediate logic operation
12697 MNEM. */
12699 char *
12700 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12701 int inverse, int quad)
12703 int width, is_valid;
12704 static char templ[40];
12706 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12708 gcc_assert (is_valid != 0);
12710 if (quad)
12711 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12712 else
12713 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12715 return templ;
12718 /* Return a string suitable for output of Neon immediate shift operation
12719 (VSHR or VSHL) MNEM. */
12721 char *
12722 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12723 machine_mode mode, int quad,
12724 bool isleftshift)
12726 int width, is_valid;
12727 static char templ[40];
12729 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12730 gcc_assert (is_valid != 0);
12732 if (quad)
12733 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12734 else
12735 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12737 return templ;
12740 /* Output a sequence of pairwise operations to implement a reduction.
12741 NOTE: We do "too much work" here, because pairwise operations work on two
12742 registers-worth of operands in one go. Unfortunately we can't exploit those
12743 extra calculations to do the full operation in fewer steps, I don't think.
12744 Although all vector elements of the result but the first are ignored, we
12745 actually calculate the same result in each of the elements. An alternative
12746 such as initially loading a vector with zero to use as each of the second
12747 operands would use up an additional register and take an extra instruction,
12748 for no particular gain. */
12750 void
12751 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12752 rtx (*reduc) (rtx, rtx, rtx))
12754 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12755 rtx tmpsum = op1;
12757 for (i = parts / 2; i >= 1; i /= 2)
12759 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12760 emit_insn (reduc (dest, tmpsum, tmpsum));
12761 tmpsum = dest;
12765 /* If VALS is a vector constant that can be loaded into a register
12766 using VDUP, generate instructions to do so and return an RTX to
12767 assign to the register. Otherwise return NULL_RTX. */
12769 static rtx
12770 neon_vdup_constant (rtx vals)
12772 machine_mode mode = GET_MODE (vals);
12773 machine_mode inner_mode = GET_MODE_INNER (mode);
12774 rtx x;
12776 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12777 return NULL_RTX;
12779 if (!const_vec_duplicate_p (vals, &x))
12780 /* The elements are not all the same. We could handle repeating
12781 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12782 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12783 vdup.i16). */
12784 return NULL_RTX;
12786 /* We can load this constant by using VDUP and a constant in a
12787 single ARM register. This will be cheaper than a vector
12788 load. */
12790 x = copy_to_mode_reg (inner_mode, x);
12791 return gen_rtx_VEC_DUPLICATE (mode, x);
12794 /* Generate code to load VALS, which is a PARALLEL containing only
12795 constants (for vec_init) or CONST_VECTOR, efficiently into a
12796 register. Returns an RTX to copy into the register, or NULL_RTX
12797 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12800 neon_make_constant (rtx vals)
12802 machine_mode mode = GET_MODE (vals);
12803 rtx target;
12804 rtx const_vec = NULL_RTX;
12805 int n_elts = GET_MODE_NUNITS (mode);
12806 int n_const = 0;
12807 int i;
12809 if (GET_CODE (vals) == CONST_VECTOR)
12810 const_vec = vals;
12811 else if (GET_CODE (vals) == PARALLEL)
12813 /* A CONST_VECTOR must contain only CONST_INTs and
12814 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12815 Only store valid constants in a CONST_VECTOR. */
12816 for (i = 0; i < n_elts; ++i)
12818 rtx x = XVECEXP (vals, 0, i);
12819 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12820 n_const++;
12822 if (n_const == n_elts)
12823 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12825 else
12826 gcc_unreachable ();
12828 if (const_vec != NULL
12829 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12830 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12831 return const_vec;
12832 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12833 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12834 pipeline cycle; creating the constant takes one or two ARM
12835 pipeline cycles. */
12836 return target;
12837 else if (const_vec != NULL_RTX)
12838 /* Load from constant pool. On Cortex-A8 this takes two cycles
12839 (for either double or quad vectors). We can not take advantage
12840 of single-cycle VLD1 because we need a PC-relative addressing
12841 mode. */
12842 return const_vec;
12843 else
12844 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12845 We can not construct an initializer. */
12846 return NULL_RTX;
12849 /* Initialize vector TARGET to VALS. */
12851 void
12852 neon_expand_vector_init (rtx target, rtx vals)
12854 machine_mode mode = GET_MODE (target);
12855 machine_mode inner_mode = GET_MODE_INNER (mode);
12856 int n_elts = GET_MODE_NUNITS (mode);
12857 int n_var = 0, one_var = -1;
12858 bool all_same = true;
12859 rtx x, mem;
12860 int i;
12862 for (i = 0; i < n_elts; ++i)
12864 x = XVECEXP (vals, 0, i);
12865 if (!CONSTANT_P (x))
12866 ++n_var, one_var = i;
12868 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12869 all_same = false;
12872 if (n_var == 0)
12874 rtx constant = neon_make_constant (vals);
12875 if (constant != NULL_RTX)
12877 emit_move_insn (target, constant);
12878 return;
12882 /* Splat a single non-constant element if we can. */
12883 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12885 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12886 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12887 return;
12890 /* One field is non-constant. Load constant then overwrite varying
12891 field. This is more efficient than using the stack. */
12892 if (n_var == 1)
12894 rtx copy = copy_rtx (vals);
12895 rtx index = GEN_INT (one_var);
12897 /* Load constant part of vector, substitute neighboring value for
12898 varying element. */
12899 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12900 neon_expand_vector_init (target, copy);
12902 /* Insert variable. */
12903 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12904 switch (mode)
12906 case V8QImode:
12907 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12908 break;
12909 case V16QImode:
12910 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12911 break;
12912 case V4HImode:
12913 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12914 break;
12915 case V8HImode:
12916 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12917 break;
12918 case V2SImode:
12919 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12920 break;
12921 case V4SImode:
12922 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12923 break;
12924 case V2SFmode:
12925 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12926 break;
12927 case V4SFmode:
12928 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12929 break;
12930 case V2DImode:
12931 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12932 break;
12933 default:
12934 gcc_unreachable ();
12936 return;
12939 /* Construct the vector in memory one field at a time
12940 and load the whole vector. */
12941 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12942 for (i = 0; i < n_elts; i++)
12943 emit_move_insn (adjust_address_nv (mem, inner_mode,
12944 i * GET_MODE_SIZE (inner_mode)),
12945 XVECEXP (vals, 0, i));
12946 emit_move_insn (target, mem);
12949 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12950 ERR if it doesn't. EXP indicates the source location, which includes the
12951 inlining history for intrinsics. */
12953 static void
12954 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12955 const_tree exp, const char *desc)
12957 HOST_WIDE_INT lane;
12959 gcc_assert (CONST_INT_P (operand));
12961 lane = INTVAL (operand);
12963 if (lane < low || lane >= high)
12965 if (exp)
12966 error ("%K%s %wd out of range %wd - %wd",
12967 exp, desc, lane, low, high - 1);
12968 else
12969 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12973 /* Bounds-check lanes. */
12975 void
12976 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12977 const_tree exp)
12979 bounds_check (operand, low, high, exp, "lane");
12982 /* Bounds-check constants. */
12984 void
12985 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12987 bounds_check (operand, low, high, NULL_TREE, "constant");
12990 HOST_WIDE_INT
12991 neon_element_bits (machine_mode mode)
12993 return GET_MODE_UNIT_BITSIZE (mode);
12997 /* Predicates for `match_operand' and `match_operator'. */
12999 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13000 WB is true if full writeback address modes are allowed and is false
13001 if limited writeback address modes (POST_INC and PRE_DEC) are
13002 allowed. */
13005 arm_coproc_mem_operand (rtx op, bool wb)
13007 rtx ind;
13009 /* Reject eliminable registers. */
13010 if (! (reload_in_progress || reload_completed || lra_in_progress)
13011 && ( reg_mentioned_p (frame_pointer_rtx, op)
13012 || reg_mentioned_p (arg_pointer_rtx, op)
13013 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13014 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13015 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13016 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13017 return FALSE;
13019 /* Constants are converted into offsets from labels. */
13020 if (!MEM_P (op))
13021 return FALSE;
13023 ind = XEXP (op, 0);
13025 if (reload_completed
13026 && (GET_CODE (ind) == LABEL_REF
13027 || (GET_CODE (ind) == CONST
13028 && GET_CODE (XEXP (ind, 0)) == PLUS
13029 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13030 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13031 return TRUE;
13033 /* Match: (mem (reg)). */
13034 if (REG_P (ind))
13035 return arm_address_register_rtx_p (ind, 0);
13037 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13038 acceptable in any case (subject to verification by
13039 arm_address_register_rtx_p). We need WB to be true to accept
13040 PRE_INC and POST_DEC. */
13041 if (GET_CODE (ind) == POST_INC
13042 || GET_CODE (ind) == PRE_DEC
13043 || (wb
13044 && (GET_CODE (ind) == PRE_INC
13045 || GET_CODE (ind) == POST_DEC)))
13046 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13048 if (wb
13049 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13050 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13051 && GET_CODE (XEXP (ind, 1)) == PLUS
13052 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13053 ind = XEXP (ind, 1);
13055 /* Match:
13056 (plus (reg)
13057 (const)). */
13058 if (GET_CODE (ind) == PLUS
13059 && REG_P (XEXP (ind, 0))
13060 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13061 && CONST_INT_P (XEXP (ind, 1))
13062 && INTVAL (XEXP (ind, 1)) > -1024
13063 && INTVAL (XEXP (ind, 1)) < 1024
13064 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13065 return TRUE;
13067 return FALSE;
13070 /* Return TRUE if OP is a memory operand which we can load or store a vector
13071 to/from. TYPE is one of the following values:
13072 0 - Vector load/stor (vldr)
13073 1 - Core registers (ldm)
13074 2 - Element/structure loads (vld1)
13077 neon_vector_mem_operand (rtx op, int type, bool strict)
13079 rtx ind;
13081 /* Reject eliminable registers. */
13082 if (strict && ! (reload_in_progress || reload_completed)
13083 && (reg_mentioned_p (frame_pointer_rtx, op)
13084 || reg_mentioned_p (arg_pointer_rtx, op)
13085 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13086 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13087 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13088 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13089 return FALSE;
13091 /* Constants are converted into offsets from labels. */
13092 if (!MEM_P (op))
13093 return FALSE;
13095 ind = XEXP (op, 0);
13097 if (reload_completed
13098 && (GET_CODE (ind) == LABEL_REF
13099 || (GET_CODE (ind) == CONST
13100 && GET_CODE (XEXP (ind, 0)) == PLUS
13101 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13102 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13103 return TRUE;
13105 /* Match: (mem (reg)). */
13106 if (REG_P (ind))
13107 return arm_address_register_rtx_p (ind, 0);
13109 /* Allow post-increment with Neon registers. */
13110 if ((type != 1 && GET_CODE (ind) == POST_INC)
13111 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13112 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13114 /* Allow post-increment by register for VLDn */
13115 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13116 && GET_CODE (XEXP (ind, 1)) == PLUS
13117 && REG_P (XEXP (XEXP (ind, 1), 1)))
13118 return true;
13120 /* Match:
13121 (plus (reg)
13122 (const)). */
13123 if (type == 0
13124 && GET_CODE (ind) == PLUS
13125 && REG_P (XEXP (ind, 0))
13126 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13127 && CONST_INT_P (XEXP (ind, 1))
13128 && INTVAL (XEXP (ind, 1)) > -1024
13129 /* For quad modes, we restrict the constant offset to be slightly less
13130 than what the instruction format permits. We have no such constraint
13131 on double mode offsets. (This must match arm_legitimate_index_p.) */
13132 && (INTVAL (XEXP (ind, 1))
13133 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13134 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13135 return TRUE;
13137 return FALSE;
13140 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13141 type. */
13143 neon_struct_mem_operand (rtx op)
13145 rtx ind;
13147 /* Reject eliminable registers. */
13148 if (! (reload_in_progress || reload_completed)
13149 && ( reg_mentioned_p (frame_pointer_rtx, op)
13150 || reg_mentioned_p (arg_pointer_rtx, op)
13151 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13152 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13153 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13154 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13155 return FALSE;
13157 /* Constants are converted into offsets from labels. */
13158 if (!MEM_P (op))
13159 return FALSE;
13161 ind = XEXP (op, 0);
13163 if (reload_completed
13164 && (GET_CODE (ind) == LABEL_REF
13165 || (GET_CODE (ind) == CONST
13166 && GET_CODE (XEXP (ind, 0)) == PLUS
13167 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13168 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13169 return TRUE;
13171 /* Match: (mem (reg)). */
13172 if (REG_P (ind))
13173 return arm_address_register_rtx_p (ind, 0);
13175 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13176 if (GET_CODE (ind) == POST_INC
13177 || GET_CODE (ind) == PRE_DEC)
13178 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13180 return FALSE;
13183 /* Return true if X is a register that will be eliminated later on. */
13185 arm_eliminable_register (rtx x)
13187 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13188 || REGNO (x) == ARG_POINTER_REGNUM
13189 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13190 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13193 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13194 coprocessor registers. Otherwise return NO_REGS. */
13196 enum reg_class
13197 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13199 if (mode == HFmode)
13201 if (!TARGET_NEON_FP16)
13202 return GENERAL_REGS;
13203 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13204 return NO_REGS;
13205 return GENERAL_REGS;
13208 /* The neon move patterns handle all legitimate vector and struct
13209 addresses. */
13210 if (TARGET_NEON
13211 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13212 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13213 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13214 || VALID_NEON_STRUCT_MODE (mode)))
13215 return NO_REGS;
13217 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13218 return NO_REGS;
13220 return GENERAL_REGS;
13223 /* Values which must be returned in the most-significant end of the return
13224 register. */
13226 static bool
13227 arm_return_in_msb (const_tree valtype)
13229 return (TARGET_AAPCS_BASED
13230 && BYTES_BIG_ENDIAN
13231 && (AGGREGATE_TYPE_P (valtype)
13232 || TREE_CODE (valtype) == COMPLEX_TYPE
13233 || FIXED_POINT_TYPE_P (valtype)));
13236 /* Return TRUE if X references a SYMBOL_REF. */
13238 symbol_mentioned_p (rtx x)
13240 const char * fmt;
13241 int i;
13243 if (GET_CODE (x) == SYMBOL_REF)
13244 return 1;
13246 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13247 are constant offsets, not symbols. */
13248 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13249 return 0;
13251 fmt = GET_RTX_FORMAT (GET_CODE (x));
13253 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13255 if (fmt[i] == 'E')
13257 int j;
13259 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13260 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13261 return 1;
13263 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13264 return 1;
13267 return 0;
13270 /* Return TRUE if X references a LABEL_REF. */
13272 label_mentioned_p (rtx x)
13274 const char * fmt;
13275 int i;
13277 if (GET_CODE (x) == LABEL_REF)
13278 return 1;
13280 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13281 instruction, but they are constant offsets, not symbols. */
13282 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13283 return 0;
13285 fmt = GET_RTX_FORMAT (GET_CODE (x));
13286 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13288 if (fmt[i] == 'E')
13290 int j;
13292 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13293 if (label_mentioned_p (XVECEXP (x, i, j)))
13294 return 1;
13296 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13297 return 1;
13300 return 0;
13304 tls_mentioned_p (rtx x)
13306 switch (GET_CODE (x))
13308 case CONST:
13309 return tls_mentioned_p (XEXP (x, 0));
13311 case UNSPEC:
13312 if (XINT (x, 1) == UNSPEC_TLS)
13313 return 1;
13315 default:
13316 return 0;
13320 /* Must not copy any rtx that uses a pc-relative address.
13321 Also, disallow copying of load-exclusive instructions that
13322 may appear after splitting of compare-and-swap-style operations
13323 so as to prevent those loops from being transformed away from their
13324 canonical forms (see PR 69904). */
13326 static bool
13327 arm_cannot_copy_insn_p (rtx_insn *insn)
13329 /* The tls call insn cannot be copied, as it is paired with a data
13330 word. */
13331 if (recog_memoized (insn) == CODE_FOR_tlscall)
13332 return true;
13334 subrtx_iterator::array_type array;
13335 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13337 const_rtx x = *iter;
13338 if (GET_CODE (x) == UNSPEC
13339 && (XINT (x, 1) == UNSPEC_PIC_BASE
13340 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13341 return true;
13344 rtx set = single_set (insn);
13345 if (set)
13347 rtx src = SET_SRC (set);
13348 if (GET_CODE (src) == ZERO_EXTEND)
13349 src = XEXP (src, 0);
13351 /* Catch the load-exclusive and load-acquire operations. */
13352 if (GET_CODE (src) == UNSPEC_VOLATILE
13353 && (XINT (src, 1) == VUNSPEC_LL
13354 || XINT (src, 1) == VUNSPEC_LAX))
13355 return true;
13357 return false;
13360 enum rtx_code
13361 minmax_code (rtx x)
13363 enum rtx_code code = GET_CODE (x);
13365 switch (code)
13367 case SMAX:
13368 return GE;
13369 case SMIN:
13370 return LE;
13371 case UMIN:
13372 return LEU;
13373 case UMAX:
13374 return GEU;
13375 default:
13376 gcc_unreachable ();
13380 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13382 bool
13383 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13384 int *mask, bool *signed_sat)
13386 /* The high bound must be a power of two minus one. */
13387 int log = exact_log2 (INTVAL (hi_bound) + 1);
13388 if (log == -1)
13389 return false;
13391 /* The low bound is either zero (for usat) or one less than the
13392 negation of the high bound (for ssat). */
13393 if (INTVAL (lo_bound) == 0)
13395 if (mask)
13396 *mask = log;
13397 if (signed_sat)
13398 *signed_sat = false;
13400 return true;
13403 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13405 if (mask)
13406 *mask = log + 1;
13407 if (signed_sat)
13408 *signed_sat = true;
13410 return true;
13413 return false;
13416 /* Return 1 if memory locations are adjacent. */
13418 adjacent_mem_locations (rtx a, rtx b)
13420 /* We don't guarantee to preserve the order of these memory refs. */
13421 if (volatile_refs_p (a) || volatile_refs_p (b))
13422 return 0;
13424 if ((REG_P (XEXP (a, 0))
13425 || (GET_CODE (XEXP (a, 0)) == PLUS
13426 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13427 && (REG_P (XEXP (b, 0))
13428 || (GET_CODE (XEXP (b, 0)) == PLUS
13429 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13431 HOST_WIDE_INT val0 = 0, val1 = 0;
13432 rtx reg0, reg1;
13433 int val_diff;
13435 if (GET_CODE (XEXP (a, 0)) == PLUS)
13437 reg0 = XEXP (XEXP (a, 0), 0);
13438 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13440 else
13441 reg0 = XEXP (a, 0);
13443 if (GET_CODE (XEXP (b, 0)) == PLUS)
13445 reg1 = XEXP (XEXP (b, 0), 0);
13446 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13448 else
13449 reg1 = XEXP (b, 0);
13451 /* Don't accept any offset that will require multiple
13452 instructions to handle, since this would cause the
13453 arith_adjacentmem pattern to output an overlong sequence. */
13454 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13455 return 0;
13457 /* Don't allow an eliminable register: register elimination can make
13458 the offset too large. */
13459 if (arm_eliminable_register (reg0))
13460 return 0;
13462 val_diff = val1 - val0;
13464 if (arm_ld_sched)
13466 /* If the target has load delay slots, then there's no benefit
13467 to using an ldm instruction unless the offset is zero and
13468 we are optimizing for size. */
13469 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13470 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13471 && (val_diff == 4 || val_diff == -4));
13474 return ((REGNO (reg0) == REGNO (reg1))
13475 && (val_diff == 4 || val_diff == -4));
13478 return 0;
13481 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13482 for load operations, false for store operations. CONSECUTIVE is true
13483 if the register numbers in the operation must be consecutive in the register
13484 bank. RETURN_PC is true if value is to be loaded in PC.
13485 The pattern we are trying to match for load is:
13486 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13487 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13490 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13492 where
13493 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13494 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13495 3. If consecutive is TRUE, then for kth register being loaded,
13496 REGNO (R_dk) = REGNO (R_d0) + k.
13497 The pattern for store is similar. */
13498 bool
13499 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13500 bool consecutive, bool return_pc)
13502 HOST_WIDE_INT count = XVECLEN (op, 0);
13503 rtx reg, mem, addr;
13504 unsigned regno;
13505 unsigned first_regno;
13506 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13507 rtx elt;
13508 bool addr_reg_in_reglist = false;
13509 bool update = false;
13510 int reg_increment;
13511 int offset_adj;
13512 int regs_per_val;
13514 /* If not in SImode, then registers must be consecutive
13515 (e.g., VLDM instructions for DFmode). */
13516 gcc_assert ((mode == SImode) || consecutive);
13517 /* Setting return_pc for stores is illegal. */
13518 gcc_assert (!return_pc || load);
13520 /* Set up the increments and the regs per val based on the mode. */
13521 reg_increment = GET_MODE_SIZE (mode);
13522 regs_per_val = reg_increment / 4;
13523 offset_adj = return_pc ? 1 : 0;
13525 if (count <= 1
13526 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13527 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13528 return false;
13530 /* Check if this is a write-back. */
13531 elt = XVECEXP (op, 0, offset_adj);
13532 if (GET_CODE (SET_SRC (elt)) == PLUS)
13534 i++;
13535 base = 1;
13536 update = true;
13538 /* The offset adjustment must be the number of registers being
13539 popped times the size of a single register. */
13540 if (!REG_P (SET_DEST (elt))
13541 || !REG_P (XEXP (SET_SRC (elt), 0))
13542 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13543 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13544 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13545 ((count - 1 - offset_adj) * reg_increment))
13546 return false;
13549 i = i + offset_adj;
13550 base = base + offset_adj;
13551 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13552 success depends on the type: VLDM can do just one reg,
13553 LDM must do at least two. */
13554 if ((count <= i) && (mode == SImode))
13555 return false;
13557 elt = XVECEXP (op, 0, i - 1);
13558 if (GET_CODE (elt) != SET)
13559 return false;
13561 if (load)
13563 reg = SET_DEST (elt);
13564 mem = SET_SRC (elt);
13566 else
13568 reg = SET_SRC (elt);
13569 mem = SET_DEST (elt);
13572 if (!REG_P (reg) || !MEM_P (mem))
13573 return false;
13575 regno = REGNO (reg);
13576 first_regno = regno;
13577 addr = XEXP (mem, 0);
13578 if (GET_CODE (addr) == PLUS)
13580 if (!CONST_INT_P (XEXP (addr, 1)))
13581 return false;
13583 offset = INTVAL (XEXP (addr, 1));
13584 addr = XEXP (addr, 0);
13587 if (!REG_P (addr))
13588 return false;
13590 /* Don't allow SP to be loaded unless it is also the base register. It
13591 guarantees that SP is reset correctly when an LDM instruction
13592 is interrupted. Otherwise, we might end up with a corrupt stack. */
13593 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13594 return false;
13596 for (; i < count; i++)
13598 elt = XVECEXP (op, 0, i);
13599 if (GET_CODE (elt) != SET)
13600 return false;
13602 if (load)
13604 reg = SET_DEST (elt);
13605 mem = SET_SRC (elt);
13607 else
13609 reg = SET_SRC (elt);
13610 mem = SET_DEST (elt);
13613 if (!REG_P (reg)
13614 || GET_MODE (reg) != mode
13615 || REGNO (reg) <= regno
13616 || (consecutive
13617 && (REGNO (reg) !=
13618 (unsigned int) (first_regno + regs_per_val * (i - base))))
13619 /* Don't allow SP to be loaded unless it is also the base register. It
13620 guarantees that SP is reset correctly when an LDM instruction
13621 is interrupted. Otherwise, we might end up with a corrupt stack. */
13622 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13623 || !MEM_P (mem)
13624 || GET_MODE (mem) != mode
13625 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13626 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13627 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13628 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13629 offset + (i - base) * reg_increment))
13630 && (!REG_P (XEXP (mem, 0))
13631 || offset + (i - base) * reg_increment != 0)))
13632 return false;
13634 regno = REGNO (reg);
13635 if (regno == REGNO (addr))
13636 addr_reg_in_reglist = true;
13639 if (load)
13641 if (update && addr_reg_in_reglist)
13642 return false;
13644 /* For Thumb-1, address register is always modified - either by write-back
13645 or by explicit load. If the pattern does not describe an update,
13646 then the address register must be in the list of loaded registers. */
13647 if (TARGET_THUMB1)
13648 return update || addr_reg_in_reglist;
13651 return true;
13654 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13655 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13656 instruction. ADD_OFFSET is nonzero if the base address register needs
13657 to be modified with an add instruction before we can use it. */
13659 static bool
13660 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13661 int nops, HOST_WIDE_INT add_offset)
13663 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13664 if the offset isn't small enough. The reason 2 ldrs are faster
13665 is because these ARMs are able to do more than one cache access
13666 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13667 whilst the ARM8 has a double bandwidth cache. This means that
13668 these cores can do both an instruction fetch and a data fetch in
13669 a single cycle, so the trick of calculating the address into a
13670 scratch register (one of the result regs) and then doing a load
13671 multiple actually becomes slower (and no smaller in code size).
13672 That is the transformation
13674 ldr rd1, [rbase + offset]
13675 ldr rd2, [rbase + offset + 4]
13679 add rd1, rbase, offset
13680 ldmia rd1, {rd1, rd2}
13682 produces worse code -- '3 cycles + any stalls on rd2' instead of
13683 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13684 access per cycle, the first sequence could never complete in less
13685 than 6 cycles, whereas the ldm sequence would only take 5 and
13686 would make better use of sequential accesses if not hitting the
13687 cache.
13689 We cheat here and test 'arm_ld_sched' which we currently know to
13690 only be true for the ARM8, ARM9 and StrongARM. If this ever
13691 changes, then the test below needs to be reworked. */
13692 if (nops == 2 && arm_ld_sched && add_offset != 0)
13693 return false;
13695 /* XScale has load-store double instructions, but they have stricter
13696 alignment requirements than load-store multiple, so we cannot
13697 use them.
13699 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13700 the pipeline until completion.
13702 NREGS CYCLES
13708 An ldr instruction takes 1-3 cycles, but does not block the
13709 pipeline.
13711 NREGS CYCLES
13712 1 1-3
13713 2 2-6
13714 3 3-9
13715 4 4-12
13717 Best case ldr will always win. However, the more ldr instructions
13718 we issue, the less likely we are to be able to schedule them well.
13719 Using ldr instructions also increases code size.
13721 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13722 for counts of 3 or 4 regs. */
13723 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13724 return false;
13725 return true;
13728 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13729 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13730 an array ORDER which describes the sequence to use when accessing the
13731 offsets that produces an ascending order. In this sequence, each
13732 offset must be larger by exactly 4 than the previous one. ORDER[0]
13733 must have been filled in with the lowest offset by the caller.
13734 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13735 we use to verify that ORDER produces an ascending order of registers.
13736 Return true if it was possible to construct such an order, false if
13737 not. */
13739 static bool
13740 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13741 int *unsorted_regs)
13743 int i;
13744 for (i = 1; i < nops; i++)
13746 int j;
13748 order[i] = order[i - 1];
13749 for (j = 0; j < nops; j++)
13750 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13752 /* We must find exactly one offset that is higher than the
13753 previous one by 4. */
13754 if (order[i] != order[i - 1])
13755 return false;
13756 order[i] = j;
13758 if (order[i] == order[i - 1])
13759 return false;
13760 /* The register numbers must be ascending. */
13761 if (unsorted_regs != NULL
13762 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13763 return false;
13765 return true;
13768 /* Used to determine in a peephole whether a sequence of load
13769 instructions can be changed into a load-multiple instruction.
13770 NOPS is the number of separate load instructions we are examining. The
13771 first NOPS entries in OPERANDS are the destination registers, the
13772 next NOPS entries are memory operands. If this function is
13773 successful, *BASE is set to the common base register of the memory
13774 accesses; *LOAD_OFFSET is set to the first memory location's offset
13775 from that base register.
13776 REGS is an array filled in with the destination register numbers.
13777 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13778 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13779 the sequence of registers in REGS matches the loads from ascending memory
13780 locations, and the function verifies that the register numbers are
13781 themselves ascending. If CHECK_REGS is false, the register numbers
13782 are stored in the order they are found in the operands. */
13783 static int
13784 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13785 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13787 int unsorted_regs[MAX_LDM_STM_OPS];
13788 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13789 int order[MAX_LDM_STM_OPS];
13790 rtx base_reg_rtx = NULL;
13791 int base_reg = -1;
13792 int i, ldm_case;
13794 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13795 easily extended if required. */
13796 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13798 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13800 /* Loop over the operands and check that the memory references are
13801 suitable (i.e. immediate offsets from the same base register). At
13802 the same time, extract the target register, and the memory
13803 offsets. */
13804 for (i = 0; i < nops; i++)
13806 rtx reg;
13807 rtx offset;
13809 /* Convert a subreg of a mem into the mem itself. */
13810 if (GET_CODE (operands[nops + i]) == SUBREG)
13811 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13813 gcc_assert (MEM_P (operands[nops + i]));
13815 /* Don't reorder volatile memory references; it doesn't seem worth
13816 looking for the case where the order is ok anyway. */
13817 if (MEM_VOLATILE_P (operands[nops + i]))
13818 return 0;
13820 offset = const0_rtx;
13822 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13823 || (GET_CODE (reg) == SUBREG
13824 && REG_P (reg = SUBREG_REG (reg))))
13825 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13826 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13827 || (GET_CODE (reg) == SUBREG
13828 && REG_P (reg = SUBREG_REG (reg))))
13829 && (CONST_INT_P (offset
13830 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13832 if (i == 0)
13834 base_reg = REGNO (reg);
13835 base_reg_rtx = reg;
13836 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13837 return 0;
13839 else if (base_reg != (int) REGNO (reg))
13840 /* Not addressed from the same base register. */
13841 return 0;
13843 unsorted_regs[i] = (REG_P (operands[i])
13844 ? REGNO (operands[i])
13845 : REGNO (SUBREG_REG (operands[i])));
13847 /* If it isn't an integer register, or if it overwrites the
13848 base register but isn't the last insn in the list, then
13849 we can't do this. */
13850 if (unsorted_regs[i] < 0
13851 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13852 || unsorted_regs[i] > 14
13853 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13854 return 0;
13856 /* Don't allow SP to be loaded unless it is also the base
13857 register. It guarantees that SP is reset correctly when
13858 an LDM instruction is interrupted. Otherwise, we might
13859 end up with a corrupt stack. */
13860 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13861 return 0;
13863 unsorted_offsets[i] = INTVAL (offset);
13864 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13865 order[0] = i;
13867 else
13868 /* Not a suitable memory address. */
13869 return 0;
13872 /* All the useful information has now been extracted from the
13873 operands into unsorted_regs and unsorted_offsets; additionally,
13874 order[0] has been set to the lowest offset in the list. Sort
13875 the offsets into order, verifying that they are adjacent, and
13876 check that the register numbers are ascending. */
13877 if (!compute_offset_order (nops, unsorted_offsets, order,
13878 check_regs ? unsorted_regs : NULL))
13879 return 0;
13881 if (saved_order)
13882 memcpy (saved_order, order, sizeof order);
13884 if (base)
13886 *base = base_reg;
13888 for (i = 0; i < nops; i++)
13889 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13891 *load_offset = unsorted_offsets[order[0]];
13894 if (TARGET_THUMB1
13895 && !peep2_reg_dead_p (nops, base_reg_rtx))
13896 return 0;
13898 if (unsorted_offsets[order[0]] == 0)
13899 ldm_case = 1; /* ldmia */
13900 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13901 ldm_case = 2; /* ldmib */
13902 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13903 ldm_case = 3; /* ldmda */
13904 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13905 ldm_case = 4; /* ldmdb */
13906 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13907 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13908 ldm_case = 5;
13909 else
13910 return 0;
13912 if (!multiple_operation_profitable_p (false, nops,
13913 ldm_case == 5
13914 ? unsorted_offsets[order[0]] : 0))
13915 return 0;
13917 return ldm_case;
13920 /* Used to determine in a peephole whether a sequence of store instructions can
13921 be changed into a store-multiple instruction.
13922 NOPS is the number of separate store instructions we are examining.
13923 NOPS_TOTAL is the total number of instructions recognized by the peephole
13924 pattern.
13925 The first NOPS entries in OPERANDS are the source registers, the next
13926 NOPS entries are memory operands. If this function is successful, *BASE is
13927 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13928 to the first memory location's offset from that base register. REGS is an
13929 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13930 likewise filled with the corresponding rtx's.
13931 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13932 numbers to an ascending order of stores.
13933 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13934 from ascending memory locations, and the function verifies that the register
13935 numbers are themselves ascending. If CHECK_REGS is false, the register
13936 numbers are stored in the order they are found in the operands. */
13937 static int
13938 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13939 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13940 HOST_WIDE_INT *load_offset, bool check_regs)
13942 int unsorted_regs[MAX_LDM_STM_OPS];
13943 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13944 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13945 int order[MAX_LDM_STM_OPS];
13946 int base_reg = -1;
13947 rtx base_reg_rtx = NULL;
13948 int i, stm_case;
13950 /* Write back of base register is currently only supported for Thumb 1. */
13951 int base_writeback = TARGET_THUMB1;
13953 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13954 easily extended if required. */
13955 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13957 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13959 /* Loop over the operands and check that the memory references are
13960 suitable (i.e. immediate offsets from the same base register). At
13961 the same time, extract the target register, and the memory
13962 offsets. */
13963 for (i = 0; i < nops; i++)
13965 rtx reg;
13966 rtx offset;
13968 /* Convert a subreg of a mem into the mem itself. */
13969 if (GET_CODE (operands[nops + i]) == SUBREG)
13970 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13972 gcc_assert (MEM_P (operands[nops + i]));
13974 /* Don't reorder volatile memory references; it doesn't seem worth
13975 looking for the case where the order is ok anyway. */
13976 if (MEM_VOLATILE_P (operands[nops + i]))
13977 return 0;
13979 offset = const0_rtx;
13981 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13982 || (GET_CODE (reg) == SUBREG
13983 && REG_P (reg = SUBREG_REG (reg))))
13984 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13985 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13986 || (GET_CODE (reg) == SUBREG
13987 && REG_P (reg = SUBREG_REG (reg))))
13988 && (CONST_INT_P (offset
13989 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13991 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13992 ? operands[i] : SUBREG_REG (operands[i]));
13993 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13995 if (i == 0)
13997 base_reg = REGNO (reg);
13998 base_reg_rtx = reg;
13999 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14000 return 0;
14002 else if (base_reg != (int) REGNO (reg))
14003 /* Not addressed from the same base register. */
14004 return 0;
14006 /* If it isn't an integer register, then we can't do this. */
14007 if (unsorted_regs[i] < 0
14008 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14009 /* The effects are unpredictable if the base register is
14010 both updated and stored. */
14011 || (base_writeback && unsorted_regs[i] == base_reg)
14012 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14013 || unsorted_regs[i] > 14)
14014 return 0;
14016 unsorted_offsets[i] = INTVAL (offset);
14017 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14018 order[0] = i;
14020 else
14021 /* Not a suitable memory address. */
14022 return 0;
14025 /* All the useful information has now been extracted from the
14026 operands into unsorted_regs and unsorted_offsets; additionally,
14027 order[0] has been set to the lowest offset in the list. Sort
14028 the offsets into order, verifying that they are adjacent, and
14029 check that the register numbers are ascending. */
14030 if (!compute_offset_order (nops, unsorted_offsets, order,
14031 check_regs ? unsorted_regs : NULL))
14032 return 0;
14034 if (saved_order)
14035 memcpy (saved_order, order, sizeof order);
14037 if (base)
14039 *base = base_reg;
14041 for (i = 0; i < nops; i++)
14043 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14044 if (reg_rtxs)
14045 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14048 *load_offset = unsorted_offsets[order[0]];
14051 if (TARGET_THUMB1
14052 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14053 return 0;
14055 if (unsorted_offsets[order[0]] == 0)
14056 stm_case = 1; /* stmia */
14057 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14058 stm_case = 2; /* stmib */
14059 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14060 stm_case = 3; /* stmda */
14061 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14062 stm_case = 4; /* stmdb */
14063 else
14064 return 0;
14066 if (!multiple_operation_profitable_p (false, nops, 0))
14067 return 0;
14069 return stm_case;
14072 /* Routines for use in generating RTL. */
14074 /* Generate a load-multiple instruction. COUNT is the number of loads in
14075 the instruction; REGS and MEMS are arrays containing the operands.
14076 BASEREG is the base register to be used in addressing the memory operands.
14077 WBACK_OFFSET is nonzero if the instruction should update the base
14078 register. */
14080 static rtx
14081 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14082 HOST_WIDE_INT wback_offset)
14084 int i = 0, j;
14085 rtx result;
14087 if (!multiple_operation_profitable_p (false, count, 0))
14089 rtx seq;
14091 start_sequence ();
14093 for (i = 0; i < count; i++)
14094 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14096 if (wback_offset != 0)
14097 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14099 seq = get_insns ();
14100 end_sequence ();
14102 return seq;
14105 result = gen_rtx_PARALLEL (VOIDmode,
14106 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14107 if (wback_offset != 0)
14109 XVECEXP (result, 0, 0)
14110 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14111 i = 1;
14112 count++;
14115 for (j = 0; i < count; i++, j++)
14116 XVECEXP (result, 0, i)
14117 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14119 return result;
14122 /* Generate a store-multiple instruction. COUNT is the number of stores in
14123 the instruction; REGS and MEMS are arrays containing the operands.
14124 BASEREG is the base register to be used in addressing the memory operands.
14125 WBACK_OFFSET is nonzero if the instruction should update the base
14126 register. */
14128 static rtx
14129 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14130 HOST_WIDE_INT wback_offset)
14132 int i = 0, j;
14133 rtx result;
14135 if (GET_CODE (basereg) == PLUS)
14136 basereg = XEXP (basereg, 0);
14138 if (!multiple_operation_profitable_p (false, count, 0))
14140 rtx seq;
14142 start_sequence ();
14144 for (i = 0; i < count; i++)
14145 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14147 if (wback_offset != 0)
14148 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14150 seq = get_insns ();
14151 end_sequence ();
14153 return seq;
14156 result = gen_rtx_PARALLEL (VOIDmode,
14157 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14158 if (wback_offset != 0)
14160 XVECEXP (result, 0, 0)
14161 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14162 i = 1;
14163 count++;
14166 for (j = 0; i < count; i++, j++)
14167 XVECEXP (result, 0, i)
14168 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14170 return result;
14173 /* Generate either a load-multiple or a store-multiple instruction. This
14174 function can be used in situations where we can start with a single MEM
14175 rtx and adjust its address upwards.
14176 COUNT is the number of operations in the instruction, not counting a
14177 possible update of the base register. REGS is an array containing the
14178 register operands.
14179 BASEREG is the base register to be used in addressing the memory operands,
14180 which are constructed from BASEMEM.
14181 WRITE_BACK specifies whether the generated instruction should include an
14182 update of the base register.
14183 OFFSETP is used to pass an offset to and from this function; this offset
14184 is not used when constructing the address (instead BASEMEM should have an
14185 appropriate offset in its address), it is used only for setting
14186 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14188 static rtx
14189 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14190 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14192 rtx mems[MAX_LDM_STM_OPS];
14193 HOST_WIDE_INT offset = *offsetp;
14194 int i;
14196 gcc_assert (count <= MAX_LDM_STM_OPS);
14198 if (GET_CODE (basereg) == PLUS)
14199 basereg = XEXP (basereg, 0);
14201 for (i = 0; i < count; i++)
14203 rtx addr = plus_constant (Pmode, basereg, i * 4);
14204 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14205 offset += 4;
14208 if (write_back)
14209 *offsetp = offset;
14211 if (is_load)
14212 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14213 write_back ? 4 * count : 0);
14214 else
14215 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14216 write_back ? 4 * count : 0);
14220 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14221 rtx basemem, HOST_WIDE_INT *offsetp)
14223 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14224 offsetp);
14228 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14229 rtx basemem, HOST_WIDE_INT *offsetp)
14231 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14232 offsetp);
14235 /* Called from a peephole2 expander to turn a sequence of loads into an
14236 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14237 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14238 is true if we can reorder the registers because they are used commutatively
14239 subsequently.
14240 Returns true iff we could generate a new instruction. */
14242 bool
14243 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14245 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14246 rtx mems[MAX_LDM_STM_OPS];
14247 int i, j, base_reg;
14248 rtx base_reg_rtx;
14249 HOST_WIDE_INT offset;
14250 int write_back = FALSE;
14251 int ldm_case;
14252 rtx addr;
14254 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14255 &base_reg, &offset, !sort_regs);
14257 if (ldm_case == 0)
14258 return false;
14260 if (sort_regs)
14261 for (i = 0; i < nops - 1; i++)
14262 for (j = i + 1; j < nops; j++)
14263 if (regs[i] > regs[j])
14265 int t = regs[i];
14266 regs[i] = regs[j];
14267 regs[j] = t;
14269 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14271 if (TARGET_THUMB1)
14273 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14274 gcc_assert (ldm_case == 1 || ldm_case == 5);
14275 write_back = TRUE;
14278 if (ldm_case == 5)
14280 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14281 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14282 offset = 0;
14283 if (!TARGET_THUMB1)
14285 base_reg = regs[0];
14286 base_reg_rtx = newbase;
14290 for (i = 0; i < nops; i++)
14292 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14293 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14294 SImode, addr, 0);
14296 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14297 write_back ? offset + i * 4 : 0));
14298 return true;
14301 /* Called from a peephole2 expander to turn a sequence of stores into an
14302 STM instruction. OPERANDS are the operands found by the peephole matcher;
14303 NOPS indicates how many separate stores we are trying to combine.
14304 Returns true iff we could generate a new instruction. */
14306 bool
14307 gen_stm_seq (rtx *operands, int nops)
14309 int i;
14310 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14311 rtx mems[MAX_LDM_STM_OPS];
14312 int base_reg;
14313 rtx base_reg_rtx;
14314 HOST_WIDE_INT offset;
14315 int write_back = FALSE;
14316 int stm_case;
14317 rtx addr;
14318 bool base_reg_dies;
14320 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14321 mem_order, &base_reg, &offset, true);
14323 if (stm_case == 0)
14324 return false;
14326 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14328 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14329 if (TARGET_THUMB1)
14331 gcc_assert (base_reg_dies);
14332 write_back = TRUE;
14335 if (stm_case == 5)
14337 gcc_assert (base_reg_dies);
14338 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14339 offset = 0;
14342 addr = plus_constant (Pmode, base_reg_rtx, offset);
14344 for (i = 0; i < nops; i++)
14346 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14347 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14348 SImode, addr, 0);
14350 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14351 write_back ? offset + i * 4 : 0));
14352 return true;
14355 /* Called from a peephole2 expander to turn a sequence of stores that are
14356 preceded by constant loads into an STM instruction. OPERANDS are the
14357 operands found by the peephole matcher; NOPS indicates how many
14358 separate stores we are trying to combine; there are 2 * NOPS
14359 instructions in the peephole.
14360 Returns true iff we could generate a new instruction. */
14362 bool
14363 gen_const_stm_seq (rtx *operands, int nops)
14365 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14366 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14367 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14368 rtx mems[MAX_LDM_STM_OPS];
14369 int base_reg;
14370 rtx base_reg_rtx;
14371 HOST_WIDE_INT offset;
14372 int write_back = FALSE;
14373 int stm_case;
14374 rtx addr;
14375 bool base_reg_dies;
14376 int i, j;
14377 HARD_REG_SET allocated;
14379 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14380 mem_order, &base_reg, &offset, false);
14382 if (stm_case == 0)
14383 return false;
14385 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14387 /* If the same register is used more than once, try to find a free
14388 register. */
14389 CLEAR_HARD_REG_SET (allocated);
14390 for (i = 0; i < nops; i++)
14392 for (j = i + 1; j < nops; j++)
14393 if (regs[i] == regs[j])
14395 rtx t = peep2_find_free_register (0, nops * 2,
14396 TARGET_THUMB1 ? "l" : "r",
14397 SImode, &allocated);
14398 if (t == NULL_RTX)
14399 return false;
14400 reg_rtxs[i] = t;
14401 regs[i] = REGNO (t);
14405 /* Compute an ordering that maps the register numbers to an ascending
14406 sequence. */
14407 reg_order[0] = 0;
14408 for (i = 0; i < nops; i++)
14409 if (regs[i] < regs[reg_order[0]])
14410 reg_order[0] = i;
14412 for (i = 1; i < nops; i++)
14414 int this_order = reg_order[i - 1];
14415 for (j = 0; j < nops; j++)
14416 if (regs[j] > regs[reg_order[i - 1]]
14417 && (this_order == reg_order[i - 1]
14418 || regs[j] < regs[this_order]))
14419 this_order = j;
14420 reg_order[i] = this_order;
14423 /* Ensure that registers that must be live after the instruction end
14424 up with the correct value. */
14425 for (i = 0; i < nops; i++)
14427 int this_order = reg_order[i];
14428 if ((this_order != mem_order[i]
14429 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14430 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14431 return false;
14434 /* Load the constants. */
14435 for (i = 0; i < nops; i++)
14437 rtx op = operands[2 * nops + mem_order[i]];
14438 sorted_regs[i] = regs[reg_order[i]];
14439 emit_move_insn (reg_rtxs[reg_order[i]], op);
14442 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14444 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14445 if (TARGET_THUMB1)
14447 gcc_assert (base_reg_dies);
14448 write_back = TRUE;
14451 if (stm_case == 5)
14453 gcc_assert (base_reg_dies);
14454 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14455 offset = 0;
14458 addr = plus_constant (Pmode, base_reg_rtx, offset);
14460 for (i = 0; i < nops; i++)
14462 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14463 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14464 SImode, addr, 0);
14466 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14467 write_back ? offset + i * 4 : 0));
14468 return true;
14471 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14472 unaligned copies on processors which support unaligned semantics for those
14473 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14474 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14475 An interleave factor of 1 (the minimum) will perform no interleaving.
14476 Load/store multiple are used for aligned addresses where possible. */
14478 static void
14479 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14480 HOST_WIDE_INT length,
14481 unsigned int interleave_factor)
14483 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14484 int *regnos = XALLOCAVEC (int, interleave_factor);
14485 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14486 HOST_WIDE_INT i, j;
14487 HOST_WIDE_INT remaining = length, words;
14488 rtx halfword_tmp = NULL, byte_tmp = NULL;
14489 rtx dst, src;
14490 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14491 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14492 HOST_WIDE_INT srcoffset, dstoffset;
14493 HOST_WIDE_INT src_autoinc, dst_autoinc;
14494 rtx mem, addr;
14496 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14498 /* Use hard registers if we have aligned source or destination so we can use
14499 load/store multiple with contiguous registers. */
14500 if (dst_aligned || src_aligned)
14501 for (i = 0; i < interleave_factor; i++)
14502 regs[i] = gen_rtx_REG (SImode, i);
14503 else
14504 for (i = 0; i < interleave_factor; i++)
14505 regs[i] = gen_reg_rtx (SImode);
14507 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14508 src = copy_addr_to_reg (XEXP (srcbase, 0));
14510 srcoffset = dstoffset = 0;
14512 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14513 For copying the last bytes we want to subtract this offset again. */
14514 src_autoinc = dst_autoinc = 0;
14516 for (i = 0; i < interleave_factor; i++)
14517 regnos[i] = i;
14519 /* Copy BLOCK_SIZE_BYTES chunks. */
14521 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14523 /* Load words. */
14524 if (src_aligned && interleave_factor > 1)
14526 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14527 TRUE, srcbase, &srcoffset));
14528 src_autoinc += UNITS_PER_WORD * interleave_factor;
14530 else
14532 for (j = 0; j < interleave_factor; j++)
14534 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14535 - src_autoinc));
14536 mem = adjust_automodify_address (srcbase, SImode, addr,
14537 srcoffset + j * UNITS_PER_WORD);
14538 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14540 srcoffset += block_size_bytes;
14543 /* Store words. */
14544 if (dst_aligned && interleave_factor > 1)
14546 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14547 TRUE, dstbase, &dstoffset));
14548 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14550 else
14552 for (j = 0; j < interleave_factor; j++)
14554 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14555 - dst_autoinc));
14556 mem = adjust_automodify_address (dstbase, SImode, addr,
14557 dstoffset + j * UNITS_PER_WORD);
14558 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14560 dstoffset += block_size_bytes;
14563 remaining -= block_size_bytes;
14566 /* Copy any whole words left (note these aren't interleaved with any
14567 subsequent halfword/byte load/stores in the interests of simplicity). */
14569 words = remaining / UNITS_PER_WORD;
14571 gcc_assert (words < interleave_factor);
14573 if (src_aligned && words > 1)
14575 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14576 &srcoffset));
14577 src_autoinc += UNITS_PER_WORD * words;
14579 else
14581 for (j = 0; j < words; j++)
14583 addr = plus_constant (Pmode, src,
14584 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14585 mem = adjust_automodify_address (srcbase, SImode, addr,
14586 srcoffset + j * UNITS_PER_WORD);
14587 if (src_aligned)
14588 emit_move_insn (regs[j], mem);
14589 else
14590 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14592 srcoffset += words * UNITS_PER_WORD;
14595 if (dst_aligned && words > 1)
14597 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14598 &dstoffset));
14599 dst_autoinc += words * UNITS_PER_WORD;
14601 else
14603 for (j = 0; j < words; j++)
14605 addr = plus_constant (Pmode, dst,
14606 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14607 mem = adjust_automodify_address (dstbase, SImode, addr,
14608 dstoffset + j * UNITS_PER_WORD);
14609 if (dst_aligned)
14610 emit_move_insn (mem, regs[j]);
14611 else
14612 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14614 dstoffset += words * UNITS_PER_WORD;
14617 remaining -= words * UNITS_PER_WORD;
14619 gcc_assert (remaining < 4);
14621 /* Copy a halfword if necessary. */
14623 if (remaining >= 2)
14625 halfword_tmp = gen_reg_rtx (SImode);
14627 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14628 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14629 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14631 /* Either write out immediately, or delay until we've loaded the last
14632 byte, depending on interleave factor. */
14633 if (interleave_factor == 1)
14635 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14636 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14637 emit_insn (gen_unaligned_storehi (mem,
14638 gen_lowpart (HImode, halfword_tmp)));
14639 halfword_tmp = NULL;
14640 dstoffset += 2;
14643 remaining -= 2;
14644 srcoffset += 2;
14647 gcc_assert (remaining < 2);
14649 /* Copy last byte. */
14651 if ((remaining & 1) != 0)
14653 byte_tmp = gen_reg_rtx (SImode);
14655 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14656 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14657 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14659 if (interleave_factor == 1)
14661 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14662 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14663 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14664 byte_tmp = NULL;
14665 dstoffset++;
14668 remaining--;
14669 srcoffset++;
14672 /* Store last halfword if we haven't done so already. */
14674 if (halfword_tmp)
14676 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14677 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14678 emit_insn (gen_unaligned_storehi (mem,
14679 gen_lowpart (HImode, halfword_tmp)));
14680 dstoffset += 2;
14683 /* Likewise for last byte. */
14685 if (byte_tmp)
14687 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14688 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14689 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14690 dstoffset++;
14693 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14696 /* From mips_adjust_block_mem:
14698 Helper function for doing a loop-based block operation on memory
14699 reference MEM. Each iteration of the loop will operate on LENGTH
14700 bytes of MEM.
14702 Create a new base register for use within the loop and point it to
14703 the start of MEM. Create a new memory reference that uses this
14704 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14706 static void
14707 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14708 rtx *loop_mem)
14710 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14712 /* Although the new mem does not refer to a known location,
14713 it does keep up to LENGTH bytes of alignment. */
14714 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14715 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14718 /* From mips_block_move_loop:
14720 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14721 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14722 the memory regions do not overlap. */
14724 static void
14725 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14726 unsigned int interleave_factor,
14727 HOST_WIDE_INT bytes_per_iter)
14729 rtx src_reg, dest_reg, final_src, test;
14730 HOST_WIDE_INT leftover;
14732 leftover = length % bytes_per_iter;
14733 length -= leftover;
14735 /* Create registers and memory references for use within the loop. */
14736 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14737 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14739 /* Calculate the value that SRC_REG should have after the last iteration of
14740 the loop. */
14741 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14742 0, 0, OPTAB_WIDEN);
14744 /* Emit the start of the loop. */
14745 rtx_code_label *label = gen_label_rtx ();
14746 emit_label (label);
14748 /* Emit the loop body. */
14749 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14750 interleave_factor);
14752 /* Move on to the next block. */
14753 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14754 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14756 /* Emit the loop condition. */
14757 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14758 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14760 /* Mop up any left-over bytes. */
14761 if (leftover)
14762 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14765 /* Emit a block move when either the source or destination is unaligned (not
14766 aligned to a four-byte boundary). This may need further tuning depending on
14767 core type, optimize_size setting, etc. */
14769 static int
14770 arm_movmemqi_unaligned (rtx *operands)
14772 HOST_WIDE_INT length = INTVAL (operands[2]);
14774 if (optimize_size)
14776 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14777 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14778 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14779 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14780 or dst_aligned though: allow more interleaving in those cases since the
14781 resulting code can be smaller. */
14782 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14783 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14785 if (length > 12)
14786 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14787 interleave_factor, bytes_per_iter);
14788 else
14789 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14790 interleave_factor);
14792 else
14794 /* Note that the loop created by arm_block_move_unaligned_loop may be
14795 subject to loop unrolling, which makes tuning this condition a little
14796 redundant. */
14797 if (length > 32)
14798 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14799 else
14800 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14803 return 1;
14807 arm_gen_movmemqi (rtx *operands)
14809 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14810 HOST_WIDE_INT srcoffset, dstoffset;
14811 int i;
14812 rtx src, dst, srcbase, dstbase;
14813 rtx part_bytes_reg = NULL;
14814 rtx mem;
14816 if (!CONST_INT_P (operands[2])
14817 || !CONST_INT_P (operands[3])
14818 || INTVAL (operands[2]) > 64)
14819 return 0;
14821 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14822 return arm_movmemqi_unaligned (operands);
14824 if (INTVAL (operands[3]) & 3)
14825 return 0;
14827 dstbase = operands[0];
14828 srcbase = operands[1];
14830 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14831 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14833 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14834 out_words_to_go = INTVAL (operands[2]) / 4;
14835 last_bytes = INTVAL (operands[2]) & 3;
14836 dstoffset = srcoffset = 0;
14838 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14839 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14841 for (i = 0; in_words_to_go >= 2; i+=4)
14843 if (in_words_to_go > 4)
14844 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14845 TRUE, srcbase, &srcoffset));
14846 else
14847 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14848 src, FALSE, srcbase,
14849 &srcoffset));
14851 if (out_words_to_go)
14853 if (out_words_to_go > 4)
14854 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14855 TRUE, dstbase, &dstoffset));
14856 else if (out_words_to_go != 1)
14857 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14858 out_words_to_go, dst,
14859 (last_bytes == 0
14860 ? FALSE : TRUE),
14861 dstbase, &dstoffset));
14862 else
14864 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14865 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14866 if (last_bytes != 0)
14868 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14869 dstoffset += 4;
14874 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14875 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14878 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14879 if (out_words_to_go)
14881 rtx sreg;
14883 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14884 sreg = copy_to_reg (mem);
14886 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14887 emit_move_insn (mem, sreg);
14888 in_words_to_go--;
14890 gcc_assert (!in_words_to_go); /* Sanity check */
14893 if (in_words_to_go)
14895 gcc_assert (in_words_to_go > 0);
14897 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14898 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14901 gcc_assert (!last_bytes || part_bytes_reg);
14903 if (BYTES_BIG_ENDIAN && last_bytes)
14905 rtx tmp = gen_reg_rtx (SImode);
14907 /* The bytes we want are in the top end of the word. */
14908 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14909 GEN_INT (8 * (4 - last_bytes))));
14910 part_bytes_reg = tmp;
14912 while (last_bytes)
14914 mem = adjust_automodify_address (dstbase, QImode,
14915 plus_constant (Pmode, dst,
14916 last_bytes - 1),
14917 dstoffset + last_bytes - 1);
14918 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14920 if (--last_bytes)
14922 tmp = gen_reg_rtx (SImode);
14923 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14924 part_bytes_reg = tmp;
14929 else
14931 if (last_bytes > 1)
14933 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14934 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14935 last_bytes -= 2;
14936 if (last_bytes)
14938 rtx tmp = gen_reg_rtx (SImode);
14939 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14940 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14941 part_bytes_reg = tmp;
14942 dstoffset += 2;
14946 if (last_bytes)
14948 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14949 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14953 return 1;
14956 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14957 by mode size. */
14958 inline static rtx
14959 next_consecutive_mem (rtx mem)
14961 machine_mode mode = GET_MODE (mem);
14962 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14963 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14965 return adjust_automodify_address (mem, mode, addr, offset);
14968 /* Copy using LDRD/STRD instructions whenever possible.
14969 Returns true upon success. */
14970 bool
14971 gen_movmem_ldrd_strd (rtx *operands)
14973 unsigned HOST_WIDE_INT len;
14974 HOST_WIDE_INT align;
14975 rtx src, dst, base;
14976 rtx reg0;
14977 bool src_aligned, dst_aligned;
14978 bool src_volatile, dst_volatile;
14980 gcc_assert (CONST_INT_P (operands[2]));
14981 gcc_assert (CONST_INT_P (operands[3]));
14983 len = UINTVAL (operands[2]);
14984 if (len > 64)
14985 return false;
14987 /* Maximum alignment we can assume for both src and dst buffers. */
14988 align = INTVAL (operands[3]);
14990 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14991 return false;
14993 /* Place src and dst addresses in registers
14994 and update the corresponding mem rtx. */
14995 dst = operands[0];
14996 dst_volatile = MEM_VOLATILE_P (dst);
14997 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14998 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14999 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15001 src = operands[1];
15002 src_volatile = MEM_VOLATILE_P (src);
15003 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15004 base = copy_to_mode_reg (SImode, XEXP (src, 0));
15005 src = adjust_automodify_address (src, VOIDmode, base, 0);
15007 if (!unaligned_access && !(src_aligned && dst_aligned))
15008 return false;
15010 if (src_volatile || dst_volatile)
15011 return false;
15013 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15014 if (!(dst_aligned || src_aligned))
15015 return arm_gen_movmemqi (operands);
15017 /* If the either src or dst is unaligned we'll be accessing it as pairs
15018 of unaligned SImode accesses. Otherwise we can generate DImode
15019 ldrd/strd instructions. */
15020 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15021 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15023 while (len >= 8)
15025 len -= 8;
15026 reg0 = gen_reg_rtx (DImode);
15027 rtx low_reg = NULL_RTX;
15028 rtx hi_reg = NULL_RTX;
15030 if (!src_aligned || !dst_aligned)
15032 low_reg = gen_lowpart (SImode, reg0);
15033 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
15035 if (src_aligned)
15036 emit_move_insn (reg0, src);
15037 else
15039 emit_insn (gen_unaligned_loadsi (low_reg, src));
15040 src = next_consecutive_mem (src);
15041 emit_insn (gen_unaligned_loadsi (hi_reg, src));
15044 if (dst_aligned)
15045 emit_move_insn (dst, reg0);
15046 else
15048 emit_insn (gen_unaligned_storesi (dst, low_reg));
15049 dst = next_consecutive_mem (dst);
15050 emit_insn (gen_unaligned_storesi (dst, hi_reg));
15053 src = next_consecutive_mem (src);
15054 dst = next_consecutive_mem (dst);
15057 gcc_assert (len < 8);
15058 if (len >= 4)
15060 /* More than a word but less than a double-word to copy. Copy a word. */
15061 reg0 = gen_reg_rtx (SImode);
15062 src = adjust_address (src, SImode, 0);
15063 dst = adjust_address (dst, SImode, 0);
15064 if (src_aligned)
15065 emit_move_insn (reg0, src);
15066 else
15067 emit_insn (gen_unaligned_loadsi (reg0, src));
15069 if (dst_aligned)
15070 emit_move_insn (dst, reg0);
15071 else
15072 emit_insn (gen_unaligned_storesi (dst, reg0));
15074 src = next_consecutive_mem (src);
15075 dst = next_consecutive_mem (dst);
15076 len -= 4;
15079 if (len == 0)
15080 return true;
15082 /* Copy the remaining bytes. */
15083 if (len >= 2)
15085 dst = adjust_address (dst, HImode, 0);
15086 src = adjust_address (src, HImode, 0);
15087 reg0 = gen_reg_rtx (SImode);
15088 if (src_aligned)
15089 emit_insn (gen_zero_extendhisi2 (reg0, src));
15090 else
15091 emit_insn (gen_unaligned_loadhiu (reg0, src));
15093 if (dst_aligned)
15094 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15095 else
15096 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15098 src = next_consecutive_mem (src);
15099 dst = next_consecutive_mem (dst);
15100 if (len == 2)
15101 return true;
15104 dst = adjust_address (dst, QImode, 0);
15105 src = adjust_address (src, QImode, 0);
15106 reg0 = gen_reg_rtx (QImode);
15107 emit_move_insn (reg0, src);
15108 emit_move_insn (dst, reg0);
15109 return true;
15112 /* Select a dominance comparison mode if possible for a test of the general
15113 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15114 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15115 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15116 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15117 In all cases OP will be either EQ or NE, but we don't need to know which
15118 here. If we are unable to support a dominance comparison we return
15119 CC mode. This will then fail to match for the RTL expressions that
15120 generate this call. */
15121 machine_mode
15122 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15124 enum rtx_code cond1, cond2;
15125 int swapped = 0;
15127 /* Currently we will probably get the wrong result if the individual
15128 comparisons are not simple. This also ensures that it is safe to
15129 reverse a comparison if necessary. */
15130 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15131 != CCmode)
15132 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15133 != CCmode))
15134 return CCmode;
15136 /* The if_then_else variant of this tests the second condition if the
15137 first passes, but is true if the first fails. Reverse the first
15138 condition to get a true "inclusive-or" expression. */
15139 if (cond_or == DOM_CC_NX_OR_Y)
15140 cond1 = reverse_condition (cond1);
15142 /* If the comparisons are not equal, and one doesn't dominate the other,
15143 then we can't do this. */
15144 if (cond1 != cond2
15145 && !comparison_dominates_p (cond1, cond2)
15146 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15147 return CCmode;
15149 if (swapped)
15150 std::swap (cond1, cond2);
15152 switch (cond1)
15154 case EQ:
15155 if (cond_or == DOM_CC_X_AND_Y)
15156 return CC_DEQmode;
15158 switch (cond2)
15160 case EQ: return CC_DEQmode;
15161 case LE: return CC_DLEmode;
15162 case LEU: return CC_DLEUmode;
15163 case GE: return CC_DGEmode;
15164 case GEU: return CC_DGEUmode;
15165 default: gcc_unreachable ();
15168 case LT:
15169 if (cond_or == DOM_CC_X_AND_Y)
15170 return CC_DLTmode;
15172 switch (cond2)
15174 case LT:
15175 return CC_DLTmode;
15176 case LE:
15177 return CC_DLEmode;
15178 case NE:
15179 return CC_DNEmode;
15180 default:
15181 gcc_unreachable ();
15184 case GT:
15185 if (cond_or == DOM_CC_X_AND_Y)
15186 return CC_DGTmode;
15188 switch (cond2)
15190 case GT:
15191 return CC_DGTmode;
15192 case GE:
15193 return CC_DGEmode;
15194 case NE:
15195 return CC_DNEmode;
15196 default:
15197 gcc_unreachable ();
15200 case LTU:
15201 if (cond_or == DOM_CC_X_AND_Y)
15202 return CC_DLTUmode;
15204 switch (cond2)
15206 case LTU:
15207 return CC_DLTUmode;
15208 case LEU:
15209 return CC_DLEUmode;
15210 case NE:
15211 return CC_DNEmode;
15212 default:
15213 gcc_unreachable ();
15216 case GTU:
15217 if (cond_or == DOM_CC_X_AND_Y)
15218 return CC_DGTUmode;
15220 switch (cond2)
15222 case GTU:
15223 return CC_DGTUmode;
15224 case GEU:
15225 return CC_DGEUmode;
15226 case NE:
15227 return CC_DNEmode;
15228 default:
15229 gcc_unreachable ();
15232 /* The remaining cases only occur when both comparisons are the
15233 same. */
15234 case NE:
15235 gcc_assert (cond1 == cond2);
15236 return CC_DNEmode;
15238 case LE:
15239 gcc_assert (cond1 == cond2);
15240 return CC_DLEmode;
15242 case GE:
15243 gcc_assert (cond1 == cond2);
15244 return CC_DGEmode;
15246 case LEU:
15247 gcc_assert (cond1 == cond2);
15248 return CC_DLEUmode;
15250 case GEU:
15251 gcc_assert (cond1 == cond2);
15252 return CC_DGEUmode;
15254 default:
15255 gcc_unreachable ();
15259 machine_mode
15260 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15262 /* All floating point compares return CCFP if it is an equality
15263 comparison, and CCFPE otherwise. */
15264 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15266 switch (op)
15268 case EQ:
15269 case NE:
15270 case UNORDERED:
15271 case ORDERED:
15272 case UNLT:
15273 case UNLE:
15274 case UNGT:
15275 case UNGE:
15276 case UNEQ:
15277 case LTGT:
15278 return CCFPmode;
15280 case LT:
15281 case LE:
15282 case GT:
15283 case GE:
15284 return CCFPEmode;
15286 default:
15287 gcc_unreachable ();
15291 /* A compare with a shifted operand. Because of canonicalization, the
15292 comparison will have to be swapped when we emit the assembler. */
15293 if (GET_MODE (y) == SImode
15294 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15295 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15296 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15297 || GET_CODE (x) == ROTATERT))
15298 return CC_SWPmode;
15300 /* This operation is performed swapped, but since we only rely on the Z
15301 flag we don't need an additional mode. */
15302 if (GET_MODE (y) == SImode
15303 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15304 && GET_CODE (x) == NEG
15305 && (op == EQ || op == NE))
15306 return CC_Zmode;
15308 /* This is a special case that is used by combine to allow a
15309 comparison of a shifted byte load to be split into a zero-extend
15310 followed by a comparison of the shifted integer (only valid for
15311 equalities and unsigned inequalities). */
15312 if (GET_MODE (x) == SImode
15313 && GET_CODE (x) == ASHIFT
15314 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15315 && GET_CODE (XEXP (x, 0)) == SUBREG
15316 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15317 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15318 && (op == EQ || op == NE
15319 || op == GEU || op == GTU || op == LTU || op == LEU)
15320 && CONST_INT_P (y))
15321 return CC_Zmode;
15323 /* A construct for a conditional compare, if the false arm contains
15324 0, then both conditions must be true, otherwise either condition
15325 must be true. Not all conditions are possible, so CCmode is
15326 returned if it can't be done. */
15327 if (GET_CODE (x) == IF_THEN_ELSE
15328 && (XEXP (x, 2) == const0_rtx
15329 || XEXP (x, 2) == const1_rtx)
15330 && COMPARISON_P (XEXP (x, 0))
15331 && COMPARISON_P (XEXP (x, 1)))
15332 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15333 INTVAL (XEXP (x, 2)));
15335 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15336 if (GET_CODE (x) == AND
15337 && (op == EQ || op == NE)
15338 && COMPARISON_P (XEXP (x, 0))
15339 && COMPARISON_P (XEXP (x, 1)))
15340 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15341 DOM_CC_X_AND_Y);
15343 if (GET_CODE (x) == IOR
15344 && (op == EQ || op == NE)
15345 && COMPARISON_P (XEXP (x, 0))
15346 && COMPARISON_P (XEXP (x, 1)))
15347 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15348 DOM_CC_X_OR_Y);
15350 /* An operation (on Thumb) where we want to test for a single bit.
15351 This is done by shifting that bit up into the top bit of a
15352 scratch register; we can then branch on the sign bit. */
15353 if (TARGET_THUMB1
15354 && GET_MODE (x) == SImode
15355 && (op == EQ || op == NE)
15356 && GET_CODE (x) == ZERO_EXTRACT
15357 && XEXP (x, 1) == const1_rtx)
15358 return CC_Nmode;
15360 /* An operation that sets the condition codes as a side-effect, the
15361 V flag is not set correctly, so we can only use comparisons where
15362 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15363 instead.) */
15364 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15365 if (GET_MODE (x) == SImode
15366 && y == const0_rtx
15367 && (op == EQ || op == NE || op == LT || op == GE)
15368 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15369 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15370 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15371 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15372 || GET_CODE (x) == LSHIFTRT
15373 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15374 || GET_CODE (x) == ROTATERT
15375 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15376 return CC_NOOVmode;
15378 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15379 return CC_Zmode;
15381 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15382 && GET_CODE (x) == PLUS
15383 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15384 return CC_Cmode;
15386 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15388 switch (op)
15390 case EQ:
15391 case NE:
15392 /* A DImode comparison against zero can be implemented by
15393 or'ing the two halves together. */
15394 if (y == const0_rtx)
15395 return CC_Zmode;
15397 /* We can do an equality test in three Thumb instructions. */
15398 if (!TARGET_32BIT)
15399 return CC_Zmode;
15401 /* FALLTHROUGH */
15403 case LTU:
15404 case LEU:
15405 case GTU:
15406 case GEU:
15407 /* DImode unsigned comparisons can be implemented by cmp +
15408 cmpeq without a scratch register. Not worth doing in
15409 Thumb-2. */
15410 if (TARGET_32BIT)
15411 return CC_CZmode;
15413 /* FALLTHROUGH */
15415 case LT:
15416 case LE:
15417 case GT:
15418 case GE:
15419 /* DImode signed and unsigned comparisons can be implemented
15420 by cmp + sbcs with a scratch register, but that does not
15421 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15422 gcc_assert (op != EQ && op != NE);
15423 return CC_NCVmode;
15425 default:
15426 gcc_unreachable ();
15430 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15431 return GET_MODE (x);
15433 return CCmode;
15436 /* X and Y are two things to compare using CODE. Emit the compare insn and
15437 return the rtx for register 0 in the proper mode. FP means this is a
15438 floating point compare: I don't think that it is needed on the arm. */
15440 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15442 machine_mode mode;
15443 rtx cc_reg;
15444 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15446 /* We might have X as a constant, Y as a register because of the predicates
15447 used for cmpdi. If so, force X to a register here. */
15448 if (dimode_comparison && !REG_P (x))
15449 x = force_reg (DImode, x);
15451 mode = SELECT_CC_MODE (code, x, y);
15452 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15454 if (dimode_comparison
15455 && mode != CC_CZmode)
15457 rtx clobber, set;
15459 /* To compare two non-zero values for equality, XOR them and
15460 then compare against zero. Not used for ARM mode; there
15461 CC_CZmode is cheaper. */
15462 if (mode == CC_Zmode && y != const0_rtx)
15464 gcc_assert (!reload_completed);
15465 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15466 y = const0_rtx;
15469 /* A scratch register is required. */
15470 if (reload_completed)
15471 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15472 else
15473 scratch = gen_rtx_SCRATCH (SImode);
15475 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15476 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15477 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15479 else
15480 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15482 return cc_reg;
15485 /* Generate a sequence of insns that will generate the correct return
15486 address mask depending on the physical architecture that the program
15487 is running on. */
15489 arm_gen_return_addr_mask (void)
15491 rtx reg = gen_reg_rtx (Pmode);
15493 emit_insn (gen_return_addr_mask (reg));
15494 return reg;
15497 void
15498 arm_reload_in_hi (rtx *operands)
15500 rtx ref = operands[1];
15501 rtx base, scratch;
15502 HOST_WIDE_INT offset = 0;
15504 if (GET_CODE (ref) == SUBREG)
15506 offset = SUBREG_BYTE (ref);
15507 ref = SUBREG_REG (ref);
15510 if (REG_P (ref))
15512 /* We have a pseudo which has been spilt onto the stack; there
15513 are two cases here: the first where there is a simple
15514 stack-slot replacement and a second where the stack-slot is
15515 out of range, or is used as a subreg. */
15516 if (reg_equiv_mem (REGNO (ref)))
15518 ref = reg_equiv_mem (REGNO (ref));
15519 base = find_replacement (&XEXP (ref, 0));
15521 else
15522 /* The slot is out of range, or was dressed up in a SUBREG. */
15523 base = reg_equiv_address (REGNO (ref));
15525 /* PR 62554: If there is no equivalent memory location then just move
15526 the value as an SImode register move. This happens when the target
15527 architecture variant does not have an HImode register move. */
15528 if (base == NULL)
15530 gcc_assert (REG_P (operands[0]));
15531 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15532 gen_rtx_SUBREG (SImode, ref, 0)));
15533 return;
15536 else
15537 base = find_replacement (&XEXP (ref, 0));
15539 /* Handle the case where the address is too complex to be offset by 1. */
15540 if (GET_CODE (base) == MINUS
15541 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15543 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15545 emit_set_insn (base_plus, base);
15546 base = base_plus;
15548 else if (GET_CODE (base) == PLUS)
15550 /* The addend must be CONST_INT, or we would have dealt with it above. */
15551 HOST_WIDE_INT hi, lo;
15553 offset += INTVAL (XEXP (base, 1));
15554 base = XEXP (base, 0);
15556 /* Rework the address into a legal sequence of insns. */
15557 /* Valid range for lo is -4095 -> 4095 */
15558 lo = (offset >= 0
15559 ? (offset & 0xfff)
15560 : -((-offset) & 0xfff));
15562 /* Corner case, if lo is the max offset then we would be out of range
15563 once we have added the additional 1 below, so bump the msb into the
15564 pre-loading insn(s). */
15565 if (lo == 4095)
15566 lo &= 0x7ff;
15568 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15569 ^ (HOST_WIDE_INT) 0x80000000)
15570 - (HOST_WIDE_INT) 0x80000000);
15572 gcc_assert (hi + lo == offset);
15574 if (hi != 0)
15576 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15578 /* Get the base address; addsi3 knows how to handle constants
15579 that require more than one insn. */
15580 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15581 base = base_plus;
15582 offset = lo;
15586 /* Operands[2] may overlap operands[0] (though it won't overlap
15587 operands[1]), that's why we asked for a DImode reg -- so we can
15588 use the bit that does not overlap. */
15589 if (REGNO (operands[2]) == REGNO (operands[0]))
15590 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15591 else
15592 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15594 emit_insn (gen_zero_extendqisi2 (scratch,
15595 gen_rtx_MEM (QImode,
15596 plus_constant (Pmode, base,
15597 offset))));
15598 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15599 gen_rtx_MEM (QImode,
15600 plus_constant (Pmode, base,
15601 offset + 1))));
15602 if (!BYTES_BIG_ENDIAN)
15603 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15604 gen_rtx_IOR (SImode,
15605 gen_rtx_ASHIFT
15606 (SImode,
15607 gen_rtx_SUBREG (SImode, operands[0], 0),
15608 GEN_INT (8)),
15609 scratch));
15610 else
15611 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15612 gen_rtx_IOR (SImode,
15613 gen_rtx_ASHIFT (SImode, scratch,
15614 GEN_INT (8)),
15615 gen_rtx_SUBREG (SImode, operands[0], 0)));
15618 /* Handle storing a half-word to memory during reload by synthesizing as two
15619 byte stores. Take care not to clobber the input values until after we
15620 have moved them somewhere safe. This code assumes that if the DImode
15621 scratch in operands[2] overlaps either the input value or output address
15622 in some way, then that value must die in this insn (we absolutely need
15623 two scratch registers for some corner cases). */
15624 void
15625 arm_reload_out_hi (rtx *operands)
15627 rtx ref = operands[0];
15628 rtx outval = operands[1];
15629 rtx base, scratch;
15630 HOST_WIDE_INT offset = 0;
15632 if (GET_CODE (ref) == SUBREG)
15634 offset = SUBREG_BYTE (ref);
15635 ref = SUBREG_REG (ref);
15638 if (REG_P (ref))
15640 /* We have a pseudo which has been spilt onto the stack; there
15641 are two cases here: the first where there is a simple
15642 stack-slot replacement and a second where the stack-slot is
15643 out of range, or is used as a subreg. */
15644 if (reg_equiv_mem (REGNO (ref)))
15646 ref = reg_equiv_mem (REGNO (ref));
15647 base = find_replacement (&XEXP (ref, 0));
15649 else
15650 /* The slot is out of range, or was dressed up in a SUBREG. */
15651 base = reg_equiv_address (REGNO (ref));
15653 /* PR 62254: If there is no equivalent memory location then just move
15654 the value as an SImode register move. This happens when the target
15655 architecture variant does not have an HImode register move. */
15656 if (base == NULL)
15658 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15660 if (REG_P (outval))
15662 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15663 gen_rtx_SUBREG (SImode, outval, 0)));
15665 else /* SUBREG_P (outval) */
15667 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15668 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15669 SUBREG_REG (outval)));
15670 else
15671 /* FIXME: Handle other cases ? */
15672 gcc_unreachable ();
15674 return;
15677 else
15678 base = find_replacement (&XEXP (ref, 0));
15680 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15682 /* Handle the case where the address is too complex to be offset by 1. */
15683 if (GET_CODE (base) == MINUS
15684 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15686 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15688 /* Be careful not to destroy OUTVAL. */
15689 if (reg_overlap_mentioned_p (base_plus, outval))
15691 /* Updating base_plus might destroy outval, see if we can
15692 swap the scratch and base_plus. */
15693 if (!reg_overlap_mentioned_p (scratch, outval))
15694 std::swap (scratch, base_plus);
15695 else
15697 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15699 /* Be conservative and copy OUTVAL into the scratch now,
15700 this should only be necessary if outval is a subreg
15701 of something larger than a word. */
15702 /* XXX Might this clobber base? I can't see how it can,
15703 since scratch is known to overlap with OUTVAL, and
15704 must be wider than a word. */
15705 emit_insn (gen_movhi (scratch_hi, outval));
15706 outval = scratch_hi;
15710 emit_set_insn (base_plus, base);
15711 base = base_plus;
15713 else if (GET_CODE (base) == PLUS)
15715 /* The addend must be CONST_INT, or we would have dealt with it above. */
15716 HOST_WIDE_INT hi, lo;
15718 offset += INTVAL (XEXP (base, 1));
15719 base = XEXP (base, 0);
15721 /* Rework the address into a legal sequence of insns. */
15722 /* Valid range for lo is -4095 -> 4095 */
15723 lo = (offset >= 0
15724 ? (offset & 0xfff)
15725 : -((-offset) & 0xfff));
15727 /* Corner case, if lo is the max offset then we would be out of range
15728 once we have added the additional 1 below, so bump the msb into the
15729 pre-loading insn(s). */
15730 if (lo == 4095)
15731 lo &= 0x7ff;
15733 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15734 ^ (HOST_WIDE_INT) 0x80000000)
15735 - (HOST_WIDE_INT) 0x80000000);
15737 gcc_assert (hi + lo == offset);
15739 if (hi != 0)
15741 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15743 /* Be careful not to destroy OUTVAL. */
15744 if (reg_overlap_mentioned_p (base_plus, outval))
15746 /* Updating base_plus might destroy outval, see if we
15747 can swap the scratch and base_plus. */
15748 if (!reg_overlap_mentioned_p (scratch, outval))
15749 std::swap (scratch, base_plus);
15750 else
15752 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15754 /* Be conservative and copy outval into scratch now,
15755 this should only be necessary if outval is a
15756 subreg of something larger than a word. */
15757 /* XXX Might this clobber base? I can't see how it
15758 can, since scratch is known to overlap with
15759 outval. */
15760 emit_insn (gen_movhi (scratch_hi, outval));
15761 outval = scratch_hi;
15765 /* Get the base address; addsi3 knows how to handle constants
15766 that require more than one insn. */
15767 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15768 base = base_plus;
15769 offset = lo;
15773 if (BYTES_BIG_ENDIAN)
15775 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15776 plus_constant (Pmode, base,
15777 offset + 1)),
15778 gen_lowpart (QImode, outval)));
15779 emit_insn (gen_lshrsi3 (scratch,
15780 gen_rtx_SUBREG (SImode, outval, 0),
15781 GEN_INT (8)));
15782 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15783 offset)),
15784 gen_lowpart (QImode, scratch)));
15786 else
15788 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15789 offset)),
15790 gen_lowpart (QImode, outval)));
15791 emit_insn (gen_lshrsi3 (scratch,
15792 gen_rtx_SUBREG (SImode, outval, 0),
15793 GEN_INT (8)));
15794 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15795 plus_constant (Pmode, base,
15796 offset + 1)),
15797 gen_lowpart (QImode, scratch)));
15801 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15802 (padded to the size of a word) should be passed in a register. */
15804 static bool
15805 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15807 if (TARGET_AAPCS_BASED)
15808 return must_pass_in_stack_var_size (mode, type);
15809 else
15810 return must_pass_in_stack_var_size_or_pad (mode, type);
15814 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15815 Return true if an argument passed on the stack should be padded upwards,
15816 i.e. if the least-significant byte has useful data.
15817 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15818 aggregate types are placed in the lowest memory address. */
15820 bool
15821 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15823 if (!TARGET_AAPCS_BASED)
15824 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15826 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15827 return false;
15829 return true;
15833 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15834 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15835 register has useful data, and return the opposite if the most
15836 significant byte does. */
15838 bool
15839 arm_pad_reg_upward (machine_mode mode,
15840 tree type, int first ATTRIBUTE_UNUSED)
15842 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15844 /* For AAPCS, small aggregates, small fixed-point types,
15845 and small complex types are always padded upwards. */
15846 if (type)
15848 if ((AGGREGATE_TYPE_P (type)
15849 || TREE_CODE (type) == COMPLEX_TYPE
15850 || FIXED_POINT_TYPE_P (type))
15851 && int_size_in_bytes (type) <= 4)
15852 return true;
15854 else
15856 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15857 && GET_MODE_SIZE (mode) <= 4)
15858 return true;
15862 /* Otherwise, use default padding. */
15863 return !BYTES_BIG_ENDIAN;
15866 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15867 assuming that the address in the base register is word aligned. */
15868 bool
15869 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15871 HOST_WIDE_INT max_offset;
15873 /* Offset must be a multiple of 4 in Thumb mode. */
15874 if (TARGET_THUMB2 && ((offset & 3) != 0))
15875 return false;
15877 if (TARGET_THUMB2)
15878 max_offset = 1020;
15879 else if (TARGET_ARM)
15880 max_offset = 255;
15881 else
15882 return false;
15884 return ((offset <= max_offset) && (offset >= -max_offset));
15887 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15888 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15889 Assumes that the address in the base register RN is word aligned. Pattern
15890 guarantees that both memory accesses use the same base register,
15891 the offsets are constants within the range, and the gap between the offsets is 4.
15892 If preload complete then check that registers are legal. WBACK indicates whether
15893 address is updated. LOAD indicates whether memory access is load or store. */
15894 bool
15895 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15896 bool wback, bool load)
15898 unsigned int t, t2, n;
15900 if (!reload_completed)
15901 return true;
15903 if (!offset_ok_for_ldrd_strd (offset))
15904 return false;
15906 t = REGNO (rt);
15907 t2 = REGNO (rt2);
15908 n = REGNO (rn);
15910 if ((TARGET_THUMB2)
15911 && ((wback && (n == t || n == t2))
15912 || (t == SP_REGNUM)
15913 || (t == PC_REGNUM)
15914 || (t2 == SP_REGNUM)
15915 || (t2 == PC_REGNUM)
15916 || (!load && (n == PC_REGNUM))
15917 || (load && (t == t2))
15918 /* Triggers Cortex-M3 LDRD errata. */
15919 || (!wback && load && fix_cm3_ldrd && (n == t))))
15920 return false;
15922 if ((TARGET_ARM)
15923 && ((wback && (n == t || n == t2))
15924 || (t2 == PC_REGNUM)
15925 || (t % 2 != 0) /* First destination register is not even. */
15926 || (t2 != t + 1)
15927 /* PC can be used as base register (for offset addressing only),
15928 but it is depricated. */
15929 || (n == PC_REGNUM)))
15930 return false;
15932 return true;
15935 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15936 operand MEM's address contains an immediate offset from the base
15937 register and has no side effects, in which case it sets BASE and
15938 OFFSET accordingly. */
15939 static bool
15940 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15942 rtx addr;
15944 gcc_assert (base != NULL && offset != NULL);
15946 /* TODO: Handle more general memory operand patterns, such as
15947 PRE_DEC and PRE_INC. */
15949 if (side_effects_p (mem))
15950 return false;
15952 /* Can't deal with subregs. */
15953 if (GET_CODE (mem) == SUBREG)
15954 return false;
15956 gcc_assert (MEM_P (mem));
15958 *offset = const0_rtx;
15960 addr = XEXP (mem, 0);
15962 /* If addr isn't valid for DImode, then we can't handle it. */
15963 if (!arm_legitimate_address_p (DImode, addr,
15964 reload_in_progress || reload_completed))
15965 return false;
15967 if (REG_P (addr))
15969 *base = addr;
15970 return true;
15972 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15974 *base = XEXP (addr, 0);
15975 *offset = XEXP (addr, 1);
15976 return (REG_P (*base) && CONST_INT_P (*offset));
15979 return false;
15982 /* Called from a peephole2 to replace two word-size accesses with a
15983 single LDRD/STRD instruction. Returns true iff we can generate a
15984 new instruction sequence. That is, both accesses use the same base
15985 register and the gap between constant offsets is 4. This function
15986 may reorder its operands to match ldrd/strd RTL templates.
15987 OPERANDS are the operands found by the peephole matcher;
15988 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15989 corresponding memory operands. LOAD indicaates whether the access
15990 is load or store. CONST_STORE indicates a store of constant
15991 integer values held in OPERANDS[4,5] and assumes that the pattern
15992 is of length 4 insn, for the purpose of checking dead registers.
15993 COMMUTE indicates that register operands may be reordered. */
15994 bool
15995 gen_operands_ldrd_strd (rtx *operands, bool load,
15996 bool const_store, bool commute)
15998 int nops = 2;
15999 HOST_WIDE_INT offsets[2], offset;
16000 rtx base = NULL_RTX;
16001 rtx cur_base, cur_offset, tmp;
16002 int i, gap;
16003 HARD_REG_SET regset;
16005 gcc_assert (!const_store || !load);
16006 /* Check that the memory references are immediate offsets from the
16007 same base register. Extract the base register, the destination
16008 registers, and the corresponding memory offsets. */
16009 for (i = 0; i < nops; i++)
16011 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
16012 return false;
16014 if (i == 0)
16015 base = cur_base;
16016 else if (REGNO (base) != REGNO (cur_base))
16017 return false;
16019 offsets[i] = INTVAL (cur_offset);
16020 if (GET_CODE (operands[i]) == SUBREG)
16022 tmp = SUBREG_REG (operands[i]);
16023 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
16024 operands[i] = tmp;
16028 /* Make sure there is no dependency between the individual loads. */
16029 if (load && REGNO (operands[0]) == REGNO (base))
16030 return false; /* RAW */
16032 if (load && REGNO (operands[0]) == REGNO (operands[1]))
16033 return false; /* WAW */
16035 /* If the same input register is used in both stores
16036 when storing different constants, try to find a free register.
16037 For example, the code
16038 mov r0, 0
16039 str r0, [r2]
16040 mov r0, 1
16041 str r0, [r2, #4]
16042 can be transformed into
16043 mov r1, 0
16044 mov r0, 1
16045 strd r1, r0, [r2]
16046 in Thumb mode assuming that r1 is free.
16047 For ARM mode do the same but only if the starting register
16048 can be made to be even. */
16049 if (const_store
16050 && REGNO (operands[0]) == REGNO (operands[1])
16051 && INTVAL (operands[4]) != INTVAL (operands[5]))
16053 if (TARGET_THUMB2)
16055 CLEAR_HARD_REG_SET (regset);
16056 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16057 if (tmp == NULL_RTX)
16058 return false;
16060 /* Use the new register in the first load to ensure that
16061 if the original input register is not dead after peephole,
16062 then it will have the correct constant value. */
16063 operands[0] = tmp;
16065 else if (TARGET_ARM)
16067 int regno = REGNO (operands[0]);
16068 if (!peep2_reg_dead_p (4, operands[0]))
16070 /* When the input register is even and is not dead after the
16071 pattern, it has to hold the second constant but we cannot
16072 form a legal STRD in ARM mode with this register as the second
16073 register. */
16074 if (regno % 2 == 0)
16075 return false;
16077 /* Is regno-1 free? */
16078 SET_HARD_REG_SET (regset);
16079 CLEAR_HARD_REG_BIT(regset, regno - 1);
16080 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16081 if (tmp == NULL_RTX)
16082 return false;
16084 operands[0] = tmp;
16086 else
16088 /* Find a DImode register. */
16089 CLEAR_HARD_REG_SET (regset);
16090 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16091 if (tmp != NULL_RTX)
16093 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16094 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16096 else
16098 /* Can we use the input register to form a DI register? */
16099 SET_HARD_REG_SET (regset);
16100 CLEAR_HARD_REG_BIT(regset,
16101 regno % 2 == 0 ? regno + 1 : regno - 1);
16102 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16103 if (tmp == NULL_RTX)
16104 return false;
16105 operands[regno % 2 == 1 ? 0 : 1] = tmp;
16109 gcc_assert (operands[0] != NULL_RTX);
16110 gcc_assert (operands[1] != NULL_RTX);
16111 gcc_assert (REGNO (operands[0]) % 2 == 0);
16112 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
16116 /* Make sure the instructions are ordered with lower memory access first. */
16117 if (offsets[0] > offsets[1])
16119 gap = offsets[0] - offsets[1];
16120 offset = offsets[1];
16122 /* Swap the instructions such that lower memory is accessed first. */
16123 std::swap (operands[0], operands[1]);
16124 std::swap (operands[2], operands[3]);
16125 if (const_store)
16126 std::swap (operands[4], operands[5]);
16128 else
16130 gap = offsets[1] - offsets[0];
16131 offset = offsets[0];
16134 /* Make sure accesses are to consecutive memory locations. */
16135 if (gap != 4)
16136 return false;
16138 /* Make sure we generate legal instructions. */
16139 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16140 false, load))
16141 return true;
16143 /* In Thumb state, where registers are almost unconstrained, there
16144 is little hope to fix it. */
16145 if (TARGET_THUMB2)
16146 return false;
16148 if (load && commute)
16150 /* Try reordering registers. */
16151 std::swap (operands[0], operands[1]);
16152 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16153 false, load))
16154 return true;
16157 if (const_store)
16159 /* If input registers are dead after this pattern, they can be
16160 reordered or replaced by other registers that are free in the
16161 current pattern. */
16162 if (!peep2_reg_dead_p (4, operands[0])
16163 || !peep2_reg_dead_p (4, operands[1]))
16164 return false;
16166 /* Try to reorder the input registers. */
16167 /* For example, the code
16168 mov r0, 0
16169 mov r1, 1
16170 str r1, [r2]
16171 str r0, [r2, #4]
16172 can be transformed into
16173 mov r1, 0
16174 mov r0, 1
16175 strd r0, [r2]
16177 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
16178 false, false))
16180 std::swap (operands[0], operands[1]);
16181 return true;
16184 /* Try to find a free DI register. */
16185 CLEAR_HARD_REG_SET (regset);
16186 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16187 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16188 while (true)
16190 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16191 if (tmp == NULL_RTX)
16192 return false;
16194 /* DREG must be an even-numbered register in DImode.
16195 Split it into SI registers. */
16196 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16197 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16198 gcc_assert (operands[0] != NULL_RTX);
16199 gcc_assert (operands[1] != NULL_RTX);
16200 gcc_assert (REGNO (operands[0]) % 2 == 0);
16201 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16203 return (operands_ok_ldrd_strd (operands[0], operands[1],
16204 base, offset,
16205 false, load));
16209 return false;
16215 /* Print a symbolic form of X to the debug file, F. */
16216 static void
16217 arm_print_value (FILE *f, rtx x)
16219 switch (GET_CODE (x))
16221 case CONST_INT:
16222 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16223 return;
16225 case CONST_DOUBLE:
16226 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
16227 return;
16229 case CONST_VECTOR:
16231 int i;
16233 fprintf (f, "<");
16234 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16236 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16237 if (i < (CONST_VECTOR_NUNITS (x) - 1))
16238 fputc (',', f);
16240 fprintf (f, ">");
16242 return;
16244 case CONST_STRING:
16245 fprintf (f, "\"%s\"", XSTR (x, 0));
16246 return;
16248 case SYMBOL_REF:
16249 fprintf (f, "`%s'", XSTR (x, 0));
16250 return;
16252 case LABEL_REF:
16253 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16254 return;
16256 case CONST:
16257 arm_print_value (f, XEXP (x, 0));
16258 return;
16260 case PLUS:
16261 arm_print_value (f, XEXP (x, 0));
16262 fprintf (f, "+");
16263 arm_print_value (f, XEXP (x, 1));
16264 return;
16266 case PC:
16267 fprintf (f, "pc");
16268 return;
16270 default:
16271 fprintf (f, "????");
16272 return;
16276 /* Routines for manipulation of the constant pool. */
16278 /* Arm instructions cannot load a large constant directly into a
16279 register; they have to come from a pc relative load. The constant
16280 must therefore be placed in the addressable range of the pc
16281 relative load. Depending on the precise pc relative load
16282 instruction the range is somewhere between 256 bytes and 4k. This
16283 means that we often have to dump a constant inside a function, and
16284 generate code to branch around it.
16286 It is important to minimize this, since the branches will slow
16287 things down and make the code larger.
16289 Normally we can hide the table after an existing unconditional
16290 branch so that there is no interruption of the flow, but in the
16291 worst case the code looks like this:
16293 ldr rn, L1
16295 b L2
16296 align
16297 L1: .long value
16301 ldr rn, L3
16303 b L4
16304 align
16305 L3: .long value
16309 We fix this by performing a scan after scheduling, which notices
16310 which instructions need to have their operands fetched from the
16311 constant table and builds the table.
16313 The algorithm starts by building a table of all the constants that
16314 need fixing up and all the natural barriers in the function (places
16315 where a constant table can be dropped without breaking the flow).
16316 For each fixup we note how far the pc-relative replacement will be
16317 able to reach and the offset of the instruction into the function.
16319 Having built the table we then group the fixes together to form
16320 tables that are as large as possible (subject to addressing
16321 constraints) and emit each table of constants after the last
16322 barrier that is within range of all the instructions in the group.
16323 If a group does not contain a barrier, then we forcibly create one
16324 by inserting a jump instruction into the flow. Once the table has
16325 been inserted, the insns are then modified to reference the
16326 relevant entry in the pool.
16328 Possible enhancements to the algorithm (not implemented) are:
16330 1) For some processors and object formats, there may be benefit in
16331 aligning the pools to the start of cache lines; this alignment
16332 would need to be taken into account when calculating addressability
16333 of a pool. */
16335 /* These typedefs are located at the start of this file, so that
16336 they can be used in the prototypes there. This comment is to
16337 remind readers of that fact so that the following structures
16338 can be understood more easily.
16340 typedef struct minipool_node Mnode;
16341 typedef struct minipool_fixup Mfix; */
16343 struct minipool_node
16345 /* Doubly linked chain of entries. */
16346 Mnode * next;
16347 Mnode * prev;
16348 /* The maximum offset into the code that this entry can be placed. While
16349 pushing fixes for forward references, all entries are sorted in order
16350 of increasing max_address. */
16351 HOST_WIDE_INT max_address;
16352 /* Similarly for an entry inserted for a backwards ref. */
16353 HOST_WIDE_INT min_address;
16354 /* The number of fixes referencing this entry. This can become zero
16355 if we "unpush" an entry. In this case we ignore the entry when we
16356 come to emit the code. */
16357 int refcount;
16358 /* The offset from the start of the minipool. */
16359 HOST_WIDE_INT offset;
16360 /* The value in table. */
16361 rtx value;
16362 /* The mode of value. */
16363 machine_mode mode;
16364 /* The size of the value. With iWMMXt enabled
16365 sizes > 4 also imply an alignment of 8-bytes. */
16366 int fix_size;
16369 struct minipool_fixup
16371 Mfix * next;
16372 rtx_insn * insn;
16373 HOST_WIDE_INT address;
16374 rtx * loc;
16375 machine_mode mode;
16376 int fix_size;
16377 rtx value;
16378 Mnode * minipool;
16379 HOST_WIDE_INT forwards;
16380 HOST_WIDE_INT backwards;
16383 /* Fixes less than a word need padding out to a word boundary. */
16384 #define MINIPOOL_FIX_SIZE(mode) \
16385 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16387 static Mnode * minipool_vector_head;
16388 static Mnode * minipool_vector_tail;
16389 static rtx_code_label *minipool_vector_label;
16390 static int minipool_pad;
16392 /* The linked list of all minipool fixes required for this function. */
16393 Mfix * minipool_fix_head;
16394 Mfix * minipool_fix_tail;
16395 /* The fix entry for the current minipool, once it has been placed. */
16396 Mfix * minipool_barrier;
16398 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16399 #define JUMP_TABLES_IN_TEXT_SECTION 0
16400 #endif
16402 static HOST_WIDE_INT
16403 get_jump_table_size (rtx_jump_table_data *insn)
16405 /* ADDR_VECs only take room if read-only data does into the text
16406 section. */
16407 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16409 rtx body = PATTERN (insn);
16410 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16411 HOST_WIDE_INT size;
16412 HOST_WIDE_INT modesize;
16414 modesize = GET_MODE_SIZE (GET_MODE (body));
16415 size = modesize * XVECLEN (body, elt);
16416 switch (modesize)
16418 case 1:
16419 /* Round up size of TBB table to a halfword boundary. */
16420 size = (size + 1) & ~HOST_WIDE_INT_1;
16421 break;
16422 case 2:
16423 /* No padding necessary for TBH. */
16424 break;
16425 case 4:
16426 /* Add two bytes for alignment on Thumb. */
16427 if (TARGET_THUMB)
16428 size += 2;
16429 break;
16430 default:
16431 gcc_unreachable ();
16433 return size;
16436 return 0;
16439 /* Return the maximum amount of padding that will be inserted before
16440 label LABEL. */
16442 static HOST_WIDE_INT
16443 get_label_padding (rtx label)
16445 HOST_WIDE_INT align, min_insn_size;
16447 align = 1 << label_to_alignment (label);
16448 min_insn_size = TARGET_THUMB ? 2 : 4;
16449 return align > min_insn_size ? align - min_insn_size : 0;
16452 /* Move a minipool fix MP from its current location to before MAX_MP.
16453 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16454 constraints may need updating. */
16455 static Mnode *
16456 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16457 HOST_WIDE_INT max_address)
16459 /* The code below assumes these are different. */
16460 gcc_assert (mp != max_mp);
16462 if (max_mp == NULL)
16464 if (max_address < mp->max_address)
16465 mp->max_address = max_address;
16467 else
16469 if (max_address > max_mp->max_address - mp->fix_size)
16470 mp->max_address = max_mp->max_address - mp->fix_size;
16471 else
16472 mp->max_address = max_address;
16474 /* Unlink MP from its current position. Since max_mp is non-null,
16475 mp->prev must be non-null. */
16476 mp->prev->next = mp->next;
16477 if (mp->next != NULL)
16478 mp->next->prev = mp->prev;
16479 else
16480 minipool_vector_tail = mp->prev;
16482 /* Re-insert it before MAX_MP. */
16483 mp->next = max_mp;
16484 mp->prev = max_mp->prev;
16485 max_mp->prev = mp;
16487 if (mp->prev != NULL)
16488 mp->prev->next = mp;
16489 else
16490 minipool_vector_head = mp;
16493 /* Save the new entry. */
16494 max_mp = mp;
16496 /* Scan over the preceding entries and adjust their addresses as
16497 required. */
16498 while (mp->prev != NULL
16499 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16501 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16502 mp = mp->prev;
16505 return max_mp;
16508 /* Add a constant to the minipool for a forward reference. Returns the
16509 node added or NULL if the constant will not fit in this pool. */
16510 static Mnode *
16511 add_minipool_forward_ref (Mfix *fix)
16513 /* If set, max_mp is the first pool_entry that has a lower
16514 constraint than the one we are trying to add. */
16515 Mnode * max_mp = NULL;
16516 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16517 Mnode * mp;
16519 /* If the minipool starts before the end of FIX->INSN then this FIX
16520 can not be placed into the current pool. Furthermore, adding the
16521 new constant pool entry may cause the pool to start FIX_SIZE bytes
16522 earlier. */
16523 if (minipool_vector_head &&
16524 (fix->address + get_attr_length (fix->insn)
16525 >= minipool_vector_head->max_address - fix->fix_size))
16526 return NULL;
16528 /* Scan the pool to see if a constant with the same value has
16529 already been added. While we are doing this, also note the
16530 location where we must insert the constant if it doesn't already
16531 exist. */
16532 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16534 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16535 && fix->mode == mp->mode
16536 && (!LABEL_P (fix->value)
16537 || (CODE_LABEL_NUMBER (fix->value)
16538 == CODE_LABEL_NUMBER (mp->value)))
16539 && rtx_equal_p (fix->value, mp->value))
16541 /* More than one fix references this entry. */
16542 mp->refcount++;
16543 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16546 /* Note the insertion point if necessary. */
16547 if (max_mp == NULL
16548 && mp->max_address > max_address)
16549 max_mp = mp;
16551 /* If we are inserting an 8-bytes aligned quantity and
16552 we have not already found an insertion point, then
16553 make sure that all such 8-byte aligned quantities are
16554 placed at the start of the pool. */
16555 if (ARM_DOUBLEWORD_ALIGN
16556 && max_mp == NULL
16557 && fix->fix_size >= 8
16558 && mp->fix_size < 8)
16560 max_mp = mp;
16561 max_address = mp->max_address;
16565 /* The value is not currently in the minipool, so we need to create
16566 a new entry for it. If MAX_MP is NULL, the entry will be put on
16567 the end of the list since the placement is less constrained than
16568 any existing entry. Otherwise, we insert the new fix before
16569 MAX_MP and, if necessary, adjust the constraints on the other
16570 entries. */
16571 mp = XNEW (Mnode);
16572 mp->fix_size = fix->fix_size;
16573 mp->mode = fix->mode;
16574 mp->value = fix->value;
16575 mp->refcount = 1;
16576 /* Not yet required for a backwards ref. */
16577 mp->min_address = -65536;
16579 if (max_mp == NULL)
16581 mp->max_address = max_address;
16582 mp->next = NULL;
16583 mp->prev = minipool_vector_tail;
16585 if (mp->prev == NULL)
16587 minipool_vector_head = mp;
16588 minipool_vector_label = gen_label_rtx ();
16590 else
16591 mp->prev->next = mp;
16593 minipool_vector_tail = mp;
16595 else
16597 if (max_address > max_mp->max_address - mp->fix_size)
16598 mp->max_address = max_mp->max_address - mp->fix_size;
16599 else
16600 mp->max_address = max_address;
16602 mp->next = max_mp;
16603 mp->prev = max_mp->prev;
16604 max_mp->prev = mp;
16605 if (mp->prev != NULL)
16606 mp->prev->next = mp;
16607 else
16608 minipool_vector_head = mp;
16611 /* Save the new entry. */
16612 max_mp = mp;
16614 /* Scan over the preceding entries and adjust their addresses as
16615 required. */
16616 while (mp->prev != NULL
16617 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16619 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16620 mp = mp->prev;
16623 return max_mp;
16626 static Mnode *
16627 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16628 HOST_WIDE_INT min_address)
16630 HOST_WIDE_INT offset;
16632 /* The code below assumes these are different. */
16633 gcc_assert (mp != min_mp);
16635 if (min_mp == NULL)
16637 if (min_address > mp->min_address)
16638 mp->min_address = min_address;
16640 else
16642 /* We will adjust this below if it is too loose. */
16643 mp->min_address = min_address;
16645 /* Unlink MP from its current position. Since min_mp is non-null,
16646 mp->next must be non-null. */
16647 mp->next->prev = mp->prev;
16648 if (mp->prev != NULL)
16649 mp->prev->next = mp->next;
16650 else
16651 minipool_vector_head = mp->next;
16653 /* Reinsert it after MIN_MP. */
16654 mp->prev = min_mp;
16655 mp->next = min_mp->next;
16656 min_mp->next = mp;
16657 if (mp->next != NULL)
16658 mp->next->prev = mp;
16659 else
16660 minipool_vector_tail = mp;
16663 min_mp = mp;
16665 offset = 0;
16666 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16668 mp->offset = offset;
16669 if (mp->refcount > 0)
16670 offset += mp->fix_size;
16672 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16673 mp->next->min_address = mp->min_address + mp->fix_size;
16676 return min_mp;
16679 /* Add a constant to the minipool for a backward reference. Returns the
16680 node added or NULL if the constant will not fit in this pool.
16682 Note that the code for insertion for a backwards reference can be
16683 somewhat confusing because the calculated offsets for each fix do
16684 not take into account the size of the pool (which is still under
16685 construction. */
16686 static Mnode *
16687 add_minipool_backward_ref (Mfix *fix)
16689 /* If set, min_mp is the last pool_entry that has a lower constraint
16690 than the one we are trying to add. */
16691 Mnode *min_mp = NULL;
16692 /* This can be negative, since it is only a constraint. */
16693 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16694 Mnode *mp;
16696 /* If we can't reach the current pool from this insn, or if we can't
16697 insert this entry at the end of the pool without pushing other
16698 fixes out of range, then we don't try. This ensures that we
16699 can't fail later on. */
16700 if (min_address >= minipool_barrier->address
16701 || (minipool_vector_tail->min_address + fix->fix_size
16702 >= minipool_barrier->address))
16703 return NULL;
16705 /* Scan the pool to see if a constant with the same value has
16706 already been added. While we are doing this, also note the
16707 location where we must insert the constant if it doesn't already
16708 exist. */
16709 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16711 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16712 && fix->mode == mp->mode
16713 && (!LABEL_P (fix->value)
16714 || (CODE_LABEL_NUMBER (fix->value)
16715 == CODE_LABEL_NUMBER (mp->value)))
16716 && rtx_equal_p (fix->value, mp->value)
16717 /* Check that there is enough slack to move this entry to the
16718 end of the table (this is conservative). */
16719 && (mp->max_address
16720 > (minipool_barrier->address
16721 + minipool_vector_tail->offset
16722 + minipool_vector_tail->fix_size)))
16724 mp->refcount++;
16725 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16728 if (min_mp != NULL)
16729 mp->min_address += fix->fix_size;
16730 else
16732 /* Note the insertion point if necessary. */
16733 if (mp->min_address < min_address)
16735 /* For now, we do not allow the insertion of 8-byte alignment
16736 requiring nodes anywhere but at the start of the pool. */
16737 if (ARM_DOUBLEWORD_ALIGN
16738 && fix->fix_size >= 8 && mp->fix_size < 8)
16739 return NULL;
16740 else
16741 min_mp = mp;
16743 else if (mp->max_address
16744 < minipool_barrier->address + mp->offset + fix->fix_size)
16746 /* Inserting before this entry would push the fix beyond
16747 its maximum address (which can happen if we have
16748 re-located a forwards fix); force the new fix to come
16749 after it. */
16750 if (ARM_DOUBLEWORD_ALIGN
16751 && fix->fix_size >= 8 && mp->fix_size < 8)
16752 return NULL;
16753 else
16755 min_mp = mp;
16756 min_address = mp->min_address + fix->fix_size;
16759 /* Do not insert a non-8-byte aligned quantity before 8-byte
16760 aligned quantities. */
16761 else if (ARM_DOUBLEWORD_ALIGN
16762 && fix->fix_size < 8
16763 && mp->fix_size >= 8)
16765 min_mp = mp;
16766 min_address = mp->min_address + fix->fix_size;
16771 /* We need to create a new entry. */
16772 mp = XNEW (Mnode);
16773 mp->fix_size = fix->fix_size;
16774 mp->mode = fix->mode;
16775 mp->value = fix->value;
16776 mp->refcount = 1;
16777 mp->max_address = minipool_barrier->address + 65536;
16779 mp->min_address = min_address;
16781 if (min_mp == NULL)
16783 mp->prev = NULL;
16784 mp->next = minipool_vector_head;
16786 if (mp->next == NULL)
16788 minipool_vector_tail = mp;
16789 minipool_vector_label = gen_label_rtx ();
16791 else
16792 mp->next->prev = mp;
16794 minipool_vector_head = mp;
16796 else
16798 mp->next = min_mp->next;
16799 mp->prev = min_mp;
16800 min_mp->next = mp;
16802 if (mp->next != NULL)
16803 mp->next->prev = mp;
16804 else
16805 minipool_vector_tail = mp;
16808 /* Save the new entry. */
16809 min_mp = mp;
16811 if (mp->prev)
16812 mp = mp->prev;
16813 else
16814 mp->offset = 0;
16816 /* Scan over the following entries and adjust their offsets. */
16817 while (mp->next != NULL)
16819 if (mp->next->min_address < mp->min_address + mp->fix_size)
16820 mp->next->min_address = mp->min_address + mp->fix_size;
16822 if (mp->refcount)
16823 mp->next->offset = mp->offset + mp->fix_size;
16824 else
16825 mp->next->offset = mp->offset;
16827 mp = mp->next;
16830 return min_mp;
16833 static void
16834 assign_minipool_offsets (Mfix *barrier)
16836 HOST_WIDE_INT offset = 0;
16837 Mnode *mp;
16839 minipool_barrier = barrier;
16841 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16843 mp->offset = offset;
16845 if (mp->refcount > 0)
16846 offset += mp->fix_size;
16850 /* Output the literal table */
16851 static void
16852 dump_minipool (rtx_insn *scan)
16854 Mnode * mp;
16855 Mnode * nmp;
16856 int align64 = 0;
16858 if (ARM_DOUBLEWORD_ALIGN)
16859 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16860 if (mp->refcount > 0 && mp->fix_size >= 8)
16862 align64 = 1;
16863 break;
16866 if (dump_file)
16867 fprintf (dump_file,
16868 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16869 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16871 scan = emit_label_after (gen_label_rtx (), scan);
16872 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16873 scan = emit_label_after (minipool_vector_label, scan);
16875 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16877 if (mp->refcount > 0)
16879 if (dump_file)
16881 fprintf (dump_file,
16882 ";; Offset %u, min %ld, max %ld ",
16883 (unsigned) mp->offset, (unsigned long) mp->min_address,
16884 (unsigned long) mp->max_address);
16885 arm_print_value (dump_file, mp->value);
16886 fputc ('\n', dump_file);
16889 switch (GET_MODE_SIZE (mp->mode))
16891 #ifdef HAVE_consttable_1
16892 case 1:
16893 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16894 break;
16896 #endif
16897 #ifdef HAVE_consttable_2
16898 case 2:
16899 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16900 break;
16902 #endif
16903 #ifdef HAVE_consttable_4
16904 case 4:
16905 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16906 break;
16908 #endif
16909 #ifdef HAVE_consttable_8
16910 case 8:
16911 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16912 break;
16914 #endif
16915 #ifdef HAVE_consttable_16
16916 case 16:
16917 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16918 break;
16920 #endif
16921 default:
16922 gcc_unreachable ();
16926 nmp = mp->next;
16927 free (mp);
16930 minipool_vector_head = minipool_vector_tail = NULL;
16931 scan = emit_insn_after (gen_consttable_end (), scan);
16932 scan = emit_barrier_after (scan);
16935 /* Return the cost of forcibly inserting a barrier after INSN. */
16936 static int
16937 arm_barrier_cost (rtx_insn *insn)
16939 /* Basing the location of the pool on the loop depth is preferable,
16940 but at the moment, the basic block information seems to be
16941 corrupt by this stage of the compilation. */
16942 int base_cost = 50;
16943 rtx_insn *next = next_nonnote_insn (insn);
16945 if (next != NULL && LABEL_P (next))
16946 base_cost -= 20;
16948 switch (GET_CODE (insn))
16950 case CODE_LABEL:
16951 /* It will always be better to place the table before the label, rather
16952 than after it. */
16953 return 50;
16955 case INSN:
16956 case CALL_INSN:
16957 return base_cost;
16959 case JUMP_INSN:
16960 return base_cost - 10;
16962 default:
16963 return base_cost + 10;
16967 /* Find the best place in the insn stream in the range
16968 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16969 Create the barrier by inserting a jump and add a new fix entry for
16970 it. */
16971 static Mfix *
16972 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16974 HOST_WIDE_INT count = 0;
16975 rtx_barrier *barrier;
16976 rtx_insn *from = fix->insn;
16977 /* The instruction after which we will insert the jump. */
16978 rtx_insn *selected = NULL;
16979 int selected_cost;
16980 /* The address at which the jump instruction will be placed. */
16981 HOST_WIDE_INT selected_address;
16982 Mfix * new_fix;
16983 HOST_WIDE_INT max_count = max_address - fix->address;
16984 rtx_code_label *label = gen_label_rtx ();
16986 selected_cost = arm_barrier_cost (from);
16987 selected_address = fix->address;
16989 while (from && count < max_count)
16991 rtx_jump_table_data *tmp;
16992 int new_cost;
16994 /* This code shouldn't have been called if there was a natural barrier
16995 within range. */
16996 gcc_assert (!BARRIER_P (from));
16998 /* Count the length of this insn. This must stay in sync with the
16999 code that pushes minipool fixes. */
17000 if (LABEL_P (from))
17001 count += get_label_padding (from);
17002 else
17003 count += get_attr_length (from);
17005 /* If there is a jump table, add its length. */
17006 if (tablejump_p (from, NULL, &tmp))
17008 count += get_jump_table_size (tmp);
17010 /* Jump tables aren't in a basic block, so base the cost on
17011 the dispatch insn. If we select this location, we will
17012 still put the pool after the table. */
17013 new_cost = arm_barrier_cost (from);
17015 if (count < max_count
17016 && (!selected || new_cost <= selected_cost))
17018 selected = tmp;
17019 selected_cost = new_cost;
17020 selected_address = fix->address + count;
17023 /* Continue after the dispatch table. */
17024 from = NEXT_INSN (tmp);
17025 continue;
17028 new_cost = arm_barrier_cost (from);
17030 if (count < max_count
17031 && (!selected || new_cost <= selected_cost))
17033 selected = from;
17034 selected_cost = new_cost;
17035 selected_address = fix->address + count;
17038 from = NEXT_INSN (from);
17041 /* Make sure that we found a place to insert the jump. */
17042 gcc_assert (selected);
17044 /* Make sure we do not split a call and its corresponding
17045 CALL_ARG_LOCATION note. */
17046 if (CALL_P (selected))
17048 rtx_insn *next = NEXT_INSN (selected);
17049 if (next && NOTE_P (next)
17050 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
17051 selected = next;
17054 /* Create a new JUMP_INSN that branches around a barrier. */
17055 from = emit_jump_insn_after (gen_jump (label), selected);
17056 JUMP_LABEL (from) = label;
17057 barrier = emit_barrier_after (from);
17058 emit_label_after (label, barrier);
17060 /* Create a minipool barrier entry for the new barrier. */
17061 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
17062 new_fix->insn = barrier;
17063 new_fix->address = selected_address;
17064 new_fix->next = fix->next;
17065 fix->next = new_fix;
17067 return new_fix;
17070 /* Record that there is a natural barrier in the insn stream at
17071 ADDRESS. */
17072 static void
17073 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
17075 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17077 fix->insn = insn;
17078 fix->address = address;
17080 fix->next = NULL;
17081 if (minipool_fix_head != NULL)
17082 minipool_fix_tail->next = fix;
17083 else
17084 minipool_fix_head = fix;
17086 minipool_fix_tail = fix;
17089 /* Record INSN, which will need fixing up to load a value from the
17090 minipool. ADDRESS is the offset of the insn since the start of the
17091 function; LOC is a pointer to the part of the insn which requires
17092 fixing; VALUE is the constant that must be loaded, which is of type
17093 MODE. */
17094 static void
17095 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
17096 machine_mode mode, rtx value)
17098 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17100 fix->insn = insn;
17101 fix->address = address;
17102 fix->loc = loc;
17103 fix->mode = mode;
17104 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
17105 fix->value = value;
17106 fix->forwards = get_attr_pool_range (insn);
17107 fix->backwards = get_attr_neg_pool_range (insn);
17108 fix->minipool = NULL;
17110 /* If an insn doesn't have a range defined for it, then it isn't
17111 expecting to be reworked by this code. Better to stop now than
17112 to generate duff assembly code. */
17113 gcc_assert (fix->forwards || fix->backwards);
17115 /* If an entry requires 8-byte alignment then assume all constant pools
17116 require 4 bytes of padding. Trying to do this later on a per-pool
17117 basis is awkward because existing pool entries have to be modified. */
17118 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
17119 minipool_pad = 4;
17121 if (dump_file)
17123 fprintf (dump_file,
17124 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17125 GET_MODE_NAME (mode),
17126 INSN_UID (insn), (unsigned long) address,
17127 -1 * (long)fix->backwards, (long)fix->forwards);
17128 arm_print_value (dump_file, fix->value);
17129 fprintf (dump_file, "\n");
17132 /* Add it to the chain of fixes. */
17133 fix->next = NULL;
17135 if (minipool_fix_head != NULL)
17136 minipool_fix_tail->next = fix;
17137 else
17138 minipool_fix_head = fix;
17140 minipool_fix_tail = fix;
17143 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17144 Returns the number of insns needed, or 99 if we always want to synthesize
17145 the value. */
17147 arm_max_const_double_inline_cost ()
17149 /* Let the value get synthesized to avoid the use of literal pools. */
17150 if (arm_disable_literal_pool)
17151 return 99;
17153 return ((optimize_size || arm_ld_sched) ? 3 : 4);
17156 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17157 Returns the number of insns needed, or 99 if we don't know how to
17158 do it. */
17160 arm_const_double_inline_cost (rtx val)
17162 rtx lowpart, highpart;
17163 machine_mode mode;
17165 mode = GET_MODE (val);
17167 if (mode == VOIDmode)
17168 mode = DImode;
17170 gcc_assert (GET_MODE_SIZE (mode) == 8);
17172 lowpart = gen_lowpart (SImode, val);
17173 highpart = gen_highpart_mode (SImode, mode, val);
17175 gcc_assert (CONST_INT_P (lowpart));
17176 gcc_assert (CONST_INT_P (highpart));
17178 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
17179 NULL_RTX, NULL_RTX, 0, 0)
17180 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
17181 NULL_RTX, NULL_RTX, 0, 0));
17184 /* Cost of loading a SImode constant. */
17185 static inline int
17186 arm_const_inline_cost (enum rtx_code code, rtx val)
17188 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17189 NULL_RTX, NULL_RTX, 1, 0);
17192 /* Return true if it is worthwhile to split a 64-bit constant into two
17193 32-bit operations. This is the case if optimizing for size, or
17194 if we have load delay slots, or if one 32-bit part can be done with
17195 a single data operation. */
17196 bool
17197 arm_const_double_by_parts (rtx val)
17199 machine_mode mode = GET_MODE (val);
17200 rtx part;
17202 if (optimize_size || arm_ld_sched)
17203 return true;
17205 if (mode == VOIDmode)
17206 mode = DImode;
17208 part = gen_highpart_mode (SImode, mode, val);
17210 gcc_assert (CONST_INT_P (part));
17212 if (const_ok_for_arm (INTVAL (part))
17213 || const_ok_for_arm (~INTVAL (part)))
17214 return true;
17216 part = gen_lowpart (SImode, val);
17218 gcc_assert (CONST_INT_P (part));
17220 if (const_ok_for_arm (INTVAL (part))
17221 || const_ok_for_arm (~INTVAL (part)))
17222 return true;
17224 return false;
17227 /* Return true if it is possible to inline both the high and low parts
17228 of a 64-bit constant into 32-bit data processing instructions. */
17229 bool
17230 arm_const_double_by_immediates (rtx val)
17232 machine_mode mode = GET_MODE (val);
17233 rtx part;
17235 if (mode == VOIDmode)
17236 mode = DImode;
17238 part = gen_highpart_mode (SImode, mode, val);
17240 gcc_assert (CONST_INT_P (part));
17242 if (!const_ok_for_arm (INTVAL (part)))
17243 return false;
17245 part = gen_lowpart (SImode, val);
17247 gcc_assert (CONST_INT_P (part));
17249 if (!const_ok_for_arm (INTVAL (part)))
17250 return false;
17252 return true;
17255 /* Scan INSN and note any of its operands that need fixing.
17256 If DO_PUSHES is false we do not actually push any of the fixups
17257 needed. */
17258 static void
17259 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17261 int opno;
17263 extract_constrain_insn (insn);
17265 if (recog_data.n_alternatives == 0)
17266 return;
17268 /* Fill in recog_op_alt with information about the constraints of
17269 this insn. */
17270 preprocess_constraints (insn);
17272 const operand_alternative *op_alt = which_op_alt ();
17273 for (opno = 0; opno < recog_data.n_operands; opno++)
17275 /* Things we need to fix can only occur in inputs. */
17276 if (recog_data.operand_type[opno] != OP_IN)
17277 continue;
17279 /* If this alternative is a memory reference, then any mention
17280 of constants in this alternative is really to fool reload
17281 into allowing us to accept one there. We need to fix them up
17282 now so that we output the right code. */
17283 if (op_alt[opno].memory_ok)
17285 rtx op = recog_data.operand[opno];
17287 if (CONSTANT_P (op))
17289 if (do_pushes)
17290 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17291 recog_data.operand_mode[opno], op);
17293 else if (MEM_P (op)
17294 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17295 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17297 if (do_pushes)
17299 rtx cop = avoid_constant_pool_reference (op);
17301 /* Casting the address of something to a mode narrower
17302 than a word can cause avoid_constant_pool_reference()
17303 to return the pool reference itself. That's no good to
17304 us here. Lets just hope that we can use the
17305 constant pool value directly. */
17306 if (op == cop)
17307 cop = get_pool_constant (XEXP (op, 0));
17309 push_minipool_fix (insn, address,
17310 recog_data.operand_loc[opno],
17311 recog_data.operand_mode[opno], cop);
17318 return;
17321 /* Rewrite move insn into subtract of 0 if the condition codes will
17322 be useful in next conditional jump insn. */
17324 static void
17325 thumb1_reorg (void)
17327 basic_block bb;
17329 FOR_EACH_BB_FN (bb, cfun)
17331 rtx dest, src;
17332 rtx cmp, op0, op1, set = NULL;
17333 rtx_insn *prev, *insn = BB_END (bb);
17334 bool insn_clobbered = false;
17336 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17337 insn = PREV_INSN (insn);
17339 /* Find the last cbranchsi4_insn in basic block BB. */
17340 if (insn == BB_HEAD (bb)
17341 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17342 continue;
17344 /* Get the register with which we are comparing. */
17345 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17346 op0 = XEXP (cmp, 0);
17347 op1 = XEXP (cmp, 1);
17349 /* Check that comparison is against ZERO. */
17350 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17351 continue;
17353 /* Find the first flag setting insn before INSN in basic block BB. */
17354 gcc_assert (insn != BB_HEAD (bb));
17355 for (prev = PREV_INSN (insn);
17356 (!insn_clobbered
17357 && prev != BB_HEAD (bb)
17358 && (NOTE_P (prev)
17359 || DEBUG_INSN_P (prev)
17360 || ((set = single_set (prev)) != NULL
17361 && get_attr_conds (prev) == CONDS_NOCOND)));
17362 prev = PREV_INSN (prev))
17364 if (reg_set_p (op0, prev))
17365 insn_clobbered = true;
17368 /* Skip if op0 is clobbered by insn other than prev. */
17369 if (insn_clobbered)
17370 continue;
17372 if (!set)
17373 continue;
17375 dest = SET_DEST (set);
17376 src = SET_SRC (set);
17377 if (!low_register_operand (dest, SImode)
17378 || !low_register_operand (src, SImode))
17379 continue;
17381 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17382 in INSN. Both src and dest of the move insn are checked. */
17383 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17385 dest = copy_rtx (dest);
17386 src = copy_rtx (src);
17387 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17388 PATTERN (prev) = gen_rtx_SET (dest, src);
17389 INSN_CODE (prev) = -1;
17390 /* Set test register in INSN to dest. */
17391 XEXP (cmp, 0) = copy_rtx (dest);
17392 INSN_CODE (insn) = -1;
17397 /* Convert instructions to their cc-clobbering variant if possible, since
17398 that allows us to use smaller encodings. */
17400 static void
17401 thumb2_reorg (void)
17403 basic_block bb;
17404 regset_head live;
17406 INIT_REG_SET (&live);
17408 /* We are freeing block_for_insn in the toplev to keep compatibility
17409 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17410 compute_bb_for_insn ();
17411 df_analyze ();
17413 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17415 FOR_EACH_BB_FN (bb, cfun)
17417 if ((current_tune->disparage_flag_setting_t16_encodings
17418 == tune_params::DISPARAGE_FLAGS_ALL)
17419 && optimize_bb_for_speed_p (bb))
17420 continue;
17422 rtx_insn *insn;
17423 Convert_Action action = SKIP;
17424 Convert_Action action_for_partial_flag_setting
17425 = ((current_tune->disparage_flag_setting_t16_encodings
17426 != tune_params::DISPARAGE_FLAGS_NEITHER)
17427 && optimize_bb_for_speed_p (bb))
17428 ? SKIP : CONV;
17430 COPY_REG_SET (&live, DF_LR_OUT (bb));
17431 df_simulate_initialize_backwards (bb, &live);
17432 FOR_BB_INSNS_REVERSE (bb, insn)
17434 if (NONJUMP_INSN_P (insn)
17435 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17436 && GET_CODE (PATTERN (insn)) == SET)
17438 action = SKIP;
17439 rtx pat = PATTERN (insn);
17440 rtx dst = XEXP (pat, 0);
17441 rtx src = XEXP (pat, 1);
17442 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17444 if (UNARY_P (src) || BINARY_P (src))
17445 op0 = XEXP (src, 0);
17447 if (BINARY_P (src))
17448 op1 = XEXP (src, 1);
17450 if (low_register_operand (dst, SImode))
17452 switch (GET_CODE (src))
17454 case PLUS:
17455 /* Adding two registers and storing the result
17456 in the first source is already a 16-bit
17457 operation. */
17458 if (rtx_equal_p (dst, op0)
17459 && register_operand (op1, SImode))
17460 break;
17462 if (low_register_operand (op0, SImode))
17464 /* ADDS <Rd>,<Rn>,<Rm> */
17465 if (low_register_operand (op1, SImode))
17466 action = CONV;
17467 /* ADDS <Rdn>,#<imm8> */
17468 /* SUBS <Rdn>,#<imm8> */
17469 else if (rtx_equal_p (dst, op0)
17470 && CONST_INT_P (op1)
17471 && IN_RANGE (INTVAL (op1), -255, 255))
17472 action = CONV;
17473 /* ADDS <Rd>,<Rn>,#<imm3> */
17474 /* SUBS <Rd>,<Rn>,#<imm3> */
17475 else if (CONST_INT_P (op1)
17476 && IN_RANGE (INTVAL (op1), -7, 7))
17477 action = CONV;
17479 /* ADCS <Rd>, <Rn> */
17480 else if (GET_CODE (XEXP (src, 0)) == PLUS
17481 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17482 && low_register_operand (XEXP (XEXP (src, 0), 1),
17483 SImode)
17484 && COMPARISON_P (op1)
17485 && cc_register (XEXP (op1, 0), VOIDmode)
17486 && maybe_get_arm_condition_code (op1) == ARM_CS
17487 && XEXP (op1, 1) == const0_rtx)
17488 action = CONV;
17489 break;
17491 case MINUS:
17492 /* RSBS <Rd>,<Rn>,#0
17493 Not handled here: see NEG below. */
17494 /* SUBS <Rd>,<Rn>,#<imm3>
17495 SUBS <Rdn>,#<imm8>
17496 Not handled here: see PLUS above. */
17497 /* SUBS <Rd>,<Rn>,<Rm> */
17498 if (low_register_operand (op0, SImode)
17499 && low_register_operand (op1, SImode))
17500 action = CONV;
17501 break;
17503 case MULT:
17504 /* MULS <Rdm>,<Rn>,<Rdm>
17505 As an exception to the rule, this is only used
17506 when optimizing for size since MULS is slow on all
17507 known implementations. We do not even want to use
17508 MULS in cold code, if optimizing for speed, so we
17509 test the global flag here. */
17510 if (!optimize_size)
17511 break;
17512 /* else fall through. */
17513 case AND:
17514 case IOR:
17515 case XOR:
17516 /* ANDS <Rdn>,<Rm> */
17517 if (rtx_equal_p (dst, op0)
17518 && low_register_operand (op1, SImode))
17519 action = action_for_partial_flag_setting;
17520 else if (rtx_equal_p (dst, op1)
17521 && low_register_operand (op0, SImode))
17522 action = action_for_partial_flag_setting == SKIP
17523 ? SKIP : SWAP_CONV;
17524 break;
17526 case ASHIFTRT:
17527 case ASHIFT:
17528 case LSHIFTRT:
17529 /* ASRS <Rdn>,<Rm> */
17530 /* LSRS <Rdn>,<Rm> */
17531 /* LSLS <Rdn>,<Rm> */
17532 if (rtx_equal_p (dst, op0)
17533 && low_register_operand (op1, SImode))
17534 action = action_for_partial_flag_setting;
17535 /* ASRS <Rd>,<Rm>,#<imm5> */
17536 /* LSRS <Rd>,<Rm>,#<imm5> */
17537 /* LSLS <Rd>,<Rm>,#<imm5> */
17538 else if (low_register_operand (op0, SImode)
17539 && CONST_INT_P (op1)
17540 && IN_RANGE (INTVAL (op1), 0, 31))
17541 action = action_for_partial_flag_setting;
17542 break;
17544 case ROTATERT:
17545 /* RORS <Rdn>,<Rm> */
17546 if (rtx_equal_p (dst, op0)
17547 && low_register_operand (op1, SImode))
17548 action = action_for_partial_flag_setting;
17549 break;
17551 case NOT:
17552 /* MVNS <Rd>,<Rm> */
17553 if (low_register_operand (op0, SImode))
17554 action = action_for_partial_flag_setting;
17555 break;
17557 case NEG:
17558 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17559 if (low_register_operand (op0, SImode))
17560 action = CONV;
17561 break;
17563 case CONST_INT:
17564 /* MOVS <Rd>,#<imm8> */
17565 if (CONST_INT_P (src)
17566 && IN_RANGE (INTVAL (src), 0, 255))
17567 action = action_for_partial_flag_setting;
17568 break;
17570 case REG:
17571 /* MOVS and MOV<c> with registers have different
17572 encodings, so are not relevant here. */
17573 break;
17575 default:
17576 break;
17580 if (action != SKIP)
17582 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17583 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17584 rtvec vec;
17586 if (action == SWAP_CONV)
17588 src = copy_rtx (src);
17589 XEXP (src, 0) = op1;
17590 XEXP (src, 1) = op0;
17591 pat = gen_rtx_SET (dst, src);
17592 vec = gen_rtvec (2, pat, clobber);
17594 else /* action == CONV */
17595 vec = gen_rtvec (2, pat, clobber);
17597 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17598 INSN_CODE (insn) = -1;
17602 if (NONDEBUG_INSN_P (insn))
17603 df_simulate_one_insn_backwards (bb, insn, &live);
17607 CLEAR_REG_SET (&live);
17610 /* Gcc puts the pool in the wrong place for ARM, since we can only
17611 load addresses a limited distance around the pc. We do some
17612 special munging to move the constant pool values to the correct
17613 point in the code. */
17614 static void
17615 arm_reorg (void)
17617 rtx_insn *insn;
17618 HOST_WIDE_INT address = 0;
17619 Mfix * fix;
17621 if (TARGET_THUMB1)
17622 thumb1_reorg ();
17623 else if (TARGET_THUMB2)
17624 thumb2_reorg ();
17626 /* Ensure all insns that must be split have been split at this point.
17627 Otherwise, the pool placement code below may compute incorrect
17628 insn lengths. Note that when optimizing, all insns have already
17629 been split at this point. */
17630 if (!optimize)
17631 split_all_insns_noflow ();
17633 minipool_fix_head = minipool_fix_tail = NULL;
17635 /* The first insn must always be a note, or the code below won't
17636 scan it properly. */
17637 insn = get_insns ();
17638 gcc_assert (NOTE_P (insn));
17639 minipool_pad = 0;
17641 /* Scan all the insns and record the operands that will need fixing. */
17642 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17644 if (BARRIER_P (insn))
17645 push_minipool_barrier (insn, address);
17646 else if (INSN_P (insn))
17648 rtx_jump_table_data *table;
17650 note_invalid_constants (insn, address, true);
17651 address += get_attr_length (insn);
17653 /* If the insn is a vector jump, add the size of the table
17654 and skip the table. */
17655 if (tablejump_p (insn, NULL, &table))
17657 address += get_jump_table_size (table);
17658 insn = table;
17661 else if (LABEL_P (insn))
17662 /* Add the worst-case padding due to alignment. We don't add
17663 the _current_ padding because the minipool insertions
17664 themselves might change it. */
17665 address += get_label_padding (insn);
17668 fix = minipool_fix_head;
17670 /* Now scan the fixups and perform the required changes. */
17671 while (fix)
17673 Mfix * ftmp;
17674 Mfix * fdel;
17675 Mfix * last_added_fix;
17676 Mfix * last_barrier = NULL;
17677 Mfix * this_fix;
17679 /* Skip any further barriers before the next fix. */
17680 while (fix && BARRIER_P (fix->insn))
17681 fix = fix->next;
17683 /* No more fixes. */
17684 if (fix == NULL)
17685 break;
17687 last_added_fix = NULL;
17689 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17691 if (BARRIER_P (ftmp->insn))
17693 if (ftmp->address >= minipool_vector_head->max_address)
17694 break;
17696 last_barrier = ftmp;
17698 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17699 break;
17701 last_added_fix = ftmp; /* Keep track of the last fix added. */
17704 /* If we found a barrier, drop back to that; any fixes that we
17705 could have reached but come after the barrier will now go in
17706 the next mini-pool. */
17707 if (last_barrier != NULL)
17709 /* Reduce the refcount for those fixes that won't go into this
17710 pool after all. */
17711 for (fdel = last_barrier->next;
17712 fdel && fdel != ftmp;
17713 fdel = fdel->next)
17715 fdel->minipool->refcount--;
17716 fdel->minipool = NULL;
17719 ftmp = last_barrier;
17721 else
17723 /* ftmp is first fix that we can't fit into this pool and
17724 there no natural barriers that we could use. Insert a
17725 new barrier in the code somewhere between the previous
17726 fix and this one, and arrange to jump around it. */
17727 HOST_WIDE_INT max_address;
17729 /* The last item on the list of fixes must be a barrier, so
17730 we can never run off the end of the list of fixes without
17731 last_barrier being set. */
17732 gcc_assert (ftmp);
17734 max_address = minipool_vector_head->max_address;
17735 /* Check that there isn't another fix that is in range that
17736 we couldn't fit into this pool because the pool was
17737 already too large: we need to put the pool before such an
17738 instruction. The pool itself may come just after the
17739 fix because create_fix_barrier also allows space for a
17740 jump instruction. */
17741 if (ftmp->address < max_address)
17742 max_address = ftmp->address + 1;
17744 last_barrier = create_fix_barrier (last_added_fix, max_address);
17747 assign_minipool_offsets (last_barrier);
17749 while (ftmp)
17751 if (!BARRIER_P (ftmp->insn)
17752 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17753 == NULL))
17754 break;
17756 ftmp = ftmp->next;
17759 /* Scan over the fixes we have identified for this pool, fixing them
17760 up and adding the constants to the pool itself. */
17761 for (this_fix = fix; this_fix && ftmp != this_fix;
17762 this_fix = this_fix->next)
17763 if (!BARRIER_P (this_fix->insn))
17765 rtx addr
17766 = plus_constant (Pmode,
17767 gen_rtx_LABEL_REF (VOIDmode,
17768 minipool_vector_label),
17769 this_fix->minipool->offset);
17770 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17773 dump_minipool (last_barrier->insn);
17774 fix = ftmp;
17777 /* From now on we must synthesize any constants that we can't handle
17778 directly. This can happen if the RTL gets split during final
17779 instruction generation. */
17780 cfun->machine->after_arm_reorg = 1;
17782 /* Free the minipool memory. */
17783 obstack_free (&minipool_obstack, minipool_startobj);
17786 /* Routines to output assembly language. */
17788 /* Return string representation of passed in real value. */
17789 static const char *
17790 fp_const_from_val (REAL_VALUE_TYPE *r)
17792 if (!fp_consts_inited)
17793 init_fp_table ();
17795 gcc_assert (real_equal (r, &value_fp0));
17796 return "0";
17799 /* OPERANDS[0] is the entire list of insns that constitute pop,
17800 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17801 is in the list, UPDATE is true iff the list contains explicit
17802 update of base register. */
17803 void
17804 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17805 bool update)
17807 int i;
17808 char pattern[100];
17809 int offset;
17810 const char *conditional;
17811 int num_saves = XVECLEN (operands[0], 0);
17812 unsigned int regno;
17813 unsigned int regno_base = REGNO (operands[1]);
17814 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17816 offset = 0;
17817 offset += update ? 1 : 0;
17818 offset += return_pc ? 1 : 0;
17820 /* Is the base register in the list? */
17821 for (i = offset; i < num_saves; i++)
17823 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17824 /* If SP is in the list, then the base register must be SP. */
17825 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17826 /* If base register is in the list, there must be no explicit update. */
17827 if (regno == regno_base)
17828 gcc_assert (!update);
17831 conditional = reverse ? "%?%D0" : "%?%d0";
17832 /* Can't use POP if returning from an interrupt. */
17833 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17834 sprintf (pattern, "pop%s\t{", conditional);
17835 else
17837 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17838 It's just a convention, their semantics are identical. */
17839 if (regno_base == SP_REGNUM)
17840 sprintf (pattern, "ldmfd%s\t", conditional);
17841 else if (update)
17842 sprintf (pattern, "ldmia%s\t", conditional);
17843 else
17844 sprintf (pattern, "ldm%s\t", conditional);
17846 strcat (pattern, reg_names[regno_base]);
17847 if (update)
17848 strcat (pattern, "!, {");
17849 else
17850 strcat (pattern, ", {");
17853 /* Output the first destination register. */
17854 strcat (pattern,
17855 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17857 /* Output the rest of the destination registers. */
17858 for (i = offset + 1; i < num_saves; i++)
17860 strcat (pattern, ", ");
17861 strcat (pattern,
17862 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17865 strcat (pattern, "}");
17867 if (interrupt_p && return_pc)
17868 strcat (pattern, "^");
17870 output_asm_insn (pattern, &cond);
17874 /* Output the assembly for a store multiple. */
17876 const char *
17877 vfp_output_vstmd (rtx * operands)
17879 char pattern[100];
17880 int p;
17881 int base;
17882 int i;
17883 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17884 ? XEXP (operands[0], 0)
17885 : XEXP (XEXP (operands[0], 0), 0);
17886 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17888 if (push_p)
17889 strcpy (pattern, "vpush%?.64\t{%P1");
17890 else
17891 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17893 p = strlen (pattern);
17895 gcc_assert (REG_P (operands[1]));
17897 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17898 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17900 p += sprintf (&pattern[p], ", d%d", base + i);
17902 strcpy (&pattern[p], "}");
17904 output_asm_insn (pattern, operands);
17905 return "";
17909 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17910 number of bytes pushed. */
17912 static int
17913 vfp_emit_fstmd (int base_reg, int count)
17915 rtx par;
17916 rtx dwarf;
17917 rtx tmp, reg;
17918 int i;
17920 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17921 register pairs are stored by a store multiple insn. We avoid this
17922 by pushing an extra pair. */
17923 if (count == 2 && !arm_arch6)
17925 if (base_reg == LAST_VFP_REGNUM - 3)
17926 base_reg -= 2;
17927 count++;
17930 /* FSTMD may not store more than 16 doubleword registers at once. Split
17931 larger stores into multiple parts (up to a maximum of two, in
17932 practice). */
17933 if (count > 16)
17935 int saved;
17936 /* NOTE: base_reg is an internal register number, so each D register
17937 counts as 2. */
17938 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17939 saved += vfp_emit_fstmd (base_reg, 16);
17940 return saved;
17943 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17944 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17946 reg = gen_rtx_REG (DFmode, base_reg);
17947 base_reg += 2;
17949 XVECEXP (par, 0, 0)
17950 = gen_rtx_SET (gen_frame_mem
17951 (BLKmode,
17952 gen_rtx_PRE_MODIFY (Pmode,
17953 stack_pointer_rtx,
17954 plus_constant
17955 (Pmode, stack_pointer_rtx,
17956 - (count * 8)))
17958 gen_rtx_UNSPEC (BLKmode,
17959 gen_rtvec (1, reg),
17960 UNSPEC_PUSH_MULT));
17962 tmp = gen_rtx_SET (stack_pointer_rtx,
17963 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17964 RTX_FRAME_RELATED_P (tmp) = 1;
17965 XVECEXP (dwarf, 0, 0) = tmp;
17967 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17968 RTX_FRAME_RELATED_P (tmp) = 1;
17969 XVECEXP (dwarf, 0, 1) = tmp;
17971 for (i = 1; i < count; i++)
17973 reg = gen_rtx_REG (DFmode, base_reg);
17974 base_reg += 2;
17975 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17977 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17978 plus_constant (Pmode,
17979 stack_pointer_rtx,
17980 i * 8)),
17981 reg);
17982 RTX_FRAME_RELATED_P (tmp) = 1;
17983 XVECEXP (dwarf, 0, i + 1) = tmp;
17986 par = emit_insn (par);
17987 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17988 RTX_FRAME_RELATED_P (par) = 1;
17990 return count * 8;
17993 /* Emit a call instruction with pattern PAT. ADDR is the address of
17994 the call target. */
17996 void
17997 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17999 rtx insn;
18001 insn = emit_call_insn (pat);
18003 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18004 If the call might use such an entry, add a use of the PIC register
18005 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18006 if (TARGET_VXWORKS_RTP
18007 && flag_pic
18008 && !sibcall
18009 && GET_CODE (addr) == SYMBOL_REF
18010 && (SYMBOL_REF_DECL (addr)
18011 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18012 : !SYMBOL_REF_LOCAL_P (addr)))
18014 require_pic_register ();
18015 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18018 if (TARGET_AAPCS_BASED)
18020 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18021 linker. We need to add an IP clobber to allow setting
18022 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18023 is not needed since it's a fixed register. */
18024 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18025 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18029 /* Output a 'call' insn. */
18030 const char *
18031 output_call (rtx *operands)
18033 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
18035 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18036 if (REGNO (operands[0]) == LR_REGNUM)
18038 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18039 output_asm_insn ("mov%?\t%0, %|lr", operands);
18042 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18044 if (TARGET_INTERWORK || arm_arch4t)
18045 output_asm_insn ("bx%?\t%0", operands);
18046 else
18047 output_asm_insn ("mov%?\t%|pc, %0", operands);
18049 return "";
18052 /* Output a move from arm registers to arm registers of a long double
18053 OPERANDS[0] is the destination.
18054 OPERANDS[1] is the source. */
18055 const char *
18056 output_mov_long_double_arm_from_arm (rtx *operands)
18058 /* We have to be careful here because the two might overlap. */
18059 int dest_start = REGNO (operands[0]);
18060 int src_start = REGNO (operands[1]);
18061 rtx ops[2];
18062 int i;
18064 if (dest_start < src_start)
18066 for (i = 0; i < 3; i++)
18068 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18069 ops[1] = gen_rtx_REG (SImode, src_start + i);
18070 output_asm_insn ("mov%?\t%0, %1", ops);
18073 else
18075 for (i = 2; i >= 0; i--)
18077 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18078 ops[1] = gen_rtx_REG (SImode, src_start + i);
18079 output_asm_insn ("mov%?\t%0, %1", ops);
18083 return "";
18086 void
18087 arm_emit_movpair (rtx dest, rtx src)
18089 rtx insn;
18091 /* If the src is an immediate, simplify it. */
18092 if (CONST_INT_P (src))
18094 HOST_WIDE_INT val = INTVAL (src);
18095 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18096 if ((val >> 16) & 0x0000ffff)
18098 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18099 GEN_INT (16)),
18100 GEN_INT ((val >> 16) & 0x0000ffff));
18101 insn = get_last_insn ();
18102 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18104 return;
18106 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18107 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18108 insn = get_last_insn ();
18109 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18112 /* Output a move between double words. It must be REG<-MEM
18113 or MEM<-REG. */
18114 const char *
18115 output_move_double (rtx *operands, bool emit, int *count)
18117 enum rtx_code code0 = GET_CODE (operands[0]);
18118 enum rtx_code code1 = GET_CODE (operands[1]);
18119 rtx otherops[3];
18120 if (count)
18121 *count = 1;
18123 /* The only case when this might happen is when
18124 you are looking at the length of a DImode instruction
18125 that has an invalid constant in it. */
18126 if (code0 == REG && code1 != MEM)
18128 gcc_assert (!emit);
18129 *count = 2;
18130 return "";
18133 if (code0 == REG)
18135 unsigned int reg0 = REGNO (operands[0]);
18137 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18139 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18141 switch (GET_CODE (XEXP (operands[1], 0)))
18143 case REG:
18145 if (emit)
18147 if (TARGET_LDRD
18148 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18149 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18150 else
18151 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18153 break;
18155 case PRE_INC:
18156 gcc_assert (TARGET_LDRD);
18157 if (emit)
18158 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18159 break;
18161 case PRE_DEC:
18162 if (emit)
18164 if (TARGET_LDRD)
18165 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18166 else
18167 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18169 break;
18171 case POST_INC:
18172 if (emit)
18174 if (TARGET_LDRD)
18175 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18176 else
18177 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18179 break;
18181 case POST_DEC:
18182 gcc_assert (TARGET_LDRD);
18183 if (emit)
18184 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18185 break;
18187 case PRE_MODIFY:
18188 case POST_MODIFY:
18189 /* Autoicrement addressing modes should never have overlapping
18190 base and destination registers, and overlapping index registers
18191 are already prohibited, so this doesn't need to worry about
18192 fix_cm3_ldrd. */
18193 otherops[0] = operands[0];
18194 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18195 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18197 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18199 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18201 /* Registers overlap so split out the increment. */
18202 if (emit)
18204 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18205 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18207 if (count)
18208 *count = 2;
18210 else
18212 /* Use a single insn if we can.
18213 FIXME: IWMMXT allows offsets larger than ldrd can
18214 handle, fix these up with a pair of ldr. */
18215 if (TARGET_THUMB2
18216 || !CONST_INT_P (otherops[2])
18217 || (INTVAL (otherops[2]) > -256
18218 && INTVAL (otherops[2]) < 256))
18220 if (emit)
18221 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18223 else
18225 if (emit)
18227 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18228 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18230 if (count)
18231 *count = 2;
18236 else
18238 /* Use a single insn if we can.
18239 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18240 fix these up with a pair of ldr. */
18241 if (TARGET_THUMB2
18242 || !CONST_INT_P (otherops[2])
18243 || (INTVAL (otherops[2]) > -256
18244 && INTVAL (otherops[2]) < 256))
18246 if (emit)
18247 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18249 else
18251 if (emit)
18253 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18254 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18256 if (count)
18257 *count = 2;
18260 break;
18262 case LABEL_REF:
18263 case CONST:
18264 /* We might be able to use ldrd %0, %1 here. However the range is
18265 different to ldr/adr, and it is broken on some ARMv7-M
18266 implementations. */
18267 /* Use the second register of the pair to avoid problematic
18268 overlap. */
18269 otherops[1] = operands[1];
18270 if (emit)
18271 output_asm_insn ("adr%?\t%0, %1", otherops);
18272 operands[1] = otherops[0];
18273 if (emit)
18275 if (TARGET_LDRD)
18276 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18277 else
18278 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18281 if (count)
18282 *count = 2;
18283 break;
18285 /* ??? This needs checking for thumb2. */
18286 default:
18287 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18288 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18290 otherops[0] = operands[0];
18291 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18292 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18294 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18296 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18298 switch ((int) INTVAL (otherops[2]))
18300 case -8:
18301 if (emit)
18302 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18303 return "";
18304 case -4:
18305 if (TARGET_THUMB2)
18306 break;
18307 if (emit)
18308 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18309 return "";
18310 case 4:
18311 if (TARGET_THUMB2)
18312 break;
18313 if (emit)
18314 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18315 return "";
18318 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18319 operands[1] = otherops[0];
18320 if (TARGET_LDRD
18321 && (REG_P (otherops[2])
18322 || TARGET_THUMB2
18323 || (CONST_INT_P (otherops[2])
18324 && INTVAL (otherops[2]) > -256
18325 && INTVAL (otherops[2]) < 256)))
18327 if (reg_overlap_mentioned_p (operands[0],
18328 otherops[2]))
18330 /* Swap base and index registers over to
18331 avoid a conflict. */
18332 std::swap (otherops[1], otherops[2]);
18334 /* If both registers conflict, it will usually
18335 have been fixed by a splitter. */
18336 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18337 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18339 if (emit)
18341 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18342 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18344 if (count)
18345 *count = 2;
18347 else
18349 otherops[0] = operands[0];
18350 if (emit)
18351 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18353 return "";
18356 if (CONST_INT_P (otherops[2]))
18358 if (emit)
18360 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18361 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18362 else
18363 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18366 else
18368 if (emit)
18369 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18372 else
18374 if (emit)
18375 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18378 if (count)
18379 *count = 2;
18381 if (TARGET_LDRD)
18382 return "ldrd%?\t%0, [%1]";
18384 return "ldmia%?\t%1, %M0";
18386 else
18388 otherops[1] = adjust_address (operands[1], SImode, 4);
18389 /* Take care of overlapping base/data reg. */
18390 if (reg_mentioned_p (operands[0], operands[1]))
18392 if (emit)
18394 output_asm_insn ("ldr%?\t%0, %1", otherops);
18395 output_asm_insn ("ldr%?\t%0, %1", operands);
18397 if (count)
18398 *count = 2;
18401 else
18403 if (emit)
18405 output_asm_insn ("ldr%?\t%0, %1", operands);
18406 output_asm_insn ("ldr%?\t%0, %1", otherops);
18408 if (count)
18409 *count = 2;
18414 else
18416 /* Constraints should ensure this. */
18417 gcc_assert (code0 == MEM && code1 == REG);
18418 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18419 || (TARGET_ARM && TARGET_LDRD));
18421 switch (GET_CODE (XEXP (operands[0], 0)))
18423 case REG:
18424 if (emit)
18426 if (TARGET_LDRD)
18427 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18428 else
18429 output_asm_insn ("stm%?\t%m0, %M1", operands);
18431 break;
18433 case PRE_INC:
18434 gcc_assert (TARGET_LDRD);
18435 if (emit)
18436 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18437 break;
18439 case PRE_DEC:
18440 if (emit)
18442 if (TARGET_LDRD)
18443 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18444 else
18445 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18447 break;
18449 case POST_INC:
18450 if (emit)
18452 if (TARGET_LDRD)
18453 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18454 else
18455 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18457 break;
18459 case POST_DEC:
18460 gcc_assert (TARGET_LDRD);
18461 if (emit)
18462 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18463 break;
18465 case PRE_MODIFY:
18466 case POST_MODIFY:
18467 otherops[0] = operands[1];
18468 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18469 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18471 /* IWMMXT allows offsets larger than ldrd can handle,
18472 fix these up with a pair of ldr. */
18473 if (!TARGET_THUMB2
18474 && CONST_INT_P (otherops[2])
18475 && (INTVAL(otherops[2]) <= -256
18476 || INTVAL(otherops[2]) >= 256))
18478 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18480 if (emit)
18482 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18483 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18485 if (count)
18486 *count = 2;
18488 else
18490 if (emit)
18492 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18493 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18495 if (count)
18496 *count = 2;
18499 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18501 if (emit)
18502 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18504 else
18506 if (emit)
18507 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18509 break;
18511 case PLUS:
18512 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18513 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18515 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18517 case -8:
18518 if (emit)
18519 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18520 return "";
18522 case -4:
18523 if (TARGET_THUMB2)
18524 break;
18525 if (emit)
18526 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18527 return "";
18529 case 4:
18530 if (TARGET_THUMB2)
18531 break;
18532 if (emit)
18533 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18534 return "";
18537 if (TARGET_LDRD
18538 && (REG_P (otherops[2])
18539 || TARGET_THUMB2
18540 || (CONST_INT_P (otherops[2])
18541 && INTVAL (otherops[2]) > -256
18542 && INTVAL (otherops[2]) < 256)))
18544 otherops[0] = operands[1];
18545 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18546 if (emit)
18547 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18548 return "";
18550 /* Fall through */
18552 default:
18553 otherops[0] = adjust_address (operands[0], SImode, 4);
18554 otherops[1] = operands[1];
18555 if (emit)
18557 output_asm_insn ("str%?\t%1, %0", operands);
18558 output_asm_insn ("str%?\t%H1, %0", otherops);
18560 if (count)
18561 *count = 2;
18565 return "";
18568 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18569 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18571 const char *
18572 output_move_quad (rtx *operands)
18574 if (REG_P (operands[0]))
18576 /* Load, or reg->reg move. */
18578 if (MEM_P (operands[1]))
18580 switch (GET_CODE (XEXP (operands[1], 0)))
18582 case REG:
18583 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18584 break;
18586 case LABEL_REF:
18587 case CONST:
18588 output_asm_insn ("adr%?\t%0, %1", operands);
18589 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18590 break;
18592 default:
18593 gcc_unreachable ();
18596 else
18598 rtx ops[2];
18599 int dest, src, i;
18601 gcc_assert (REG_P (operands[1]));
18603 dest = REGNO (operands[0]);
18604 src = REGNO (operands[1]);
18606 /* This seems pretty dumb, but hopefully GCC won't try to do it
18607 very often. */
18608 if (dest < src)
18609 for (i = 0; i < 4; i++)
18611 ops[0] = gen_rtx_REG (SImode, dest + i);
18612 ops[1] = gen_rtx_REG (SImode, src + i);
18613 output_asm_insn ("mov%?\t%0, %1", ops);
18615 else
18616 for (i = 3; i >= 0; i--)
18618 ops[0] = gen_rtx_REG (SImode, dest + i);
18619 ops[1] = gen_rtx_REG (SImode, src + i);
18620 output_asm_insn ("mov%?\t%0, %1", ops);
18624 else
18626 gcc_assert (MEM_P (operands[0]));
18627 gcc_assert (REG_P (operands[1]));
18628 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18630 switch (GET_CODE (XEXP (operands[0], 0)))
18632 case REG:
18633 output_asm_insn ("stm%?\t%m0, %M1", operands);
18634 break;
18636 default:
18637 gcc_unreachable ();
18641 return "";
18644 /* Output a VFP load or store instruction. */
18646 const char *
18647 output_move_vfp (rtx *operands)
18649 rtx reg, mem, addr, ops[2];
18650 int load = REG_P (operands[0]);
18651 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18652 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18653 const char *templ;
18654 char buff[50];
18655 machine_mode mode;
18657 reg = operands[!load];
18658 mem = operands[load];
18660 mode = GET_MODE (reg);
18662 gcc_assert (REG_P (reg));
18663 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18664 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT && TARGET_VFP)
18665 || mode == SFmode
18666 || mode == DFmode
18667 || mode == SImode
18668 || mode == DImode
18669 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18670 gcc_assert (MEM_P (mem));
18672 addr = XEXP (mem, 0);
18674 switch (GET_CODE (addr))
18676 case PRE_DEC:
18677 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18678 ops[0] = XEXP (addr, 0);
18679 ops[1] = reg;
18680 break;
18682 case POST_INC:
18683 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18684 ops[0] = XEXP (addr, 0);
18685 ops[1] = reg;
18686 break;
18688 default:
18689 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18690 ops[0] = reg;
18691 ops[1] = mem;
18692 break;
18695 sprintf (buff, templ,
18696 load ? "ld" : "st",
18697 dp ? "64" : "32",
18698 dp ? "P" : "",
18699 integer_p ? "\t%@ int" : "");
18700 output_asm_insn (buff, ops);
18702 return "";
18705 /* Output a Neon double-word or quad-word load or store, or a load
18706 or store for larger structure modes.
18708 WARNING: The ordering of elements is weird in big-endian mode,
18709 because the EABI requires that vectors stored in memory appear
18710 as though they were stored by a VSTM, as required by the EABI.
18711 GCC RTL defines element ordering based on in-memory order.
18712 This can be different from the architectural ordering of elements
18713 within a NEON register. The intrinsics defined in arm_neon.h use the
18714 NEON register element ordering, not the GCC RTL element ordering.
18716 For example, the in-memory ordering of a big-endian a quadword
18717 vector with 16-bit elements when stored from register pair {d0,d1}
18718 will be (lowest address first, d0[N] is NEON register element N):
18720 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18722 When necessary, quadword registers (dN, dN+1) are moved to ARM
18723 registers from rN in the order:
18725 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18727 So that STM/LDM can be used on vectors in ARM registers, and the
18728 same memory layout will result as if VSTM/VLDM were used.
18730 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18731 possible, which allows use of appropriate alignment tags.
18732 Note that the choice of "64" is independent of the actual vector
18733 element size; this size simply ensures that the behavior is
18734 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18736 Due to limitations of those instructions, use of VST1.64/VLD1.64
18737 is not possible if:
18738 - the address contains PRE_DEC, or
18739 - the mode refers to more than 4 double-word registers
18741 In those cases, it would be possible to replace VSTM/VLDM by a
18742 sequence of instructions; this is not currently implemented since
18743 this is not certain to actually improve performance. */
18745 const char *
18746 output_move_neon (rtx *operands)
18748 rtx reg, mem, addr, ops[2];
18749 int regno, nregs, load = REG_P (operands[0]);
18750 const char *templ;
18751 char buff[50];
18752 machine_mode mode;
18754 reg = operands[!load];
18755 mem = operands[load];
18757 mode = GET_MODE (reg);
18759 gcc_assert (REG_P (reg));
18760 regno = REGNO (reg);
18761 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18762 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18763 || NEON_REGNO_OK_FOR_QUAD (regno));
18764 gcc_assert (VALID_NEON_DREG_MODE (mode)
18765 || VALID_NEON_QREG_MODE (mode)
18766 || VALID_NEON_STRUCT_MODE (mode));
18767 gcc_assert (MEM_P (mem));
18769 addr = XEXP (mem, 0);
18771 /* Strip off const from addresses like (const (plus (...))). */
18772 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18773 addr = XEXP (addr, 0);
18775 switch (GET_CODE (addr))
18777 case POST_INC:
18778 /* We have to use vldm / vstm for too-large modes. */
18779 if (nregs > 4)
18781 templ = "v%smia%%?\t%%0!, %%h1";
18782 ops[0] = XEXP (addr, 0);
18784 else
18786 templ = "v%s1.64\t%%h1, %%A0";
18787 ops[0] = mem;
18789 ops[1] = reg;
18790 break;
18792 case PRE_DEC:
18793 /* We have to use vldm / vstm in this case, since there is no
18794 pre-decrement form of the vld1 / vst1 instructions. */
18795 templ = "v%smdb%%?\t%%0!, %%h1";
18796 ops[0] = XEXP (addr, 0);
18797 ops[1] = reg;
18798 break;
18800 case POST_MODIFY:
18801 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18802 gcc_unreachable ();
18804 case REG:
18805 /* We have to use vldm / vstm for too-large modes. */
18806 if (nregs > 1)
18808 if (nregs > 4)
18809 templ = "v%smia%%?\t%%m0, %%h1";
18810 else
18811 templ = "v%s1.64\t%%h1, %%A0";
18813 ops[0] = mem;
18814 ops[1] = reg;
18815 break;
18817 /* Fall through. */
18818 case LABEL_REF:
18819 case PLUS:
18821 int i;
18822 int overlap = -1;
18823 for (i = 0; i < nregs; i++)
18825 /* We're only using DImode here because it's a convenient size. */
18826 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18827 ops[1] = adjust_address (mem, DImode, 8 * i);
18828 if (reg_overlap_mentioned_p (ops[0], mem))
18830 gcc_assert (overlap == -1);
18831 overlap = i;
18833 else
18835 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18836 output_asm_insn (buff, ops);
18839 if (overlap != -1)
18841 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18842 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18843 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18844 output_asm_insn (buff, ops);
18847 return "";
18850 default:
18851 gcc_unreachable ();
18854 sprintf (buff, templ, load ? "ld" : "st");
18855 output_asm_insn (buff, ops);
18857 return "";
18860 /* Compute and return the length of neon_mov<mode>, where <mode> is
18861 one of VSTRUCT modes: EI, OI, CI or XI. */
18863 arm_attr_length_move_neon (rtx_insn *insn)
18865 rtx reg, mem, addr;
18866 int load;
18867 machine_mode mode;
18869 extract_insn_cached (insn);
18871 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18873 mode = GET_MODE (recog_data.operand[0]);
18874 switch (mode)
18876 case EImode:
18877 case OImode:
18878 return 8;
18879 case CImode:
18880 return 12;
18881 case XImode:
18882 return 16;
18883 default:
18884 gcc_unreachable ();
18888 load = REG_P (recog_data.operand[0]);
18889 reg = recog_data.operand[!load];
18890 mem = recog_data.operand[load];
18892 gcc_assert (MEM_P (mem));
18894 mode = GET_MODE (reg);
18895 addr = XEXP (mem, 0);
18897 /* Strip off const from addresses like (const (plus (...))). */
18898 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18899 addr = XEXP (addr, 0);
18901 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18903 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18904 return insns * 4;
18906 else
18907 return 4;
18910 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18911 return zero. */
18914 arm_address_offset_is_imm (rtx_insn *insn)
18916 rtx mem, addr;
18918 extract_insn_cached (insn);
18920 if (REG_P (recog_data.operand[0]))
18921 return 0;
18923 mem = recog_data.operand[0];
18925 gcc_assert (MEM_P (mem));
18927 addr = XEXP (mem, 0);
18929 if (REG_P (addr)
18930 || (GET_CODE (addr) == PLUS
18931 && REG_P (XEXP (addr, 0))
18932 && CONST_INT_P (XEXP (addr, 1))))
18933 return 1;
18934 else
18935 return 0;
18938 /* Output an ADD r, s, #n where n may be too big for one instruction.
18939 If adding zero to one register, output nothing. */
18940 const char *
18941 output_add_immediate (rtx *operands)
18943 HOST_WIDE_INT n = INTVAL (operands[2]);
18945 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18947 if (n < 0)
18948 output_multi_immediate (operands,
18949 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18950 -n);
18951 else
18952 output_multi_immediate (operands,
18953 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18957 return "";
18960 /* Output a multiple immediate operation.
18961 OPERANDS is the vector of operands referred to in the output patterns.
18962 INSTR1 is the output pattern to use for the first constant.
18963 INSTR2 is the output pattern to use for subsequent constants.
18964 IMMED_OP is the index of the constant slot in OPERANDS.
18965 N is the constant value. */
18966 static const char *
18967 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18968 int immed_op, HOST_WIDE_INT n)
18970 #if HOST_BITS_PER_WIDE_INT > 32
18971 n &= 0xffffffff;
18972 #endif
18974 if (n == 0)
18976 /* Quick and easy output. */
18977 operands[immed_op] = const0_rtx;
18978 output_asm_insn (instr1, operands);
18980 else
18982 int i;
18983 const char * instr = instr1;
18985 /* Note that n is never zero here (which would give no output). */
18986 for (i = 0; i < 32; i += 2)
18988 if (n & (3 << i))
18990 operands[immed_op] = GEN_INT (n & (255 << i));
18991 output_asm_insn (instr, operands);
18992 instr = instr2;
18993 i += 6;
18998 return "";
19001 /* Return the name of a shifter operation. */
19002 static const char *
19003 arm_shift_nmem(enum rtx_code code)
19005 switch (code)
19007 case ASHIFT:
19008 return ARM_LSL_NAME;
19010 case ASHIFTRT:
19011 return "asr";
19013 case LSHIFTRT:
19014 return "lsr";
19016 case ROTATERT:
19017 return "ror";
19019 default:
19020 abort();
19024 /* Return the appropriate ARM instruction for the operation code.
19025 The returned result should not be overwritten. OP is the rtx of the
19026 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19027 was shifted. */
19028 const char *
19029 arithmetic_instr (rtx op, int shift_first_arg)
19031 switch (GET_CODE (op))
19033 case PLUS:
19034 return "add";
19036 case MINUS:
19037 return shift_first_arg ? "rsb" : "sub";
19039 case IOR:
19040 return "orr";
19042 case XOR:
19043 return "eor";
19045 case AND:
19046 return "and";
19048 case ASHIFT:
19049 case ASHIFTRT:
19050 case LSHIFTRT:
19051 case ROTATERT:
19052 return arm_shift_nmem(GET_CODE(op));
19054 default:
19055 gcc_unreachable ();
19059 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19060 for the operation code. The returned result should not be overwritten.
19061 OP is the rtx code of the shift.
19062 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19063 shift. */
19064 static const char *
19065 shift_op (rtx op, HOST_WIDE_INT *amountp)
19067 const char * mnem;
19068 enum rtx_code code = GET_CODE (op);
19070 switch (code)
19072 case ROTATE:
19073 if (!CONST_INT_P (XEXP (op, 1)))
19075 output_operand_lossage ("invalid shift operand");
19076 return NULL;
19079 code = ROTATERT;
19080 *amountp = 32 - INTVAL (XEXP (op, 1));
19081 mnem = "ror";
19082 break;
19084 case ASHIFT:
19085 case ASHIFTRT:
19086 case LSHIFTRT:
19087 case ROTATERT:
19088 mnem = arm_shift_nmem(code);
19089 if (CONST_INT_P (XEXP (op, 1)))
19091 *amountp = INTVAL (XEXP (op, 1));
19093 else if (REG_P (XEXP (op, 1)))
19095 *amountp = -1;
19096 return mnem;
19098 else
19100 output_operand_lossage ("invalid shift operand");
19101 return NULL;
19103 break;
19105 case MULT:
19106 /* We never have to worry about the amount being other than a
19107 power of 2, since this case can never be reloaded from a reg. */
19108 if (!CONST_INT_P (XEXP (op, 1)))
19110 output_operand_lossage ("invalid shift operand");
19111 return NULL;
19114 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19116 /* Amount must be a power of two. */
19117 if (*amountp & (*amountp - 1))
19119 output_operand_lossage ("invalid shift operand");
19120 return NULL;
19123 *amountp = exact_log2 (*amountp);
19124 gcc_assert (IN_RANGE (*amountp, 0, 31));
19125 return ARM_LSL_NAME;
19127 default:
19128 output_operand_lossage ("invalid shift operand");
19129 return NULL;
19132 /* This is not 100% correct, but follows from the desire to merge
19133 multiplication by a power of 2 with the recognizer for a
19134 shift. >=32 is not a valid shift for "lsl", so we must try and
19135 output a shift that produces the correct arithmetical result.
19136 Using lsr #32 is identical except for the fact that the carry bit
19137 is not set correctly if we set the flags; but we never use the
19138 carry bit from such an operation, so we can ignore that. */
19139 if (code == ROTATERT)
19140 /* Rotate is just modulo 32. */
19141 *amountp &= 31;
19142 else if (*amountp != (*amountp & 31))
19144 if (code == ASHIFT)
19145 mnem = "lsr";
19146 *amountp = 32;
19149 /* Shifts of 0 are no-ops. */
19150 if (*amountp == 0)
19151 return NULL;
19153 return mnem;
19156 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19157 because /bin/as is horribly restrictive. The judgement about
19158 whether or not each character is 'printable' (and can be output as
19159 is) or not (and must be printed with an octal escape) must be made
19160 with reference to the *host* character set -- the situation is
19161 similar to that discussed in the comments above pp_c_char in
19162 c-pretty-print.c. */
19164 #define MAX_ASCII_LEN 51
19166 void
19167 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19169 int i;
19170 int len_so_far = 0;
19172 fputs ("\t.ascii\t\"", stream);
19174 for (i = 0; i < len; i++)
19176 int c = p[i];
19178 if (len_so_far >= MAX_ASCII_LEN)
19180 fputs ("\"\n\t.ascii\t\"", stream);
19181 len_so_far = 0;
19184 if (ISPRINT (c))
19186 if (c == '\\' || c == '\"')
19188 putc ('\\', stream);
19189 len_so_far++;
19191 putc (c, stream);
19192 len_so_far++;
19194 else
19196 fprintf (stream, "\\%03o", c);
19197 len_so_far += 4;
19201 fputs ("\"\n", stream);
19204 /* Whether a register is callee saved or not. This is necessary because high
19205 registers are marked as caller saved when optimizing for size on Thumb-1
19206 targets despite being callee saved in order to avoid using them. */
19207 #define callee_saved_reg_p(reg) \
19208 (!call_used_regs[reg] \
19209 || (TARGET_THUMB1 && optimize_size \
19210 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19212 /* Compute the register save mask for registers 0 through 12
19213 inclusive. This code is used by arm_compute_save_reg_mask. */
19215 static unsigned long
19216 arm_compute_save_reg0_reg12_mask (void)
19218 unsigned long func_type = arm_current_func_type ();
19219 unsigned long save_reg_mask = 0;
19220 unsigned int reg;
19222 if (IS_INTERRUPT (func_type))
19224 unsigned int max_reg;
19225 /* Interrupt functions must not corrupt any registers,
19226 even call clobbered ones. If this is a leaf function
19227 we can just examine the registers used by the RTL, but
19228 otherwise we have to assume that whatever function is
19229 called might clobber anything, and so we have to save
19230 all the call-clobbered registers as well. */
19231 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19232 /* FIQ handlers have registers r8 - r12 banked, so
19233 we only need to check r0 - r7, Normal ISRs only
19234 bank r14 and r15, so we must check up to r12.
19235 r13 is the stack pointer which is always preserved,
19236 so we do not need to consider it here. */
19237 max_reg = 7;
19238 else
19239 max_reg = 12;
19241 for (reg = 0; reg <= max_reg; reg++)
19242 if (df_regs_ever_live_p (reg)
19243 || (! crtl->is_leaf && call_used_regs[reg]))
19244 save_reg_mask |= (1 << reg);
19246 /* Also save the pic base register if necessary. */
19247 if (flag_pic
19248 && !TARGET_SINGLE_PIC_BASE
19249 && arm_pic_register != INVALID_REGNUM
19250 && crtl->uses_pic_offset_table)
19251 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19253 else if (IS_VOLATILE(func_type))
19255 /* For noreturn functions we historically omitted register saves
19256 altogether. However this really messes up debugging. As a
19257 compromise save just the frame pointers. Combined with the link
19258 register saved elsewhere this should be sufficient to get
19259 a backtrace. */
19260 if (frame_pointer_needed)
19261 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19262 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19263 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19264 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19265 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19267 else
19269 /* In the normal case we only need to save those registers
19270 which are call saved and which are used by this function. */
19271 for (reg = 0; reg <= 11; reg++)
19272 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19273 save_reg_mask |= (1 << reg);
19275 /* Handle the frame pointer as a special case. */
19276 if (frame_pointer_needed)
19277 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19279 /* If we aren't loading the PIC register,
19280 don't stack it even though it may be live. */
19281 if (flag_pic
19282 && !TARGET_SINGLE_PIC_BASE
19283 && arm_pic_register != INVALID_REGNUM
19284 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19285 || crtl->uses_pic_offset_table))
19286 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19288 /* The prologue will copy SP into R0, so save it. */
19289 if (IS_STACKALIGN (func_type))
19290 save_reg_mask |= 1;
19293 /* Save registers so the exception handler can modify them. */
19294 if (crtl->calls_eh_return)
19296 unsigned int i;
19298 for (i = 0; ; i++)
19300 reg = EH_RETURN_DATA_REGNO (i);
19301 if (reg == INVALID_REGNUM)
19302 break;
19303 save_reg_mask |= 1 << reg;
19307 return save_reg_mask;
19310 /* Return true if r3 is live at the start of the function. */
19312 static bool
19313 arm_r3_live_at_start_p (void)
19315 /* Just look at cfg info, which is still close enough to correct at this
19316 point. This gives false positives for broken functions that might use
19317 uninitialized data that happens to be allocated in r3, but who cares? */
19318 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19321 /* Compute the number of bytes used to store the static chain register on the
19322 stack, above the stack frame. We need to know this accurately to get the
19323 alignment of the rest of the stack frame correct. */
19325 static int
19326 arm_compute_static_chain_stack_bytes (void)
19328 /* See the defining assertion in arm_expand_prologue. */
19329 if (IS_NESTED (arm_current_func_type ())
19330 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19331 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19332 && !df_regs_ever_live_p (LR_REGNUM)))
19333 && arm_r3_live_at_start_p ()
19334 && crtl->args.pretend_args_size == 0)
19335 return 4;
19337 return 0;
19340 /* Compute a bit mask of which registers need to be
19341 saved on the stack for the current function.
19342 This is used by arm_get_frame_offsets, which may add extra registers. */
19344 static unsigned long
19345 arm_compute_save_reg_mask (void)
19347 unsigned int save_reg_mask = 0;
19348 unsigned long func_type = arm_current_func_type ();
19349 unsigned int reg;
19351 if (IS_NAKED (func_type))
19352 /* This should never really happen. */
19353 return 0;
19355 /* If we are creating a stack frame, then we must save the frame pointer,
19356 IP (which will hold the old stack pointer), LR and the PC. */
19357 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19358 save_reg_mask |=
19359 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19360 | (1 << IP_REGNUM)
19361 | (1 << LR_REGNUM)
19362 | (1 << PC_REGNUM);
19364 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19366 /* Decide if we need to save the link register.
19367 Interrupt routines have their own banked link register,
19368 so they never need to save it.
19369 Otherwise if we do not use the link register we do not need to save
19370 it. If we are pushing other registers onto the stack however, we
19371 can save an instruction in the epilogue by pushing the link register
19372 now and then popping it back into the PC. This incurs extra memory
19373 accesses though, so we only do it when optimizing for size, and only
19374 if we know that we will not need a fancy return sequence. */
19375 if (df_regs_ever_live_p (LR_REGNUM)
19376 || (save_reg_mask
19377 && optimize_size
19378 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19379 && !crtl->tail_call_emit
19380 && !crtl->calls_eh_return))
19381 save_reg_mask |= 1 << LR_REGNUM;
19383 if (cfun->machine->lr_save_eliminated)
19384 save_reg_mask &= ~ (1 << LR_REGNUM);
19386 if (TARGET_REALLY_IWMMXT
19387 && ((bit_count (save_reg_mask)
19388 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19389 arm_compute_static_chain_stack_bytes())
19390 ) % 2) != 0)
19392 /* The total number of registers that are going to be pushed
19393 onto the stack is odd. We need to ensure that the stack
19394 is 64-bit aligned before we start to save iWMMXt registers,
19395 and also before we start to create locals. (A local variable
19396 might be a double or long long which we will load/store using
19397 an iWMMXt instruction). Therefore we need to push another
19398 ARM register, so that the stack will be 64-bit aligned. We
19399 try to avoid using the arg registers (r0 -r3) as they might be
19400 used to pass values in a tail call. */
19401 for (reg = 4; reg <= 12; reg++)
19402 if ((save_reg_mask & (1 << reg)) == 0)
19403 break;
19405 if (reg <= 12)
19406 save_reg_mask |= (1 << reg);
19407 else
19409 cfun->machine->sibcall_blocked = 1;
19410 save_reg_mask |= (1 << 3);
19414 /* We may need to push an additional register for use initializing the
19415 PIC base register. */
19416 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19417 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19419 reg = thumb_find_work_register (1 << 4);
19420 if (!call_used_regs[reg])
19421 save_reg_mask |= (1 << reg);
19424 return save_reg_mask;
19427 /* Compute a bit mask of which registers need to be
19428 saved on the stack for the current function. */
19429 static unsigned long
19430 thumb1_compute_save_reg_mask (void)
19432 unsigned long mask;
19433 unsigned reg;
19435 mask = 0;
19436 for (reg = 0; reg < 12; reg ++)
19437 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19438 mask |= 1 << reg;
19440 if (flag_pic
19441 && !TARGET_SINGLE_PIC_BASE
19442 && arm_pic_register != INVALID_REGNUM
19443 && crtl->uses_pic_offset_table)
19444 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19446 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19447 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19448 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19450 /* LR will also be pushed if any lo regs are pushed. */
19451 if (mask & 0xff || thumb_force_lr_save ())
19452 mask |= (1 << LR_REGNUM);
19454 /* Make sure we have a low work register if we need one.
19455 We will need one if we are going to push a high register,
19456 but we are not currently intending to push a low register. */
19457 if ((mask & 0xff) == 0
19458 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19460 /* Use thumb_find_work_register to choose which register
19461 we will use. If the register is live then we will
19462 have to push it. Use LAST_LO_REGNUM as our fallback
19463 choice for the register to select. */
19464 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19465 /* Make sure the register returned by thumb_find_work_register is
19466 not part of the return value. */
19467 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19468 reg = LAST_LO_REGNUM;
19470 if (callee_saved_reg_p (reg))
19471 mask |= 1 << reg;
19474 /* The 504 below is 8 bytes less than 512 because there are two possible
19475 alignment words. We can't tell here if they will be present or not so we
19476 have to play it safe and assume that they are. */
19477 if ((CALLER_INTERWORKING_SLOT_SIZE +
19478 ROUND_UP_WORD (get_frame_size ()) +
19479 crtl->outgoing_args_size) >= 504)
19481 /* This is the same as the code in thumb1_expand_prologue() which
19482 determines which register to use for stack decrement. */
19483 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19484 if (mask & (1 << reg))
19485 break;
19487 if (reg > LAST_LO_REGNUM)
19489 /* Make sure we have a register available for stack decrement. */
19490 mask |= 1 << LAST_LO_REGNUM;
19494 return mask;
19498 /* Return the number of bytes required to save VFP registers. */
19499 static int
19500 arm_get_vfp_saved_size (void)
19502 unsigned int regno;
19503 int count;
19504 int saved;
19506 saved = 0;
19507 /* Space for saved VFP registers. */
19508 if (TARGET_HARD_FLOAT && TARGET_VFP)
19510 count = 0;
19511 for (regno = FIRST_VFP_REGNUM;
19512 regno < LAST_VFP_REGNUM;
19513 regno += 2)
19515 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19516 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19518 if (count > 0)
19520 /* Workaround ARM10 VFPr1 bug. */
19521 if (count == 2 && !arm_arch6)
19522 count++;
19523 saved += count * 8;
19525 count = 0;
19527 else
19528 count++;
19530 if (count > 0)
19532 if (count == 2 && !arm_arch6)
19533 count++;
19534 saved += count * 8;
19537 return saved;
19541 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19542 everything bar the final return instruction. If simple_return is true,
19543 then do not output epilogue, because it has already been emitted in RTL. */
19544 const char *
19545 output_return_instruction (rtx operand, bool really_return, bool reverse,
19546 bool simple_return)
19548 char conditional[10];
19549 char instr[100];
19550 unsigned reg;
19551 unsigned long live_regs_mask;
19552 unsigned long func_type;
19553 arm_stack_offsets *offsets;
19555 func_type = arm_current_func_type ();
19557 if (IS_NAKED (func_type))
19558 return "";
19560 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19562 /* If this function was declared non-returning, and we have
19563 found a tail call, then we have to trust that the called
19564 function won't return. */
19565 if (really_return)
19567 rtx ops[2];
19569 /* Otherwise, trap an attempted return by aborting. */
19570 ops[0] = operand;
19571 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19572 : "abort");
19573 assemble_external_libcall (ops[1]);
19574 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19577 return "";
19580 gcc_assert (!cfun->calls_alloca || really_return);
19582 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19584 cfun->machine->return_used_this_function = 1;
19586 offsets = arm_get_frame_offsets ();
19587 live_regs_mask = offsets->saved_regs_mask;
19589 if (!simple_return && live_regs_mask)
19591 const char * return_reg;
19593 /* If we do not have any special requirements for function exit
19594 (e.g. interworking) then we can load the return address
19595 directly into the PC. Otherwise we must load it into LR. */
19596 if (really_return
19597 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19598 return_reg = reg_names[PC_REGNUM];
19599 else
19600 return_reg = reg_names[LR_REGNUM];
19602 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19604 /* There are three possible reasons for the IP register
19605 being saved. 1) a stack frame was created, in which case
19606 IP contains the old stack pointer, or 2) an ISR routine
19607 corrupted it, or 3) it was saved to align the stack on
19608 iWMMXt. In case 1, restore IP into SP, otherwise just
19609 restore IP. */
19610 if (frame_pointer_needed)
19612 live_regs_mask &= ~ (1 << IP_REGNUM);
19613 live_regs_mask |= (1 << SP_REGNUM);
19615 else
19616 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19619 /* On some ARM architectures it is faster to use LDR rather than
19620 LDM to load a single register. On other architectures, the
19621 cost is the same. In 26 bit mode, or for exception handlers,
19622 we have to use LDM to load the PC so that the CPSR is also
19623 restored. */
19624 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19625 if (live_regs_mask == (1U << reg))
19626 break;
19628 if (reg <= LAST_ARM_REGNUM
19629 && (reg != LR_REGNUM
19630 || ! really_return
19631 || ! IS_INTERRUPT (func_type)))
19633 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19634 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19636 else
19638 char *p;
19639 int first = 1;
19641 /* Generate the load multiple instruction to restore the
19642 registers. Note we can get here, even if
19643 frame_pointer_needed is true, but only if sp already
19644 points to the base of the saved core registers. */
19645 if (live_regs_mask & (1 << SP_REGNUM))
19647 unsigned HOST_WIDE_INT stack_adjust;
19649 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19650 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19652 if (stack_adjust && arm_arch5 && TARGET_ARM)
19653 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19654 else
19656 /* If we can't use ldmib (SA110 bug),
19657 then try to pop r3 instead. */
19658 if (stack_adjust)
19659 live_regs_mask |= 1 << 3;
19661 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19664 /* For interrupt returns we have to use an LDM rather than
19665 a POP so that we can use the exception return variant. */
19666 else if (IS_INTERRUPT (func_type))
19667 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19668 else
19669 sprintf (instr, "pop%s\t{", conditional);
19671 p = instr + strlen (instr);
19673 for (reg = 0; reg <= SP_REGNUM; reg++)
19674 if (live_regs_mask & (1 << reg))
19676 int l = strlen (reg_names[reg]);
19678 if (first)
19679 first = 0;
19680 else
19682 memcpy (p, ", ", 2);
19683 p += 2;
19686 memcpy (p, "%|", 2);
19687 memcpy (p + 2, reg_names[reg], l);
19688 p += l + 2;
19691 if (live_regs_mask & (1 << LR_REGNUM))
19693 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19694 /* If returning from an interrupt, restore the CPSR. */
19695 if (IS_INTERRUPT (func_type))
19696 strcat (p, "^");
19698 else
19699 strcpy (p, "}");
19702 output_asm_insn (instr, & operand);
19704 /* See if we need to generate an extra instruction to
19705 perform the actual function return. */
19706 if (really_return
19707 && func_type != ARM_FT_INTERWORKED
19708 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19710 /* The return has already been handled
19711 by loading the LR into the PC. */
19712 return "";
19716 if (really_return)
19718 switch ((int) ARM_FUNC_TYPE (func_type))
19720 case ARM_FT_ISR:
19721 case ARM_FT_FIQ:
19722 /* ??? This is wrong for unified assembly syntax. */
19723 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19724 break;
19726 case ARM_FT_INTERWORKED:
19727 gcc_assert (arm_arch5 || arm_arch4t);
19728 sprintf (instr, "bx%s\t%%|lr", conditional);
19729 break;
19731 case ARM_FT_EXCEPTION:
19732 /* ??? This is wrong for unified assembly syntax. */
19733 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19734 break;
19736 default:
19737 /* Use bx if it's available. */
19738 if (arm_arch5 || arm_arch4t)
19739 sprintf (instr, "bx%s\t%%|lr", conditional);
19740 else
19741 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19742 break;
19745 output_asm_insn (instr, & operand);
19748 return "";
19751 /* Write the function name into the code section, directly preceding
19752 the function prologue.
19754 Code will be output similar to this:
19756 .ascii "arm_poke_function_name", 0
19757 .align
19759 .word 0xff000000 + (t1 - t0)
19760 arm_poke_function_name
19761 mov ip, sp
19762 stmfd sp!, {fp, ip, lr, pc}
19763 sub fp, ip, #4
19765 When performing a stack backtrace, code can inspect the value
19766 of 'pc' stored at 'fp' + 0. If the trace function then looks
19767 at location pc - 12 and the top 8 bits are set, then we know
19768 that there is a function name embedded immediately preceding this
19769 location and has length ((pc[-3]) & 0xff000000).
19771 We assume that pc is declared as a pointer to an unsigned long.
19773 It is of no benefit to output the function name if we are assembling
19774 a leaf function. These function types will not contain a stack
19775 backtrace structure, therefore it is not possible to determine the
19776 function name. */
19777 void
19778 arm_poke_function_name (FILE *stream, const char *name)
19780 unsigned long alignlength;
19781 unsigned long length;
19782 rtx x;
19784 length = strlen (name) + 1;
19785 alignlength = ROUND_UP_WORD (length);
19787 ASM_OUTPUT_ASCII (stream, name, length);
19788 ASM_OUTPUT_ALIGN (stream, 2);
19789 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19790 assemble_aligned_integer (UNITS_PER_WORD, x);
19793 /* Place some comments into the assembler stream
19794 describing the current function. */
19795 static void
19796 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19798 unsigned long func_type;
19800 /* ??? Do we want to print some of the below anyway? */
19801 if (TARGET_THUMB1)
19802 return;
19804 /* Sanity check. */
19805 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19807 func_type = arm_current_func_type ();
19809 switch ((int) ARM_FUNC_TYPE (func_type))
19811 default:
19812 case ARM_FT_NORMAL:
19813 break;
19814 case ARM_FT_INTERWORKED:
19815 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19816 break;
19817 case ARM_FT_ISR:
19818 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19819 break;
19820 case ARM_FT_FIQ:
19821 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19822 break;
19823 case ARM_FT_EXCEPTION:
19824 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19825 break;
19828 if (IS_NAKED (func_type))
19829 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19831 if (IS_VOLATILE (func_type))
19832 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19834 if (IS_NESTED (func_type))
19835 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19836 if (IS_STACKALIGN (func_type))
19837 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19839 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19840 crtl->args.size,
19841 crtl->args.pretend_args_size, frame_size);
19843 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19844 frame_pointer_needed,
19845 cfun->machine->uses_anonymous_args);
19847 if (cfun->machine->lr_save_eliminated)
19848 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19850 if (crtl->calls_eh_return)
19851 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19855 static void
19856 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19857 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19859 arm_stack_offsets *offsets;
19861 if (TARGET_THUMB1)
19863 int regno;
19865 /* Emit any call-via-reg trampolines that are needed for v4t support
19866 of call_reg and call_value_reg type insns. */
19867 for (regno = 0; regno < LR_REGNUM; regno++)
19869 rtx label = cfun->machine->call_via[regno];
19871 if (label != NULL)
19873 switch_to_section (function_section (current_function_decl));
19874 targetm.asm_out.internal_label (asm_out_file, "L",
19875 CODE_LABEL_NUMBER (label));
19876 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19880 /* ??? Probably not safe to set this here, since it assumes that a
19881 function will be emitted as assembly immediately after we generate
19882 RTL for it. This does not happen for inline functions. */
19883 cfun->machine->return_used_this_function = 0;
19885 else /* TARGET_32BIT */
19887 /* We need to take into account any stack-frame rounding. */
19888 offsets = arm_get_frame_offsets ();
19890 gcc_assert (!use_return_insn (FALSE, NULL)
19891 || (cfun->machine->return_used_this_function != 0)
19892 || offsets->saved_regs == offsets->outgoing_args
19893 || frame_pointer_needed);
19897 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19898 STR and STRD. If an even number of registers are being pushed, one
19899 or more STRD patterns are created for each register pair. If an
19900 odd number of registers are pushed, emit an initial STR followed by
19901 as many STRD instructions as are needed. This works best when the
19902 stack is initially 64-bit aligned (the normal case), since it
19903 ensures that each STRD is also 64-bit aligned. */
19904 static void
19905 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19907 int num_regs = 0;
19908 int i;
19909 int regno;
19910 rtx par = NULL_RTX;
19911 rtx dwarf = NULL_RTX;
19912 rtx tmp;
19913 bool first = true;
19915 num_regs = bit_count (saved_regs_mask);
19917 /* Must be at least one register to save, and can't save SP or PC. */
19918 gcc_assert (num_regs > 0 && num_regs <= 14);
19919 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19920 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19922 /* Create sequence for DWARF info. All the frame-related data for
19923 debugging is held in this wrapper. */
19924 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19926 /* Describe the stack adjustment. */
19927 tmp = gen_rtx_SET (stack_pointer_rtx,
19928 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19929 RTX_FRAME_RELATED_P (tmp) = 1;
19930 XVECEXP (dwarf, 0, 0) = tmp;
19932 /* Find the first register. */
19933 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19936 i = 0;
19938 /* If there's an odd number of registers to push. Start off by
19939 pushing a single register. This ensures that subsequent strd
19940 operations are dword aligned (assuming that SP was originally
19941 64-bit aligned). */
19942 if ((num_regs & 1) != 0)
19944 rtx reg, mem, insn;
19946 reg = gen_rtx_REG (SImode, regno);
19947 if (num_regs == 1)
19948 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19949 stack_pointer_rtx));
19950 else
19951 mem = gen_frame_mem (Pmode,
19952 gen_rtx_PRE_MODIFY
19953 (Pmode, stack_pointer_rtx,
19954 plus_constant (Pmode, stack_pointer_rtx,
19955 -4 * num_regs)));
19957 tmp = gen_rtx_SET (mem, reg);
19958 RTX_FRAME_RELATED_P (tmp) = 1;
19959 insn = emit_insn (tmp);
19960 RTX_FRAME_RELATED_P (insn) = 1;
19961 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19962 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19963 RTX_FRAME_RELATED_P (tmp) = 1;
19964 i++;
19965 regno++;
19966 XVECEXP (dwarf, 0, i) = tmp;
19967 first = false;
19970 while (i < num_regs)
19971 if (saved_regs_mask & (1 << regno))
19973 rtx reg1, reg2, mem1, mem2;
19974 rtx tmp0, tmp1, tmp2;
19975 int regno2;
19977 /* Find the register to pair with this one. */
19978 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19979 regno2++)
19982 reg1 = gen_rtx_REG (SImode, regno);
19983 reg2 = gen_rtx_REG (SImode, regno2);
19985 if (first)
19987 rtx insn;
19989 first = false;
19990 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19991 stack_pointer_rtx,
19992 -4 * num_regs));
19993 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19994 stack_pointer_rtx,
19995 -4 * (num_regs - 1)));
19996 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19997 plus_constant (Pmode, stack_pointer_rtx,
19998 -4 * (num_regs)));
19999 tmp1 = gen_rtx_SET (mem1, reg1);
20000 tmp2 = gen_rtx_SET (mem2, reg2);
20001 RTX_FRAME_RELATED_P (tmp0) = 1;
20002 RTX_FRAME_RELATED_P (tmp1) = 1;
20003 RTX_FRAME_RELATED_P (tmp2) = 1;
20004 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20005 XVECEXP (par, 0, 0) = tmp0;
20006 XVECEXP (par, 0, 1) = tmp1;
20007 XVECEXP (par, 0, 2) = tmp2;
20008 insn = emit_insn (par);
20009 RTX_FRAME_RELATED_P (insn) = 1;
20010 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20012 else
20014 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20015 stack_pointer_rtx,
20016 4 * i));
20017 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20018 stack_pointer_rtx,
20019 4 * (i + 1)));
20020 tmp1 = gen_rtx_SET (mem1, reg1);
20021 tmp2 = gen_rtx_SET (mem2, reg2);
20022 RTX_FRAME_RELATED_P (tmp1) = 1;
20023 RTX_FRAME_RELATED_P (tmp2) = 1;
20024 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20025 XVECEXP (par, 0, 0) = tmp1;
20026 XVECEXP (par, 0, 1) = tmp2;
20027 emit_insn (par);
20030 /* Create unwind information. This is an approximation. */
20031 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20032 plus_constant (Pmode,
20033 stack_pointer_rtx,
20034 4 * i)),
20035 reg1);
20036 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20037 plus_constant (Pmode,
20038 stack_pointer_rtx,
20039 4 * (i + 1))),
20040 reg2);
20042 RTX_FRAME_RELATED_P (tmp1) = 1;
20043 RTX_FRAME_RELATED_P (tmp2) = 1;
20044 XVECEXP (dwarf, 0, i + 1) = tmp1;
20045 XVECEXP (dwarf, 0, i + 2) = tmp2;
20046 i += 2;
20047 regno = regno2 + 1;
20049 else
20050 regno++;
20052 return;
20055 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20056 whenever possible, otherwise it emits single-word stores. The first store
20057 also allocates stack space for all saved registers, using writeback with
20058 post-addressing mode. All other stores use offset addressing. If no STRD
20059 can be emitted, this function emits a sequence of single-word stores,
20060 and not an STM as before, because single-word stores provide more freedom
20061 scheduling and can be turned into an STM by peephole optimizations. */
20062 static void
20063 arm_emit_strd_push (unsigned long saved_regs_mask)
20065 int num_regs = 0;
20066 int i, j, dwarf_index = 0;
20067 int offset = 0;
20068 rtx dwarf = NULL_RTX;
20069 rtx insn = NULL_RTX;
20070 rtx tmp, mem;
20072 /* TODO: A more efficient code can be emitted by changing the
20073 layout, e.g., first push all pairs that can use STRD to keep the
20074 stack aligned, and then push all other registers. */
20075 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20076 if (saved_regs_mask & (1 << i))
20077 num_regs++;
20079 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20080 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20081 gcc_assert (num_regs > 0);
20083 /* Create sequence for DWARF info. */
20084 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20086 /* For dwarf info, we generate explicit stack update. */
20087 tmp = gen_rtx_SET (stack_pointer_rtx,
20088 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20089 RTX_FRAME_RELATED_P (tmp) = 1;
20090 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20092 /* Save registers. */
20093 offset = - 4 * num_regs;
20094 j = 0;
20095 while (j <= LAST_ARM_REGNUM)
20096 if (saved_regs_mask & (1 << j))
20098 if ((j % 2 == 0)
20099 && (saved_regs_mask & (1 << (j + 1))))
20101 /* Current register and previous register form register pair for
20102 which STRD can be generated. */
20103 if (offset < 0)
20105 /* Allocate stack space for all saved registers. */
20106 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20107 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20108 mem = gen_frame_mem (DImode, tmp);
20109 offset = 0;
20111 else if (offset > 0)
20112 mem = gen_frame_mem (DImode,
20113 plus_constant (Pmode,
20114 stack_pointer_rtx,
20115 offset));
20116 else
20117 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20119 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20120 RTX_FRAME_RELATED_P (tmp) = 1;
20121 tmp = emit_insn (tmp);
20123 /* Record the first store insn. */
20124 if (dwarf_index == 1)
20125 insn = tmp;
20127 /* Generate dwarf info. */
20128 mem = gen_frame_mem (SImode,
20129 plus_constant (Pmode,
20130 stack_pointer_rtx,
20131 offset));
20132 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20133 RTX_FRAME_RELATED_P (tmp) = 1;
20134 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20136 mem = gen_frame_mem (SImode,
20137 plus_constant (Pmode,
20138 stack_pointer_rtx,
20139 offset + 4));
20140 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20141 RTX_FRAME_RELATED_P (tmp) = 1;
20142 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20144 offset += 8;
20145 j += 2;
20147 else
20149 /* Emit a single word store. */
20150 if (offset < 0)
20152 /* Allocate stack space for all saved registers. */
20153 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20154 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20155 mem = gen_frame_mem (SImode, tmp);
20156 offset = 0;
20158 else if (offset > 0)
20159 mem = gen_frame_mem (SImode,
20160 plus_constant (Pmode,
20161 stack_pointer_rtx,
20162 offset));
20163 else
20164 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20166 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20167 RTX_FRAME_RELATED_P (tmp) = 1;
20168 tmp = emit_insn (tmp);
20170 /* Record the first store insn. */
20171 if (dwarf_index == 1)
20172 insn = tmp;
20174 /* Generate dwarf info. */
20175 mem = gen_frame_mem (SImode,
20176 plus_constant(Pmode,
20177 stack_pointer_rtx,
20178 offset));
20179 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20180 RTX_FRAME_RELATED_P (tmp) = 1;
20181 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20183 offset += 4;
20184 j += 1;
20187 else
20188 j++;
20190 /* Attach dwarf info to the first insn we generate. */
20191 gcc_assert (insn != NULL_RTX);
20192 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20193 RTX_FRAME_RELATED_P (insn) = 1;
20196 /* Generate and emit an insn that we will recognize as a push_multi.
20197 Unfortunately, since this insn does not reflect very well the actual
20198 semantics of the operation, we need to annotate the insn for the benefit
20199 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20200 MASK for registers that should be annotated for DWARF2 frame unwind
20201 information. */
20202 static rtx
20203 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20205 int num_regs = 0;
20206 int num_dwarf_regs = 0;
20207 int i, j;
20208 rtx par;
20209 rtx dwarf;
20210 int dwarf_par_index;
20211 rtx tmp, reg;
20213 /* We don't record the PC in the dwarf frame information. */
20214 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20216 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20218 if (mask & (1 << i))
20219 num_regs++;
20220 if (dwarf_regs_mask & (1 << i))
20221 num_dwarf_regs++;
20224 gcc_assert (num_regs && num_regs <= 16);
20225 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20227 /* For the body of the insn we are going to generate an UNSPEC in
20228 parallel with several USEs. This allows the insn to be recognized
20229 by the push_multi pattern in the arm.md file.
20231 The body of the insn looks something like this:
20233 (parallel [
20234 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20235 (const_int:SI <num>)))
20236 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20237 (use (reg:SI XX))
20238 (use (reg:SI YY))
20242 For the frame note however, we try to be more explicit and actually
20243 show each register being stored into the stack frame, plus a (single)
20244 decrement of the stack pointer. We do it this way in order to be
20245 friendly to the stack unwinding code, which only wants to see a single
20246 stack decrement per instruction. The RTL we generate for the note looks
20247 something like this:
20249 (sequence [
20250 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20251 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20252 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20253 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20257 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20258 instead we'd have a parallel expression detailing all
20259 the stores to the various memory addresses so that debug
20260 information is more up-to-date. Remember however while writing
20261 this to take care of the constraints with the push instruction.
20263 Note also that this has to be taken care of for the VFP registers.
20265 For more see PR43399. */
20267 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20268 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20269 dwarf_par_index = 1;
20271 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20273 if (mask & (1 << i))
20275 reg = gen_rtx_REG (SImode, i);
20277 XVECEXP (par, 0, 0)
20278 = gen_rtx_SET (gen_frame_mem
20279 (BLKmode,
20280 gen_rtx_PRE_MODIFY (Pmode,
20281 stack_pointer_rtx,
20282 plus_constant
20283 (Pmode, stack_pointer_rtx,
20284 -4 * num_regs))
20286 gen_rtx_UNSPEC (BLKmode,
20287 gen_rtvec (1, reg),
20288 UNSPEC_PUSH_MULT));
20290 if (dwarf_regs_mask & (1 << i))
20292 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20293 reg);
20294 RTX_FRAME_RELATED_P (tmp) = 1;
20295 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20298 break;
20302 for (j = 1, i++; j < num_regs; i++)
20304 if (mask & (1 << i))
20306 reg = gen_rtx_REG (SImode, i);
20308 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20310 if (dwarf_regs_mask & (1 << i))
20313 = gen_rtx_SET (gen_frame_mem
20314 (SImode,
20315 plus_constant (Pmode, stack_pointer_rtx,
20316 4 * j)),
20317 reg);
20318 RTX_FRAME_RELATED_P (tmp) = 1;
20319 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20322 j++;
20326 par = emit_insn (par);
20328 tmp = gen_rtx_SET (stack_pointer_rtx,
20329 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20330 RTX_FRAME_RELATED_P (tmp) = 1;
20331 XVECEXP (dwarf, 0, 0) = tmp;
20333 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20335 return par;
20338 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20339 SIZE is the offset to be adjusted.
20340 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20341 static void
20342 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20344 rtx dwarf;
20346 RTX_FRAME_RELATED_P (insn) = 1;
20347 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20348 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20351 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20352 SAVED_REGS_MASK shows which registers need to be restored.
20354 Unfortunately, since this insn does not reflect very well the actual
20355 semantics of the operation, we need to annotate the insn for the benefit
20356 of DWARF2 frame unwind information. */
20357 static void
20358 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20360 int num_regs = 0;
20361 int i, j;
20362 rtx par;
20363 rtx dwarf = NULL_RTX;
20364 rtx tmp, reg;
20365 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20366 int offset_adj;
20367 int emit_update;
20369 offset_adj = return_in_pc ? 1 : 0;
20370 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20371 if (saved_regs_mask & (1 << i))
20372 num_regs++;
20374 gcc_assert (num_regs && num_regs <= 16);
20376 /* If SP is in reglist, then we don't emit SP update insn. */
20377 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20379 /* The parallel needs to hold num_regs SETs
20380 and one SET for the stack update. */
20381 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20383 if (return_in_pc)
20384 XVECEXP (par, 0, 0) = ret_rtx;
20386 if (emit_update)
20388 /* Increment the stack pointer, based on there being
20389 num_regs 4-byte registers to restore. */
20390 tmp = gen_rtx_SET (stack_pointer_rtx,
20391 plus_constant (Pmode,
20392 stack_pointer_rtx,
20393 4 * num_regs));
20394 RTX_FRAME_RELATED_P (tmp) = 1;
20395 XVECEXP (par, 0, offset_adj) = tmp;
20398 /* Now restore every reg, which may include PC. */
20399 for (j = 0, i = 0; j < num_regs; i++)
20400 if (saved_regs_mask & (1 << i))
20402 reg = gen_rtx_REG (SImode, i);
20403 if ((num_regs == 1) && emit_update && !return_in_pc)
20405 /* Emit single load with writeback. */
20406 tmp = gen_frame_mem (SImode,
20407 gen_rtx_POST_INC (Pmode,
20408 stack_pointer_rtx));
20409 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20410 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20411 return;
20414 tmp = gen_rtx_SET (reg,
20415 gen_frame_mem
20416 (SImode,
20417 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20418 RTX_FRAME_RELATED_P (tmp) = 1;
20419 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20421 /* We need to maintain a sequence for DWARF info too. As dwarf info
20422 should not have PC, skip PC. */
20423 if (i != PC_REGNUM)
20424 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20426 j++;
20429 if (return_in_pc)
20430 par = emit_jump_insn (par);
20431 else
20432 par = emit_insn (par);
20434 REG_NOTES (par) = dwarf;
20435 if (!return_in_pc)
20436 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20437 stack_pointer_rtx, stack_pointer_rtx);
20440 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20441 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20443 Unfortunately, since this insn does not reflect very well the actual
20444 semantics of the operation, we need to annotate the insn for the benefit
20445 of DWARF2 frame unwind information. */
20446 static void
20447 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20449 int i, j;
20450 rtx par;
20451 rtx dwarf = NULL_RTX;
20452 rtx tmp, reg;
20454 gcc_assert (num_regs && num_regs <= 32);
20456 /* Workaround ARM10 VFPr1 bug. */
20457 if (num_regs == 2 && !arm_arch6)
20459 if (first_reg == 15)
20460 first_reg--;
20462 num_regs++;
20465 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20466 there could be up to 32 D-registers to restore.
20467 If there are more than 16 D-registers, make two recursive calls,
20468 each of which emits one pop_multi instruction. */
20469 if (num_regs > 16)
20471 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20472 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20473 return;
20476 /* The parallel needs to hold num_regs SETs
20477 and one SET for the stack update. */
20478 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20480 /* Increment the stack pointer, based on there being
20481 num_regs 8-byte registers to restore. */
20482 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20483 RTX_FRAME_RELATED_P (tmp) = 1;
20484 XVECEXP (par, 0, 0) = tmp;
20486 /* Now show every reg that will be restored, using a SET for each. */
20487 for (j = 0, i=first_reg; j < num_regs; i += 2)
20489 reg = gen_rtx_REG (DFmode, i);
20491 tmp = gen_rtx_SET (reg,
20492 gen_frame_mem
20493 (DFmode,
20494 plus_constant (Pmode, base_reg, 8 * j)));
20495 RTX_FRAME_RELATED_P (tmp) = 1;
20496 XVECEXP (par, 0, j + 1) = tmp;
20498 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20500 j++;
20503 par = emit_insn (par);
20504 REG_NOTES (par) = dwarf;
20506 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20507 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20509 RTX_FRAME_RELATED_P (par) = 1;
20510 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20512 else
20513 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20514 base_reg, base_reg);
20517 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20518 number of registers are being popped, multiple LDRD patterns are created for
20519 all register pairs. If odd number of registers are popped, last register is
20520 loaded by using LDR pattern. */
20521 static void
20522 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20524 int num_regs = 0;
20525 int i, j;
20526 rtx par = NULL_RTX;
20527 rtx dwarf = NULL_RTX;
20528 rtx tmp, reg, tmp1;
20529 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20531 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20532 if (saved_regs_mask & (1 << i))
20533 num_regs++;
20535 gcc_assert (num_regs && num_regs <= 16);
20537 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20538 to be popped. So, if num_regs is even, now it will become odd,
20539 and we can generate pop with PC. If num_regs is odd, it will be
20540 even now, and ldr with return can be generated for PC. */
20541 if (return_in_pc)
20542 num_regs--;
20544 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20546 /* Var j iterates over all the registers to gather all the registers in
20547 saved_regs_mask. Var i gives index of saved registers in stack frame.
20548 A PARALLEL RTX of register-pair is created here, so that pattern for
20549 LDRD can be matched. As PC is always last register to be popped, and
20550 we have already decremented num_regs if PC, we don't have to worry
20551 about PC in this loop. */
20552 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20553 if (saved_regs_mask & (1 << j))
20555 /* Create RTX for memory load. */
20556 reg = gen_rtx_REG (SImode, j);
20557 tmp = gen_rtx_SET (reg,
20558 gen_frame_mem (SImode,
20559 plus_constant (Pmode,
20560 stack_pointer_rtx, 4 * i)));
20561 RTX_FRAME_RELATED_P (tmp) = 1;
20563 if (i % 2 == 0)
20565 /* When saved-register index (i) is even, the RTX to be emitted is
20566 yet to be created. Hence create it first. The LDRD pattern we
20567 are generating is :
20568 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20569 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20570 where target registers need not be consecutive. */
20571 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20572 dwarf = NULL_RTX;
20575 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20576 added as 0th element and if i is odd, reg_i is added as 1st element
20577 of LDRD pattern shown above. */
20578 XVECEXP (par, 0, (i % 2)) = tmp;
20579 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20581 if ((i % 2) == 1)
20583 /* When saved-register index (i) is odd, RTXs for both the registers
20584 to be loaded are generated in above given LDRD pattern, and the
20585 pattern can be emitted now. */
20586 par = emit_insn (par);
20587 REG_NOTES (par) = dwarf;
20588 RTX_FRAME_RELATED_P (par) = 1;
20591 i++;
20594 /* If the number of registers pushed is odd AND return_in_pc is false OR
20595 number of registers are even AND return_in_pc is true, last register is
20596 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20597 then LDR with post increment. */
20599 /* Increment the stack pointer, based on there being
20600 num_regs 4-byte registers to restore. */
20601 tmp = gen_rtx_SET (stack_pointer_rtx,
20602 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20603 RTX_FRAME_RELATED_P (tmp) = 1;
20604 tmp = emit_insn (tmp);
20605 if (!return_in_pc)
20607 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20608 stack_pointer_rtx, stack_pointer_rtx);
20611 dwarf = NULL_RTX;
20613 if (((num_regs % 2) == 1 && !return_in_pc)
20614 || ((num_regs % 2) == 0 && return_in_pc))
20616 /* Scan for the single register to be popped. Skip until the saved
20617 register is found. */
20618 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20620 /* Gen LDR with post increment here. */
20621 tmp1 = gen_rtx_MEM (SImode,
20622 gen_rtx_POST_INC (SImode,
20623 stack_pointer_rtx));
20624 set_mem_alias_set (tmp1, get_frame_alias_set ());
20626 reg = gen_rtx_REG (SImode, j);
20627 tmp = gen_rtx_SET (reg, tmp1);
20628 RTX_FRAME_RELATED_P (tmp) = 1;
20629 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20631 if (return_in_pc)
20633 /* If return_in_pc, j must be PC_REGNUM. */
20634 gcc_assert (j == PC_REGNUM);
20635 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20636 XVECEXP (par, 0, 0) = ret_rtx;
20637 XVECEXP (par, 0, 1) = tmp;
20638 par = emit_jump_insn (par);
20640 else
20642 par = emit_insn (tmp);
20643 REG_NOTES (par) = dwarf;
20644 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20645 stack_pointer_rtx, stack_pointer_rtx);
20649 else if ((num_regs % 2) == 1 && return_in_pc)
20651 /* There are 2 registers to be popped. So, generate the pattern
20652 pop_multiple_with_stack_update_and_return to pop in PC. */
20653 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20656 return;
20659 /* LDRD in ARM mode needs consecutive registers as operands. This function
20660 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20661 offset addressing and then generates one separate stack udpate. This provides
20662 more scheduling freedom, compared to writeback on every load. However,
20663 if the function returns using load into PC directly
20664 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20665 before the last load. TODO: Add a peephole optimization to recognize
20666 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20667 peephole optimization to merge the load at stack-offset zero
20668 with the stack update instruction using load with writeback
20669 in post-index addressing mode. */
20670 static void
20671 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20673 int j = 0;
20674 int offset = 0;
20675 rtx par = NULL_RTX;
20676 rtx dwarf = NULL_RTX;
20677 rtx tmp, mem;
20679 /* Restore saved registers. */
20680 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20681 j = 0;
20682 while (j <= LAST_ARM_REGNUM)
20683 if (saved_regs_mask & (1 << j))
20685 if ((j % 2) == 0
20686 && (saved_regs_mask & (1 << (j + 1)))
20687 && (j + 1) != PC_REGNUM)
20689 /* Current register and next register form register pair for which
20690 LDRD can be generated. PC is always the last register popped, and
20691 we handle it separately. */
20692 if (offset > 0)
20693 mem = gen_frame_mem (DImode,
20694 plus_constant (Pmode,
20695 stack_pointer_rtx,
20696 offset));
20697 else
20698 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20700 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20701 tmp = emit_insn (tmp);
20702 RTX_FRAME_RELATED_P (tmp) = 1;
20704 /* Generate dwarf info. */
20706 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20707 gen_rtx_REG (SImode, j),
20708 NULL_RTX);
20709 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20710 gen_rtx_REG (SImode, j + 1),
20711 dwarf);
20713 REG_NOTES (tmp) = dwarf;
20715 offset += 8;
20716 j += 2;
20718 else if (j != PC_REGNUM)
20720 /* Emit a single word load. */
20721 if (offset > 0)
20722 mem = gen_frame_mem (SImode,
20723 plus_constant (Pmode,
20724 stack_pointer_rtx,
20725 offset));
20726 else
20727 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20729 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20730 tmp = emit_insn (tmp);
20731 RTX_FRAME_RELATED_P (tmp) = 1;
20733 /* Generate dwarf info. */
20734 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20735 gen_rtx_REG (SImode, j),
20736 NULL_RTX);
20738 offset += 4;
20739 j += 1;
20741 else /* j == PC_REGNUM */
20742 j++;
20744 else
20745 j++;
20747 /* Update the stack. */
20748 if (offset > 0)
20750 tmp = gen_rtx_SET (stack_pointer_rtx,
20751 plus_constant (Pmode,
20752 stack_pointer_rtx,
20753 offset));
20754 tmp = emit_insn (tmp);
20755 arm_add_cfa_adjust_cfa_note (tmp, offset,
20756 stack_pointer_rtx, stack_pointer_rtx);
20757 offset = 0;
20760 if (saved_regs_mask & (1 << PC_REGNUM))
20762 /* Only PC is to be popped. */
20763 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20764 XVECEXP (par, 0, 0) = ret_rtx;
20765 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20766 gen_frame_mem (SImode,
20767 gen_rtx_POST_INC (SImode,
20768 stack_pointer_rtx)));
20769 RTX_FRAME_RELATED_P (tmp) = 1;
20770 XVECEXP (par, 0, 1) = tmp;
20771 par = emit_jump_insn (par);
20773 /* Generate dwarf info. */
20774 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20775 gen_rtx_REG (SImode, PC_REGNUM),
20776 NULL_RTX);
20777 REG_NOTES (par) = dwarf;
20778 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20779 stack_pointer_rtx, stack_pointer_rtx);
20783 /* Calculate the size of the return value that is passed in registers. */
20784 static unsigned
20785 arm_size_return_regs (void)
20787 machine_mode mode;
20789 if (crtl->return_rtx != 0)
20790 mode = GET_MODE (crtl->return_rtx);
20791 else
20792 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20794 return GET_MODE_SIZE (mode);
20797 /* Return true if the current function needs to save/restore LR. */
20798 static bool
20799 thumb_force_lr_save (void)
20801 return !cfun->machine->lr_save_eliminated
20802 && (!leaf_function_p ()
20803 || thumb_far_jump_used_p ()
20804 || df_regs_ever_live_p (LR_REGNUM));
20807 /* We do not know if r3 will be available because
20808 we do have an indirect tailcall happening in this
20809 particular case. */
20810 static bool
20811 is_indirect_tailcall_p (rtx call)
20813 rtx pat = PATTERN (call);
20815 /* Indirect tail call. */
20816 pat = XVECEXP (pat, 0, 0);
20817 if (GET_CODE (pat) == SET)
20818 pat = SET_SRC (pat);
20820 pat = XEXP (XEXP (pat, 0), 0);
20821 return REG_P (pat);
20824 /* Return true if r3 is used by any of the tail call insns in the
20825 current function. */
20826 static bool
20827 any_sibcall_could_use_r3 (void)
20829 edge_iterator ei;
20830 edge e;
20832 if (!crtl->tail_call_emit)
20833 return false;
20834 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20835 if (e->flags & EDGE_SIBCALL)
20837 rtx call = BB_END (e->src);
20838 if (!CALL_P (call))
20839 call = prev_nonnote_nondebug_insn (call);
20840 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20841 if (find_regno_fusage (call, USE, 3)
20842 || is_indirect_tailcall_p (call))
20843 return true;
20845 return false;
20849 /* Compute the distance from register FROM to register TO.
20850 These can be the arg pointer (26), the soft frame pointer (25),
20851 the stack pointer (13) or the hard frame pointer (11).
20852 In thumb mode r7 is used as the soft frame pointer, if needed.
20853 Typical stack layout looks like this:
20855 old stack pointer -> | |
20856 ----
20857 | | \
20858 | | saved arguments for
20859 | | vararg functions
20860 | | /
20862 hard FP & arg pointer -> | | \
20863 | | stack
20864 | | frame
20865 | | /
20867 | | \
20868 | | call saved
20869 | | registers
20870 soft frame pointer -> | | /
20872 | | \
20873 | | local
20874 | | variables
20875 locals base pointer -> | | /
20877 | | \
20878 | | outgoing
20879 | | arguments
20880 current stack pointer -> | | /
20883 For a given function some or all of these stack components
20884 may not be needed, giving rise to the possibility of
20885 eliminating some of the registers.
20887 The values returned by this function must reflect the behavior
20888 of arm_expand_prologue() and arm_compute_save_reg_mask().
20890 The sign of the number returned reflects the direction of stack
20891 growth, so the values are positive for all eliminations except
20892 from the soft frame pointer to the hard frame pointer.
20894 SFP may point just inside the local variables block to ensure correct
20895 alignment. */
20898 /* Calculate stack offsets. These are used to calculate register elimination
20899 offsets and in prologue/epilogue code. Also calculates which registers
20900 should be saved. */
20902 static arm_stack_offsets *
20903 arm_get_frame_offsets (void)
20905 struct arm_stack_offsets *offsets;
20906 unsigned long func_type;
20907 int leaf;
20908 int saved;
20909 int core_saved;
20910 HOST_WIDE_INT frame_size;
20911 int i;
20913 offsets = &cfun->machine->stack_offsets;
20915 /* We need to know if we are a leaf function. Unfortunately, it
20916 is possible to be called after start_sequence has been called,
20917 which causes get_insns to return the insns for the sequence,
20918 not the function, which will cause leaf_function_p to return
20919 the incorrect result.
20921 to know about leaf functions once reload has completed, and the
20922 frame size cannot be changed after that time, so we can safely
20923 use the cached value. */
20925 if (reload_completed)
20926 return offsets;
20928 /* Initially this is the size of the local variables. It will translated
20929 into an offset once we have determined the size of preceding data. */
20930 frame_size = ROUND_UP_WORD (get_frame_size ());
20932 leaf = leaf_function_p ();
20934 /* Space for variadic functions. */
20935 offsets->saved_args = crtl->args.pretend_args_size;
20937 /* In Thumb mode this is incorrect, but never used. */
20938 offsets->frame
20939 = (offsets->saved_args
20940 + arm_compute_static_chain_stack_bytes ()
20941 + (frame_pointer_needed ? 4 : 0));
20943 if (TARGET_32BIT)
20945 unsigned int regno;
20947 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20948 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20949 saved = core_saved;
20951 /* We know that SP will be doubleword aligned on entry, and we must
20952 preserve that condition at any subroutine call. We also require the
20953 soft frame pointer to be doubleword aligned. */
20955 if (TARGET_REALLY_IWMMXT)
20957 /* Check for the call-saved iWMMXt registers. */
20958 for (regno = FIRST_IWMMXT_REGNUM;
20959 regno <= LAST_IWMMXT_REGNUM;
20960 regno++)
20961 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20962 saved += 8;
20965 func_type = arm_current_func_type ();
20966 /* Space for saved VFP registers. */
20967 if (! IS_VOLATILE (func_type)
20968 && TARGET_HARD_FLOAT && TARGET_VFP)
20969 saved += arm_get_vfp_saved_size ();
20971 else /* TARGET_THUMB1 */
20973 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20974 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20975 saved = core_saved;
20976 if (TARGET_BACKTRACE)
20977 saved += 16;
20980 /* Saved registers include the stack frame. */
20981 offsets->saved_regs
20982 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20983 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20985 /* A leaf function does not need any stack alignment if it has nothing
20986 on the stack. */
20987 if (leaf && frame_size == 0
20988 /* However if it calls alloca(), we have a dynamically allocated
20989 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20990 && ! cfun->calls_alloca)
20992 offsets->outgoing_args = offsets->soft_frame;
20993 offsets->locals_base = offsets->soft_frame;
20994 return offsets;
20997 /* Ensure SFP has the correct alignment. */
20998 if (ARM_DOUBLEWORD_ALIGN
20999 && (offsets->soft_frame & 7))
21001 offsets->soft_frame += 4;
21002 /* Try to align stack by pushing an extra reg. Don't bother doing this
21003 when there is a stack frame as the alignment will be rolled into
21004 the normal stack adjustment. */
21005 if (frame_size + crtl->outgoing_args_size == 0)
21007 int reg = -1;
21009 /* Register r3 is caller-saved. Normally it does not need to be
21010 saved on entry by the prologue. However if we choose to save
21011 it for padding then we may confuse the compiler into thinking
21012 a prologue sequence is required when in fact it is not. This
21013 will occur when shrink-wrapping if r3 is used as a scratch
21014 register and there are no other callee-saved writes.
21016 This situation can be avoided when other callee-saved registers
21017 are available and r3 is not mandatory if we choose a callee-saved
21018 register for padding. */
21019 bool prefer_callee_reg_p = false;
21021 /* If it is safe to use r3, then do so. This sometimes
21022 generates better code on Thumb-2 by avoiding the need to
21023 use 32-bit push/pop instructions. */
21024 if (! any_sibcall_could_use_r3 ()
21025 && arm_size_return_regs () <= 12
21026 && (offsets->saved_regs_mask & (1 << 3)) == 0
21027 && (TARGET_THUMB2
21028 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21030 reg = 3;
21031 if (!TARGET_THUMB2)
21032 prefer_callee_reg_p = true;
21034 if (reg == -1
21035 || prefer_callee_reg_p)
21037 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21039 /* Avoid fixed registers; they may be changed at
21040 arbitrary times so it's unsafe to restore them
21041 during the epilogue. */
21042 if (!fixed_regs[i]
21043 && (offsets->saved_regs_mask & (1 << i)) == 0)
21045 reg = i;
21046 break;
21051 if (reg != -1)
21053 offsets->saved_regs += 4;
21054 offsets->saved_regs_mask |= (1 << reg);
21059 offsets->locals_base = offsets->soft_frame + frame_size;
21060 offsets->outgoing_args = (offsets->locals_base
21061 + crtl->outgoing_args_size);
21063 if (ARM_DOUBLEWORD_ALIGN)
21065 /* Ensure SP remains doubleword aligned. */
21066 if (offsets->outgoing_args & 7)
21067 offsets->outgoing_args += 4;
21068 gcc_assert (!(offsets->outgoing_args & 7));
21071 return offsets;
21075 /* Calculate the relative offsets for the different stack pointers. Positive
21076 offsets are in the direction of stack growth. */
21078 HOST_WIDE_INT
21079 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21081 arm_stack_offsets *offsets;
21083 offsets = arm_get_frame_offsets ();
21085 /* OK, now we have enough information to compute the distances.
21086 There must be an entry in these switch tables for each pair
21087 of registers in ELIMINABLE_REGS, even if some of the entries
21088 seem to be redundant or useless. */
21089 switch (from)
21091 case ARG_POINTER_REGNUM:
21092 switch (to)
21094 case THUMB_HARD_FRAME_POINTER_REGNUM:
21095 return 0;
21097 case FRAME_POINTER_REGNUM:
21098 /* This is the reverse of the soft frame pointer
21099 to hard frame pointer elimination below. */
21100 return offsets->soft_frame - offsets->saved_args;
21102 case ARM_HARD_FRAME_POINTER_REGNUM:
21103 /* This is only non-zero in the case where the static chain register
21104 is stored above the frame. */
21105 return offsets->frame - offsets->saved_args - 4;
21107 case STACK_POINTER_REGNUM:
21108 /* If nothing has been pushed on the stack at all
21109 then this will return -4. This *is* correct! */
21110 return offsets->outgoing_args - (offsets->saved_args + 4);
21112 default:
21113 gcc_unreachable ();
21115 gcc_unreachable ();
21117 case FRAME_POINTER_REGNUM:
21118 switch (to)
21120 case THUMB_HARD_FRAME_POINTER_REGNUM:
21121 return 0;
21123 case ARM_HARD_FRAME_POINTER_REGNUM:
21124 /* The hard frame pointer points to the top entry in the
21125 stack frame. The soft frame pointer to the bottom entry
21126 in the stack frame. If there is no stack frame at all,
21127 then they are identical. */
21129 return offsets->frame - offsets->soft_frame;
21131 case STACK_POINTER_REGNUM:
21132 return offsets->outgoing_args - offsets->soft_frame;
21134 default:
21135 gcc_unreachable ();
21137 gcc_unreachable ();
21139 default:
21140 /* You cannot eliminate from the stack pointer.
21141 In theory you could eliminate from the hard frame
21142 pointer to the stack pointer, but this will never
21143 happen, since if a stack frame is not needed the
21144 hard frame pointer will never be used. */
21145 gcc_unreachable ();
21149 /* Given FROM and TO register numbers, say whether this elimination is
21150 allowed. Frame pointer elimination is automatically handled.
21152 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21153 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21154 pointer, we must eliminate FRAME_POINTER_REGNUM into
21155 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21156 ARG_POINTER_REGNUM. */
21158 bool
21159 arm_can_eliminate (const int from, const int to)
21161 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21162 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21163 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21164 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21165 true);
21168 /* Emit RTL to save coprocessor registers on function entry. Returns the
21169 number of bytes pushed. */
21171 static int
21172 arm_save_coproc_regs(void)
21174 int saved_size = 0;
21175 unsigned reg;
21176 unsigned start_reg;
21177 rtx insn;
21179 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21180 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21182 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21183 insn = gen_rtx_MEM (V2SImode, insn);
21184 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21185 RTX_FRAME_RELATED_P (insn) = 1;
21186 saved_size += 8;
21189 if (TARGET_HARD_FLOAT && TARGET_VFP)
21191 start_reg = FIRST_VFP_REGNUM;
21193 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21195 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21196 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21198 if (start_reg != reg)
21199 saved_size += vfp_emit_fstmd (start_reg,
21200 (reg - start_reg) / 2);
21201 start_reg = reg + 2;
21204 if (start_reg != reg)
21205 saved_size += vfp_emit_fstmd (start_reg,
21206 (reg - start_reg) / 2);
21208 return saved_size;
21212 /* Set the Thumb frame pointer from the stack pointer. */
21214 static void
21215 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21217 HOST_WIDE_INT amount;
21218 rtx insn, dwarf;
21220 amount = offsets->outgoing_args - offsets->locals_base;
21221 if (amount < 1024)
21222 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21223 stack_pointer_rtx, GEN_INT (amount)));
21224 else
21226 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21227 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21228 expects the first two operands to be the same. */
21229 if (TARGET_THUMB2)
21231 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21232 stack_pointer_rtx,
21233 hard_frame_pointer_rtx));
21235 else
21237 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21238 hard_frame_pointer_rtx,
21239 stack_pointer_rtx));
21241 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21242 plus_constant (Pmode, stack_pointer_rtx, amount));
21243 RTX_FRAME_RELATED_P (dwarf) = 1;
21244 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21247 RTX_FRAME_RELATED_P (insn) = 1;
21250 struct scratch_reg {
21251 rtx reg;
21252 bool saved;
21255 /* Return a short-lived scratch register for use as a 2nd scratch register on
21256 function entry after the registers are saved in the prologue. This register
21257 must be released by means of release_scratch_register_on_entry. IP is not
21258 considered since it is always used as the 1st scratch register if available.
21260 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21261 mask of live registers. */
21263 static void
21264 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21265 unsigned long live_regs)
21267 int regno = -1;
21269 sr->saved = false;
21271 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21272 regno = LR_REGNUM;
21273 else
21275 unsigned int i;
21277 for (i = 4; i < 11; i++)
21278 if (regno1 != i && (live_regs & (1 << i)) != 0)
21280 regno = i;
21281 break;
21284 if (regno < 0)
21286 /* If IP is used as the 1st scratch register for a nested function,
21287 then either r3 wasn't available or is used to preserve IP. */
21288 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21289 regno1 = 3;
21290 regno = (regno1 == 3 ? 2 : 3);
21291 sr->saved
21292 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21293 regno);
21297 sr->reg = gen_rtx_REG (SImode, regno);
21298 if (sr->saved)
21300 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21301 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21302 rtx x = gen_rtx_SET (stack_pointer_rtx,
21303 plus_constant (Pmode, stack_pointer_rtx, -4));
21304 RTX_FRAME_RELATED_P (insn) = 1;
21305 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21309 /* Release a scratch register obtained from the preceding function. */
21311 static void
21312 release_scratch_register_on_entry (struct scratch_reg *sr)
21314 if (sr->saved)
21316 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21317 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21318 rtx x = gen_rtx_SET (stack_pointer_rtx,
21319 plus_constant (Pmode, stack_pointer_rtx, 4));
21320 RTX_FRAME_RELATED_P (insn) = 1;
21321 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21325 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21327 #if PROBE_INTERVAL > 4096
21328 #error Cannot use indexed addressing mode for stack probing
21329 #endif
21331 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21332 inclusive. These are offsets from the current stack pointer. REGNO1
21333 is the index number of the 1st scratch register and LIVE_REGS is the
21334 mask of live registers. */
21336 static void
21337 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21338 unsigned int regno1, unsigned long live_regs)
21340 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21342 /* See if we have a constant small number of probes to generate. If so,
21343 that's the easy case. */
21344 if (size <= PROBE_INTERVAL)
21346 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21347 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21348 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21351 /* The run-time loop is made up of 10 insns in the generic case while the
21352 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21353 else if (size <= 5 * PROBE_INTERVAL)
21355 HOST_WIDE_INT i, rem;
21357 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21358 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21359 emit_stack_probe (reg1);
21361 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21362 it exceeds SIZE. If only two probes are needed, this will not
21363 generate any code. Then probe at FIRST + SIZE. */
21364 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21366 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21367 emit_stack_probe (reg1);
21370 rem = size - (i - PROBE_INTERVAL);
21371 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21373 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21374 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21376 else
21377 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21380 /* Otherwise, do the same as above, but in a loop. Note that we must be
21381 extra careful with variables wrapping around because we might be at
21382 the very top (or the very bottom) of the address space and we have
21383 to be able to handle this case properly; in particular, we use an
21384 equality test for the loop condition. */
21385 else
21387 HOST_WIDE_INT rounded_size;
21388 struct scratch_reg sr;
21390 get_scratch_register_on_entry (&sr, regno1, live_regs);
21392 emit_move_insn (reg1, GEN_INT (first));
21395 /* Step 1: round SIZE to the previous multiple of the interval. */
21397 rounded_size = size & -PROBE_INTERVAL;
21398 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21401 /* Step 2: compute initial and final value of the loop counter. */
21403 /* TEST_ADDR = SP + FIRST. */
21404 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21406 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21407 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21410 /* Step 3: the loop
21414 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21415 probe at TEST_ADDR
21417 while (TEST_ADDR != LAST_ADDR)
21419 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21420 until it is equal to ROUNDED_SIZE. */
21422 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21425 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21426 that SIZE is equal to ROUNDED_SIZE. */
21428 if (size != rounded_size)
21430 HOST_WIDE_INT rem = size - rounded_size;
21432 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21434 emit_set_insn (sr.reg,
21435 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21436 emit_stack_probe (plus_constant (Pmode, sr.reg,
21437 PROBE_INTERVAL - rem));
21439 else
21440 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21443 release_scratch_register_on_entry (&sr);
21446 /* Make sure nothing is scheduled before we are done. */
21447 emit_insn (gen_blockage ());
21450 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21451 absolute addresses. */
21453 const char *
21454 output_probe_stack_range (rtx reg1, rtx reg2)
21456 static int labelno = 0;
21457 char loop_lab[32];
21458 rtx xops[2];
21460 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21462 /* Loop. */
21463 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21465 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21466 xops[0] = reg1;
21467 xops[1] = GEN_INT (PROBE_INTERVAL);
21468 output_asm_insn ("sub\t%0, %0, %1", xops);
21470 /* Probe at TEST_ADDR. */
21471 output_asm_insn ("str\tr0, [%0, #0]", xops);
21473 /* Test if TEST_ADDR == LAST_ADDR. */
21474 xops[1] = reg2;
21475 output_asm_insn ("cmp\t%0, %1", xops);
21477 /* Branch. */
21478 fputs ("\tbne\t", asm_out_file);
21479 assemble_name_raw (asm_out_file, loop_lab);
21480 fputc ('\n', asm_out_file);
21482 return "";
21485 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21486 function. */
21487 void
21488 arm_expand_prologue (void)
21490 rtx amount;
21491 rtx insn;
21492 rtx ip_rtx;
21493 unsigned long live_regs_mask;
21494 unsigned long func_type;
21495 int fp_offset = 0;
21496 int saved_pretend_args = 0;
21497 int saved_regs = 0;
21498 unsigned HOST_WIDE_INT args_to_push;
21499 HOST_WIDE_INT size;
21500 arm_stack_offsets *offsets;
21501 bool clobber_ip;
21503 func_type = arm_current_func_type ();
21505 /* Naked functions don't have prologues. */
21506 if (IS_NAKED (func_type))
21508 if (flag_stack_usage_info)
21509 current_function_static_stack_size = 0;
21510 return;
21513 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21514 args_to_push = crtl->args.pretend_args_size;
21516 /* Compute which register we will have to save onto the stack. */
21517 offsets = arm_get_frame_offsets ();
21518 live_regs_mask = offsets->saved_regs_mask;
21520 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21522 if (IS_STACKALIGN (func_type))
21524 rtx r0, r1;
21526 /* Handle a word-aligned stack pointer. We generate the following:
21528 mov r0, sp
21529 bic r1, r0, #7
21530 mov sp, r1
21531 <save and restore r0 in normal prologue/epilogue>
21532 mov sp, r0
21533 bx lr
21535 The unwinder doesn't need to know about the stack realignment.
21536 Just tell it we saved SP in r0. */
21537 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21539 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21540 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21542 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21543 RTX_FRAME_RELATED_P (insn) = 1;
21544 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21546 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21548 /* ??? The CFA changes here, which may cause GDB to conclude that it
21549 has entered a different function. That said, the unwind info is
21550 correct, individually, before and after this instruction because
21551 we've described the save of SP, which will override the default
21552 handling of SP as restoring from the CFA. */
21553 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21556 /* The static chain register is the same as the IP register. If it is
21557 clobbered when creating the frame, we need to save and restore it. */
21558 clobber_ip = IS_NESTED (func_type)
21559 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21560 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21561 && !df_regs_ever_live_p (LR_REGNUM)
21562 && arm_r3_live_at_start_p ()));
21564 /* Find somewhere to store IP whilst the frame is being created.
21565 We try the following places in order:
21567 1. The last argument register r3 if it is available.
21568 2. A slot on the stack above the frame if there are no
21569 arguments to push onto the stack.
21570 3. Register r3 again, after pushing the argument registers
21571 onto the stack, if this is a varargs function.
21572 4. The last slot on the stack created for the arguments to
21573 push, if this isn't a varargs function.
21575 Note - we only need to tell the dwarf2 backend about the SP
21576 adjustment in the second variant; the static chain register
21577 doesn't need to be unwound, as it doesn't contain a value
21578 inherited from the caller. */
21579 if (clobber_ip)
21581 if (!arm_r3_live_at_start_p ())
21582 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21583 else if (args_to_push == 0)
21585 rtx addr, dwarf;
21587 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21588 saved_regs += 4;
21590 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21591 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21592 fp_offset = 4;
21594 /* Just tell the dwarf backend that we adjusted SP. */
21595 dwarf = gen_rtx_SET (stack_pointer_rtx,
21596 plus_constant (Pmode, stack_pointer_rtx,
21597 -fp_offset));
21598 RTX_FRAME_RELATED_P (insn) = 1;
21599 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21601 else
21603 /* Store the args on the stack. */
21604 if (cfun->machine->uses_anonymous_args)
21606 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21607 (0xf0 >> (args_to_push / 4)) & 0xf);
21608 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21609 saved_pretend_args = 1;
21611 else
21613 rtx addr, dwarf;
21615 if (args_to_push == 4)
21616 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21617 else
21618 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21619 plus_constant (Pmode,
21620 stack_pointer_rtx,
21621 -args_to_push));
21623 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21625 /* Just tell the dwarf backend that we adjusted SP. */
21626 dwarf = gen_rtx_SET (stack_pointer_rtx,
21627 plus_constant (Pmode, stack_pointer_rtx,
21628 -args_to_push));
21629 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21632 RTX_FRAME_RELATED_P (insn) = 1;
21633 fp_offset = args_to_push;
21634 args_to_push = 0;
21638 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21640 if (IS_INTERRUPT (func_type))
21642 /* Interrupt functions must not corrupt any registers.
21643 Creating a frame pointer however, corrupts the IP
21644 register, so we must push it first. */
21645 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21647 /* Do not set RTX_FRAME_RELATED_P on this insn.
21648 The dwarf stack unwinding code only wants to see one
21649 stack decrement per function, and this is not it. If
21650 this instruction is labeled as being part of the frame
21651 creation sequence then dwarf2out_frame_debug_expr will
21652 die when it encounters the assignment of IP to FP
21653 later on, since the use of SP here establishes SP as
21654 the CFA register and not IP.
21656 Anyway this instruction is not really part of the stack
21657 frame creation although it is part of the prologue. */
21660 insn = emit_set_insn (ip_rtx,
21661 plus_constant (Pmode, stack_pointer_rtx,
21662 fp_offset));
21663 RTX_FRAME_RELATED_P (insn) = 1;
21666 if (args_to_push)
21668 /* Push the argument registers, or reserve space for them. */
21669 if (cfun->machine->uses_anonymous_args)
21670 insn = emit_multi_reg_push
21671 ((0xf0 >> (args_to_push / 4)) & 0xf,
21672 (0xf0 >> (args_to_push / 4)) & 0xf);
21673 else
21674 insn = emit_insn
21675 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21676 GEN_INT (- args_to_push)));
21677 RTX_FRAME_RELATED_P (insn) = 1;
21680 /* If this is an interrupt service routine, and the link register
21681 is going to be pushed, and we're not generating extra
21682 push of IP (needed when frame is needed and frame layout if apcs),
21683 subtracting four from LR now will mean that the function return
21684 can be done with a single instruction. */
21685 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21686 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21687 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21688 && TARGET_ARM)
21690 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21692 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21695 if (live_regs_mask)
21697 unsigned long dwarf_regs_mask = live_regs_mask;
21699 saved_regs += bit_count (live_regs_mask) * 4;
21700 if (optimize_size && !frame_pointer_needed
21701 && saved_regs == offsets->saved_regs - offsets->saved_args)
21703 /* If no coprocessor registers are being pushed and we don't have
21704 to worry about a frame pointer then push extra registers to
21705 create the stack frame. This is done is a way that does not
21706 alter the frame layout, so is independent of the epilogue. */
21707 int n;
21708 int frame;
21709 n = 0;
21710 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21711 n++;
21712 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21713 if (frame && n * 4 >= frame)
21715 n = frame / 4;
21716 live_regs_mask |= (1 << n) - 1;
21717 saved_regs += frame;
21721 if (TARGET_LDRD
21722 && current_tune->prefer_ldrd_strd
21723 && !optimize_function_for_size_p (cfun))
21725 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21726 if (TARGET_THUMB2)
21727 thumb2_emit_strd_push (live_regs_mask);
21728 else if (TARGET_ARM
21729 && !TARGET_APCS_FRAME
21730 && !IS_INTERRUPT (func_type))
21731 arm_emit_strd_push (live_regs_mask);
21732 else
21734 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21735 RTX_FRAME_RELATED_P (insn) = 1;
21738 else
21740 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21741 RTX_FRAME_RELATED_P (insn) = 1;
21745 if (! IS_VOLATILE (func_type))
21746 saved_regs += arm_save_coproc_regs ();
21748 if (frame_pointer_needed && TARGET_ARM)
21750 /* Create the new frame pointer. */
21751 if (TARGET_APCS_FRAME)
21753 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21754 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21755 RTX_FRAME_RELATED_P (insn) = 1;
21757 else
21759 insn = GEN_INT (saved_regs - (4 + fp_offset));
21760 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21761 stack_pointer_rtx, insn));
21762 RTX_FRAME_RELATED_P (insn) = 1;
21766 size = offsets->outgoing_args - offsets->saved_args;
21767 if (flag_stack_usage_info)
21768 current_function_static_stack_size = size;
21770 /* If this isn't an interrupt service routine and we have a frame, then do
21771 stack checking. We use IP as the first scratch register, except for the
21772 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21773 if (!IS_INTERRUPT (func_type)
21774 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21776 unsigned int regno;
21778 if (!IS_NESTED (func_type) || clobber_ip)
21779 regno = IP_REGNUM;
21780 else if (df_regs_ever_live_p (LR_REGNUM))
21781 regno = LR_REGNUM;
21782 else
21783 regno = 3;
21785 if (crtl->is_leaf && !cfun->calls_alloca)
21787 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21788 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21789 size - STACK_CHECK_PROTECT,
21790 regno, live_regs_mask);
21792 else if (size > 0)
21793 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21794 regno, live_regs_mask);
21797 /* Recover the static chain register. */
21798 if (clobber_ip)
21800 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21801 insn = gen_rtx_REG (SImode, 3);
21802 else
21804 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21805 insn = gen_frame_mem (SImode, insn);
21807 emit_set_insn (ip_rtx, insn);
21808 emit_insn (gen_force_register_use (ip_rtx));
21811 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21813 /* This add can produce multiple insns for a large constant, so we
21814 need to get tricky. */
21815 rtx_insn *last = get_last_insn ();
21817 amount = GEN_INT (offsets->saved_args + saved_regs
21818 - offsets->outgoing_args);
21820 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21821 amount));
21824 last = last ? NEXT_INSN (last) : get_insns ();
21825 RTX_FRAME_RELATED_P (last) = 1;
21827 while (last != insn);
21829 /* If the frame pointer is needed, emit a special barrier that
21830 will prevent the scheduler from moving stores to the frame
21831 before the stack adjustment. */
21832 if (frame_pointer_needed)
21833 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21834 hard_frame_pointer_rtx));
21838 if (frame_pointer_needed && TARGET_THUMB2)
21839 thumb_set_frame_pointer (offsets);
21841 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21843 unsigned long mask;
21845 mask = live_regs_mask;
21846 mask &= THUMB2_WORK_REGS;
21847 if (!IS_NESTED (func_type))
21848 mask |= (1 << IP_REGNUM);
21849 arm_load_pic_register (mask);
21852 /* If we are profiling, make sure no instructions are scheduled before
21853 the call to mcount. Similarly if the user has requested no
21854 scheduling in the prolog. Similarly if we want non-call exceptions
21855 using the EABI unwinder, to prevent faulting instructions from being
21856 swapped with a stack adjustment. */
21857 if (crtl->profile || !TARGET_SCHED_PROLOG
21858 || (arm_except_unwind_info (&global_options) == UI_TARGET
21859 && cfun->can_throw_non_call_exceptions))
21860 emit_insn (gen_blockage ());
21862 /* If the link register is being kept alive, with the return address in it,
21863 then make sure that it does not get reused by the ce2 pass. */
21864 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21865 cfun->machine->lr_save_eliminated = 1;
21868 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21869 static void
21870 arm_print_condition (FILE *stream)
21872 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21874 /* Branch conversion is not implemented for Thumb-2. */
21875 if (TARGET_THUMB)
21877 output_operand_lossage ("predicated Thumb instruction");
21878 return;
21880 if (current_insn_predicate != NULL)
21882 output_operand_lossage
21883 ("predicated instruction in conditional sequence");
21884 return;
21887 fputs (arm_condition_codes[arm_current_cc], stream);
21889 else if (current_insn_predicate)
21891 enum arm_cond_code code;
21893 if (TARGET_THUMB1)
21895 output_operand_lossage ("predicated Thumb instruction");
21896 return;
21899 code = get_arm_condition_code (current_insn_predicate);
21900 fputs (arm_condition_codes[code], stream);
21905 /* Globally reserved letters: acln
21906 Puncutation letters currently used: @_|?().!#
21907 Lower case letters currently used: bcdefhimpqtvwxyz
21908 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21909 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21911 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21913 If CODE is 'd', then the X is a condition operand and the instruction
21914 should only be executed if the condition is true.
21915 if CODE is 'D', then the X is a condition operand and the instruction
21916 should only be executed if the condition is false: however, if the mode
21917 of the comparison is CCFPEmode, then always execute the instruction -- we
21918 do this because in these circumstances !GE does not necessarily imply LT;
21919 in these cases the instruction pattern will take care to make sure that
21920 an instruction containing %d will follow, thereby undoing the effects of
21921 doing this instruction unconditionally.
21922 If CODE is 'N' then X is a floating point operand that must be negated
21923 before output.
21924 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21925 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21926 static void
21927 arm_print_operand (FILE *stream, rtx x, int code)
21929 switch (code)
21931 case '@':
21932 fputs (ASM_COMMENT_START, stream);
21933 return;
21935 case '_':
21936 fputs (user_label_prefix, stream);
21937 return;
21939 case '|':
21940 fputs (REGISTER_PREFIX, stream);
21941 return;
21943 case '?':
21944 arm_print_condition (stream);
21945 return;
21947 case '.':
21948 /* The current condition code for a condition code setting instruction.
21949 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21950 fputc('s', stream);
21951 arm_print_condition (stream);
21952 return;
21954 case '!':
21955 /* If the instruction is conditionally executed then print
21956 the current condition code, otherwise print 's'. */
21957 gcc_assert (TARGET_THUMB2);
21958 if (current_insn_predicate)
21959 arm_print_condition (stream);
21960 else
21961 fputc('s', stream);
21962 break;
21964 /* %# is a "break" sequence. It doesn't output anything, but is used to
21965 separate e.g. operand numbers from following text, if that text consists
21966 of further digits which we don't want to be part of the operand
21967 number. */
21968 case '#':
21969 return;
21971 case 'N':
21973 REAL_VALUE_TYPE r;
21974 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21975 fprintf (stream, "%s", fp_const_from_val (&r));
21977 return;
21979 /* An integer or symbol address without a preceding # sign. */
21980 case 'c':
21981 switch (GET_CODE (x))
21983 case CONST_INT:
21984 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21985 break;
21987 case SYMBOL_REF:
21988 output_addr_const (stream, x);
21989 break;
21991 case CONST:
21992 if (GET_CODE (XEXP (x, 0)) == PLUS
21993 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21995 output_addr_const (stream, x);
21996 break;
21998 /* Fall through. */
22000 default:
22001 output_operand_lossage ("Unsupported operand for code '%c'", code);
22003 return;
22005 /* An integer that we want to print in HEX. */
22006 case 'x':
22007 switch (GET_CODE (x))
22009 case CONST_INT:
22010 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22011 break;
22013 default:
22014 output_operand_lossage ("Unsupported operand for code '%c'", code);
22016 return;
22018 case 'B':
22019 if (CONST_INT_P (x))
22021 HOST_WIDE_INT val;
22022 val = ARM_SIGN_EXTEND (~INTVAL (x));
22023 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22025 else
22027 putc ('~', stream);
22028 output_addr_const (stream, x);
22030 return;
22032 case 'b':
22033 /* Print the log2 of a CONST_INT. */
22035 HOST_WIDE_INT val;
22037 if (!CONST_INT_P (x)
22038 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22039 output_operand_lossage ("Unsupported operand for code '%c'", code);
22040 else
22041 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22043 return;
22045 case 'L':
22046 /* The low 16 bits of an immediate constant. */
22047 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22048 return;
22050 case 'i':
22051 fprintf (stream, "%s", arithmetic_instr (x, 1));
22052 return;
22054 case 'I':
22055 fprintf (stream, "%s", arithmetic_instr (x, 0));
22056 return;
22058 case 'S':
22060 HOST_WIDE_INT val;
22061 const char *shift;
22063 shift = shift_op (x, &val);
22065 if (shift)
22067 fprintf (stream, ", %s ", shift);
22068 if (val == -1)
22069 arm_print_operand (stream, XEXP (x, 1), 0);
22070 else
22071 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22074 return;
22076 /* An explanation of the 'Q', 'R' and 'H' register operands:
22078 In a pair of registers containing a DI or DF value the 'Q'
22079 operand returns the register number of the register containing
22080 the least significant part of the value. The 'R' operand returns
22081 the register number of the register containing the most
22082 significant part of the value.
22084 The 'H' operand returns the higher of the two register numbers.
22085 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22086 same as the 'Q' operand, since the most significant part of the
22087 value is held in the lower number register. The reverse is true
22088 on systems where WORDS_BIG_ENDIAN is false.
22090 The purpose of these operands is to distinguish between cases
22091 where the endian-ness of the values is important (for example
22092 when they are added together), and cases where the endian-ness
22093 is irrelevant, but the order of register operations is important.
22094 For example when loading a value from memory into a register
22095 pair, the endian-ness does not matter. Provided that the value
22096 from the lower memory address is put into the lower numbered
22097 register, and the value from the higher address is put into the
22098 higher numbered register, the load will work regardless of whether
22099 the value being loaded is big-wordian or little-wordian. The
22100 order of the two register loads can matter however, if the address
22101 of the memory location is actually held in one of the registers
22102 being overwritten by the load.
22104 The 'Q' and 'R' constraints are also available for 64-bit
22105 constants. */
22106 case 'Q':
22107 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22109 rtx part = gen_lowpart (SImode, x);
22110 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22111 return;
22114 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22116 output_operand_lossage ("invalid operand for code '%c'", code);
22117 return;
22120 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22121 return;
22123 case 'R':
22124 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22126 machine_mode mode = GET_MODE (x);
22127 rtx part;
22129 if (mode == VOIDmode)
22130 mode = DImode;
22131 part = gen_highpart_mode (SImode, mode, x);
22132 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22133 return;
22136 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22138 output_operand_lossage ("invalid operand for code '%c'", code);
22139 return;
22142 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22143 return;
22145 case 'H':
22146 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22148 output_operand_lossage ("invalid operand for code '%c'", code);
22149 return;
22152 asm_fprintf (stream, "%r", REGNO (x) + 1);
22153 return;
22155 case 'J':
22156 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22158 output_operand_lossage ("invalid operand for code '%c'", code);
22159 return;
22162 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22163 return;
22165 case 'K':
22166 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22168 output_operand_lossage ("invalid operand for code '%c'", code);
22169 return;
22172 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22173 return;
22175 case 'm':
22176 asm_fprintf (stream, "%r",
22177 REG_P (XEXP (x, 0))
22178 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22179 return;
22181 case 'M':
22182 asm_fprintf (stream, "{%r-%r}",
22183 REGNO (x),
22184 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22185 return;
22187 /* Like 'M', but writing doubleword vector registers, for use by Neon
22188 insns. */
22189 case 'h':
22191 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22192 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22193 if (numregs == 1)
22194 asm_fprintf (stream, "{d%d}", regno);
22195 else
22196 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22198 return;
22200 case 'd':
22201 /* CONST_TRUE_RTX means always -- that's the default. */
22202 if (x == const_true_rtx)
22203 return;
22205 if (!COMPARISON_P (x))
22207 output_operand_lossage ("invalid operand for code '%c'", code);
22208 return;
22211 fputs (arm_condition_codes[get_arm_condition_code (x)],
22212 stream);
22213 return;
22215 case 'D':
22216 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22217 want to do that. */
22218 if (x == const_true_rtx)
22220 output_operand_lossage ("instruction never executed");
22221 return;
22223 if (!COMPARISON_P (x))
22225 output_operand_lossage ("invalid operand for code '%c'", code);
22226 return;
22229 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22230 (get_arm_condition_code (x))],
22231 stream);
22232 return;
22234 case 's':
22235 case 'V':
22236 case 'W':
22237 case 'X':
22238 case 'Y':
22239 case 'Z':
22240 /* Former Maverick support, removed after GCC-4.7. */
22241 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22242 return;
22244 case 'U':
22245 if (!REG_P (x)
22246 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22247 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22248 /* Bad value for wCG register number. */
22250 output_operand_lossage ("invalid operand for code '%c'", code);
22251 return;
22254 else
22255 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22256 return;
22258 /* Print an iWMMXt control register name. */
22259 case 'w':
22260 if (!CONST_INT_P (x)
22261 || INTVAL (x) < 0
22262 || INTVAL (x) >= 16)
22263 /* Bad value for wC register number. */
22265 output_operand_lossage ("invalid operand for code '%c'", code);
22266 return;
22269 else
22271 static const char * wc_reg_names [16] =
22273 "wCID", "wCon", "wCSSF", "wCASF",
22274 "wC4", "wC5", "wC6", "wC7",
22275 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22276 "wC12", "wC13", "wC14", "wC15"
22279 fputs (wc_reg_names [INTVAL (x)], stream);
22281 return;
22283 /* Print the high single-precision register of a VFP double-precision
22284 register. */
22285 case 'p':
22287 machine_mode mode = GET_MODE (x);
22288 int regno;
22290 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22292 output_operand_lossage ("invalid operand for code '%c'", code);
22293 return;
22296 regno = REGNO (x);
22297 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22299 output_operand_lossage ("invalid operand for code '%c'", code);
22300 return;
22303 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22305 return;
22307 /* Print a VFP/Neon double precision or quad precision register name. */
22308 case 'P':
22309 case 'q':
22311 machine_mode mode = GET_MODE (x);
22312 int is_quad = (code == 'q');
22313 int regno;
22315 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22317 output_operand_lossage ("invalid operand for code '%c'", code);
22318 return;
22321 if (!REG_P (x)
22322 || !IS_VFP_REGNUM (REGNO (x)))
22324 output_operand_lossage ("invalid operand for code '%c'", code);
22325 return;
22328 regno = REGNO (x);
22329 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22330 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22332 output_operand_lossage ("invalid operand for code '%c'", code);
22333 return;
22336 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22337 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22339 return;
22341 /* These two codes print the low/high doubleword register of a Neon quad
22342 register, respectively. For pair-structure types, can also print
22343 low/high quadword registers. */
22344 case 'e':
22345 case 'f':
22347 machine_mode mode = GET_MODE (x);
22348 int regno;
22350 if ((GET_MODE_SIZE (mode) != 16
22351 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22353 output_operand_lossage ("invalid operand for code '%c'", code);
22354 return;
22357 regno = REGNO (x);
22358 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22360 output_operand_lossage ("invalid operand for code '%c'", code);
22361 return;
22364 if (GET_MODE_SIZE (mode) == 16)
22365 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22366 + (code == 'f' ? 1 : 0));
22367 else
22368 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22369 + (code == 'f' ? 1 : 0));
22371 return;
22373 /* Print a VFPv3 floating-point constant, represented as an integer
22374 index. */
22375 case 'G':
22377 int index = vfp3_const_double_index (x);
22378 gcc_assert (index != -1);
22379 fprintf (stream, "%d", index);
22381 return;
22383 /* Print bits representing opcode features for Neon.
22385 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22386 and polynomials as unsigned.
22388 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22390 Bit 2 is 1 for rounding functions, 0 otherwise. */
22392 /* Identify the type as 's', 'u', 'p' or 'f'. */
22393 case 'T':
22395 HOST_WIDE_INT bits = INTVAL (x);
22396 fputc ("uspf"[bits & 3], stream);
22398 return;
22400 /* Likewise, but signed and unsigned integers are both 'i'. */
22401 case 'F':
22403 HOST_WIDE_INT bits = INTVAL (x);
22404 fputc ("iipf"[bits & 3], stream);
22406 return;
22408 /* As for 'T', but emit 'u' instead of 'p'. */
22409 case 't':
22411 HOST_WIDE_INT bits = INTVAL (x);
22412 fputc ("usuf"[bits & 3], stream);
22414 return;
22416 /* Bit 2: rounding (vs none). */
22417 case 'O':
22419 HOST_WIDE_INT bits = INTVAL (x);
22420 fputs ((bits & 4) != 0 ? "r" : "", stream);
22422 return;
22424 /* Memory operand for vld1/vst1 instruction. */
22425 case 'A':
22427 rtx addr;
22428 bool postinc = FALSE;
22429 rtx postinc_reg = NULL;
22430 unsigned align, memsize, align_bits;
22432 gcc_assert (MEM_P (x));
22433 addr = XEXP (x, 0);
22434 if (GET_CODE (addr) == POST_INC)
22436 postinc = 1;
22437 addr = XEXP (addr, 0);
22439 if (GET_CODE (addr) == POST_MODIFY)
22441 postinc_reg = XEXP( XEXP (addr, 1), 1);
22442 addr = XEXP (addr, 0);
22444 asm_fprintf (stream, "[%r", REGNO (addr));
22446 /* We know the alignment of this access, so we can emit a hint in the
22447 instruction (for some alignments) as an aid to the memory subsystem
22448 of the target. */
22449 align = MEM_ALIGN (x) >> 3;
22450 memsize = MEM_SIZE (x);
22452 /* Only certain alignment specifiers are supported by the hardware. */
22453 if (memsize == 32 && (align % 32) == 0)
22454 align_bits = 256;
22455 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22456 align_bits = 128;
22457 else if (memsize >= 8 && (align % 8) == 0)
22458 align_bits = 64;
22459 else
22460 align_bits = 0;
22462 if (align_bits != 0)
22463 asm_fprintf (stream, ":%d", align_bits);
22465 asm_fprintf (stream, "]");
22467 if (postinc)
22468 fputs("!", stream);
22469 if (postinc_reg)
22470 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22472 return;
22474 case 'C':
22476 rtx addr;
22478 gcc_assert (MEM_P (x));
22479 addr = XEXP (x, 0);
22480 gcc_assert (REG_P (addr));
22481 asm_fprintf (stream, "[%r]", REGNO (addr));
22483 return;
22485 /* Translate an S register number into a D register number and element index. */
22486 case 'y':
22488 machine_mode mode = GET_MODE (x);
22489 int regno;
22491 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22493 output_operand_lossage ("invalid operand for code '%c'", code);
22494 return;
22497 regno = REGNO (x);
22498 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22500 output_operand_lossage ("invalid operand for code '%c'", code);
22501 return;
22504 regno = regno - FIRST_VFP_REGNUM;
22505 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22507 return;
22509 case 'v':
22510 gcc_assert (CONST_DOUBLE_P (x));
22511 int result;
22512 result = vfp3_const_double_for_fract_bits (x);
22513 if (result == 0)
22514 result = vfp3_const_double_for_bits (x);
22515 fprintf (stream, "#%d", result);
22516 return;
22518 /* Register specifier for vld1.16/vst1.16. Translate the S register
22519 number into a D register number and element index. */
22520 case 'z':
22522 machine_mode mode = GET_MODE (x);
22523 int regno;
22525 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22527 output_operand_lossage ("invalid operand for code '%c'", code);
22528 return;
22531 regno = REGNO (x);
22532 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22534 output_operand_lossage ("invalid operand for code '%c'", code);
22535 return;
22538 regno = regno - FIRST_VFP_REGNUM;
22539 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22541 return;
22543 default:
22544 if (x == 0)
22546 output_operand_lossage ("missing operand");
22547 return;
22550 switch (GET_CODE (x))
22552 case REG:
22553 asm_fprintf (stream, "%r", REGNO (x));
22554 break;
22556 case MEM:
22557 output_address (GET_MODE (x), XEXP (x, 0));
22558 break;
22560 case CONST_DOUBLE:
22562 char fpstr[20];
22563 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22564 sizeof (fpstr), 0, 1);
22565 fprintf (stream, "#%s", fpstr);
22567 break;
22569 default:
22570 gcc_assert (GET_CODE (x) != NEG);
22571 fputc ('#', stream);
22572 if (GET_CODE (x) == HIGH)
22574 fputs (":lower16:", stream);
22575 x = XEXP (x, 0);
22578 output_addr_const (stream, x);
22579 break;
22584 /* Target hook for printing a memory address. */
22585 static void
22586 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22588 if (TARGET_32BIT)
22590 int is_minus = GET_CODE (x) == MINUS;
22592 if (REG_P (x))
22593 asm_fprintf (stream, "[%r]", REGNO (x));
22594 else if (GET_CODE (x) == PLUS || is_minus)
22596 rtx base = XEXP (x, 0);
22597 rtx index = XEXP (x, 1);
22598 HOST_WIDE_INT offset = 0;
22599 if (!REG_P (base)
22600 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22602 /* Ensure that BASE is a register. */
22603 /* (one of them must be). */
22604 /* Also ensure the SP is not used as in index register. */
22605 std::swap (base, index);
22607 switch (GET_CODE (index))
22609 case CONST_INT:
22610 offset = INTVAL (index);
22611 if (is_minus)
22612 offset = -offset;
22613 asm_fprintf (stream, "[%r, #%wd]",
22614 REGNO (base), offset);
22615 break;
22617 case REG:
22618 asm_fprintf (stream, "[%r, %s%r]",
22619 REGNO (base), is_minus ? "-" : "",
22620 REGNO (index));
22621 break;
22623 case MULT:
22624 case ASHIFTRT:
22625 case LSHIFTRT:
22626 case ASHIFT:
22627 case ROTATERT:
22629 asm_fprintf (stream, "[%r, %s%r",
22630 REGNO (base), is_minus ? "-" : "",
22631 REGNO (XEXP (index, 0)));
22632 arm_print_operand (stream, index, 'S');
22633 fputs ("]", stream);
22634 break;
22637 default:
22638 gcc_unreachable ();
22641 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22642 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22644 gcc_assert (REG_P (XEXP (x, 0)));
22646 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22647 asm_fprintf (stream, "[%r, #%s%d]!",
22648 REGNO (XEXP (x, 0)),
22649 GET_CODE (x) == PRE_DEC ? "-" : "",
22650 GET_MODE_SIZE (mode));
22651 else
22652 asm_fprintf (stream, "[%r], #%s%d",
22653 REGNO (XEXP (x, 0)),
22654 GET_CODE (x) == POST_DEC ? "-" : "",
22655 GET_MODE_SIZE (mode));
22657 else if (GET_CODE (x) == PRE_MODIFY)
22659 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22660 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22661 asm_fprintf (stream, "#%wd]!",
22662 INTVAL (XEXP (XEXP (x, 1), 1)));
22663 else
22664 asm_fprintf (stream, "%r]!",
22665 REGNO (XEXP (XEXP (x, 1), 1)));
22667 else if (GET_CODE (x) == POST_MODIFY)
22669 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22670 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22671 asm_fprintf (stream, "#%wd",
22672 INTVAL (XEXP (XEXP (x, 1), 1)));
22673 else
22674 asm_fprintf (stream, "%r",
22675 REGNO (XEXP (XEXP (x, 1), 1)));
22677 else output_addr_const (stream, x);
22679 else
22681 if (REG_P (x))
22682 asm_fprintf (stream, "[%r]", REGNO (x));
22683 else if (GET_CODE (x) == POST_INC)
22684 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22685 else if (GET_CODE (x) == PLUS)
22687 gcc_assert (REG_P (XEXP (x, 0)));
22688 if (CONST_INT_P (XEXP (x, 1)))
22689 asm_fprintf (stream, "[%r, #%wd]",
22690 REGNO (XEXP (x, 0)),
22691 INTVAL (XEXP (x, 1)));
22692 else
22693 asm_fprintf (stream, "[%r, %r]",
22694 REGNO (XEXP (x, 0)),
22695 REGNO (XEXP (x, 1)));
22697 else
22698 output_addr_const (stream, x);
22702 /* Target hook for indicating whether a punctuation character for
22703 TARGET_PRINT_OPERAND is valid. */
22704 static bool
22705 arm_print_operand_punct_valid_p (unsigned char code)
22707 return (code == '@' || code == '|' || code == '.'
22708 || code == '(' || code == ')' || code == '#'
22709 || (TARGET_32BIT && (code == '?'))
22710 || (TARGET_THUMB2 && (code == '!'))
22711 || (TARGET_THUMB && (code == '_')));
22714 /* Target hook for assembling integer objects. The ARM version needs to
22715 handle word-sized values specially. */
22716 static bool
22717 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22719 machine_mode mode;
22721 if (size == UNITS_PER_WORD && aligned_p)
22723 fputs ("\t.word\t", asm_out_file);
22724 output_addr_const (asm_out_file, x);
22726 /* Mark symbols as position independent. We only do this in the
22727 .text segment, not in the .data segment. */
22728 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22729 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22731 /* See legitimize_pic_address for an explanation of the
22732 TARGET_VXWORKS_RTP check. */
22733 if (!arm_pic_data_is_text_relative
22734 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22735 fputs ("(GOT)", asm_out_file);
22736 else
22737 fputs ("(GOTOFF)", asm_out_file);
22739 fputc ('\n', asm_out_file);
22740 return true;
22743 mode = GET_MODE (x);
22745 if (arm_vector_mode_supported_p (mode))
22747 int i, units;
22749 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22751 units = CONST_VECTOR_NUNITS (x);
22752 size = GET_MODE_UNIT_SIZE (mode);
22754 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22755 for (i = 0; i < units; i++)
22757 rtx elt = CONST_VECTOR_ELT (x, i);
22758 assemble_integer
22759 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22761 else
22762 for (i = 0; i < units; i++)
22764 rtx elt = CONST_VECTOR_ELT (x, i);
22765 assemble_real
22766 (*CONST_DOUBLE_REAL_VALUE (elt), GET_MODE_INNER (mode),
22767 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22770 return true;
22773 return default_assemble_integer (x, size, aligned_p);
22776 static void
22777 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22779 section *s;
22781 if (!TARGET_AAPCS_BASED)
22783 (is_ctor ?
22784 default_named_section_asm_out_constructor
22785 : default_named_section_asm_out_destructor) (symbol, priority);
22786 return;
22789 /* Put these in the .init_array section, using a special relocation. */
22790 if (priority != DEFAULT_INIT_PRIORITY)
22792 char buf[18];
22793 sprintf (buf, "%s.%.5u",
22794 is_ctor ? ".init_array" : ".fini_array",
22795 priority);
22796 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22798 else if (is_ctor)
22799 s = ctors_section;
22800 else
22801 s = dtors_section;
22803 switch_to_section (s);
22804 assemble_align (POINTER_SIZE);
22805 fputs ("\t.word\t", asm_out_file);
22806 output_addr_const (asm_out_file, symbol);
22807 fputs ("(target1)\n", asm_out_file);
22810 /* Add a function to the list of static constructors. */
22812 static void
22813 arm_elf_asm_constructor (rtx symbol, int priority)
22815 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22818 /* Add a function to the list of static destructors. */
22820 static void
22821 arm_elf_asm_destructor (rtx symbol, int priority)
22823 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22826 /* A finite state machine takes care of noticing whether or not instructions
22827 can be conditionally executed, and thus decrease execution time and code
22828 size by deleting branch instructions. The fsm is controlled by
22829 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22831 /* The state of the fsm controlling condition codes are:
22832 0: normal, do nothing special
22833 1: make ASM_OUTPUT_OPCODE not output this instruction
22834 2: make ASM_OUTPUT_OPCODE not output this instruction
22835 3: make instructions conditional
22836 4: make instructions conditional
22838 State transitions (state->state by whom under condition):
22839 0 -> 1 final_prescan_insn if the `target' is a label
22840 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22841 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22842 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22843 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22844 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22845 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22846 (the target insn is arm_target_insn).
22848 If the jump clobbers the conditions then we use states 2 and 4.
22850 A similar thing can be done with conditional return insns.
22852 XXX In case the `target' is an unconditional branch, this conditionalising
22853 of the instructions always reduces code size, but not always execution
22854 time. But then, I want to reduce the code size to somewhere near what
22855 /bin/cc produces. */
22857 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22858 instructions. When a COND_EXEC instruction is seen the subsequent
22859 instructions are scanned so that multiple conditional instructions can be
22860 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22861 specify the length and true/false mask for the IT block. These will be
22862 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22864 /* Returns the index of the ARM condition code string in
22865 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22866 COMPARISON should be an rtx like `(eq (...) (...))'. */
22868 enum arm_cond_code
22869 maybe_get_arm_condition_code (rtx comparison)
22871 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22872 enum arm_cond_code code;
22873 enum rtx_code comp_code = GET_CODE (comparison);
22875 if (GET_MODE_CLASS (mode) != MODE_CC)
22876 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22877 XEXP (comparison, 1));
22879 switch (mode)
22881 case CC_DNEmode: code = ARM_NE; goto dominance;
22882 case CC_DEQmode: code = ARM_EQ; goto dominance;
22883 case CC_DGEmode: code = ARM_GE; goto dominance;
22884 case CC_DGTmode: code = ARM_GT; goto dominance;
22885 case CC_DLEmode: code = ARM_LE; goto dominance;
22886 case CC_DLTmode: code = ARM_LT; goto dominance;
22887 case CC_DGEUmode: code = ARM_CS; goto dominance;
22888 case CC_DGTUmode: code = ARM_HI; goto dominance;
22889 case CC_DLEUmode: code = ARM_LS; goto dominance;
22890 case CC_DLTUmode: code = ARM_CC;
22892 dominance:
22893 if (comp_code == EQ)
22894 return ARM_INVERSE_CONDITION_CODE (code);
22895 if (comp_code == NE)
22896 return code;
22897 return ARM_NV;
22899 case CC_NOOVmode:
22900 switch (comp_code)
22902 case NE: return ARM_NE;
22903 case EQ: return ARM_EQ;
22904 case GE: return ARM_PL;
22905 case LT: return ARM_MI;
22906 default: return ARM_NV;
22909 case CC_Zmode:
22910 switch (comp_code)
22912 case NE: return ARM_NE;
22913 case EQ: return ARM_EQ;
22914 default: return ARM_NV;
22917 case CC_Nmode:
22918 switch (comp_code)
22920 case NE: return ARM_MI;
22921 case EQ: return ARM_PL;
22922 default: return ARM_NV;
22925 case CCFPEmode:
22926 case CCFPmode:
22927 /* We can handle all cases except UNEQ and LTGT. */
22928 switch (comp_code)
22930 case GE: return ARM_GE;
22931 case GT: return ARM_GT;
22932 case LE: return ARM_LS;
22933 case LT: return ARM_MI;
22934 case NE: return ARM_NE;
22935 case EQ: return ARM_EQ;
22936 case ORDERED: return ARM_VC;
22937 case UNORDERED: return ARM_VS;
22938 case UNLT: return ARM_LT;
22939 case UNLE: return ARM_LE;
22940 case UNGT: return ARM_HI;
22941 case UNGE: return ARM_PL;
22942 /* UNEQ and LTGT do not have a representation. */
22943 case UNEQ: /* Fall through. */
22944 case LTGT: /* Fall through. */
22945 default: return ARM_NV;
22948 case CC_SWPmode:
22949 switch (comp_code)
22951 case NE: return ARM_NE;
22952 case EQ: return ARM_EQ;
22953 case GE: return ARM_LE;
22954 case GT: return ARM_LT;
22955 case LE: return ARM_GE;
22956 case LT: return ARM_GT;
22957 case GEU: return ARM_LS;
22958 case GTU: return ARM_CC;
22959 case LEU: return ARM_CS;
22960 case LTU: return ARM_HI;
22961 default: return ARM_NV;
22964 case CC_Cmode:
22965 switch (comp_code)
22967 case LTU: return ARM_CS;
22968 case GEU: return ARM_CC;
22969 default: return ARM_NV;
22972 case CC_CZmode:
22973 switch (comp_code)
22975 case NE: return ARM_NE;
22976 case EQ: return ARM_EQ;
22977 case GEU: return ARM_CS;
22978 case GTU: return ARM_HI;
22979 case LEU: return ARM_LS;
22980 case LTU: return ARM_CC;
22981 default: return ARM_NV;
22984 case CC_NCVmode:
22985 switch (comp_code)
22987 case GE: return ARM_GE;
22988 case LT: return ARM_LT;
22989 case GEU: return ARM_CS;
22990 case LTU: return ARM_CC;
22991 default: return ARM_NV;
22994 case CCmode:
22995 switch (comp_code)
22997 case NE: return ARM_NE;
22998 case EQ: return ARM_EQ;
22999 case GE: return ARM_GE;
23000 case GT: return ARM_GT;
23001 case LE: return ARM_LE;
23002 case LT: return ARM_LT;
23003 case GEU: return ARM_CS;
23004 case GTU: return ARM_HI;
23005 case LEU: return ARM_LS;
23006 case LTU: return ARM_CC;
23007 default: return ARM_NV;
23010 default: gcc_unreachable ();
23014 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23015 static enum arm_cond_code
23016 get_arm_condition_code (rtx comparison)
23018 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23019 gcc_assert (code != ARM_NV);
23020 return code;
23023 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23024 instructions. */
23025 void
23026 thumb2_final_prescan_insn (rtx_insn *insn)
23028 rtx_insn *first_insn = insn;
23029 rtx body = PATTERN (insn);
23030 rtx predicate;
23031 enum arm_cond_code code;
23032 int n;
23033 int mask;
23034 int max;
23036 /* max_insns_skipped in the tune was already taken into account in the
23037 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23038 just emit the IT blocks as we can. It does not make sense to split
23039 the IT blocks. */
23040 max = MAX_INSN_PER_IT_BLOCK;
23042 /* Remove the previous insn from the count of insns to be output. */
23043 if (arm_condexec_count)
23044 arm_condexec_count--;
23046 /* Nothing to do if we are already inside a conditional block. */
23047 if (arm_condexec_count)
23048 return;
23050 if (GET_CODE (body) != COND_EXEC)
23051 return;
23053 /* Conditional jumps are implemented directly. */
23054 if (JUMP_P (insn))
23055 return;
23057 predicate = COND_EXEC_TEST (body);
23058 arm_current_cc = get_arm_condition_code (predicate);
23060 n = get_attr_ce_count (insn);
23061 arm_condexec_count = 1;
23062 arm_condexec_mask = (1 << n) - 1;
23063 arm_condexec_masklen = n;
23064 /* See if subsequent instructions can be combined into the same block. */
23065 for (;;)
23067 insn = next_nonnote_insn (insn);
23069 /* Jumping into the middle of an IT block is illegal, so a label or
23070 barrier terminates the block. */
23071 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23072 break;
23074 body = PATTERN (insn);
23075 /* USE and CLOBBER aren't really insns, so just skip them. */
23076 if (GET_CODE (body) == USE
23077 || GET_CODE (body) == CLOBBER)
23078 continue;
23080 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23081 if (GET_CODE (body) != COND_EXEC)
23082 break;
23083 /* Maximum number of conditionally executed instructions in a block. */
23084 n = get_attr_ce_count (insn);
23085 if (arm_condexec_masklen + n > max)
23086 break;
23088 predicate = COND_EXEC_TEST (body);
23089 code = get_arm_condition_code (predicate);
23090 mask = (1 << n) - 1;
23091 if (arm_current_cc == code)
23092 arm_condexec_mask |= (mask << arm_condexec_masklen);
23093 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23094 break;
23096 arm_condexec_count++;
23097 arm_condexec_masklen += n;
23099 /* A jump must be the last instruction in a conditional block. */
23100 if (JUMP_P (insn))
23101 break;
23103 /* Restore recog_data (getting the attributes of other insns can
23104 destroy this array, but final.c assumes that it remains intact
23105 across this call). */
23106 extract_constrain_insn_cached (first_insn);
23109 void
23110 arm_final_prescan_insn (rtx_insn *insn)
23112 /* BODY will hold the body of INSN. */
23113 rtx body = PATTERN (insn);
23115 /* This will be 1 if trying to repeat the trick, and things need to be
23116 reversed if it appears to fail. */
23117 int reverse = 0;
23119 /* If we start with a return insn, we only succeed if we find another one. */
23120 int seeking_return = 0;
23121 enum rtx_code return_code = UNKNOWN;
23123 /* START_INSN will hold the insn from where we start looking. This is the
23124 first insn after the following code_label if REVERSE is true. */
23125 rtx_insn *start_insn = insn;
23127 /* If in state 4, check if the target branch is reached, in order to
23128 change back to state 0. */
23129 if (arm_ccfsm_state == 4)
23131 if (insn == arm_target_insn)
23133 arm_target_insn = NULL;
23134 arm_ccfsm_state = 0;
23136 return;
23139 /* If in state 3, it is possible to repeat the trick, if this insn is an
23140 unconditional branch to a label, and immediately following this branch
23141 is the previous target label which is only used once, and the label this
23142 branch jumps to is not too far off. */
23143 if (arm_ccfsm_state == 3)
23145 if (simplejump_p (insn))
23147 start_insn = next_nonnote_insn (start_insn);
23148 if (BARRIER_P (start_insn))
23150 /* XXX Isn't this always a barrier? */
23151 start_insn = next_nonnote_insn (start_insn);
23153 if (LABEL_P (start_insn)
23154 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23155 && LABEL_NUSES (start_insn) == 1)
23156 reverse = TRUE;
23157 else
23158 return;
23160 else if (ANY_RETURN_P (body))
23162 start_insn = next_nonnote_insn (start_insn);
23163 if (BARRIER_P (start_insn))
23164 start_insn = next_nonnote_insn (start_insn);
23165 if (LABEL_P (start_insn)
23166 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23167 && LABEL_NUSES (start_insn) == 1)
23169 reverse = TRUE;
23170 seeking_return = 1;
23171 return_code = GET_CODE (body);
23173 else
23174 return;
23176 else
23177 return;
23180 gcc_assert (!arm_ccfsm_state || reverse);
23181 if (!JUMP_P (insn))
23182 return;
23184 /* This jump might be paralleled with a clobber of the condition codes
23185 the jump should always come first */
23186 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23187 body = XVECEXP (body, 0, 0);
23189 if (reverse
23190 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23191 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23193 int insns_skipped;
23194 int fail = FALSE, succeed = FALSE;
23195 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23196 int then_not_else = TRUE;
23197 rtx_insn *this_insn = start_insn;
23198 rtx label = 0;
23200 /* Register the insn jumped to. */
23201 if (reverse)
23203 if (!seeking_return)
23204 label = XEXP (SET_SRC (body), 0);
23206 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23207 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23208 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23210 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23211 then_not_else = FALSE;
23213 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23215 seeking_return = 1;
23216 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23218 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23220 seeking_return = 1;
23221 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23222 then_not_else = FALSE;
23224 else
23225 gcc_unreachable ();
23227 /* See how many insns this branch skips, and what kind of insns. If all
23228 insns are okay, and the label or unconditional branch to the same
23229 label is not too far away, succeed. */
23230 for (insns_skipped = 0;
23231 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23233 rtx scanbody;
23235 this_insn = next_nonnote_insn (this_insn);
23236 if (!this_insn)
23237 break;
23239 switch (GET_CODE (this_insn))
23241 case CODE_LABEL:
23242 /* Succeed if it is the target label, otherwise fail since
23243 control falls in from somewhere else. */
23244 if (this_insn == label)
23246 arm_ccfsm_state = 1;
23247 succeed = TRUE;
23249 else
23250 fail = TRUE;
23251 break;
23253 case BARRIER:
23254 /* Succeed if the following insn is the target label.
23255 Otherwise fail.
23256 If return insns are used then the last insn in a function
23257 will be a barrier. */
23258 this_insn = next_nonnote_insn (this_insn);
23259 if (this_insn && this_insn == label)
23261 arm_ccfsm_state = 1;
23262 succeed = TRUE;
23264 else
23265 fail = TRUE;
23266 break;
23268 case CALL_INSN:
23269 /* The AAPCS says that conditional calls should not be
23270 used since they make interworking inefficient (the
23271 linker can't transform BL<cond> into BLX). That's
23272 only a problem if the machine has BLX. */
23273 if (arm_arch5)
23275 fail = TRUE;
23276 break;
23279 /* Succeed if the following insn is the target label, or
23280 if the following two insns are a barrier and the
23281 target label. */
23282 this_insn = next_nonnote_insn (this_insn);
23283 if (this_insn && BARRIER_P (this_insn))
23284 this_insn = next_nonnote_insn (this_insn);
23286 if (this_insn && this_insn == label
23287 && insns_skipped < max_insns_skipped)
23289 arm_ccfsm_state = 1;
23290 succeed = TRUE;
23292 else
23293 fail = TRUE;
23294 break;
23296 case JUMP_INSN:
23297 /* If this is an unconditional branch to the same label, succeed.
23298 If it is to another label, do nothing. If it is conditional,
23299 fail. */
23300 /* XXX Probably, the tests for SET and the PC are
23301 unnecessary. */
23303 scanbody = PATTERN (this_insn);
23304 if (GET_CODE (scanbody) == SET
23305 && GET_CODE (SET_DEST (scanbody)) == PC)
23307 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23308 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23310 arm_ccfsm_state = 2;
23311 succeed = TRUE;
23313 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23314 fail = TRUE;
23316 /* Fail if a conditional return is undesirable (e.g. on a
23317 StrongARM), but still allow this if optimizing for size. */
23318 else if (GET_CODE (scanbody) == return_code
23319 && !use_return_insn (TRUE, NULL)
23320 && !optimize_size)
23321 fail = TRUE;
23322 else if (GET_CODE (scanbody) == return_code)
23324 arm_ccfsm_state = 2;
23325 succeed = TRUE;
23327 else if (GET_CODE (scanbody) == PARALLEL)
23329 switch (get_attr_conds (this_insn))
23331 case CONDS_NOCOND:
23332 break;
23333 default:
23334 fail = TRUE;
23335 break;
23338 else
23339 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23341 break;
23343 case INSN:
23344 /* Instructions using or affecting the condition codes make it
23345 fail. */
23346 scanbody = PATTERN (this_insn);
23347 if (!(GET_CODE (scanbody) == SET
23348 || GET_CODE (scanbody) == PARALLEL)
23349 || get_attr_conds (this_insn) != CONDS_NOCOND)
23350 fail = TRUE;
23351 break;
23353 default:
23354 break;
23357 if (succeed)
23359 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23360 arm_target_label = CODE_LABEL_NUMBER (label);
23361 else
23363 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23365 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23367 this_insn = next_nonnote_insn (this_insn);
23368 gcc_assert (!this_insn
23369 || (!BARRIER_P (this_insn)
23370 && !LABEL_P (this_insn)));
23372 if (!this_insn)
23374 /* Oh, dear! we ran off the end.. give up. */
23375 extract_constrain_insn_cached (insn);
23376 arm_ccfsm_state = 0;
23377 arm_target_insn = NULL;
23378 return;
23380 arm_target_insn = this_insn;
23383 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23384 what it was. */
23385 if (!reverse)
23386 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23388 if (reverse || then_not_else)
23389 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23392 /* Restore recog_data (getting the attributes of other insns can
23393 destroy this array, but final.c assumes that it remains intact
23394 across this call. */
23395 extract_constrain_insn_cached (insn);
23399 /* Output IT instructions. */
23400 void
23401 thumb2_asm_output_opcode (FILE * stream)
23403 char buff[5];
23404 int n;
23406 if (arm_condexec_mask)
23408 for (n = 0; n < arm_condexec_masklen; n++)
23409 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23410 buff[n] = 0;
23411 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23412 arm_condition_codes[arm_current_cc]);
23413 arm_condexec_mask = 0;
23417 /* Returns true if REGNO is a valid register
23418 for holding a quantity of type MODE. */
23420 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23422 if (GET_MODE_CLASS (mode) == MODE_CC)
23423 return (regno == CC_REGNUM
23424 || (TARGET_HARD_FLOAT && TARGET_VFP
23425 && regno == VFPCC_REGNUM));
23427 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23428 return false;
23430 if (TARGET_THUMB1)
23431 /* For the Thumb we only allow values bigger than SImode in
23432 registers 0 - 6, so that there is always a second low
23433 register available to hold the upper part of the value.
23434 We probably we ought to ensure that the register is the
23435 start of an even numbered register pair. */
23436 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23438 if (TARGET_HARD_FLOAT && TARGET_VFP
23439 && IS_VFP_REGNUM (regno))
23441 if (mode == SFmode || mode == SImode)
23442 return VFP_REGNO_OK_FOR_SINGLE (regno);
23444 if (mode == DFmode)
23445 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23447 if (mode == HFmode)
23448 return VFP_REGNO_OK_FOR_SINGLE (regno);
23450 if (TARGET_NEON)
23451 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23452 || (VALID_NEON_QREG_MODE (mode)
23453 && NEON_REGNO_OK_FOR_QUAD (regno))
23454 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23455 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23456 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23457 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23458 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23460 return FALSE;
23463 if (TARGET_REALLY_IWMMXT)
23465 if (IS_IWMMXT_GR_REGNUM (regno))
23466 return mode == SImode;
23468 if (IS_IWMMXT_REGNUM (regno))
23469 return VALID_IWMMXT_REG_MODE (mode);
23472 /* We allow almost any value to be stored in the general registers.
23473 Restrict doubleword quantities to even register pairs in ARM state
23474 so that we can use ldrd. Do not allow very large Neon structure
23475 opaque modes in general registers; they would use too many. */
23476 if (regno <= LAST_ARM_REGNUM)
23478 if (ARM_NUM_REGS (mode) > 4)
23479 return FALSE;
23481 if (TARGET_THUMB2)
23482 return TRUE;
23484 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23487 if (regno == FRAME_POINTER_REGNUM
23488 || regno == ARG_POINTER_REGNUM)
23489 /* We only allow integers in the fake hard registers. */
23490 return GET_MODE_CLASS (mode) == MODE_INT;
23492 return FALSE;
23495 /* Implement MODES_TIEABLE_P. */
23497 bool
23498 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23500 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23501 return true;
23503 /* We specifically want to allow elements of "structure" modes to
23504 be tieable to the structure. This more general condition allows
23505 other rarer situations too. */
23506 if (TARGET_NEON
23507 && (VALID_NEON_DREG_MODE (mode1)
23508 || VALID_NEON_QREG_MODE (mode1)
23509 || VALID_NEON_STRUCT_MODE (mode1))
23510 && (VALID_NEON_DREG_MODE (mode2)
23511 || VALID_NEON_QREG_MODE (mode2)
23512 || VALID_NEON_STRUCT_MODE (mode2)))
23513 return true;
23515 return false;
23518 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23519 not used in arm mode. */
23521 enum reg_class
23522 arm_regno_class (int regno)
23524 if (regno == PC_REGNUM)
23525 return NO_REGS;
23527 if (TARGET_THUMB1)
23529 if (regno == STACK_POINTER_REGNUM)
23530 return STACK_REG;
23531 if (regno == CC_REGNUM)
23532 return CC_REG;
23533 if (regno < 8)
23534 return LO_REGS;
23535 return HI_REGS;
23538 if (TARGET_THUMB2 && regno < 8)
23539 return LO_REGS;
23541 if ( regno <= LAST_ARM_REGNUM
23542 || regno == FRAME_POINTER_REGNUM
23543 || regno == ARG_POINTER_REGNUM)
23544 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23546 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23547 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23549 if (IS_VFP_REGNUM (regno))
23551 if (regno <= D7_VFP_REGNUM)
23552 return VFP_D0_D7_REGS;
23553 else if (regno <= LAST_LO_VFP_REGNUM)
23554 return VFP_LO_REGS;
23555 else
23556 return VFP_HI_REGS;
23559 if (IS_IWMMXT_REGNUM (regno))
23560 return IWMMXT_REGS;
23562 if (IS_IWMMXT_GR_REGNUM (regno))
23563 return IWMMXT_GR_REGS;
23565 return NO_REGS;
23568 /* Handle a special case when computing the offset
23569 of an argument from the frame pointer. */
23571 arm_debugger_arg_offset (int value, rtx addr)
23573 rtx_insn *insn;
23575 /* We are only interested if dbxout_parms() failed to compute the offset. */
23576 if (value != 0)
23577 return 0;
23579 /* We can only cope with the case where the address is held in a register. */
23580 if (!REG_P (addr))
23581 return 0;
23583 /* If we are using the frame pointer to point at the argument, then
23584 an offset of 0 is correct. */
23585 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23586 return 0;
23588 /* If we are using the stack pointer to point at the
23589 argument, then an offset of 0 is correct. */
23590 /* ??? Check this is consistent with thumb2 frame layout. */
23591 if ((TARGET_THUMB || !frame_pointer_needed)
23592 && REGNO (addr) == SP_REGNUM)
23593 return 0;
23595 /* Oh dear. The argument is pointed to by a register rather
23596 than being held in a register, or being stored at a known
23597 offset from the frame pointer. Since GDB only understands
23598 those two kinds of argument we must translate the address
23599 held in the register into an offset from the frame pointer.
23600 We do this by searching through the insns for the function
23601 looking to see where this register gets its value. If the
23602 register is initialized from the frame pointer plus an offset
23603 then we are in luck and we can continue, otherwise we give up.
23605 This code is exercised by producing debugging information
23606 for a function with arguments like this:
23608 double func (double a, double b, int c, double d) {return d;}
23610 Without this code the stab for parameter 'd' will be set to
23611 an offset of 0 from the frame pointer, rather than 8. */
23613 /* The if() statement says:
23615 If the insn is a normal instruction
23616 and if the insn is setting the value in a register
23617 and if the register being set is the register holding the address of the argument
23618 and if the address is computing by an addition
23619 that involves adding to a register
23620 which is the frame pointer
23621 a constant integer
23623 then... */
23625 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23627 if ( NONJUMP_INSN_P (insn)
23628 && GET_CODE (PATTERN (insn)) == SET
23629 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23630 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23631 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23632 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23633 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23636 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23638 break;
23642 if (value == 0)
23644 debug_rtx (addr);
23645 warning (0, "unable to compute real location of stacked parameter");
23646 value = 8; /* XXX magic hack */
23649 return value;
23652 /* Implement TARGET_PROMOTED_TYPE. */
23654 static tree
23655 arm_promoted_type (const_tree t)
23657 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23658 return float_type_node;
23659 return NULL_TREE;
23662 /* Implement TARGET_CONVERT_TO_TYPE.
23663 Specifically, this hook implements the peculiarity of the ARM
23664 half-precision floating-point C semantics that requires conversions between
23665 __fp16 to or from double to do an intermediate conversion to float. */
23667 static tree
23668 arm_convert_to_type (tree type, tree expr)
23670 tree fromtype = TREE_TYPE (expr);
23671 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23672 return NULL_TREE;
23673 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23674 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23675 return convert (type, convert (float_type_node, expr));
23676 return NULL_TREE;
23679 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23680 This simply adds HFmode as a supported mode; even though we don't
23681 implement arithmetic on this type directly, it's supported by
23682 optabs conversions, much the way the double-word arithmetic is
23683 special-cased in the default hook. */
23685 static bool
23686 arm_scalar_mode_supported_p (machine_mode mode)
23688 if (mode == HFmode)
23689 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23690 else if (ALL_FIXED_POINT_MODE_P (mode))
23691 return true;
23692 else
23693 return default_scalar_mode_supported_p (mode);
23696 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23697 not to early-clobber SRC registers in the process.
23699 We assume that the operands described by SRC and DEST represent a
23700 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23701 number of components into which the copy has been decomposed. */
23702 void
23703 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23705 unsigned int i;
23707 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23708 || REGNO (operands[0]) < REGNO (operands[1]))
23710 for (i = 0; i < count; i++)
23712 operands[2 * i] = dest[i];
23713 operands[2 * i + 1] = src[i];
23716 else
23718 for (i = 0; i < count; i++)
23720 operands[2 * i] = dest[count - i - 1];
23721 operands[2 * i + 1] = src[count - i - 1];
23726 /* Split operands into moves from op[1] + op[2] into op[0]. */
23728 void
23729 neon_split_vcombine (rtx operands[3])
23731 unsigned int dest = REGNO (operands[0]);
23732 unsigned int src1 = REGNO (operands[1]);
23733 unsigned int src2 = REGNO (operands[2]);
23734 machine_mode halfmode = GET_MODE (operands[1]);
23735 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23736 rtx destlo, desthi;
23738 if (src1 == dest && src2 == dest + halfregs)
23740 /* No-op move. Can't split to nothing; emit something. */
23741 emit_note (NOTE_INSN_DELETED);
23742 return;
23745 /* Preserve register attributes for variable tracking. */
23746 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23747 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23748 GET_MODE_SIZE (halfmode));
23750 /* Special case of reversed high/low parts. Use VSWP. */
23751 if (src2 == dest && src1 == dest + halfregs)
23753 rtx x = gen_rtx_SET (destlo, operands[1]);
23754 rtx y = gen_rtx_SET (desthi, operands[2]);
23755 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23756 return;
23759 if (!reg_overlap_mentioned_p (operands[2], destlo))
23761 /* Try to avoid unnecessary moves if part of the result
23762 is in the right place already. */
23763 if (src1 != dest)
23764 emit_move_insn (destlo, operands[1]);
23765 if (src2 != dest + halfregs)
23766 emit_move_insn (desthi, operands[2]);
23768 else
23770 if (src2 != dest + halfregs)
23771 emit_move_insn (desthi, operands[2]);
23772 if (src1 != dest)
23773 emit_move_insn (destlo, operands[1]);
23777 /* Return the number (counting from 0) of
23778 the least significant set bit in MASK. */
23780 inline static int
23781 number_of_first_bit_set (unsigned mask)
23783 return ctz_hwi (mask);
23786 /* Like emit_multi_reg_push, but allowing for a different set of
23787 registers to be described as saved. MASK is the set of registers
23788 to be saved; REAL_REGS is the set of registers to be described as
23789 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23791 static rtx_insn *
23792 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23794 unsigned long regno;
23795 rtx par[10], tmp, reg;
23796 rtx_insn *insn;
23797 int i, j;
23799 /* Build the parallel of the registers actually being stored. */
23800 for (i = 0; mask; ++i, mask &= mask - 1)
23802 regno = ctz_hwi (mask);
23803 reg = gen_rtx_REG (SImode, regno);
23805 if (i == 0)
23806 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23807 else
23808 tmp = gen_rtx_USE (VOIDmode, reg);
23810 par[i] = tmp;
23813 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23814 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23815 tmp = gen_frame_mem (BLKmode, tmp);
23816 tmp = gen_rtx_SET (tmp, par[0]);
23817 par[0] = tmp;
23819 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23820 insn = emit_insn (tmp);
23822 /* Always build the stack adjustment note for unwind info. */
23823 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23824 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23825 par[0] = tmp;
23827 /* Build the parallel of the registers recorded as saved for unwind. */
23828 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23830 regno = ctz_hwi (real_regs);
23831 reg = gen_rtx_REG (SImode, regno);
23833 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23834 tmp = gen_frame_mem (SImode, tmp);
23835 tmp = gen_rtx_SET (tmp, reg);
23836 RTX_FRAME_RELATED_P (tmp) = 1;
23837 par[j + 1] = tmp;
23840 if (j == 0)
23841 tmp = par[0];
23842 else
23844 RTX_FRAME_RELATED_P (par[0]) = 1;
23845 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23848 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23850 return insn;
23853 /* Emit code to push or pop registers to or from the stack. F is the
23854 assembly file. MASK is the registers to pop. */
23855 static void
23856 thumb_pop (FILE *f, unsigned long mask)
23858 int regno;
23859 int lo_mask = mask & 0xFF;
23860 int pushed_words = 0;
23862 gcc_assert (mask);
23864 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23866 /* Special case. Do not generate a POP PC statement here, do it in
23867 thumb_exit() */
23868 thumb_exit (f, -1);
23869 return;
23872 fprintf (f, "\tpop\t{");
23874 /* Look at the low registers first. */
23875 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23877 if (lo_mask & 1)
23879 asm_fprintf (f, "%r", regno);
23881 if ((lo_mask & ~1) != 0)
23882 fprintf (f, ", ");
23884 pushed_words++;
23888 if (mask & (1 << PC_REGNUM))
23890 /* Catch popping the PC. */
23891 if (TARGET_INTERWORK || TARGET_BACKTRACE
23892 || crtl->calls_eh_return)
23894 /* The PC is never poped directly, instead
23895 it is popped into r3 and then BX is used. */
23896 fprintf (f, "}\n");
23898 thumb_exit (f, -1);
23900 return;
23902 else
23904 if (mask & 0xFF)
23905 fprintf (f, ", ");
23907 asm_fprintf (f, "%r", PC_REGNUM);
23911 fprintf (f, "}\n");
23914 /* Generate code to return from a thumb function.
23915 If 'reg_containing_return_addr' is -1, then the return address is
23916 actually on the stack, at the stack pointer. */
23917 static void
23918 thumb_exit (FILE *f, int reg_containing_return_addr)
23920 unsigned regs_available_for_popping;
23921 unsigned regs_to_pop;
23922 int pops_needed;
23923 unsigned available;
23924 unsigned required;
23925 machine_mode mode;
23926 int size;
23927 int restore_a4 = FALSE;
23929 /* Compute the registers we need to pop. */
23930 regs_to_pop = 0;
23931 pops_needed = 0;
23933 if (reg_containing_return_addr == -1)
23935 regs_to_pop |= 1 << LR_REGNUM;
23936 ++pops_needed;
23939 if (TARGET_BACKTRACE)
23941 /* Restore the (ARM) frame pointer and stack pointer. */
23942 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23943 pops_needed += 2;
23946 /* If there is nothing to pop then just emit the BX instruction and
23947 return. */
23948 if (pops_needed == 0)
23950 if (crtl->calls_eh_return)
23951 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23953 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23954 return;
23956 /* Otherwise if we are not supporting interworking and we have not created
23957 a backtrace structure and the function was not entered in ARM mode then
23958 just pop the return address straight into the PC. */
23959 else if (!TARGET_INTERWORK
23960 && !TARGET_BACKTRACE
23961 && !is_called_in_ARM_mode (current_function_decl)
23962 && !crtl->calls_eh_return)
23964 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23965 return;
23968 /* Find out how many of the (return) argument registers we can corrupt. */
23969 regs_available_for_popping = 0;
23971 /* If returning via __builtin_eh_return, the bottom three registers
23972 all contain information needed for the return. */
23973 if (crtl->calls_eh_return)
23974 size = 12;
23975 else
23977 /* If we can deduce the registers used from the function's
23978 return value. This is more reliable that examining
23979 df_regs_ever_live_p () because that will be set if the register is
23980 ever used in the function, not just if the register is used
23981 to hold a return value. */
23983 if (crtl->return_rtx != 0)
23984 mode = GET_MODE (crtl->return_rtx);
23985 else
23986 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23988 size = GET_MODE_SIZE (mode);
23990 if (size == 0)
23992 /* In a void function we can use any argument register.
23993 In a function that returns a structure on the stack
23994 we can use the second and third argument registers. */
23995 if (mode == VOIDmode)
23996 regs_available_for_popping =
23997 (1 << ARG_REGISTER (1))
23998 | (1 << ARG_REGISTER (2))
23999 | (1 << ARG_REGISTER (3));
24000 else
24001 regs_available_for_popping =
24002 (1 << ARG_REGISTER (2))
24003 | (1 << ARG_REGISTER (3));
24005 else if (size <= 4)
24006 regs_available_for_popping =
24007 (1 << ARG_REGISTER (2))
24008 | (1 << ARG_REGISTER (3));
24009 else if (size <= 8)
24010 regs_available_for_popping =
24011 (1 << ARG_REGISTER (3));
24014 /* Match registers to be popped with registers into which we pop them. */
24015 for (available = regs_available_for_popping,
24016 required = regs_to_pop;
24017 required != 0 && available != 0;
24018 available &= ~(available & - available),
24019 required &= ~(required & - required))
24020 -- pops_needed;
24022 /* If we have any popping registers left over, remove them. */
24023 if (available > 0)
24024 regs_available_for_popping &= ~available;
24026 /* Otherwise if we need another popping register we can use
24027 the fourth argument register. */
24028 else if (pops_needed)
24030 /* If we have not found any free argument registers and
24031 reg a4 contains the return address, we must move it. */
24032 if (regs_available_for_popping == 0
24033 && reg_containing_return_addr == LAST_ARG_REGNUM)
24035 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24036 reg_containing_return_addr = LR_REGNUM;
24038 else if (size > 12)
24040 /* Register a4 is being used to hold part of the return value,
24041 but we have dire need of a free, low register. */
24042 restore_a4 = TRUE;
24044 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24047 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24049 /* The fourth argument register is available. */
24050 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24052 --pops_needed;
24056 /* Pop as many registers as we can. */
24057 thumb_pop (f, regs_available_for_popping);
24059 /* Process the registers we popped. */
24060 if (reg_containing_return_addr == -1)
24062 /* The return address was popped into the lowest numbered register. */
24063 regs_to_pop &= ~(1 << LR_REGNUM);
24065 reg_containing_return_addr =
24066 number_of_first_bit_set (regs_available_for_popping);
24068 /* Remove this register for the mask of available registers, so that
24069 the return address will not be corrupted by further pops. */
24070 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24073 /* If we popped other registers then handle them here. */
24074 if (regs_available_for_popping)
24076 int frame_pointer;
24078 /* Work out which register currently contains the frame pointer. */
24079 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24081 /* Move it into the correct place. */
24082 asm_fprintf (f, "\tmov\t%r, %r\n",
24083 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24085 /* (Temporarily) remove it from the mask of popped registers. */
24086 regs_available_for_popping &= ~(1 << frame_pointer);
24087 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24089 if (regs_available_for_popping)
24091 int stack_pointer;
24093 /* We popped the stack pointer as well,
24094 find the register that contains it. */
24095 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24097 /* Move it into the stack register. */
24098 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24100 /* At this point we have popped all necessary registers, so
24101 do not worry about restoring regs_available_for_popping
24102 to its correct value:
24104 assert (pops_needed == 0)
24105 assert (regs_available_for_popping == (1 << frame_pointer))
24106 assert (regs_to_pop == (1 << STACK_POINTER)) */
24108 else
24110 /* Since we have just move the popped value into the frame
24111 pointer, the popping register is available for reuse, and
24112 we know that we still have the stack pointer left to pop. */
24113 regs_available_for_popping |= (1 << frame_pointer);
24117 /* If we still have registers left on the stack, but we no longer have
24118 any registers into which we can pop them, then we must move the return
24119 address into the link register and make available the register that
24120 contained it. */
24121 if (regs_available_for_popping == 0 && pops_needed > 0)
24123 regs_available_for_popping |= 1 << reg_containing_return_addr;
24125 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24126 reg_containing_return_addr);
24128 reg_containing_return_addr = LR_REGNUM;
24131 /* If we have registers left on the stack then pop some more.
24132 We know that at most we will want to pop FP and SP. */
24133 if (pops_needed > 0)
24135 int popped_into;
24136 int move_to;
24138 thumb_pop (f, regs_available_for_popping);
24140 /* We have popped either FP or SP.
24141 Move whichever one it is into the correct register. */
24142 popped_into = number_of_first_bit_set (regs_available_for_popping);
24143 move_to = number_of_first_bit_set (regs_to_pop);
24145 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24147 regs_to_pop &= ~(1 << move_to);
24149 --pops_needed;
24152 /* If we still have not popped everything then we must have only
24153 had one register available to us and we are now popping the SP. */
24154 if (pops_needed > 0)
24156 int popped_into;
24158 thumb_pop (f, regs_available_for_popping);
24160 popped_into = number_of_first_bit_set (regs_available_for_popping);
24162 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24164 assert (regs_to_pop == (1 << STACK_POINTER))
24165 assert (pops_needed == 1)
24169 /* If necessary restore the a4 register. */
24170 if (restore_a4)
24172 if (reg_containing_return_addr != LR_REGNUM)
24174 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24175 reg_containing_return_addr = LR_REGNUM;
24178 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24181 if (crtl->calls_eh_return)
24182 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24184 /* Return to caller. */
24185 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24188 /* Scan INSN just before assembler is output for it.
24189 For Thumb-1, we track the status of the condition codes; this
24190 information is used in the cbranchsi4_insn pattern. */
24191 void
24192 thumb1_final_prescan_insn (rtx_insn *insn)
24194 if (flag_print_asm_name)
24195 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24196 INSN_ADDRESSES (INSN_UID (insn)));
24197 /* Don't overwrite the previous setter when we get to a cbranch. */
24198 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24200 enum attr_conds conds;
24202 if (cfun->machine->thumb1_cc_insn)
24204 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24205 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24206 CC_STATUS_INIT;
24208 conds = get_attr_conds (insn);
24209 if (conds == CONDS_SET)
24211 rtx set = single_set (insn);
24212 cfun->machine->thumb1_cc_insn = insn;
24213 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24214 cfun->machine->thumb1_cc_op1 = const0_rtx;
24215 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24216 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24218 rtx src1 = XEXP (SET_SRC (set), 1);
24219 if (src1 == const0_rtx)
24220 cfun->machine->thumb1_cc_mode = CCmode;
24222 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24224 /* Record the src register operand instead of dest because
24225 cprop_hardreg pass propagates src. */
24226 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24229 else if (conds != CONDS_NOCOND)
24230 cfun->machine->thumb1_cc_insn = NULL_RTX;
24233 /* Check if unexpected far jump is used. */
24234 if (cfun->machine->lr_save_eliminated
24235 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24236 internal_error("Unexpected thumb1 far jump");
24240 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24242 unsigned HOST_WIDE_INT mask = 0xff;
24243 int i;
24245 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24246 if (val == 0) /* XXX */
24247 return 0;
24249 for (i = 0; i < 25; i++)
24250 if ((val & (mask << i)) == val)
24251 return 1;
24253 return 0;
24256 /* Returns nonzero if the current function contains,
24257 or might contain a far jump. */
24258 static int
24259 thumb_far_jump_used_p (void)
24261 rtx_insn *insn;
24262 bool far_jump = false;
24263 unsigned int func_size = 0;
24265 /* This test is only important for leaf functions. */
24266 /* assert (!leaf_function_p ()); */
24268 /* If we have already decided that far jumps may be used,
24269 do not bother checking again, and always return true even if
24270 it turns out that they are not being used. Once we have made
24271 the decision that far jumps are present (and that hence the link
24272 register will be pushed onto the stack) we cannot go back on it. */
24273 if (cfun->machine->far_jump_used)
24274 return 1;
24276 /* If this function is not being called from the prologue/epilogue
24277 generation code then it must be being called from the
24278 INITIAL_ELIMINATION_OFFSET macro. */
24279 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24281 /* In this case we know that we are being asked about the elimination
24282 of the arg pointer register. If that register is not being used,
24283 then there are no arguments on the stack, and we do not have to
24284 worry that a far jump might force the prologue to push the link
24285 register, changing the stack offsets. In this case we can just
24286 return false, since the presence of far jumps in the function will
24287 not affect stack offsets.
24289 If the arg pointer is live (or if it was live, but has now been
24290 eliminated and so set to dead) then we do have to test to see if
24291 the function might contain a far jump. This test can lead to some
24292 false negatives, since before reload is completed, then length of
24293 branch instructions is not known, so gcc defaults to returning their
24294 longest length, which in turn sets the far jump attribute to true.
24296 A false negative will not result in bad code being generated, but it
24297 will result in a needless push and pop of the link register. We
24298 hope that this does not occur too often.
24300 If we need doubleword stack alignment this could affect the other
24301 elimination offsets so we can't risk getting it wrong. */
24302 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24303 cfun->machine->arg_pointer_live = 1;
24304 else if (!cfun->machine->arg_pointer_live)
24305 return 0;
24308 /* We should not change far_jump_used during or after reload, as there is
24309 no chance to change stack frame layout. */
24310 if (reload_in_progress || reload_completed)
24311 return 0;
24313 /* Check to see if the function contains a branch
24314 insn with the far jump attribute set. */
24315 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24317 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24319 far_jump = true;
24321 func_size += get_attr_length (insn);
24324 /* Attribute far_jump will always be true for thumb1 before
24325 shorten_branch pass. So checking far_jump attribute before
24326 shorten_branch isn't much useful.
24328 Following heuristic tries to estimate more accurately if a far jump
24329 may finally be used. The heuristic is very conservative as there is
24330 no chance to roll-back the decision of not to use far jump.
24332 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24333 2-byte insn is associated with a 4 byte constant pool. Using
24334 function size 2048/3 as the threshold is conservative enough. */
24335 if (far_jump)
24337 if ((func_size * 3) >= 2048)
24339 /* Record the fact that we have decided that
24340 the function does use far jumps. */
24341 cfun->machine->far_jump_used = 1;
24342 return 1;
24346 return 0;
24349 /* Return nonzero if FUNC must be entered in ARM mode. */
24350 static bool
24351 is_called_in_ARM_mode (tree func)
24353 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24355 /* Ignore the problem about functions whose address is taken. */
24356 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24357 return true;
24359 #ifdef ARM_PE
24360 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24361 #else
24362 return false;
24363 #endif
24366 /* Given the stack offsets and register mask in OFFSETS, decide how
24367 many additional registers to push instead of subtracting a constant
24368 from SP. For epilogues the principle is the same except we use pop.
24369 FOR_PROLOGUE indicates which we're generating. */
24370 static int
24371 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24373 HOST_WIDE_INT amount;
24374 unsigned long live_regs_mask = offsets->saved_regs_mask;
24375 /* Extract a mask of the ones we can give to the Thumb's push/pop
24376 instruction. */
24377 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24378 /* Then count how many other high registers will need to be pushed. */
24379 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24380 int n_free, reg_base, size;
24382 if (!for_prologue && frame_pointer_needed)
24383 amount = offsets->locals_base - offsets->saved_regs;
24384 else
24385 amount = offsets->outgoing_args - offsets->saved_regs;
24387 /* If the stack frame size is 512 exactly, we can save one load
24388 instruction, which should make this a win even when optimizing
24389 for speed. */
24390 if (!optimize_size && amount != 512)
24391 return 0;
24393 /* Can't do this if there are high registers to push. */
24394 if (high_regs_pushed != 0)
24395 return 0;
24397 /* Shouldn't do it in the prologue if no registers would normally
24398 be pushed at all. In the epilogue, also allow it if we'll have
24399 a pop insn for the PC. */
24400 if (l_mask == 0
24401 && (for_prologue
24402 || TARGET_BACKTRACE
24403 || (live_regs_mask & 1 << LR_REGNUM) == 0
24404 || TARGET_INTERWORK
24405 || crtl->args.pretend_args_size != 0))
24406 return 0;
24408 /* Don't do this if thumb_expand_prologue wants to emit instructions
24409 between the push and the stack frame allocation. */
24410 if (for_prologue
24411 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24412 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24413 return 0;
24415 reg_base = 0;
24416 n_free = 0;
24417 if (!for_prologue)
24419 size = arm_size_return_regs ();
24420 reg_base = ARM_NUM_INTS (size);
24421 live_regs_mask >>= reg_base;
24424 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24425 && (for_prologue || call_used_regs[reg_base + n_free]))
24427 live_regs_mask >>= 1;
24428 n_free++;
24431 if (n_free == 0)
24432 return 0;
24433 gcc_assert (amount / 4 * 4 == amount);
24435 if (amount >= 512 && (amount - n_free * 4) < 512)
24436 return (amount - 508) / 4;
24437 if (amount <= n_free * 4)
24438 return amount / 4;
24439 return 0;
24442 /* The bits which aren't usefully expanded as rtl. */
24443 const char *
24444 thumb1_unexpanded_epilogue (void)
24446 arm_stack_offsets *offsets;
24447 int regno;
24448 unsigned long live_regs_mask = 0;
24449 int high_regs_pushed = 0;
24450 int extra_pop;
24451 int had_to_push_lr;
24452 int size;
24454 if (cfun->machine->return_used_this_function != 0)
24455 return "";
24457 if (IS_NAKED (arm_current_func_type ()))
24458 return "";
24460 offsets = arm_get_frame_offsets ();
24461 live_regs_mask = offsets->saved_regs_mask;
24462 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24464 /* If we can deduce the registers used from the function's return value.
24465 This is more reliable that examining df_regs_ever_live_p () because that
24466 will be set if the register is ever used in the function, not just if
24467 the register is used to hold a return value. */
24468 size = arm_size_return_regs ();
24470 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24471 if (extra_pop > 0)
24473 unsigned long extra_mask = (1 << extra_pop) - 1;
24474 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24477 /* The prolog may have pushed some high registers to use as
24478 work registers. e.g. the testsuite file:
24479 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24480 compiles to produce:
24481 push {r4, r5, r6, r7, lr}
24482 mov r7, r9
24483 mov r6, r8
24484 push {r6, r7}
24485 as part of the prolog. We have to undo that pushing here. */
24487 if (high_regs_pushed)
24489 unsigned long mask = live_regs_mask & 0xff;
24490 int next_hi_reg;
24492 /* The available low registers depend on the size of the value we are
24493 returning. */
24494 if (size <= 12)
24495 mask |= 1 << 3;
24496 if (size <= 8)
24497 mask |= 1 << 2;
24499 if (mask == 0)
24500 /* Oh dear! We have no low registers into which we can pop
24501 high registers! */
24502 internal_error
24503 ("no low registers available for popping high registers");
24505 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24506 if (live_regs_mask & (1 << next_hi_reg))
24507 break;
24509 while (high_regs_pushed)
24511 /* Find lo register(s) into which the high register(s) can
24512 be popped. */
24513 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24515 if (mask & (1 << regno))
24516 high_regs_pushed--;
24517 if (high_regs_pushed == 0)
24518 break;
24521 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24523 /* Pop the values into the low register(s). */
24524 thumb_pop (asm_out_file, mask);
24526 /* Move the value(s) into the high registers. */
24527 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24529 if (mask & (1 << regno))
24531 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24532 regno);
24534 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24535 if (live_regs_mask & (1 << next_hi_reg))
24536 break;
24540 live_regs_mask &= ~0x0f00;
24543 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24544 live_regs_mask &= 0xff;
24546 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24548 /* Pop the return address into the PC. */
24549 if (had_to_push_lr)
24550 live_regs_mask |= 1 << PC_REGNUM;
24552 /* Either no argument registers were pushed or a backtrace
24553 structure was created which includes an adjusted stack
24554 pointer, so just pop everything. */
24555 if (live_regs_mask)
24556 thumb_pop (asm_out_file, live_regs_mask);
24558 /* We have either just popped the return address into the
24559 PC or it is was kept in LR for the entire function.
24560 Note that thumb_pop has already called thumb_exit if the
24561 PC was in the list. */
24562 if (!had_to_push_lr)
24563 thumb_exit (asm_out_file, LR_REGNUM);
24565 else
24567 /* Pop everything but the return address. */
24568 if (live_regs_mask)
24569 thumb_pop (asm_out_file, live_regs_mask);
24571 if (had_to_push_lr)
24573 if (size > 12)
24575 /* We have no free low regs, so save one. */
24576 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24577 LAST_ARG_REGNUM);
24580 /* Get the return address into a temporary register. */
24581 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24583 if (size > 12)
24585 /* Move the return address to lr. */
24586 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24587 LAST_ARG_REGNUM);
24588 /* Restore the low register. */
24589 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24590 IP_REGNUM);
24591 regno = LR_REGNUM;
24593 else
24594 regno = LAST_ARG_REGNUM;
24596 else
24597 regno = LR_REGNUM;
24599 /* Remove the argument registers that were pushed onto the stack. */
24600 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24601 SP_REGNUM, SP_REGNUM,
24602 crtl->args.pretend_args_size);
24604 thumb_exit (asm_out_file, regno);
24607 return "";
24610 /* Functions to save and restore machine-specific function data. */
24611 static struct machine_function *
24612 arm_init_machine_status (void)
24614 struct machine_function *machine;
24615 machine = ggc_cleared_alloc<machine_function> ();
24617 #if ARM_FT_UNKNOWN != 0
24618 machine->func_type = ARM_FT_UNKNOWN;
24619 #endif
24620 return machine;
24623 /* Return an RTX indicating where the return address to the
24624 calling function can be found. */
24626 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24628 if (count != 0)
24629 return NULL_RTX;
24631 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24634 /* Do anything needed before RTL is emitted for each function. */
24635 void
24636 arm_init_expanders (void)
24638 /* Arrange to initialize and mark the machine per-function status. */
24639 init_machine_status = arm_init_machine_status;
24641 /* This is to stop the combine pass optimizing away the alignment
24642 adjustment of va_arg. */
24643 /* ??? It is claimed that this should not be necessary. */
24644 if (cfun)
24645 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24648 /* Check that FUNC is called with a different mode. */
24650 bool
24651 arm_change_mode_p (tree func)
24653 if (TREE_CODE (func) != FUNCTION_DECL)
24654 return false;
24656 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24658 if (!callee_tree)
24659 callee_tree = target_option_default_node;
24661 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24662 int flags = callee_opts->x_target_flags;
24664 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24667 /* Like arm_compute_initial_elimination offset. Simpler because there
24668 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24669 to point at the base of the local variables after static stack
24670 space for a function has been allocated. */
24672 HOST_WIDE_INT
24673 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24675 arm_stack_offsets *offsets;
24677 offsets = arm_get_frame_offsets ();
24679 switch (from)
24681 case ARG_POINTER_REGNUM:
24682 switch (to)
24684 case STACK_POINTER_REGNUM:
24685 return offsets->outgoing_args - offsets->saved_args;
24687 case FRAME_POINTER_REGNUM:
24688 return offsets->soft_frame - offsets->saved_args;
24690 case ARM_HARD_FRAME_POINTER_REGNUM:
24691 return offsets->saved_regs - offsets->saved_args;
24693 case THUMB_HARD_FRAME_POINTER_REGNUM:
24694 return offsets->locals_base - offsets->saved_args;
24696 default:
24697 gcc_unreachable ();
24699 break;
24701 case FRAME_POINTER_REGNUM:
24702 switch (to)
24704 case STACK_POINTER_REGNUM:
24705 return offsets->outgoing_args - offsets->soft_frame;
24707 case ARM_HARD_FRAME_POINTER_REGNUM:
24708 return offsets->saved_regs - offsets->soft_frame;
24710 case THUMB_HARD_FRAME_POINTER_REGNUM:
24711 return offsets->locals_base - offsets->soft_frame;
24713 default:
24714 gcc_unreachable ();
24716 break;
24718 default:
24719 gcc_unreachable ();
24723 /* Generate the function's prologue. */
24725 void
24726 thumb1_expand_prologue (void)
24728 rtx_insn *insn;
24730 HOST_WIDE_INT amount;
24731 HOST_WIDE_INT size;
24732 arm_stack_offsets *offsets;
24733 unsigned long func_type;
24734 int regno;
24735 unsigned long live_regs_mask;
24736 unsigned long l_mask;
24737 unsigned high_regs_pushed = 0;
24739 func_type = arm_current_func_type ();
24741 /* Naked functions don't have prologues. */
24742 if (IS_NAKED (func_type))
24744 if (flag_stack_usage_info)
24745 current_function_static_stack_size = 0;
24746 return;
24749 if (IS_INTERRUPT (func_type))
24751 error ("interrupt Service Routines cannot be coded in Thumb mode");
24752 return;
24755 if (is_called_in_ARM_mode (current_function_decl))
24756 emit_insn (gen_prologue_thumb1_interwork ());
24758 offsets = arm_get_frame_offsets ();
24759 live_regs_mask = offsets->saved_regs_mask;
24761 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24762 l_mask = live_regs_mask & 0x40ff;
24763 /* Then count how many other high registers will need to be pushed. */
24764 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24766 if (crtl->args.pretend_args_size)
24768 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24770 if (cfun->machine->uses_anonymous_args)
24772 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24773 unsigned long mask;
24775 mask = 1ul << (LAST_ARG_REGNUM + 1);
24776 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24778 insn = thumb1_emit_multi_reg_push (mask, 0);
24780 else
24782 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24783 stack_pointer_rtx, x));
24785 RTX_FRAME_RELATED_P (insn) = 1;
24788 if (TARGET_BACKTRACE)
24790 HOST_WIDE_INT offset = 0;
24791 unsigned work_register;
24792 rtx work_reg, x, arm_hfp_rtx;
24794 /* We have been asked to create a stack backtrace structure.
24795 The code looks like this:
24797 0 .align 2
24798 0 func:
24799 0 sub SP, #16 Reserve space for 4 registers.
24800 2 push {R7} Push low registers.
24801 4 add R7, SP, #20 Get the stack pointer before the push.
24802 6 str R7, [SP, #8] Store the stack pointer
24803 (before reserving the space).
24804 8 mov R7, PC Get hold of the start of this code + 12.
24805 10 str R7, [SP, #16] Store it.
24806 12 mov R7, FP Get hold of the current frame pointer.
24807 14 str R7, [SP, #4] Store it.
24808 16 mov R7, LR Get hold of the current return address.
24809 18 str R7, [SP, #12] Store it.
24810 20 add R7, SP, #16 Point at the start of the
24811 backtrace structure.
24812 22 mov FP, R7 Put this value into the frame pointer. */
24814 work_register = thumb_find_work_register (live_regs_mask);
24815 work_reg = gen_rtx_REG (SImode, work_register);
24816 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24818 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24819 stack_pointer_rtx, GEN_INT (-16)));
24820 RTX_FRAME_RELATED_P (insn) = 1;
24822 if (l_mask)
24824 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24825 RTX_FRAME_RELATED_P (insn) = 1;
24827 offset = bit_count (l_mask) * UNITS_PER_WORD;
24830 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24831 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24833 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24834 x = gen_frame_mem (SImode, x);
24835 emit_move_insn (x, work_reg);
24837 /* Make sure that the instruction fetching the PC is in the right place
24838 to calculate "start of backtrace creation code + 12". */
24839 /* ??? The stores using the common WORK_REG ought to be enough to
24840 prevent the scheduler from doing anything weird. Failing that
24841 we could always move all of the following into an UNSPEC_VOLATILE. */
24842 if (l_mask)
24844 x = gen_rtx_REG (SImode, PC_REGNUM);
24845 emit_move_insn (work_reg, x);
24847 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24848 x = gen_frame_mem (SImode, x);
24849 emit_move_insn (x, work_reg);
24851 emit_move_insn (work_reg, arm_hfp_rtx);
24853 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24854 x = gen_frame_mem (SImode, x);
24855 emit_move_insn (x, work_reg);
24857 else
24859 emit_move_insn (work_reg, arm_hfp_rtx);
24861 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24862 x = gen_frame_mem (SImode, x);
24863 emit_move_insn (x, work_reg);
24865 x = gen_rtx_REG (SImode, PC_REGNUM);
24866 emit_move_insn (work_reg, x);
24868 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24869 x = gen_frame_mem (SImode, x);
24870 emit_move_insn (x, work_reg);
24873 x = gen_rtx_REG (SImode, LR_REGNUM);
24874 emit_move_insn (work_reg, x);
24876 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24877 x = gen_frame_mem (SImode, x);
24878 emit_move_insn (x, work_reg);
24880 x = GEN_INT (offset + 12);
24881 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24883 emit_move_insn (arm_hfp_rtx, work_reg);
24885 /* Optimization: If we are not pushing any low registers but we are going
24886 to push some high registers then delay our first push. This will just
24887 be a push of LR and we can combine it with the push of the first high
24888 register. */
24889 else if ((l_mask & 0xff) != 0
24890 || (high_regs_pushed == 0 && l_mask))
24892 unsigned long mask = l_mask;
24893 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24894 insn = thumb1_emit_multi_reg_push (mask, mask);
24895 RTX_FRAME_RELATED_P (insn) = 1;
24898 if (high_regs_pushed)
24900 unsigned pushable_regs;
24901 unsigned next_hi_reg;
24902 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24903 : crtl->args.info.nregs;
24904 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24906 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24907 if (live_regs_mask & (1 << next_hi_reg))
24908 break;
24910 /* Here we need to mask out registers used for passing arguments
24911 even if they can be pushed. This is to avoid using them to stash the high
24912 registers. Such kind of stash may clobber the use of arguments. */
24913 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24915 if (pushable_regs == 0)
24916 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24918 while (high_regs_pushed > 0)
24920 unsigned long real_regs_mask = 0;
24922 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24924 if (pushable_regs & (1 << regno))
24926 emit_move_insn (gen_rtx_REG (SImode, regno),
24927 gen_rtx_REG (SImode, next_hi_reg));
24929 high_regs_pushed --;
24930 real_regs_mask |= (1 << next_hi_reg);
24932 if (high_regs_pushed)
24934 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24935 next_hi_reg --)
24936 if (live_regs_mask & (1 << next_hi_reg))
24937 break;
24939 else
24941 pushable_regs &= ~((1 << regno) - 1);
24942 break;
24947 /* If we had to find a work register and we have not yet
24948 saved the LR then add it to the list of regs to push. */
24949 if (l_mask == (1 << LR_REGNUM))
24951 pushable_regs |= l_mask;
24952 real_regs_mask |= l_mask;
24953 l_mask = 0;
24956 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24957 RTX_FRAME_RELATED_P (insn) = 1;
24961 /* Load the pic register before setting the frame pointer,
24962 so we can use r7 as a temporary work register. */
24963 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24964 arm_load_pic_register (live_regs_mask);
24966 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24967 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24968 stack_pointer_rtx);
24970 size = offsets->outgoing_args - offsets->saved_args;
24971 if (flag_stack_usage_info)
24972 current_function_static_stack_size = size;
24974 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24975 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24976 sorry ("-fstack-check=specific for Thumb-1");
24978 amount = offsets->outgoing_args - offsets->saved_regs;
24979 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24980 if (amount)
24982 if (amount < 512)
24984 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24985 GEN_INT (- amount)));
24986 RTX_FRAME_RELATED_P (insn) = 1;
24988 else
24990 rtx reg, dwarf;
24992 /* The stack decrement is too big for an immediate value in a single
24993 insn. In theory we could issue multiple subtracts, but after
24994 three of them it becomes more space efficient to place the full
24995 value in the constant pool and load into a register. (Also the
24996 ARM debugger really likes to see only one stack decrement per
24997 function). So instead we look for a scratch register into which
24998 we can load the decrement, and then we subtract this from the
24999 stack pointer. Unfortunately on the thumb the only available
25000 scratch registers are the argument registers, and we cannot use
25001 these as they may hold arguments to the function. Instead we
25002 attempt to locate a call preserved register which is used by this
25003 function. If we can find one, then we know that it will have
25004 been pushed at the start of the prologue and so we can corrupt
25005 it now. */
25006 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25007 if (live_regs_mask & (1 << regno))
25008 break;
25010 gcc_assert(regno <= LAST_LO_REGNUM);
25012 reg = gen_rtx_REG (SImode, regno);
25014 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25016 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25017 stack_pointer_rtx, reg));
25019 dwarf = gen_rtx_SET (stack_pointer_rtx,
25020 plus_constant (Pmode, stack_pointer_rtx,
25021 -amount));
25022 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25023 RTX_FRAME_RELATED_P (insn) = 1;
25027 if (frame_pointer_needed)
25028 thumb_set_frame_pointer (offsets);
25030 /* If we are profiling, make sure no instructions are scheduled before
25031 the call to mcount. Similarly if the user has requested no
25032 scheduling in the prolog. Similarly if we want non-call exceptions
25033 using the EABI unwinder, to prevent faulting instructions from being
25034 swapped with a stack adjustment. */
25035 if (crtl->profile || !TARGET_SCHED_PROLOG
25036 || (arm_except_unwind_info (&global_options) == UI_TARGET
25037 && cfun->can_throw_non_call_exceptions))
25038 emit_insn (gen_blockage ());
25040 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25041 if (live_regs_mask & 0xff)
25042 cfun->machine->lr_save_eliminated = 0;
25045 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25046 POP instruction can be generated. LR should be replaced by PC. All
25047 the checks required are already done by USE_RETURN_INSN (). Hence,
25048 all we really need to check here is if single register is to be
25049 returned, or multiple register return. */
25050 void
25051 thumb2_expand_return (bool simple_return)
25053 int i, num_regs;
25054 unsigned long saved_regs_mask;
25055 arm_stack_offsets *offsets;
25057 offsets = arm_get_frame_offsets ();
25058 saved_regs_mask = offsets->saved_regs_mask;
25060 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25061 if (saved_regs_mask & (1 << i))
25062 num_regs++;
25064 if (!simple_return && saved_regs_mask)
25066 if (num_regs == 1)
25068 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25069 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25070 rtx addr = gen_rtx_MEM (SImode,
25071 gen_rtx_POST_INC (SImode,
25072 stack_pointer_rtx));
25073 set_mem_alias_set (addr, get_frame_alias_set ());
25074 XVECEXP (par, 0, 0) = ret_rtx;
25075 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25076 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25077 emit_jump_insn (par);
25079 else
25081 saved_regs_mask &= ~ (1 << LR_REGNUM);
25082 saved_regs_mask |= (1 << PC_REGNUM);
25083 arm_emit_multi_reg_pop (saved_regs_mask);
25086 else
25088 emit_jump_insn (simple_return_rtx);
25092 void
25093 thumb1_expand_epilogue (void)
25095 HOST_WIDE_INT amount;
25096 arm_stack_offsets *offsets;
25097 int regno;
25099 /* Naked functions don't have prologues. */
25100 if (IS_NAKED (arm_current_func_type ()))
25101 return;
25103 offsets = arm_get_frame_offsets ();
25104 amount = offsets->outgoing_args - offsets->saved_regs;
25106 if (frame_pointer_needed)
25108 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25109 amount = offsets->locals_base - offsets->saved_regs;
25111 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25113 gcc_assert (amount >= 0);
25114 if (amount)
25116 emit_insn (gen_blockage ());
25118 if (amount < 512)
25119 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25120 GEN_INT (amount)));
25121 else
25123 /* r3 is always free in the epilogue. */
25124 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25126 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25127 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25131 /* Emit a USE (stack_pointer_rtx), so that
25132 the stack adjustment will not be deleted. */
25133 emit_insn (gen_force_register_use (stack_pointer_rtx));
25135 if (crtl->profile || !TARGET_SCHED_PROLOG)
25136 emit_insn (gen_blockage ());
25138 /* Emit a clobber for each insn that will be restored in the epilogue,
25139 so that flow2 will get register lifetimes correct. */
25140 for (regno = 0; regno < 13; regno++)
25141 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25142 emit_clobber (gen_rtx_REG (SImode, regno));
25144 if (! df_regs_ever_live_p (LR_REGNUM))
25145 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25148 /* Epilogue code for APCS frame. */
25149 static void
25150 arm_expand_epilogue_apcs_frame (bool really_return)
25152 unsigned long func_type;
25153 unsigned long saved_regs_mask;
25154 int num_regs = 0;
25155 int i;
25156 int floats_from_frame = 0;
25157 arm_stack_offsets *offsets;
25159 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25160 func_type = arm_current_func_type ();
25162 /* Get frame offsets for ARM. */
25163 offsets = arm_get_frame_offsets ();
25164 saved_regs_mask = offsets->saved_regs_mask;
25166 /* Find the offset of the floating-point save area in the frame. */
25167 floats_from_frame
25168 = (offsets->saved_args
25169 + arm_compute_static_chain_stack_bytes ()
25170 - offsets->frame);
25172 /* Compute how many core registers saved and how far away the floats are. */
25173 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25174 if (saved_regs_mask & (1 << i))
25176 num_regs++;
25177 floats_from_frame += 4;
25180 if (TARGET_HARD_FLOAT && TARGET_VFP)
25182 int start_reg;
25183 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25185 /* The offset is from IP_REGNUM. */
25186 int saved_size = arm_get_vfp_saved_size ();
25187 if (saved_size > 0)
25189 rtx_insn *insn;
25190 floats_from_frame += saved_size;
25191 insn = emit_insn (gen_addsi3 (ip_rtx,
25192 hard_frame_pointer_rtx,
25193 GEN_INT (-floats_from_frame)));
25194 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25195 ip_rtx, hard_frame_pointer_rtx);
25198 /* Generate VFP register multi-pop. */
25199 start_reg = FIRST_VFP_REGNUM;
25201 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25202 /* Look for a case where a reg does not need restoring. */
25203 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25204 && (!df_regs_ever_live_p (i + 1)
25205 || call_used_regs[i + 1]))
25207 if (start_reg != i)
25208 arm_emit_vfp_multi_reg_pop (start_reg,
25209 (i - start_reg) / 2,
25210 gen_rtx_REG (SImode,
25211 IP_REGNUM));
25212 start_reg = i + 2;
25215 /* Restore the remaining regs that we have discovered (or possibly
25216 even all of them, if the conditional in the for loop never
25217 fired). */
25218 if (start_reg != i)
25219 arm_emit_vfp_multi_reg_pop (start_reg,
25220 (i - start_reg) / 2,
25221 gen_rtx_REG (SImode, IP_REGNUM));
25224 if (TARGET_IWMMXT)
25226 /* The frame pointer is guaranteed to be non-double-word aligned, as
25227 it is set to double-word-aligned old_stack_pointer - 4. */
25228 rtx_insn *insn;
25229 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25231 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25232 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25234 rtx addr = gen_frame_mem (V2SImode,
25235 plus_constant (Pmode, hard_frame_pointer_rtx,
25236 - lrm_count * 4));
25237 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25238 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25239 gen_rtx_REG (V2SImode, i),
25240 NULL_RTX);
25241 lrm_count += 2;
25245 /* saved_regs_mask should contain IP which contains old stack pointer
25246 at the time of activation creation. Since SP and IP are adjacent registers,
25247 we can restore the value directly into SP. */
25248 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25249 saved_regs_mask &= ~(1 << IP_REGNUM);
25250 saved_regs_mask |= (1 << SP_REGNUM);
25252 /* There are two registers left in saved_regs_mask - LR and PC. We
25253 only need to restore LR (the return address), but to
25254 save time we can load it directly into PC, unless we need a
25255 special function exit sequence, or we are not really returning. */
25256 if (really_return
25257 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25258 && !crtl->calls_eh_return)
25259 /* Delete LR from the register mask, so that LR on
25260 the stack is loaded into the PC in the register mask. */
25261 saved_regs_mask &= ~(1 << LR_REGNUM);
25262 else
25263 saved_regs_mask &= ~(1 << PC_REGNUM);
25265 num_regs = bit_count (saved_regs_mask);
25266 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25268 rtx_insn *insn;
25269 emit_insn (gen_blockage ());
25270 /* Unwind the stack to just below the saved registers. */
25271 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25272 hard_frame_pointer_rtx,
25273 GEN_INT (- 4 * num_regs)));
25275 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25276 stack_pointer_rtx, hard_frame_pointer_rtx);
25279 arm_emit_multi_reg_pop (saved_regs_mask);
25281 if (IS_INTERRUPT (func_type))
25283 /* Interrupt handlers will have pushed the
25284 IP onto the stack, so restore it now. */
25285 rtx_insn *insn;
25286 rtx addr = gen_rtx_MEM (SImode,
25287 gen_rtx_POST_INC (SImode,
25288 stack_pointer_rtx));
25289 set_mem_alias_set (addr, get_frame_alias_set ());
25290 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25291 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25292 gen_rtx_REG (SImode, IP_REGNUM),
25293 NULL_RTX);
25296 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25297 return;
25299 if (crtl->calls_eh_return)
25300 emit_insn (gen_addsi3 (stack_pointer_rtx,
25301 stack_pointer_rtx,
25302 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25304 if (IS_STACKALIGN (func_type))
25305 /* Restore the original stack pointer. Before prologue, the stack was
25306 realigned and the original stack pointer saved in r0. For details,
25307 see comment in arm_expand_prologue. */
25308 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25310 emit_jump_insn (simple_return_rtx);
25313 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25314 function is not a sibcall. */
25315 void
25316 arm_expand_epilogue (bool really_return)
25318 unsigned long func_type;
25319 unsigned long saved_regs_mask;
25320 int num_regs = 0;
25321 int i;
25322 int amount;
25323 arm_stack_offsets *offsets;
25325 func_type = arm_current_func_type ();
25327 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25328 let output_return_instruction take care of instruction emission if any. */
25329 if (IS_NAKED (func_type)
25330 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25332 if (really_return)
25333 emit_jump_insn (simple_return_rtx);
25334 return;
25337 /* If we are throwing an exception, then we really must be doing a
25338 return, so we can't tail-call. */
25339 gcc_assert (!crtl->calls_eh_return || really_return);
25341 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25343 arm_expand_epilogue_apcs_frame (really_return);
25344 return;
25347 /* Get frame offsets for ARM. */
25348 offsets = arm_get_frame_offsets ();
25349 saved_regs_mask = offsets->saved_regs_mask;
25350 num_regs = bit_count (saved_regs_mask);
25352 if (frame_pointer_needed)
25354 rtx_insn *insn;
25355 /* Restore stack pointer if necessary. */
25356 if (TARGET_ARM)
25358 /* In ARM mode, frame pointer points to first saved register.
25359 Restore stack pointer to last saved register. */
25360 amount = offsets->frame - offsets->saved_regs;
25362 /* Force out any pending memory operations that reference stacked data
25363 before stack de-allocation occurs. */
25364 emit_insn (gen_blockage ());
25365 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25366 hard_frame_pointer_rtx,
25367 GEN_INT (amount)));
25368 arm_add_cfa_adjust_cfa_note (insn, amount,
25369 stack_pointer_rtx,
25370 hard_frame_pointer_rtx);
25372 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25373 deleted. */
25374 emit_insn (gen_force_register_use (stack_pointer_rtx));
25376 else
25378 /* In Thumb-2 mode, the frame pointer points to the last saved
25379 register. */
25380 amount = offsets->locals_base - offsets->saved_regs;
25381 if (amount)
25383 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25384 hard_frame_pointer_rtx,
25385 GEN_INT (amount)));
25386 arm_add_cfa_adjust_cfa_note (insn, amount,
25387 hard_frame_pointer_rtx,
25388 hard_frame_pointer_rtx);
25391 /* Force out any pending memory operations that reference stacked data
25392 before stack de-allocation occurs. */
25393 emit_insn (gen_blockage ());
25394 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25395 hard_frame_pointer_rtx));
25396 arm_add_cfa_adjust_cfa_note (insn, 0,
25397 stack_pointer_rtx,
25398 hard_frame_pointer_rtx);
25399 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25400 deleted. */
25401 emit_insn (gen_force_register_use (stack_pointer_rtx));
25404 else
25406 /* Pop off outgoing args and local frame to adjust stack pointer to
25407 last saved register. */
25408 amount = offsets->outgoing_args - offsets->saved_regs;
25409 if (amount)
25411 rtx_insn *tmp;
25412 /* Force out any pending memory operations that reference stacked data
25413 before stack de-allocation occurs. */
25414 emit_insn (gen_blockage ());
25415 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25416 stack_pointer_rtx,
25417 GEN_INT (amount)));
25418 arm_add_cfa_adjust_cfa_note (tmp, amount,
25419 stack_pointer_rtx, stack_pointer_rtx);
25420 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25421 not deleted. */
25422 emit_insn (gen_force_register_use (stack_pointer_rtx));
25426 if (TARGET_HARD_FLOAT && TARGET_VFP)
25428 /* Generate VFP register multi-pop. */
25429 int end_reg = LAST_VFP_REGNUM + 1;
25431 /* Scan the registers in reverse order. We need to match
25432 any groupings made in the prologue and generate matching
25433 vldm operations. The need to match groups is because,
25434 unlike pop, vldm can only do consecutive regs. */
25435 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25436 /* Look for a case where a reg does not need restoring. */
25437 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25438 && (!df_regs_ever_live_p (i + 1)
25439 || call_used_regs[i + 1]))
25441 /* Restore the regs discovered so far (from reg+2 to
25442 end_reg). */
25443 if (end_reg > i + 2)
25444 arm_emit_vfp_multi_reg_pop (i + 2,
25445 (end_reg - (i + 2)) / 2,
25446 stack_pointer_rtx);
25447 end_reg = i;
25450 /* Restore the remaining regs that we have discovered (or possibly
25451 even all of them, if the conditional in the for loop never
25452 fired). */
25453 if (end_reg > i + 2)
25454 arm_emit_vfp_multi_reg_pop (i + 2,
25455 (end_reg - (i + 2)) / 2,
25456 stack_pointer_rtx);
25459 if (TARGET_IWMMXT)
25460 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25461 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25463 rtx_insn *insn;
25464 rtx addr = gen_rtx_MEM (V2SImode,
25465 gen_rtx_POST_INC (SImode,
25466 stack_pointer_rtx));
25467 set_mem_alias_set (addr, get_frame_alias_set ());
25468 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25469 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25470 gen_rtx_REG (V2SImode, i),
25471 NULL_RTX);
25472 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25473 stack_pointer_rtx, stack_pointer_rtx);
25476 if (saved_regs_mask)
25478 rtx insn;
25479 bool return_in_pc = false;
25481 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25482 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25483 && !IS_STACKALIGN (func_type)
25484 && really_return
25485 && crtl->args.pretend_args_size == 0
25486 && saved_regs_mask & (1 << LR_REGNUM)
25487 && !crtl->calls_eh_return)
25489 saved_regs_mask &= ~(1 << LR_REGNUM);
25490 saved_regs_mask |= (1 << PC_REGNUM);
25491 return_in_pc = true;
25494 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25496 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25497 if (saved_regs_mask & (1 << i))
25499 rtx addr = gen_rtx_MEM (SImode,
25500 gen_rtx_POST_INC (SImode,
25501 stack_pointer_rtx));
25502 set_mem_alias_set (addr, get_frame_alias_set ());
25504 if (i == PC_REGNUM)
25506 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25507 XVECEXP (insn, 0, 0) = ret_rtx;
25508 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25509 addr);
25510 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25511 insn = emit_jump_insn (insn);
25513 else
25515 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25516 addr));
25517 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25518 gen_rtx_REG (SImode, i),
25519 NULL_RTX);
25520 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25521 stack_pointer_rtx,
25522 stack_pointer_rtx);
25526 else
25528 if (TARGET_LDRD
25529 && current_tune->prefer_ldrd_strd
25530 && !optimize_function_for_size_p (cfun))
25532 if (TARGET_THUMB2)
25533 thumb2_emit_ldrd_pop (saved_regs_mask);
25534 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25535 arm_emit_ldrd_pop (saved_regs_mask);
25536 else
25537 arm_emit_multi_reg_pop (saved_regs_mask);
25539 else
25540 arm_emit_multi_reg_pop (saved_regs_mask);
25543 if (return_in_pc)
25544 return;
25547 amount
25548 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25549 if (amount)
25551 int i, j;
25552 rtx dwarf = NULL_RTX;
25553 rtx_insn *tmp =
25554 emit_insn (gen_addsi3 (stack_pointer_rtx,
25555 stack_pointer_rtx,
25556 GEN_INT (amount)));
25558 RTX_FRAME_RELATED_P (tmp) = 1;
25560 if (cfun->machine->uses_anonymous_args)
25562 /* Restore pretend args. Refer arm_expand_prologue on how to save
25563 pretend_args in stack. */
25564 int num_regs = crtl->args.pretend_args_size / 4;
25565 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25566 for (j = 0, i = 0; j < num_regs; i++)
25567 if (saved_regs_mask & (1 << i))
25569 rtx reg = gen_rtx_REG (SImode, i);
25570 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25571 j++;
25573 REG_NOTES (tmp) = dwarf;
25575 arm_add_cfa_adjust_cfa_note (tmp, amount,
25576 stack_pointer_rtx, stack_pointer_rtx);
25579 if (!really_return)
25580 return;
25582 if (crtl->calls_eh_return)
25583 emit_insn (gen_addsi3 (stack_pointer_rtx,
25584 stack_pointer_rtx,
25585 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25587 if (IS_STACKALIGN (func_type))
25588 /* Restore the original stack pointer. Before prologue, the stack was
25589 realigned and the original stack pointer saved in r0. For details,
25590 see comment in arm_expand_prologue. */
25591 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25593 emit_jump_insn (simple_return_rtx);
25596 /* Implementation of insn prologue_thumb1_interwork. This is the first
25597 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25599 const char *
25600 thumb1_output_interwork (void)
25602 const char * name;
25603 FILE *f = asm_out_file;
25605 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25606 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25607 == SYMBOL_REF);
25608 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25610 /* Generate code sequence to switch us into Thumb mode. */
25611 /* The .code 32 directive has already been emitted by
25612 ASM_DECLARE_FUNCTION_NAME. */
25613 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25614 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25616 /* Generate a label, so that the debugger will notice the
25617 change in instruction sets. This label is also used by
25618 the assembler to bypass the ARM code when this function
25619 is called from a Thumb encoded function elsewhere in the
25620 same file. Hence the definition of STUB_NAME here must
25621 agree with the definition in gas/config/tc-arm.c. */
25623 #define STUB_NAME ".real_start_of"
25625 fprintf (f, "\t.code\t16\n");
25626 #ifdef ARM_PE
25627 if (arm_dllexport_name_p (name))
25628 name = arm_strip_name_encoding (name);
25629 #endif
25630 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25631 fprintf (f, "\t.thumb_func\n");
25632 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25634 return "";
25637 /* Handle the case of a double word load into a low register from
25638 a computed memory address. The computed address may involve a
25639 register which is overwritten by the load. */
25640 const char *
25641 thumb_load_double_from_address (rtx *operands)
25643 rtx addr;
25644 rtx base;
25645 rtx offset;
25646 rtx arg1;
25647 rtx arg2;
25649 gcc_assert (REG_P (operands[0]));
25650 gcc_assert (MEM_P (operands[1]));
25652 /* Get the memory address. */
25653 addr = XEXP (operands[1], 0);
25655 /* Work out how the memory address is computed. */
25656 switch (GET_CODE (addr))
25658 case REG:
25659 operands[2] = adjust_address (operands[1], SImode, 4);
25661 if (REGNO (operands[0]) == REGNO (addr))
25663 output_asm_insn ("ldr\t%H0, %2", operands);
25664 output_asm_insn ("ldr\t%0, %1", operands);
25666 else
25668 output_asm_insn ("ldr\t%0, %1", operands);
25669 output_asm_insn ("ldr\t%H0, %2", operands);
25671 break;
25673 case CONST:
25674 /* Compute <address> + 4 for the high order load. */
25675 operands[2] = adjust_address (operands[1], SImode, 4);
25677 output_asm_insn ("ldr\t%0, %1", operands);
25678 output_asm_insn ("ldr\t%H0, %2", operands);
25679 break;
25681 case PLUS:
25682 arg1 = XEXP (addr, 0);
25683 arg2 = XEXP (addr, 1);
25685 if (CONSTANT_P (arg1))
25686 base = arg2, offset = arg1;
25687 else
25688 base = arg1, offset = arg2;
25690 gcc_assert (REG_P (base));
25692 /* Catch the case of <address> = <reg> + <reg> */
25693 if (REG_P (offset))
25695 int reg_offset = REGNO (offset);
25696 int reg_base = REGNO (base);
25697 int reg_dest = REGNO (operands[0]);
25699 /* Add the base and offset registers together into the
25700 higher destination register. */
25701 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25702 reg_dest + 1, reg_base, reg_offset);
25704 /* Load the lower destination register from the address in
25705 the higher destination register. */
25706 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25707 reg_dest, reg_dest + 1);
25709 /* Load the higher destination register from its own address
25710 plus 4. */
25711 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25712 reg_dest + 1, reg_dest + 1);
25714 else
25716 /* Compute <address> + 4 for the high order load. */
25717 operands[2] = adjust_address (operands[1], SImode, 4);
25719 /* If the computed address is held in the low order register
25720 then load the high order register first, otherwise always
25721 load the low order register first. */
25722 if (REGNO (operands[0]) == REGNO (base))
25724 output_asm_insn ("ldr\t%H0, %2", operands);
25725 output_asm_insn ("ldr\t%0, %1", operands);
25727 else
25729 output_asm_insn ("ldr\t%0, %1", operands);
25730 output_asm_insn ("ldr\t%H0, %2", operands);
25733 break;
25735 case LABEL_REF:
25736 /* With no registers to worry about we can just load the value
25737 directly. */
25738 operands[2] = adjust_address (operands[1], SImode, 4);
25740 output_asm_insn ("ldr\t%H0, %2", operands);
25741 output_asm_insn ("ldr\t%0, %1", operands);
25742 break;
25744 default:
25745 gcc_unreachable ();
25748 return "";
25751 const char *
25752 thumb_output_move_mem_multiple (int n, rtx *operands)
25754 switch (n)
25756 case 2:
25757 if (REGNO (operands[4]) > REGNO (operands[5]))
25758 std::swap (operands[4], operands[5]);
25760 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25761 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25762 break;
25764 case 3:
25765 if (REGNO (operands[4]) > REGNO (operands[5]))
25766 std::swap (operands[4], operands[5]);
25767 if (REGNO (operands[5]) > REGNO (operands[6]))
25768 std::swap (operands[5], operands[6]);
25769 if (REGNO (operands[4]) > REGNO (operands[5]))
25770 std::swap (operands[4], operands[5]);
25772 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25773 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25774 break;
25776 default:
25777 gcc_unreachable ();
25780 return "";
25783 /* Output a call-via instruction for thumb state. */
25784 const char *
25785 thumb_call_via_reg (rtx reg)
25787 int regno = REGNO (reg);
25788 rtx *labelp;
25790 gcc_assert (regno < LR_REGNUM);
25792 /* If we are in the normal text section we can use a single instance
25793 per compilation unit. If we are doing function sections, then we need
25794 an entry per section, since we can't rely on reachability. */
25795 if (in_section == text_section)
25797 thumb_call_reg_needed = 1;
25799 if (thumb_call_via_label[regno] == NULL)
25800 thumb_call_via_label[regno] = gen_label_rtx ();
25801 labelp = thumb_call_via_label + regno;
25803 else
25805 if (cfun->machine->call_via[regno] == NULL)
25806 cfun->machine->call_via[regno] = gen_label_rtx ();
25807 labelp = cfun->machine->call_via + regno;
25810 output_asm_insn ("bl\t%a0", labelp);
25811 return "";
25814 /* Routines for generating rtl. */
25815 void
25816 thumb_expand_movmemqi (rtx *operands)
25818 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25819 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25820 HOST_WIDE_INT len = INTVAL (operands[2]);
25821 HOST_WIDE_INT offset = 0;
25823 while (len >= 12)
25825 emit_insn (gen_movmem12b (out, in, out, in));
25826 len -= 12;
25829 if (len >= 8)
25831 emit_insn (gen_movmem8b (out, in, out, in));
25832 len -= 8;
25835 if (len >= 4)
25837 rtx reg = gen_reg_rtx (SImode);
25838 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25839 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25840 len -= 4;
25841 offset += 4;
25844 if (len >= 2)
25846 rtx reg = gen_reg_rtx (HImode);
25847 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25848 plus_constant (Pmode, in,
25849 offset))));
25850 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25851 offset)),
25852 reg));
25853 len -= 2;
25854 offset += 2;
25857 if (len)
25859 rtx reg = gen_reg_rtx (QImode);
25860 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25861 plus_constant (Pmode, in,
25862 offset))));
25863 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25864 offset)),
25865 reg));
25869 void
25870 thumb_reload_out_hi (rtx *operands)
25872 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25875 /* Return the length of a function name prefix
25876 that starts with the character 'c'. */
25877 static int
25878 arm_get_strip_length (int c)
25880 switch (c)
25882 ARM_NAME_ENCODING_LENGTHS
25883 default: return 0;
25887 /* Return a pointer to a function's name with any
25888 and all prefix encodings stripped from it. */
25889 const char *
25890 arm_strip_name_encoding (const char *name)
25892 int skip;
25894 while ((skip = arm_get_strip_length (* name)))
25895 name += skip;
25897 return name;
25900 /* If there is a '*' anywhere in the name's prefix, then
25901 emit the stripped name verbatim, otherwise prepend an
25902 underscore if leading underscores are being used. */
25903 void
25904 arm_asm_output_labelref (FILE *stream, const char *name)
25906 int skip;
25907 int verbatim = 0;
25909 while ((skip = arm_get_strip_length (* name)))
25911 verbatim |= (*name == '*');
25912 name += skip;
25915 if (verbatim)
25916 fputs (name, stream);
25917 else
25918 asm_fprintf (stream, "%U%s", name);
25921 /* This function is used to emit an EABI tag and its associated value.
25922 We emit the numerical value of the tag in case the assembler does not
25923 support textual tags. (Eg gas prior to 2.20). If requested we include
25924 the tag name in a comment so that anyone reading the assembler output
25925 will know which tag is being set.
25927 This function is not static because arm-c.c needs it too. */
25929 void
25930 arm_emit_eabi_attribute (const char *name, int num, int val)
25932 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25933 if (flag_verbose_asm || flag_debug_asm)
25934 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25935 asm_fprintf (asm_out_file, "\n");
25938 /* This function is used to print CPU tuning information as comment
25939 in assembler file. Pointers are not printed for now. */
25941 void
25942 arm_print_tune_info (void)
25944 asm_fprintf (asm_out_file, "\t@.tune parameters\n");
25945 asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
25946 current_tune->constant_limit);
25947 asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
25948 current_tune->max_insns_skipped);
25949 asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n",
25950 current_tune->prefetch.num_slots);
25951 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n",
25952 current_tune->prefetch.l1_cache_size);
25953 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25954 current_tune->prefetch.l1_cache_line_size);
25955 asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
25956 (int) current_tune->prefer_constant_pool);
25957 asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25958 asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
25959 asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
25960 current_tune->branch_cost (false, false));
25961 asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
25962 current_tune->branch_cost (false, true));
25963 asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
25964 current_tune->branch_cost (true, false));
25965 asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
25966 current_tune->branch_cost (true, true));
25967 asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
25968 (int) current_tune->prefer_ldrd_strd);
25969 asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25970 (int) current_tune->logical_op_non_short_circuit_thumb,
25971 (int) current_tune->logical_op_non_short_circuit_arm);
25972 asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
25973 (int) current_tune->prefer_neon_for_64bits);
25974 asm_fprintf (asm_out_file,
25975 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25976 (int) current_tune->disparage_flag_setting_t16_encodings);
25977 asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
25978 (int) current_tune->string_ops_prefer_neon);
25979 asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
25980 current_tune->max_insns_inline_memset);
25981 asm_fprintf (asm_out_file, "\t\t@fusible_ops:\t%u\n",
25982 current_tune->fusible_ops);
25983 asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
25984 (int) current_tune->sched_autopref);
25987 static void
25988 arm_file_start (void)
25990 int val;
25992 if (TARGET_BPABI)
25994 if (arm_selected_arch)
25996 /* armv7ve doesn't support any extensions. */
25997 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25999 /* Keep backward compatability for assemblers
26000 which don't support armv7ve. */
26001 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26002 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26003 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26004 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26005 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26007 else
26009 const char* pos = strchr (arm_selected_arch->name, '+');
26010 if (pos)
26012 char buf[32];
26013 gcc_assert (strlen (arm_selected_arch->name)
26014 <= sizeof (buf) / sizeof (*pos));
26015 strncpy (buf, arm_selected_arch->name,
26016 (pos - arm_selected_arch->name) * sizeof (*pos));
26017 buf[pos - arm_selected_arch->name] = '\0';
26018 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
26019 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
26021 else
26022 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
26025 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
26026 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
26027 else
26029 const char* truncated_name
26030 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
26031 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26034 if (print_tune_info)
26035 arm_print_tune_info ();
26037 if (! TARGET_SOFT_FLOAT && TARGET_VFP)
26039 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26040 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26042 if (TARGET_HARD_FLOAT_ABI)
26043 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26046 /* Some of these attributes only apply when the corresponding features
26047 are used. However we don't have any easy way of figuring this out.
26048 Conservatively record the setting that would have been used. */
26050 if (flag_rounding_math)
26051 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26053 if (!flag_unsafe_math_optimizations)
26055 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26056 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26058 if (flag_signaling_nans)
26059 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26061 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26062 flag_finite_math_only ? 1 : 3);
26064 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26065 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26066 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26067 flag_short_enums ? 1 : 2);
26069 /* Tag_ABI_optimization_goals. */
26070 if (optimize_size)
26071 val = 4;
26072 else if (optimize >= 2)
26073 val = 2;
26074 else if (optimize)
26075 val = 1;
26076 else
26077 val = 6;
26078 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26080 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26081 unaligned_access);
26083 if (arm_fp16_format)
26084 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26085 (int) arm_fp16_format);
26087 if (arm_lang_output_object_attributes_hook)
26088 arm_lang_output_object_attributes_hook();
26091 default_file_start ();
26094 static void
26095 arm_file_end (void)
26097 int regno;
26099 if (NEED_INDICATE_EXEC_STACK)
26100 /* Add .note.GNU-stack. */
26101 file_end_indicate_exec_stack ();
26103 if (! thumb_call_reg_needed)
26104 return;
26106 switch_to_section (text_section);
26107 asm_fprintf (asm_out_file, "\t.code 16\n");
26108 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26110 for (regno = 0; regno < LR_REGNUM; regno++)
26112 rtx label = thumb_call_via_label[regno];
26114 if (label != 0)
26116 targetm.asm_out.internal_label (asm_out_file, "L",
26117 CODE_LABEL_NUMBER (label));
26118 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26123 #ifndef ARM_PE
26124 /* Symbols in the text segment can be accessed without indirecting via the
26125 constant pool; it may take an extra binary operation, but this is still
26126 faster than indirecting via memory. Don't do this when not optimizing,
26127 since we won't be calculating al of the offsets necessary to do this
26128 simplification. */
26130 static void
26131 arm_encode_section_info (tree decl, rtx rtl, int first)
26133 if (optimize > 0 && TREE_CONSTANT (decl))
26134 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26136 default_encode_section_info (decl, rtl, first);
26138 #endif /* !ARM_PE */
26140 static void
26141 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26143 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26144 && !strcmp (prefix, "L"))
26146 arm_ccfsm_state = 0;
26147 arm_target_insn = NULL;
26149 default_internal_label (stream, prefix, labelno);
26152 /* Output code to add DELTA to the first argument, and then jump
26153 to FUNCTION. Used for C++ multiple inheritance. */
26155 static void
26156 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26157 HOST_WIDE_INT, tree function)
26159 static int thunk_label = 0;
26160 char label[256];
26161 char labelpc[256];
26162 int mi_delta = delta;
26163 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26164 int shift = 0;
26165 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26166 ? 1 : 0);
26167 if (mi_delta < 0)
26168 mi_delta = - mi_delta;
26170 final_start_function (emit_barrier (), file, 1);
26172 if (TARGET_THUMB1)
26174 int labelno = thunk_label++;
26175 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26176 /* Thunks are entered in arm mode when avaiable. */
26177 if (TARGET_THUMB1_ONLY)
26179 /* push r3 so we can use it as a temporary. */
26180 /* TODO: Omit this save if r3 is not used. */
26181 fputs ("\tpush {r3}\n", file);
26182 fputs ("\tldr\tr3, ", file);
26184 else
26186 fputs ("\tldr\tr12, ", file);
26188 assemble_name (file, label);
26189 fputc ('\n', file);
26190 if (flag_pic)
26192 /* If we are generating PIC, the ldr instruction below loads
26193 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26194 the address of the add + 8, so we have:
26196 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26197 = target + 1.
26199 Note that we have "+ 1" because some versions of GNU ld
26200 don't set the low bit of the result for R_ARM_REL32
26201 relocations against thumb function symbols.
26202 On ARMv6M this is +4, not +8. */
26203 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26204 assemble_name (file, labelpc);
26205 fputs (":\n", file);
26206 if (TARGET_THUMB1_ONLY)
26208 /* This is 2 insns after the start of the thunk, so we know it
26209 is 4-byte aligned. */
26210 fputs ("\tadd\tr3, pc, r3\n", file);
26211 fputs ("\tmov r12, r3\n", file);
26213 else
26214 fputs ("\tadd\tr12, pc, r12\n", file);
26216 else if (TARGET_THUMB1_ONLY)
26217 fputs ("\tmov r12, r3\n", file);
26219 if (TARGET_THUMB1_ONLY)
26221 if (mi_delta > 255)
26223 fputs ("\tldr\tr3, ", file);
26224 assemble_name (file, label);
26225 fputs ("+4\n", file);
26226 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26227 mi_op, this_regno, this_regno);
26229 else if (mi_delta != 0)
26231 /* Thumb1 unified syntax requires s suffix in instruction name when
26232 one of the operands is immediate. */
26233 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26234 mi_op, this_regno, this_regno,
26235 mi_delta);
26238 else
26240 /* TODO: Use movw/movt for large constants when available. */
26241 while (mi_delta != 0)
26243 if ((mi_delta & (3 << shift)) == 0)
26244 shift += 2;
26245 else
26247 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26248 mi_op, this_regno, this_regno,
26249 mi_delta & (0xff << shift));
26250 mi_delta &= ~(0xff << shift);
26251 shift += 8;
26255 if (TARGET_THUMB1)
26257 if (TARGET_THUMB1_ONLY)
26258 fputs ("\tpop\t{r3}\n", file);
26260 fprintf (file, "\tbx\tr12\n");
26261 ASM_OUTPUT_ALIGN (file, 2);
26262 assemble_name (file, label);
26263 fputs (":\n", file);
26264 if (flag_pic)
26266 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26267 rtx tem = XEXP (DECL_RTL (function), 0);
26268 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26269 pipeline offset is four rather than eight. Adjust the offset
26270 accordingly. */
26271 tem = plus_constant (GET_MODE (tem), tem,
26272 TARGET_THUMB1_ONLY ? -3 : -7);
26273 tem = gen_rtx_MINUS (GET_MODE (tem),
26274 tem,
26275 gen_rtx_SYMBOL_REF (Pmode,
26276 ggc_strdup (labelpc)));
26277 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26279 else
26280 /* Output ".word .LTHUNKn". */
26281 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26283 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26284 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26286 else
26288 fputs ("\tb\t", file);
26289 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26290 if (NEED_PLT_RELOC)
26291 fputs ("(PLT)", file);
26292 fputc ('\n', file);
26295 final_end_function ();
26298 /* MI thunk handling for TARGET_32BIT. */
26300 static void
26301 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26302 HOST_WIDE_INT vcall_offset, tree function)
26304 /* On ARM, this_regno is R0 or R1 depending on
26305 whether the function returns an aggregate or not.
26307 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26308 function)
26309 ? R1_REGNUM : R0_REGNUM);
26311 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26312 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26313 reload_completed = 1;
26314 emit_note (NOTE_INSN_PROLOGUE_END);
26316 /* Add DELTA to THIS_RTX. */
26317 if (delta != 0)
26318 arm_split_constant (PLUS, Pmode, NULL_RTX,
26319 delta, this_rtx, this_rtx, false);
26321 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26322 if (vcall_offset != 0)
26324 /* Load *THIS_RTX. */
26325 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26326 /* Compute *THIS_RTX + VCALL_OFFSET. */
26327 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26328 false);
26329 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26330 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26331 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26334 /* Generate a tail call to the target function. */
26335 if (!TREE_USED (function))
26337 assemble_external (function);
26338 TREE_USED (function) = 1;
26340 rtx funexp = XEXP (DECL_RTL (function), 0);
26341 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26342 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26343 SIBLING_CALL_P (insn) = 1;
26345 insn = get_insns ();
26346 shorten_branches (insn);
26347 final_start_function (insn, file, 1);
26348 final (insn, file, 1);
26349 final_end_function ();
26351 /* Stop pretending this is a post-reload pass. */
26352 reload_completed = 0;
26355 /* Output code to add DELTA to the first argument, and then jump
26356 to FUNCTION. Used for C++ multiple inheritance. */
26358 static void
26359 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26360 HOST_WIDE_INT vcall_offset, tree function)
26362 if (TARGET_32BIT)
26363 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26364 else
26365 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26369 arm_emit_vector_const (FILE *file, rtx x)
26371 int i;
26372 const char * pattern;
26374 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26376 switch (GET_MODE (x))
26378 case V2SImode: pattern = "%08x"; break;
26379 case V4HImode: pattern = "%04x"; break;
26380 case V8QImode: pattern = "%02x"; break;
26381 default: gcc_unreachable ();
26384 fprintf (file, "0x");
26385 for (i = CONST_VECTOR_NUNITS (x); i--;)
26387 rtx element;
26389 element = CONST_VECTOR_ELT (x, i);
26390 fprintf (file, pattern, INTVAL (element));
26393 return 1;
26396 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26397 HFmode constant pool entries are actually loaded with ldr. */
26398 void
26399 arm_emit_fp16_const (rtx c)
26401 long bits;
26403 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26404 if (WORDS_BIG_ENDIAN)
26405 assemble_zeros (2);
26406 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26407 if (!WORDS_BIG_ENDIAN)
26408 assemble_zeros (2);
26411 const char *
26412 arm_output_load_gr (rtx *operands)
26414 rtx reg;
26415 rtx offset;
26416 rtx wcgr;
26417 rtx sum;
26419 if (!MEM_P (operands [1])
26420 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26421 || !REG_P (reg = XEXP (sum, 0))
26422 || !CONST_INT_P (offset = XEXP (sum, 1))
26423 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26424 return "wldrw%?\t%0, %1";
26426 /* Fix up an out-of-range load of a GR register. */
26427 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26428 wcgr = operands[0];
26429 operands[0] = reg;
26430 output_asm_insn ("ldr%?\t%0, %1", operands);
26432 operands[0] = wcgr;
26433 operands[1] = reg;
26434 output_asm_insn ("tmcr%?\t%0, %1", operands);
26435 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26437 return "";
26440 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26442 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26443 named arg and all anonymous args onto the stack.
26444 XXX I know the prologue shouldn't be pushing registers, but it is faster
26445 that way. */
26447 static void
26448 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26449 machine_mode mode,
26450 tree type,
26451 int *pretend_size,
26452 int second_time ATTRIBUTE_UNUSED)
26454 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26455 int nregs;
26457 cfun->machine->uses_anonymous_args = 1;
26458 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26460 nregs = pcum->aapcs_ncrn;
26461 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
26462 nregs++;
26464 else
26465 nregs = pcum->nregs;
26467 if (nregs < NUM_ARG_REGS)
26468 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26471 /* We can't rely on the caller doing the proper promotion when
26472 using APCS or ATPCS. */
26474 static bool
26475 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26477 return !TARGET_AAPCS_BASED;
26480 static machine_mode
26481 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26482 machine_mode mode,
26483 int *punsignedp ATTRIBUTE_UNUSED,
26484 const_tree fntype ATTRIBUTE_UNUSED,
26485 int for_return ATTRIBUTE_UNUSED)
26487 if (GET_MODE_CLASS (mode) == MODE_INT
26488 && GET_MODE_SIZE (mode) < 4)
26489 return SImode;
26491 return mode;
26494 /* AAPCS based ABIs use short enums by default. */
26496 static bool
26497 arm_default_short_enums (void)
26499 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26503 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26505 static bool
26506 arm_align_anon_bitfield (void)
26508 return TARGET_AAPCS_BASED;
26512 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26514 static tree
26515 arm_cxx_guard_type (void)
26517 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26521 /* The EABI says test the least significant bit of a guard variable. */
26523 static bool
26524 arm_cxx_guard_mask_bit (void)
26526 return TARGET_AAPCS_BASED;
26530 /* The EABI specifies that all array cookies are 8 bytes long. */
26532 static tree
26533 arm_get_cookie_size (tree type)
26535 tree size;
26537 if (!TARGET_AAPCS_BASED)
26538 return default_cxx_get_cookie_size (type);
26540 size = build_int_cst (sizetype, 8);
26541 return size;
26545 /* The EABI says that array cookies should also contain the element size. */
26547 static bool
26548 arm_cookie_has_size (void)
26550 return TARGET_AAPCS_BASED;
26554 /* The EABI says constructors and destructors should return a pointer to
26555 the object constructed/destroyed. */
26557 static bool
26558 arm_cxx_cdtor_returns_this (void)
26560 return TARGET_AAPCS_BASED;
26563 /* The EABI says that an inline function may never be the key
26564 method. */
26566 static bool
26567 arm_cxx_key_method_may_be_inline (void)
26569 return !TARGET_AAPCS_BASED;
26572 static void
26573 arm_cxx_determine_class_data_visibility (tree decl)
26575 if (!TARGET_AAPCS_BASED
26576 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26577 return;
26579 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26580 is exported. However, on systems without dynamic vague linkage,
26581 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26582 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26583 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26584 else
26585 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26586 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26589 static bool
26590 arm_cxx_class_data_always_comdat (void)
26592 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26593 vague linkage if the class has no key function. */
26594 return !TARGET_AAPCS_BASED;
26598 /* The EABI says __aeabi_atexit should be used to register static
26599 destructors. */
26601 static bool
26602 arm_cxx_use_aeabi_atexit (void)
26604 return TARGET_AAPCS_BASED;
26608 void
26609 arm_set_return_address (rtx source, rtx scratch)
26611 arm_stack_offsets *offsets;
26612 HOST_WIDE_INT delta;
26613 rtx addr;
26614 unsigned long saved_regs;
26616 offsets = arm_get_frame_offsets ();
26617 saved_regs = offsets->saved_regs_mask;
26619 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26620 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26621 else
26623 if (frame_pointer_needed)
26624 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26625 else
26627 /* LR will be the first saved register. */
26628 delta = offsets->outgoing_args - (offsets->frame + 4);
26631 if (delta >= 4096)
26633 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26634 GEN_INT (delta & ~4095)));
26635 addr = scratch;
26636 delta &= 4095;
26638 else
26639 addr = stack_pointer_rtx;
26641 addr = plus_constant (Pmode, addr, delta);
26643 /* The store needs to be marked as frame related in order to prevent
26644 DSE from deleting it as dead if it is based on fp. */
26645 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26646 RTX_FRAME_RELATED_P (insn) = 1;
26647 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26652 void
26653 thumb_set_return_address (rtx source, rtx scratch)
26655 arm_stack_offsets *offsets;
26656 HOST_WIDE_INT delta;
26657 HOST_WIDE_INT limit;
26658 int reg;
26659 rtx addr;
26660 unsigned long mask;
26662 emit_use (source);
26664 offsets = arm_get_frame_offsets ();
26665 mask = offsets->saved_regs_mask;
26666 if (mask & (1 << LR_REGNUM))
26668 limit = 1024;
26669 /* Find the saved regs. */
26670 if (frame_pointer_needed)
26672 delta = offsets->soft_frame - offsets->saved_args;
26673 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26674 if (TARGET_THUMB1)
26675 limit = 128;
26677 else
26679 delta = offsets->outgoing_args - offsets->saved_args;
26680 reg = SP_REGNUM;
26682 /* Allow for the stack frame. */
26683 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26684 delta -= 16;
26685 /* The link register is always the first saved register. */
26686 delta -= 4;
26688 /* Construct the address. */
26689 addr = gen_rtx_REG (SImode, reg);
26690 if (delta > limit)
26692 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26693 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26694 addr = scratch;
26696 else
26697 addr = plus_constant (Pmode, addr, delta);
26699 /* The store needs to be marked as frame related in order to prevent
26700 DSE from deleting it as dead if it is based on fp. */
26701 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26702 RTX_FRAME_RELATED_P (insn) = 1;
26703 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26705 else
26706 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26709 /* Implements target hook vector_mode_supported_p. */
26710 bool
26711 arm_vector_mode_supported_p (machine_mode mode)
26713 /* Neon also supports V2SImode, etc. listed in the clause below. */
26714 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26715 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26716 || mode == V2DImode || mode == V8HFmode))
26717 return true;
26719 if ((TARGET_NEON || TARGET_IWMMXT)
26720 && ((mode == V2SImode)
26721 || (mode == V4HImode)
26722 || (mode == V8QImode)))
26723 return true;
26725 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26726 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26727 || mode == V2HAmode))
26728 return true;
26730 return false;
26733 /* Implements target hook array_mode_supported_p. */
26735 static bool
26736 arm_array_mode_supported_p (machine_mode mode,
26737 unsigned HOST_WIDE_INT nelems)
26739 if (TARGET_NEON
26740 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26741 && (nelems >= 2 && nelems <= 4))
26742 return true;
26744 return false;
26747 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26748 registers when autovectorizing for Neon, at least until multiple vector
26749 widths are supported properly by the middle-end. */
26751 static machine_mode
26752 arm_preferred_simd_mode (machine_mode mode)
26754 if (TARGET_NEON)
26755 switch (mode)
26757 case SFmode:
26758 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26759 case SImode:
26760 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26761 case HImode:
26762 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26763 case QImode:
26764 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26765 case DImode:
26766 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26767 return V2DImode;
26768 break;
26770 default:;
26773 if (TARGET_REALLY_IWMMXT)
26774 switch (mode)
26776 case SImode:
26777 return V2SImode;
26778 case HImode:
26779 return V4HImode;
26780 case QImode:
26781 return V8QImode;
26783 default:;
26786 return word_mode;
26789 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26791 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26792 using r0-r4 for function arguments, r7 for the stack frame and don't have
26793 enough left over to do doubleword arithmetic. For Thumb-2 all the
26794 potentially problematic instructions accept high registers so this is not
26795 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26796 that require many low registers. */
26797 static bool
26798 arm_class_likely_spilled_p (reg_class_t rclass)
26800 if ((TARGET_THUMB1 && rclass == LO_REGS)
26801 || rclass == CC_REG)
26802 return true;
26804 return false;
26807 /* Implements target hook small_register_classes_for_mode_p. */
26808 bool
26809 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26811 return TARGET_THUMB1;
26814 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26815 ARM insns and therefore guarantee that the shift count is modulo 256.
26816 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26817 guarantee no particular behavior for out-of-range counts. */
26819 static unsigned HOST_WIDE_INT
26820 arm_shift_truncation_mask (machine_mode mode)
26822 return mode == SImode ? 255 : 0;
26826 /* Map internal gcc register numbers to DWARF2 register numbers. */
26828 unsigned int
26829 arm_dbx_register_number (unsigned int regno)
26831 if (regno < 16)
26832 return regno;
26834 if (IS_VFP_REGNUM (regno))
26836 /* See comment in arm_dwarf_register_span. */
26837 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26838 return 64 + regno - FIRST_VFP_REGNUM;
26839 else
26840 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26843 if (IS_IWMMXT_GR_REGNUM (regno))
26844 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26846 if (IS_IWMMXT_REGNUM (regno))
26847 return 112 + regno - FIRST_IWMMXT_REGNUM;
26849 return DWARF_FRAME_REGISTERS;
26852 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26853 GCC models tham as 64 32-bit registers, so we need to describe this to
26854 the DWARF generation code. Other registers can use the default. */
26855 static rtx
26856 arm_dwarf_register_span (rtx rtl)
26858 machine_mode mode;
26859 unsigned regno;
26860 rtx parts[16];
26861 int nregs;
26862 int i;
26864 regno = REGNO (rtl);
26865 if (!IS_VFP_REGNUM (regno))
26866 return NULL_RTX;
26868 /* XXX FIXME: The EABI defines two VFP register ranges:
26869 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26870 256-287: D0-D31
26871 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26872 corresponding D register. Until GDB supports this, we shall use the
26873 legacy encodings. We also use these encodings for D0-D15 for
26874 compatibility with older debuggers. */
26875 mode = GET_MODE (rtl);
26876 if (GET_MODE_SIZE (mode) < 8)
26877 return NULL_RTX;
26879 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26881 nregs = GET_MODE_SIZE (mode) / 4;
26882 for (i = 0; i < nregs; i += 2)
26883 if (TARGET_BIG_END)
26885 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26886 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26888 else
26890 parts[i] = gen_rtx_REG (SImode, regno + i);
26891 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26894 else
26896 nregs = GET_MODE_SIZE (mode) / 8;
26897 for (i = 0; i < nregs; i++)
26898 parts[i] = gen_rtx_REG (DImode, regno + i);
26901 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26904 #if ARM_UNWIND_INFO
26905 /* Emit unwind directives for a store-multiple instruction or stack pointer
26906 push during alignment.
26907 These should only ever be generated by the function prologue code, so
26908 expect them to have a particular form.
26909 The store-multiple instruction sometimes pushes pc as the last register,
26910 although it should not be tracked into unwind information, or for -Os
26911 sometimes pushes some dummy registers before first register that needs
26912 to be tracked in unwind information; such dummy registers are there just
26913 to avoid separate stack adjustment, and will not be restored in the
26914 epilogue. */
26916 static void
26917 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26919 int i;
26920 HOST_WIDE_INT offset;
26921 HOST_WIDE_INT nregs;
26922 int reg_size;
26923 unsigned reg;
26924 unsigned lastreg;
26925 unsigned padfirst = 0, padlast = 0;
26926 rtx e;
26928 e = XVECEXP (p, 0, 0);
26929 gcc_assert (GET_CODE (e) == SET);
26931 /* First insn will adjust the stack pointer. */
26932 gcc_assert (GET_CODE (e) == SET
26933 && REG_P (SET_DEST (e))
26934 && REGNO (SET_DEST (e)) == SP_REGNUM
26935 && GET_CODE (SET_SRC (e)) == PLUS);
26937 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26938 nregs = XVECLEN (p, 0) - 1;
26939 gcc_assert (nregs);
26941 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26942 if (reg < 16)
26944 /* For -Os dummy registers can be pushed at the beginning to
26945 avoid separate stack pointer adjustment. */
26946 e = XVECEXP (p, 0, 1);
26947 e = XEXP (SET_DEST (e), 0);
26948 if (GET_CODE (e) == PLUS)
26949 padfirst = INTVAL (XEXP (e, 1));
26950 gcc_assert (padfirst == 0 || optimize_size);
26951 /* The function prologue may also push pc, but not annotate it as it is
26952 never restored. We turn this into a stack pointer adjustment. */
26953 e = XVECEXP (p, 0, nregs);
26954 e = XEXP (SET_DEST (e), 0);
26955 if (GET_CODE (e) == PLUS)
26956 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26957 else
26958 padlast = offset - 4;
26959 gcc_assert (padlast == 0 || padlast == 4);
26960 if (padlast == 4)
26961 fprintf (asm_out_file, "\t.pad #4\n");
26962 reg_size = 4;
26963 fprintf (asm_out_file, "\t.save {");
26965 else if (IS_VFP_REGNUM (reg))
26967 reg_size = 8;
26968 fprintf (asm_out_file, "\t.vsave {");
26970 else
26971 /* Unknown register type. */
26972 gcc_unreachable ();
26974 /* If the stack increment doesn't match the size of the saved registers,
26975 something has gone horribly wrong. */
26976 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26978 offset = padfirst;
26979 lastreg = 0;
26980 /* The remaining insns will describe the stores. */
26981 for (i = 1; i <= nregs; i++)
26983 /* Expect (set (mem <addr>) (reg)).
26984 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26985 e = XVECEXP (p, 0, i);
26986 gcc_assert (GET_CODE (e) == SET
26987 && MEM_P (SET_DEST (e))
26988 && REG_P (SET_SRC (e)));
26990 reg = REGNO (SET_SRC (e));
26991 gcc_assert (reg >= lastreg);
26993 if (i != 1)
26994 fprintf (asm_out_file, ", ");
26995 /* We can't use %r for vfp because we need to use the
26996 double precision register names. */
26997 if (IS_VFP_REGNUM (reg))
26998 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26999 else
27000 asm_fprintf (asm_out_file, "%r", reg);
27002 if (flag_checking)
27004 /* Check that the addresses are consecutive. */
27005 e = XEXP (SET_DEST (e), 0);
27006 if (GET_CODE (e) == PLUS)
27007 gcc_assert (REG_P (XEXP (e, 0))
27008 && REGNO (XEXP (e, 0)) == SP_REGNUM
27009 && CONST_INT_P (XEXP (e, 1))
27010 && offset == INTVAL (XEXP (e, 1)));
27011 else
27012 gcc_assert (i == 1
27013 && REG_P (e)
27014 && REGNO (e) == SP_REGNUM);
27015 offset += reg_size;
27018 fprintf (asm_out_file, "}\n");
27019 if (padfirst)
27020 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27023 /* Emit unwind directives for a SET. */
27025 static void
27026 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27028 rtx e0;
27029 rtx e1;
27030 unsigned reg;
27032 e0 = XEXP (p, 0);
27033 e1 = XEXP (p, 1);
27034 switch (GET_CODE (e0))
27036 case MEM:
27037 /* Pushing a single register. */
27038 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27039 || !REG_P (XEXP (XEXP (e0, 0), 0))
27040 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27041 abort ();
27043 asm_fprintf (asm_out_file, "\t.save ");
27044 if (IS_VFP_REGNUM (REGNO (e1)))
27045 asm_fprintf(asm_out_file, "{d%d}\n",
27046 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27047 else
27048 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27049 break;
27051 case REG:
27052 if (REGNO (e0) == SP_REGNUM)
27054 /* A stack increment. */
27055 if (GET_CODE (e1) != PLUS
27056 || !REG_P (XEXP (e1, 0))
27057 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27058 || !CONST_INT_P (XEXP (e1, 1)))
27059 abort ();
27061 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27062 -INTVAL (XEXP (e1, 1)));
27064 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27066 HOST_WIDE_INT offset;
27068 if (GET_CODE (e1) == PLUS)
27070 if (!REG_P (XEXP (e1, 0))
27071 || !CONST_INT_P (XEXP (e1, 1)))
27072 abort ();
27073 reg = REGNO (XEXP (e1, 0));
27074 offset = INTVAL (XEXP (e1, 1));
27075 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27076 HARD_FRAME_POINTER_REGNUM, reg,
27077 offset);
27079 else if (REG_P (e1))
27081 reg = REGNO (e1);
27082 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27083 HARD_FRAME_POINTER_REGNUM, reg);
27085 else
27086 abort ();
27088 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27090 /* Move from sp to reg. */
27091 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27093 else if (GET_CODE (e1) == PLUS
27094 && REG_P (XEXP (e1, 0))
27095 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27096 && CONST_INT_P (XEXP (e1, 1)))
27098 /* Set reg to offset from sp. */
27099 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27100 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27102 else
27103 abort ();
27104 break;
27106 default:
27107 abort ();
27112 /* Emit unwind directives for the given insn. */
27114 static void
27115 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27117 rtx note, pat;
27118 bool handled_one = false;
27120 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27121 return;
27123 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27124 && (TREE_NOTHROW (current_function_decl)
27125 || crtl->all_throwers_are_sibcalls))
27126 return;
27128 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27129 return;
27131 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27133 switch (REG_NOTE_KIND (note))
27135 case REG_FRAME_RELATED_EXPR:
27136 pat = XEXP (note, 0);
27137 goto found;
27139 case REG_CFA_REGISTER:
27140 pat = XEXP (note, 0);
27141 if (pat == NULL)
27143 pat = PATTERN (insn);
27144 if (GET_CODE (pat) == PARALLEL)
27145 pat = XVECEXP (pat, 0, 0);
27148 /* Only emitted for IS_STACKALIGN re-alignment. */
27150 rtx dest, src;
27151 unsigned reg;
27153 src = SET_SRC (pat);
27154 dest = SET_DEST (pat);
27156 gcc_assert (src == stack_pointer_rtx);
27157 reg = REGNO (dest);
27158 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27159 reg + 0x90, reg);
27161 handled_one = true;
27162 break;
27164 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27165 to get correct dwarf information for shrink-wrap. We should not
27166 emit unwind information for it because these are used either for
27167 pretend arguments or notes to adjust sp and restore registers from
27168 stack. */
27169 case REG_CFA_DEF_CFA:
27170 case REG_CFA_ADJUST_CFA:
27171 case REG_CFA_RESTORE:
27172 return;
27174 case REG_CFA_EXPRESSION:
27175 case REG_CFA_OFFSET:
27176 /* ??? Only handling here what we actually emit. */
27177 gcc_unreachable ();
27179 default:
27180 break;
27183 if (handled_one)
27184 return;
27185 pat = PATTERN (insn);
27186 found:
27188 switch (GET_CODE (pat))
27190 case SET:
27191 arm_unwind_emit_set (asm_out_file, pat);
27192 break;
27194 case SEQUENCE:
27195 /* Store multiple. */
27196 arm_unwind_emit_sequence (asm_out_file, pat);
27197 break;
27199 default:
27200 abort();
27205 /* Output a reference from a function exception table to the type_info
27206 object X. The EABI specifies that the symbol should be relocated by
27207 an R_ARM_TARGET2 relocation. */
27209 static bool
27210 arm_output_ttype (rtx x)
27212 fputs ("\t.word\t", asm_out_file);
27213 output_addr_const (asm_out_file, x);
27214 /* Use special relocations for symbol references. */
27215 if (!CONST_INT_P (x))
27216 fputs ("(TARGET2)", asm_out_file);
27217 fputc ('\n', asm_out_file);
27219 return TRUE;
27222 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27224 static void
27225 arm_asm_emit_except_personality (rtx personality)
27227 fputs ("\t.personality\t", asm_out_file);
27228 output_addr_const (asm_out_file, personality);
27229 fputc ('\n', asm_out_file);
27232 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27234 static void
27235 arm_asm_init_sections (void)
27237 exception_section = get_unnamed_section (0, output_section_asm_op,
27238 "\t.handlerdata");
27240 #endif /* ARM_UNWIND_INFO */
27242 /* Output unwind directives for the start/end of a function. */
27244 void
27245 arm_output_fn_unwind (FILE * f, bool prologue)
27247 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27248 return;
27250 if (prologue)
27251 fputs ("\t.fnstart\n", f);
27252 else
27254 /* If this function will never be unwound, then mark it as such.
27255 The came condition is used in arm_unwind_emit to suppress
27256 the frame annotations. */
27257 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27258 && (TREE_NOTHROW (current_function_decl)
27259 || crtl->all_throwers_are_sibcalls))
27260 fputs("\t.cantunwind\n", f);
27262 fputs ("\t.fnend\n", f);
27266 static bool
27267 arm_emit_tls_decoration (FILE *fp, rtx x)
27269 enum tls_reloc reloc;
27270 rtx val;
27272 val = XVECEXP (x, 0, 0);
27273 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27275 output_addr_const (fp, val);
27277 switch (reloc)
27279 case TLS_GD32:
27280 fputs ("(tlsgd)", fp);
27281 break;
27282 case TLS_LDM32:
27283 fputs ("(tlsldm)", fp);
27284 break;
27285 case TLS_LDO32:
27286 fputs ("(tlsldo)", fp);
27287 break;
27288 case TLS_IE32:
27289 fputs ("(gottpoff)", fp);
27290 break;
27291 case TLS_LE32:
27292 fputs ("(tpoff)", fp);
27293 break;
27294 case TLS_DESCSEQ:
27295 fputs ("(tlsdesc)", fp);
27296 break;
27297 default:
27298 gcc_unreachable ();
27301 switch (reloc)
27303 case TLS_GD32:
27304 case TLS_LDM32:
27305 case TLS_IE32:
27306 case TLS_DESCSEQ:
27307 fputs (" + (. - ", fp);
27308 output_addr_const (fp, XVECEXP (x, 0, 2));
27309 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27310 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27311 output_addr_const (fp, XVECEXP (x, 0, 3));
27312 fputc (')', fp);
27313 break;
27314 default:
27315 break;
27318 return TRUE;
27321 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27323 static void
27324 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27326 gcc_assert (size == 4);
27327 fputs ("\t.word\t", file);
27328 output_addr_const (file, x);
27329 fputs ("(tlsldo)", file);
27332 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27334 static bool
27335 arm_output_addr_const_extra (FILE *fp, rtx x)
27337 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27338 return arm_emit_tls_decoration (fp, x);
27339 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27341 char label[256];
27342 int labelno = INTVAL (XVECEXP (x, 0, 0));
27344 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27345 assemble_name_raw (fp, label);
27347 return TRUE;
27349 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27351 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27352 if (GOT_PCREL)
27353 fputs ("+.", fp);
27354 fputs ("-(", fp);
27355 output_addr_const (fp, XVECEXP (x, 0, 0));
27356 fputc (')', fp);
27357 return TRUE;
27359 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27361 output_addr_const (fp, XVECEXP (x, 0, 0));
27362 if (GOT_PCREL)
27363 fputs ("+.", fp);
27364 fputs ("-(", fp);
27365 output_addr_const (fp, XVECEXP (x, 0, 1));
27366 fputc (')', fp);
27367 return TRUE;
27369 else if (GET_CODE (x) == CONST_VECTOR)
27370 return arm_emit_vector_const (fp, x);
27372 return FALSE;
27375 /* Output assembly for a shift instruction.
27376 SET_FLAGS determines how the instruction modifies the condition codes.
27377 0 - Do not set condition codes.
27378 1 - Set condition codes.
27379 2 - Use smallest instruction. */
27380 const char *
27381 arm_output_shift(rtx * operands, int set_flags)
27383 char pattern[100];
27384 static const char flag_chars[3] = {'?', '.', '!'};
27385 const char *shift;
27386 HOST_WIDE_INT val;
27387 char c;
27389 c = flag_chars[set_flags];
27390 shift = shift_op(operands[3], &val);
27391 if (shift)
27393 if (val != -1)
27394 operands[2] = GEN_INT(val);
27395 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27397 else
27398 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27400 output_asm_insn (pattern, operands);
27401 return "";
27404 /* Output assembly for a WMMX immediate shift instruction. */
27405 const char *
27406 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27408 int shift = INTVAL (operands[2]);
27409 char templ[50];
27410 machine_mode opmode = GET_MODE (operands[0]);
27412 gcc_assert (shift >= 0);
27414 /* If the shift value in the register versions is > 63 (for D qualifier),
27415 31 (for W qualifier) or 15 (for H qualifier). */
27416 if (((opmode == V4HImode) && (shift > 15))
27417 || ((opmode == V2SImode) && (shift > 31))
27418 || ((opmode == DImode) && (shift > 63)))
27420 if (wror_or_wsra)
27422 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27423 output_asm_insn (templ, operands);
27424 if (opmode == DImode)
27426 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27427 output_asm_insn (templ, operands);
27430 else
27432 /* The destination register will contain all zeros. */
27433 sprintf (templ, "wzero\t%%0");
27434 output_asm_insn (templ, operands);
27436 return "";
27439 if ((opmode == DImode) && (shift > 32))
27441 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27442 output_asm_insn (templ, operands);
27443 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27444 output_asm_insn (templ, operands);
27446 else
27448 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27449 output_asm_insn (templ, operands);
27451 return "";
27454 /* Output assembly for a WMMX tinsr instruction. */
27455 const char *
27456 arm_output_iwmmxt_tinsr (rtx *operands)
27458 int mask = INTVAL (operands[3]);
27459 int i;
27460 char templ[50];
27461 int units = mode_nunits[GET_MODE (operands[0])];
27462 gcc_assert ((mask & (mask - 1)) == 0);
27463 for (i = 0; i < units; ++i)
27465 if ((mask & 0x01) == 1)
27467 break;
27469 mask >>= 1;
27471 gcc_assert (i < units);
27473 switch (GET_MODE (operands[0]))
27475 case V8QImode:
27476 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27477 break;
27478 case V4HImode:
27479 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27480 break;
27481 case V2SImode:
27482 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27483 break;
27484 default:
27485 gcc_unreachable ();
27486 break;
27488 output_asm_insn (templ, operands);
27490 return "";
27493 /* Output a Thumb-1 casesi dispatch sequence. */
27494 const char *
27495 thumb1_output_casesi (rtx *operands)
27497 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27499 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27501 switch (GET_MODE(diff_vec))
27503 case QImode:
27504 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27505 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27506 case HImode:
27507 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27508 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27509 case SImode:
27510 return "bl\t%___gnu_thumb1_case_si";
27511 default:
27512 gcc_unreachable ();
27516 /* Output a Thumb-2 casesi instruction. */
27517 const char *
27518 thumb2_output_casesi (rtx *operands)
27520 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27522 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27524 output_asm_insn ("cmp\t%0, %1", operands);
27525 output_asm_insn ("bhi\t%l3", operands);
27526 switch (GET_MODE(diff_vec))
27528 case QImode:
27529 return "tbb\t[%|pc, %0]";
27530 case HImode:
27531 return "tbh\t[%|pc, %0, lsl #1]";
27532 case SImode:
27533 if (flag_pic)
27535 output_asm_insn ("adr\t%4, %l2", operands);
27536 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27537 output_asm_insn ("add\t%4, %4, %5", operands);
27538 return "bx\t%4";
27540 else
27542 output_asm_insn ("adr\t%4, %l2", operands);
27543 return "ldr\t%|pc, [%4, %0, lsl #2]";
27545 default:
27546 gcc_unreachable ();
27550 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27551 per-core tuning structs. */
27552 static int
27553 arm_issue_rate (void)
27555 return current_tune->issue_rate;
27558 /* Return how many instructions should scheduler lookahead to choose the
27559 best one. */
27560 static int
27561 arm_first_cycle_multipass_dfa_lookahead (void)
27563 int issue_rate = arm_issue_rate ();
27565 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27568 /* Enable modeling of L2 auto-prefetcher. */
27569 static int
27570 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27572 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27575 const char *
27576 arm_mangle_type (const_tree type)
27578 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27579 has to be managled as if it is in the "std" namespace. */
27580 if (TARGET_AAPCS_BASED
27581 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27582 return "St9__va_list";
27584 /* Half-precision float. */
27585 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27586 return "Dh";
27588 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27589 builtin type. */
27590 if (TYPE_NAME (type) != NULL)
27591 return arm_mangle_builtin_type (type);
27593 /* Use the default mangling. */
27594 return NULL;
27597 /* Order of allocation of core registers for Thumb: this allocation is
27598 written over the corresponding initial entries of the array
27599 initialized with REG_ALLOC_ORDER. We allocate all low registers
27600 first. Saving and restoring a low register is usually cheaper than
27601 using a call-clobbered high register. */
27603 static const int thumb_core_reg_alloc_order[] =
27605 3, 2, 1, 0, 4, 5, 6, 7,
27606 14, 12, 8, 9, 10, 11
27609 /* Adjust register allocation order when compiling for Thumb. */
27611 void
27612 arm_order_regs_for_local_alloc (void)
27614 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27615 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27616 if (TARGET_THUMB)
27617 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27618 sizeof (thumb_core_reg_alloc_order));
27621 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27623 bool
27624 arm_frame_pointer_required (void)
27626 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27627 return true;
27629 /* If the function receives nonlocal gotos, it needs to save the frame
27630 pointer in the nonlocal_goto_save_area object. */
27631 if (cfun->has_nonlocal_label)
27632 return true;
27634 /* The frame pointer is required for non-leaf APCS frames. */
27635 if (TARGET_ARM && TARGET_APCS_FRAME && !leaf_function_p ())
27636 return true;
27638 /* If we are probing the stack in the prologue, we will have a faulting
27639 instruction prior to the stack adjustment and this requires a frame
27640 pointer if we want to catch the exception using the EABI unwinder. */
27641 if (!IS_INTERRUPT (arm_current_func_type ())
27642 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27643 && arm_except_unwind_info (&global_options) == UI_TARGET
27644 && cfun->can_throw_non_call_exceptions)
27646 HOST_WIDE_INT size = get_frame_size ();
27648 /* That's irrelevant if there is no stack adjustment. */
27649 if (size <= 0)
27650 return false;
27652 /* That's relevant only if there is a stack probe. */
27653 if (crtl->is_leaf && !cfun->calls_alloca)
27655 /* We don't have the final size of the frame so adjust. */
27656 size += 32 * UNITS_PER_WORD;
27657 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27658 return true;
27660 else
27661 return true;
27664 return false;
27667 /* Only thumb1 can't support conditional execution, so return true if
27668 the target is not thumb1. */
27669 static bool
27670 arm_have_conditional_execution (void)
27672 return !TARGET_THUMB1;
27675 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27676 static HOST_WIDE_INT
27677 arm_vector_alignment (const_tree type)
27679 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27681 if (TARGET_AAPCS_BASED)
27682 align = MIN (align, 64);
27684 return align;
27687 static unsigned int
27688 arm_autovectorize_vector_sizes (void)
27690 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27693 static bool
27694 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27696 /* Vectors which aren't in packed structures will not be less aligned than
27697 the natural alignment of their element type, so this is safe. */
27698 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27699 return !is_packed;
27701 return default_builtin_vector_alignment_reachable (type, is_packed);
27704 static bool
27705 arm_builtin_support_vector_misalignment (machine_mode mode,
27706 const_tree type, int misalignment,
27707 bool is_packed)
27709 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27711 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27713 if (is_packed)
27714 return align == 1;
27716 /* If the misalignment is unknown, we should be able to handle the access
27717 so long as it is not to a member of a packed data structure. */
27718 if (misalignment == -1)
27719 return true;
27721 /* Return true if the misalignment is a multiple of the natural alignment
27722 of the vector's element type. This is probably always going to be
27723 true in practice, since we've already established that this isn't a
27724 packed access. */
27725 return ((misalignment % align) == 0);
27728 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27729 is_packed);
27732 static void
27733 arm_conditional_register_usage (void)
27735 int regno;
27737 if (TARGET_THUMB1 && optimize_size)
27739 /* When optimizing for size on Thumb-1, it's better not
27740 to use the HI regs, because of the overhead of
27741 stacking them. */
27742 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27743 fixed_regs[regno] = call_used_regs[regno] = 1;
27746 /* The link register can be clobbered by any branch insn,
27747 but we have no way to track that at present, so mark
27748 it as unavailable. */
27749 if (TARGET_THUMB1)
27750 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27752 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27754 /* VFPv3 registers are disabled when earlier VFP
27755 versions are selected due to the definition of
27756 LAST_VFP_REGNUM. */
27757 for (regno = FIRST_VFP_REGNUM;
27758 regno <= LAST_VFP_REGNUM; ++ regno)
27760 fixed_regs[regno] = 0;
27761 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27762 || regno >= FIRST_VFP_REGNUM + 32;
27766 if (TARGET_REALLY_IWMMXT)
27768 regno = FIRST_IWMMXT_GR_REGNUM;
27769 /* The 2002/10/09 revision of the XScale ABI has wCG0
27770 and wCG1 as call-preserved registers. The 2002/11/21
27771 revision changed this so that all wCG registers are
27772 scratch registers. */
27773 for (regno = FIRST_IWMMXT_GR_REGNUM;
27774 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27775 fixed_regs[regno] = 0;
27776 /* The XScale ABI has wR0 - wR9 as scratch registers,
27777 the rest as call-preserved registers. */
27778 for (regno = FIRST_IWMMXT_REGNUM;
27779 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27781 fixed_regs[regno] = 0;
27782 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27786 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27788 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27789 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27791 else if (TARGET_APCS_STACK)
27793 fixed_regs[10] = 1;
27794 call_used_regs[10] = 1;
27796 /* -mcaller-super-interworking reserves r11 for calls to
27797 _interwork_r11_call_via_rN(). Making the register global
27798 is an easy way of ensuring that it remains valid for all
27799 calls. */
27800 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27801 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27803 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27804 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27805 if (TARGET_CALLER_INTERWORKING)
27806 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27808 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27811 static reg_class_t
27812 arm_preferred_rename_class (reg_class_t rclass)
27814 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27815 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27816 and code size can be reduced. */
27817 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27818 return LO_REGS;
27819 else
27820 return NO_REGS;
27823 /* Compute the attribute "length" of insn "*push_multi".
27824 So this function MUST be kept in sync with that insn pattern. */
27826 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27828 int i, regno, hi_reg;
27829 int num_saves = XVECLEN (parallel_op, 0);
27831 /* ARM mode. */
27832 if (TARGET_ARM)
27833 return 4;
27834 /* Thumb1 mode. */
27835 if (TARGET_THUMB1)
27836 return 2;
27838 /* Thumb2 mode. */
27839 regno = REGNO (first_op);
27840 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27841 list is 8-bit. Normally this means all registers in the list must be
27842 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
27843 encodings. There is one exception for PUSH that LR in HI_REGS can be used
27844 with 16-bit encoding. */
27845 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27846 for (i = 1; i < num_saves && !hi_reg; i++)
27848 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27849 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27852 if (!hi_reg)
27853 return 2;
27854 return 4;
27857 /* Compute the attribute "length" of insn. Currently, this function is used
27858 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
27859 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
27860 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
27861 true if OPERANDS contains insn which explicit updates base register. */
27864 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
27866 /* ARM mode. */
27867 if (TARGET_ARM)
27868 return 4;
27869 /* Thumb1 mode. */
27870 if (TARGET_THUMB1)
27871 return 2;
27873 rtx parallel_op = operands[0];
27874 /* Initialize to elements number of PARALLEL. */
27875 unsigned indx = XVECLEN (parallel_op, 0) - 1;
27876 /* Initialize the value to base register. */
27877 unsigned regno = REGNO (operands[1]);
27878 /* Skip return and write back pattern.
27879 We only need register pop pattern for later analysis. */
27880 unsigned first_indx = 0;
27881 first_indx += return_pc ? 1 : 0;
27882 first_indx += write_back_p ? 1 : 0;
27884 /* A pop operation can be done through LDM or POP. If the base register is SP
27885 and if it's with write back, then a LDM will be alias of POP. */
27886 bool pop_p = (regno == SP_REGNUM && write_back_p);
27887 bool ldm_p = !pop_p;
27889 /* Check base register for LDM. */
27890 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
27891 return 4;
27893 /* Check each register in the list. */
27894 for (; indx >= first_indx; indx--)
27896 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
27897 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
27898 comment in arm_attr_length_push_multi. */
27899 if (REGNO_REG_CLASS (regno) == HI_REGS
27900 && (regno != PC_REGNUM || ldm_p))
27901 return 4;
27904 return 2;
27907 /* Compute the number of instructions emitted by output_move_double. */
27909 arm_count_output_move_double_insns (rtx *operands)
27911 int count;
27912 rtx ops[2];
27913 /* output_move_double may modify the operands array, so call it
27914 here on a copy of the array. */
27915 ops[0] = operands[0];
27916 ops[1] = operands[1];
27917 output_move_double (ops, false, &count);
27918 return count;
27922 vfp3_const_double_for_fract_bits (rtx operand)
27924 REAL_VALUE_TYPE r0;
27926 if (!CONST_DOUBLE_P (operand))
27927 return 0;
27929 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
27930 if (exact_real_inverse (DFmode, &r0)
27931 && !REAL_VALUE_NEGATIVE (r0))
27933 if (exact_real_truncate (DFmode, &r0))
27935 HOST_WIDE_INT value = real_to_integer (&r0);
27936 value = value & 0xffffffff;
27937 if ((value != 0) && ( (value & (value - 1)) == 0))
27939 int ret = exact_log2 (value);
27940 gcc_assert (IN_RANGE (ret, 0, 31));
27941 return ret;
27945 return 0;
27948 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
27949 log2 is in [1, 32], return that log2. Otherwise return -1.
27950 This is used in the patterns for vcvt.s32.f32 floating-point to
27951 fixed-point conversions. */
27954 vfp3_const_double_for_bits (rtx x)
27956 const REAL_VALUE_TYPE *r;
27958 if (!CONST_DOUBLE_P (x))
27959 return -1;
27961 r = CONST_DOUBLE_REAL_VALUE (x);
27963 if (REAL_VALUE_NEGATIVE (*r)
27964 || REAL_VALUE_ISNAN (*r)
27965 || REAL_VALUE_ISINF (*r)
27966 || !real_isinteger (r, SFmode))
27967 return -1;
27969 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
27971 /* The exact_log2 above will have returned -1 if this is
27972 not an exact log2. */
27973 if (!IN_RANGE (hwint, 1, 32))
27974 return -1;
27976 return hwint;
27980 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27982 static void
27983 arm_pre_atomic_barrier (enum memmodel model)
27985 if (need_atomic_barrier_p (model, true))
27986 emit_insn (gen_memory_barrier ());
27989 static void
27990 arm_post_atomic_barrier (enum memmodel model)
27992 if (need_atomic_barrier_p (model, false))
27993 emit_insn (gen_memory_barrier ());
27996 /* Emit the load-exclusive and store-exclusive instructions.
27997 Use acquire and release versions if necessary. */
27999 static void
28000 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28002 rtx (*gen) (rtx, rtx);
28004 if (acq)
28006 switch (mode)
28008 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28009 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28010 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28011 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28012 default:
28013 gcc_unreachable ();
28016 else
28018 switch (mode)
28020 case QImode: gen = gen_arm_load_exclusiveqi; break;
28021 case HImode: gen = gen_arm_load_exclusivehi; break;
28022 case SImode: gen = gen_arm_load_exclusivesi; break;
28023 case DImode: gen = gen_arm_load_exclusivedi; break;
28024 default:
28025 gcc_unreachable ();
28029 emit_insn (gen (rval, mem));
28032 static void
28033 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28034 rtx mem, bool rel)
28036 rtx (*gen) (rtx, rtx, rtx);
28038 if (rel)
28040 switch (mode)
28042 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
28043 case HImode: gen = gen_arm_store_release_exclusivehi; break;
28044 case SImode: gen = gen_arm_store_release_exclusivesi; break;
28045 case DImode: gen = gen_arm_store_release_exclusivedi; break;
28046 default:
28047 gcc_unreachable ();
28050 else
28052 switch (mode)
28054 case QImode: gen = gen_arm_store_exclusiveqi; break;
28055 case HImode: gen = gen_arm_store_exclusivehi; break;
28056 case SImode: gen = gen_arm_store_exclusivesi; break;
28057 case DImode: gen = gen_arm_store_exclusivedi; break;
28058 default:
28059 gcc_unreachable ();
28063 emit_insn (gen (bval, rval, mem));
28066 /* Mark the previous jump instruction as unlikely. */
28068 static void
28069 emit_unlikely_jump (rtx insn)
28071 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
28073 insn = emit_jump_insn (insn);
28074 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
28077 /* Expand a compare and swap pattern. */
28079 void
28080 arm_expand_compare_and_swap (rtx operands[])
28082 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28083 machine_mode mode;
28084 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28086 bval = operands[0];
28087 rval = operands[1];
28088 mem = operands[2];
28089 oldval = operands[3];
28090 newval = operands[4];
28091 is_weak = operands[5];
28092 mod_s = operands[6];
28093 mod_f = operands[7];
28094 mode = GET_MODE (mem);
28096 /* Normally the succ memory model must be stronger than fail, but in the
28097 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28098 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28100 if (TARGET_HAVE_LDACQ
28101 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28102 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28103 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28105 switch (mode)
28107 case QImode:
28108 case HImode:
28109 /* For narrow modes, we're going to perform the comparison in SImode,
28110 so do the zero-extension now. */
28111 rval = gen_reg_rtx (SImode);
28112 oldval = convert_modes (SImode, mode, oldval, true);
28113 /* FALLTHRU */
28115 case SImode:
28116 /* Force the value into a register if needed. We waited until after
28117 the zero-extension above to do this properly. */
28118 if (!arm_add_operand (oldval, SImode))
28119 oldval = force_reg (SImode, oldval);
28120 break;
28122 case DImode:
28123 if (!cmpdi_operand (oldval, mode))
28124 oldval = force_reg (mode, oldval);
28125 break;
28127 default:
28128 gcc_unreachable ();
28131 switch (mode)
28133 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
28134 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
28135 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
28136 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
28137 default:
28138 gcc_unreachable ();
28141 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28143 if (mode == QImode || mode == HImode)
28144 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28146 /* In all cases, we arrange for success to be signaled by Z set.
28147 This arrangement allows for the boolean result to be used directly
28148 in a subsequent branch, post optimization. */
28149 x = gen_rtx_REG (CCmode, CC_REGNUM);
28150 x = gen_rtx_EQ (SImode, x, const0_rtx);
28151 emit_insn (gen_rtx_SET (bval, x));
28154 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28155 another memory store between the load-exclusive and store-exclusive can
28156 reset the monitor from Exclusive to Open state. This means we must wait
28157 until after reload to split the pattern, lest we get a register spill in
28158 the middle of the atomic sequence. */
28160 void
28161 arm_split_compare_and_swap (rtx operands[])
28163 rtx rval, mem, oldval, newval, scratch;
28164 machine_mode mode;
28165 enum memmodel mod_s, mod_f;
28166 bool is_weak;
28167 rtx_code_label *label1, *label2;
28168 rtx x, cond;
28170 rval = operands[0];
28171 mem = operands[1];
28172 oldval = operands[2];
28173 newval = operands[3];
28174 is_weak = (operands[4] != const0_rtx);
28175 mod_s = memmodel_from_int (INTVAL (operands[5]));
28176 mod_f = memmodel_from_int (INTVAL (operands[6]));
28177 scratch = operands[7];
28178 mode = GET_MODE (mem);
28180 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28182 bool use_acquire = TARGET_HAVE_LDACQ
28183 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28184 || is_mm_release (mod_s));
28186 bool use_release = TARGET_HAVE_LDACQ
28187 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28188 || is_mm_acquire (mod_s));
28190 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28191 a full barrier is emitted after the store-release. */
28192 if (is_armv8_sync)
28193 use_acquire = false;
28195 /* Checks whether a barrier is needed and emits one accordingly. */
28196 if (!(use_acquire || use_release))
28197 arm_pre_atomic_barrier (mod_s);
28199 label1 = NULL;
28200 if (!is_weak)
28202 label1 = gen_label_rtx ();
28203 emit_label (label1);
28205 label2 = gen_label_rtx ();
28207 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28209 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
28210 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28211 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28212 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28213 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28215 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
28217 /* Weak or strong, we want EQ to be true for success, so that we
28218 match the flags that we got from the compare above. */
28219 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28220 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
28221 emit_insn (gen_rtx_SET (cond, x));
28223 if (!is_weak)
28225 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28226 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28227 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
28228 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28231 if (!is_mm_relaxed (mod_f))
28232 emit_label (label2);
28234 /* Checks whether a barrier is needed and emits one accordingly. */
28235 if (is_armv8_sync
28236 || !(use_acquire || use_release))
28237 arm_post_atomic_barrier (mod_s);
28239 if (is_mm_relaxed (mod_f))
28240 emit_label (label2);
28243 void
28244 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28245 rtx value, rtx model_rtx, rtx cond)
28247 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28248 machine_mode mode = GET_MODE (mem);
28249 machine_mode wmode = (mode == DImode ? DImode : SImode);
28250 rtx_code_label *label;
28251 rtx x;
28253 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28255 bool use_acquire = TARGET_HAVE_LDACQ
28256 && !(is_mm_relaxed (model) || is_mm_consume (model)
28257 || is_mm_release (model));
28259 bool use_release = TARGET_HAVE_LDACQ
28260 && !(is_mm_relaxed (model) || is_mm_consume (model)
28261 || is_mm_acquire (model));
28263 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28264 a full barrier is emitted after the store-release. */
28265 if (is_armv8_sync)
28266 use_acquire = false;
28268 /* Checks whether a barrier is needed and emits one accordingly. */
28269 if (!(use_acquire || use_release))
28270 arm_pre_atomic_barrier (model);
28272 label = gen_label_rtx ();
28273 emit_label (label);
28275 if (new_out)
28276 new_out = gen_lowpart (wmode, new_out);
28277 if (old_out)
28278 old_out = gen_lowpart (wmode, old_out);
28279 else
28280 old_out = new_out;
28281 value = simplify_gen_subreg (wmode, value, mode, 0);
28283 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28285 switch (code)
28287 case SET:
28288 new_out = value;
28289 break;
28291 case NOT:
28292 x = gen_rtx_AND (wmode, old_out, value);
28293 emit_insn (gen_rtx_SET (new_out, x));
28294 x = gen_rtx_NOT (wmode, new_out);
28295 emit_insn (gen_rtx_SET (new_out, x));
28296 break;
28298 case MINUS:
28299 if (CONST_INT_P (value))
28301 value = GEN_INT (-INTVAL (value));
28302 code = PLUS;
28304 /* FALLTHRU */
28306 case PLUS:
28307 if (mode == DImode)
28309 /* DImode plus/minus need to clobber flags. */
28310 /* The adddi3 and subdi3 patterns are incorrectly written so that
28311 they require matching operands, even when we could easily support
28312 three operands. Thankfully, this can be fixed up post-splitting,
28313 as the individual add+adc patterns do accept three operands and
28314 post-reload cprop can make these moves go away. */
28315 emit_move_insn (new_out, old_out);
28316 if (code == PLUS)
28317 x = gen_adddi3 (new_out, new_out, value);
28318 else
28319 x = gen_subdi3 (new_out, new_out, value);
28320 emit_insn (x);
28321 break;
28323 /* FALLTHRU */
28325 default:
28326 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28327 emit_insn (gen_rtx_SET (new_out, x));
28328 break;
28331 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28332 use_release);
28334 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28335 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28337 /* Checks whether a barrier is needed and emits one accordingly. */
28338 if (is_armv8_sync
28339 || !(use_acquire || use_release))
28340 arm_post_atomic_barrier (model);
28343 #define MAX_VECT_LEN 16
28345 struct expand_vec_perm_d
28347 rtx target, op0, op1;
28348 unsigned char perm[MAX_VECT_LEN];
28349 machine_mode vmode;
28350 unsigned char nelt;
28351 bool one_vector_p;
28352 bool testing_p;
28355 /* Generate a variable permutation. */
28357 static void
28358 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28360 machine_mode vmode = GET_MODE (target);
28361 bool one_vector_p = rtx_equal_p (op0, op1);
28363 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28364 gcc_checking_assert (GET_MODE (op0) == vmode);
28365 gcc_checking_assert (GET_MODE (op1) == vmode);
28366 gcc_checking_assert (GET_MODE (sel) == vmode);
28367 gcc_checking_assert (TARGET_NEON);
28369 if (one_vector_p)
28371 if (vmode == V8QImode)
28372 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28373 else
28374 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28376 else
28378 rtx pair;
28380 if (vmode == V8QImode)
28382 pair = gen_reg_rtx (V16QImode);
28383 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28384 pair = gen_lowpart (TImode, pair);
28385 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28387 else
28389 pair = gen_reg_rtx (OImode);
28390 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28391 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28396 void
28397 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28399 machine_mode vmode = GET_MODE (target);
28400 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28401 bool one_vector_p = rtx_equal_p (op0, op1);
28402 rtx rmask[MAX_VECT_LEN], mask;
28404 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28405 numbering of elements for big-endian, we must reverse the order. */
28406 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28408 /* The VTBL instruction does not use a modulo index, so we must take care
28409 of that ourselves. */
28410 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28411 for (i = 0; i < nelt; ++i)
28412 rmask[i] = mask;
28413 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28414 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28416 arm_expand_vec_perm_1 (target, op0, op1, sel);
28419 /* Map lane ordering between architectural lane order, and GCC lane order,
28420 taking into account ABI. See comment above output_move_neon for details. */
28422 static int
28423 neon_endian_lane_map (machine_mode mode, int lane)
28425 if (BYTES_BIG_ENDIAN)
28427 int nelems = GET_MODE_NUNITS (mode);
28428 /* Reverse lane order. */
28429 lane = (nelems - 1 - lane);
28430 /* Reverse D register order, to match ABI. */
28431 if (GET_MODE_SIZE (mode) == 16)
28432 lane = lane ^ (nelems / 2);
28434 return lane;
28437 /* Some permutations index into pairs of vectors, this is a helper function
28438 to map indexes into those pairs of vectors. */
28440 static int
28441 neon_pair_endian_lane_map (machine_mode mode, int lane)
28443 int nelem = GET_MODE_NUNITS (mode);
28444 if (BYTES_BIG_ENDIAN)
28445 lane =
28446 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28447 return lane;
28450 /* Generate or test for an insn that supports a constant permutation. */
28452 /* Recognize patterns for the VUZP insns. */
28454 static bool
28455 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28457 unsigned int i, odd, mask, nelt = d->nelt;
28458 rtx out0, out1, in0, in1;
28459 rtx (*gen)(rtx, rtx, rtx, rtx);
28460 int first_elem;
28461 int swap_nelt;
28463 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28464 return false;
28466 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28467 big endian pattern on 64 bit vectors, so we correct for that. */
28468 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28469 && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28471 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28473 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28474 odd = 0;
28475 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28476 odd = 1;
28477 else
28478 return false;
28479 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28481 for (i = 0; i < nelt; i++)
28483 unsigned elt =
28484 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28485 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28486 return false;
28489 /* Success! */
28490 if (d->testing_p)
28491 return true;
28493 switch (d->vmode)
28495 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28496 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28497 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28498 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28499 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28500 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28501 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28502 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28503 default:
28504 gcc_unreachable ();
28507 in0 = d->op0;
28508 in1 = d->op1;
28509 if (swap_nelt != 0)
28510 std::swap (in0, in1);
28512 out0 = d->target;
28513 out1 = gen_reg_rtx (d->vmode);
28514 if (odd)
28515 std::swap (out0, out1);
28517 emit_insn (gen (out0, in0, in1, out1));
28518 return true;
28521 /* Recognize patterns for the VZIP insns. */
28523 static bool
28524 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28526 unsigned int i, high, mask, nelt = d->nelt;
28527 rtx out0, out1, in0, in1;
28528 rtx (*gen)(rtx, rtx, rtx, rtx);
28529 int first_elem;
28530 bool is_swapped;
28532 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28533 return false;
28535 is_swapped = BYTES_BIG_ENDIAN;
28537 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28539 high = nelt / 2;
28540 if (first_elem == neon_endian_lane_map (d->vmode, high))
28542 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28543 high = 0;
28544 else
28545 return false;
28546 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28548 for (i = 0; i < nelt / 2; i++)
28550 unsigned elt =
28551 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28552 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28553 != elt)
28554 return false;
28555 elt =
28556 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28557 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28558 != elt)
28559 return false;
28562 /* Success! */
28563 if (d->testing_p)
28564 return true;
28566 switch (d->vmode)
28568 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28569 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28570 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28571 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28572 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
28573 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
28574 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28575 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28576 default:
28577 gcc_unreachable ();
28580 in0 = d->op0;
28581 in1 = d->op1;
28582 if (is_swapped)
28583 std::swap (in0, in1);
28585 out0 = d->target;
28586 out1 = gen_reg_rtx (d->vmode);
28587 if (high)
28588 std::swap (out0, out1);
28590 emit_insn (gen (out0, in0, in1, out1));
28591 return true;
28594 /* Recognize patterns for the VREV insns. */
28596 static bool
28597 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28599 unsigned int i, j, diff, nelt = d->nelt;
28600 rtx (*gen)(rtx, rtx);
28602 if (!d->one_vector_p)
28603 return false;
28605 diff = d->perm[0];
28606 switch (diff)
28608 case 7:
28609 switch (d->vmode)
28611 case V16QImode: gen = gen_neon_vrev64v16qi; break;
28612 case V8QImode: gen = gen_neon_vrev64v8qi; break;
28613 default:
28614 return false;
28616 break;
28617 case 3:
28618 switch (d->vmode)
28620 case V16QImode: gen = gen_neon_vrev32v16qi; break;
28621 case V8QImode: gen = gen_neon_vrev32v8qi; break;
28622 case V8HImode: gen = gen_neon_vrev64v8hi; break;
28623 case V4HImode: gen = gen_neon_vrev64v4hi; break;
28624 default:
28625 return false;
28627 break;
28628 case 1:
28629 switch (d->vmode)
28631 case V16QImode: gen = gen_neon_vrev16v16qi; break;
28632 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28633 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28634 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28635 case V4SImode: gen = gen_neon_vrev64v4si; break;
28636 case V2SImode: gen = gen_neon_vrev64v2si; break;
28637 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28638 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28639 default:
28640 return false;
28642 break;
28643 default:
28644 return false;
28647 for (i = 0; i < nelt ; i += diff + 1)
28648 for (j = 0; j <= diff; j += 1)
28650 /* This is guaranteed to be true as the value of diff
28651 is 7, 3, 1 and we should have enough elements in the
28652 queue to generate this. Getting a vector mask with a
28653 value of diff other than these values implies that
28654 something is wrong by the time we get here. */
28655 gcc_assert (i + j < nelt);
28656 if (d->perm[i + j] != i + diff - j)
28657 return false;
28660 /* Success! */
28661 if (d->testing_p)
28662 return true;
28664 emit_insn (gen (d->target, d->op0));
28665 return true;
28668 /* Recognize patterns for the VTRN insns. */
28670 static bool
28671 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28673 unsigned int i, odd, mask, nelt = d->nelt;
28674 rtx out0, out1, in0, in1;
28675 rtx (*gen)(rtx, rtx, rtx, rtx);
28677 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28678 return false;
28680 /* Note that these are little-endian tests. Adjust for big-endian later. */
28681 if (d->perm[0] == 0)
28682 odd = 0;
28683 else if (d->perm[0] == 1)
28684 odd = 1;
28685 else
28686 return false;
28687 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28689 for (i = 0; i < nelt; i += 2)
28691 if (d->perm[i] != i + odd)
28692 return false;
28693 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28694 return false;
28697 /* Success! */
28698 if (d->testing_p)
28699 return true;
28701 switch (d->vmode)
28703 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28704 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28705 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28706 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28707 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28708 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28709 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28710 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28711 default:
28712 gcc_unreachable ();
28715 in0 = d->op0;
28716 in1 = d->op1;
28717 if (BYTES_BIG_ENDIAN)
28719 std::swap (in0, in1);
28720 odd = !odd;
28723 out0 = d->target;
28724 out1 = gen_reg_rtx (d->vmode);
28725 if (odd)
28726 std::swap (out0, out1);
28728 emit_insn (gen (out0, in0, in1, out1));
28729 return true;
28732 /* Recognize patterns for the VEXT insns. */
28734 static bool
28735 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28737 unsigned int i, nelt = d->nelt;
28738 rtx (*gen) (rtx, rtx, rtx, rtx);
28739 rtx offset;
28741 unsigned int location;
28743 unsigned int next = d->perm[0] + 1;
28745 /* TODO: Handle GCC's numbering of elements for big-endian. */
28746 if (BYTES_BIG_ENDIAN)
28747 return false;
28749 /* Check if the extracted indexes are increasing by one. */
28750 for (i = 1; i < nelt; next++, i++)
28752 /* If we hit the most significant element of the 2nd vector in
28753 the previous iteration, no need to test further. */
28754 if (next == 2 * nelt)
28755 return false;
28757 /* If we are operating on only one vector: it could be a
28758 rotation. If there are only two elements of size < 64, let
28759 arm_evpc_neon_vrev catch it. */
28760 if (d->one_vector_p && (next == nelt))
28762 if ((nelt == 2) && (d->vmode != V2DImode))
28763 return false;
28764 else
28765 next = 0;
28768 if (d->perm[i] != next)
28769 return false;
28772 location = d->perm[0];
28774 switch (d->vmode)
28776 case V16QImode: gen = gen_neon_vextv16qi; break;
28777 case V8QImode: gen = gen_neon_vextv8qi; break;
28778 case V4HImode: gen = gen_neon_vextv4hi; break;
28779 case V8HImode: gen = gen_neon_vextv8hi; break;
28780 case V2SImode: gen = gen_neon_vextv2si; break;
28781 case V4SImode: gen = gen_neon_vextv4si; break;
28782 case V2SFmode: gen = gen_neon_vextv2sf; break;
28783 case V4SFmode: gen = gen_neon_vextv4sf; break;
28784 case V2DImode: gen = gen_neon_vextv2di; break;
28785 default:
28786 return false;
28789 /* Success! */
28790 if (d->testing_p)
28791 return true;
28793 offset = GEN_INT (location);
28794 emit_insn (gen (d->target, d->op0, d->op1, offset));
28795 return true;
28798 /* The NEON VTBL instruction is a fully variable permuation that's even
28799 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28800 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28801 can do slightly better by expanding this as a constant where we don't
28802 have to apply a mask. */
28804 static bool
28805 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28807 rtx rperm[MAX_VECT_LEN], sel;
28808 machine_mode vmode = d->vmode;
28809 unsigned int i, nelt = d->nelt;
28811 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28812 numbering of elements for big-endian, we must reverse the order. */
28813 if (BYTES_BIG_ENDIAN)
28814 return false;
28816 if (d->testing_p)
28817 return true;
28819 /* Generic code will try constant permutation twice. Once with the
28820 original mode and again with the elements lowered to QImode.
28821 So wait and don't do the selector expansion ourselves. */
28822 if (vmode != V8QImode && vmode != V16QImode)
28823 return false;
28825 for (i = 0; i < nelt; ++i)
28826 rperm[i] = GEN_INT (d->perm[i]);
28827 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28828 sel = force_reg (vmode, sel);
28830 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28831 return true;
28834 static bool
28835 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28837 /* Check if the input mask matches vext before reordering the
28838 operands. */
28839 if (TARGET_NEON)
28840 if (arm_evpc_neon_vext (d))
28841 return true;
28843 /* The pattern matching functions above are written to look for a small
28844 number to begin the sequence (0, 1, N/2). If we begin with an index
28845 from the second operand, we can swap the operands. */
28846 if (d->perm[0] >= d->nelt)
28848 unsigned i, nelt = d->nelt;
28850 for (i = 0; i < nelt; ++i)
28851 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28853 std::swap (d->op0, d->op1);
28856 if (TARGET_NEON)
28858 if (arm_evpc_neon_vuzp (d))
28859 return true;
28860 if (arm_evpc_neon_vzip (d))
28861 return true;
28862 if (arm_evpc_neon_vrev (d))
28863 return true;
28864 if (arm_evpc_neon_vtrn (d))
28865 return true;
28866 return arm_evpc_neon_vtbl (d);
28868 return false;
28871 /* Expand a vec_perm_const pattern. */
28873 bool
28874 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28876 struct expand_vec_perm_d d;
28877 int i, nelt, which;
28879 d.target = target;
28880 d.op0 = op0;
28881 d.op1 = op1;
28883 d.vmode = GET_MODE (target);
28884 gcc_assert (VECTOR_MODE_P (d.vmode));
28885 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28886 d.testing_p = false;
28888 for (i = which = 0; i < nelt; ++i)
28890 rtx e = XVECEXP (sel, 0, i);
28891 int ei = INTVAL (e) & (2 * nelt - 1);
28892 which |= (ei < nelt ? 1 : 2);
28893 d.perm[i] = ei;
28896 switch (which)
28898 default:
28899 gcc_unreachable();
28901 case 3:
28902 d.one_vector_p = false;
28903 if (!rtx_equal_p (op0, op1))
28904 break;
28906 /* The elements of PERM do not suggest that only the first operand
28907 is used, but both operands are identical. Allow easier matching
28908 of the permutation by folding the permutation into the single
28909 input vector. */
28910 /* FALLTHRU */
28911 case 2:
28912 for (i = 0; i < nelt; ++i)
28913 d.perm[i] &= nelt - 1;
28914 d.op0 = op1;
28915 d.one_vector_p = true;
28916 break;
28918 case 1:
28919 d.op1 = op0;
28920 d.one_vector_p = true;
28921 break;
28924 return arm_expand_vec_perm_const_1 (&d);
28927 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28929 static bool
28930 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28931 const unsigned char *sel)
28933 struct expand_vec_perm_d d;
28934 unsigned int i, nelt, which;
28935 bool ret;
28937 d.vmode = vmode;
28938 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28939 d.testing_p = true;
28940 memcpy (d.perm, sel, nelt);
28942 /* Categorize the set of elements in the selector. */
28943 for (i = which = 0; i < nelt; ++i)
28945 unsigned char e = d.perm[i];
28946 gcc_assert (e < 2 * nelt);
28947 which |= (e < nelt ? 1 : 2);
28950 /* For all elements from second vector, fold the elements to first. */
28951 if (which == 2)
28952 for (i = 0; i < nelt; ++i)
28953 d.perm[i] -= nelt;
28955 /* Check whether the mask can be applied to the vector type. */
28956 d.one_vector_p = (which != 3);
28958 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28959 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28960 if (!d.one_vector_p)
28961 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28963 start_sequence ();
28964 ret = arm_expand_vec_perm_const_1 (&d);
28965 end_sequence ();
28967 return ret;
28970 bool
28971 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28973 /* If we are soft float and we do not have ldrd
28974 then all auto increment forms are ok. */
28975 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28976 return true;
28978 switch (code)
28980 /* Post increment and Pre Decrement are supported for all
28981 instruction forms except for vector forms. */
28982 case ARM_POST_INC:
28983 case ARM_PRE_DEC:
28984 if (VECTOR_MODE_P (mode))
28986 if (code != ARM_PRE_DEC)
28987 return true;
28988 else
28989 return false;
28992 return true;
28994 case ARM_POST_DEC:
28995 case ARM_PRE_INC:
28996 /* Without LDRD and mode size greater than
28997 word size, there is no point in auto-incrementing
28998 because ldm and stm will not have these forms. */
28999 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29000 return false;
29002 /* Vector and floating point modes do not support
29003 these auto increment forms. */
29004 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29005 return false;
29007 return true;
29009 default:
29010 return false;
29014 return false;
29017 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29018 on ARM, since we know that shifts by negative amounts are no-ops.
29019 Additionally, the default expansion code is not available or suitable
29020 for post-reload insn splits (this can occur when the register allocator
29021 chooses not to do a shift in NEON).
29023 This function is used in both initial expand and post-reload splits, and
29024 handles all kinds of 64-bit shifts.
29026 Input requirements:
29027 - It is safe for the input and output to be the same register, but
29028 early-clobber rules apply for the shift amount and scratch registers.
29029 - Shift by register requires both scratch registers. In all other cases
29030 the scratch registers may be NULL.
29031 - Ashiftrt by a register also clobbers the CC register. */
29032 void
29033 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29034 rtx amount, rtx scratch1, rtx scratch2)
29036 rtx out_high = gen_highpart (SImode, out);
29037 rtx out_low = gen_lowpart (SImode, out);
29038 rtx in_high = gen_highpart (SImode, in);
29039 rtx in_low = gen_lowpart (SImode, in);
29041 /* Terminology:
29042 in = the register pair containing the input value.
29043 out = the destination register pair.
29044 up = the high- or low-part of each pair.
29045 down = the opposite part to "up".
29046 In a shift, we can consider bits to shift from "up"-stream to
29047 "down"-stream, so in a left-shift "up" is the low-part and "down"
29048 is the high-part of each register pair. */
29050 rtx out_up = code == ASHIFT ? out_low : out_high;
29051 rtx out_down = code == ASHIFT ? out_high : out_low;
29052 rtx in_up = code == ASHIFT ? in_low : in_high;
29053 rtx in_down = code == ASHIFT ? in_high : in_low;
29055 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29056 gcc_assert (out
29057 && (REG_P (out) || GET_CODE (out) == SUBREG)
29058 && GET_MODE (out) == DImode);
29059 gcc_assert (in
29060 && (REG_P (in) || GET_CODE (in) == SUBREG)
29061 && GET_MODE (in) == DImode);
29062 gcc_assert (amount
29063 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29064 && GET_MODE (amount) == SImode)
29065 || CONST_INT_P (amount)));
29066 gcc_assert (scratch1 == NULL
29067 || (GET_CODE (scratch1) == SCRATCH)
29068 || (GET_MODE (scratch1) == SImode
29069 && REG_P (scratch1)));
29070 gcc_assert (scratch2 == NULL
29071 || (GET_CODE (scratch2) == SCRATCH)
29072 || (GET_MODE (scratch2) == SImode
29073 && REG_P (scratch2)));
29074 gcc_assert (!REG_P (out) || !REG_P (amount)
29075 || !HARD_REGISTER_P (out)
29076 || (REGNO (out) != REGNO (amount)
29077 && REGNO (out) + 1 != REGNO (amount)));
29079 /* Macros to make following code more readable. */
29080 #define SUB_32(DEST,SRC) \
29081 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29082 #define RSB_32(DEST,SRC) \
29083 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29084 #define SUB_S_32(DEST,SRC) \
29085 gen_addsi3_compare0 ((DEST), (SRC), \
29086 GEN_INT (-32))
29087 #define SET(DEST,SRC) \
29088 gen_rtx_SET ((DEST), (SRC))
29089 #define SHIFT(CODE,SRC,AMOUNT) \
29090 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29091 #define LSHIFT(CODE,SRC,AMOUNT) \
29092 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29093 SImode, (SRC), (AMOUNT))
29094 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29095 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29096 SImode, (SRC), (AMOUNT))
29097 #define ORR(A,B) \
29098 gen_rtx_IOR (SImode, (A), (B))
29099 #define BRANCH(COND,LABEL) \
29100 gen_arm_cond_branch ((LABEL), \
29101 gen_rtx_ ## COND (CCmode, cc_reg, \
29102 const0_rtx), \
29103 cc_reg)
29105 /* Shifts by register and shifts by constant are handled separately. */
29106 if (CONST_INT_P (amount))
29108 /* We have a shift-by-constant. */
29110 /* First, handle out-of-range shift amounts.
29111 In both cases we try to match the result an ARM instruction in a
29112 shift-by-register would give. This helps reduce execution
29113 differences between optimization levels, but it won't stop other
29114 parts of the compiler doing different things. This is "undefined
29115 behavior, in any case. */
29116 if (INTVAL (amount) <= 0)
29117 emit_insn (gen_movdi (out, in));
29118 else if (INTVAL (amount) >= 64)
29120 if (code == ASHIFTRT)
29122 rtx const31_rtx = GEN_INT (31);
29123 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29124 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29126 else
29127 emit_insn (gen_movdi (out, const0_rtx));
29130 /* Now handle valid shifts. */
29131 else if (INTVAL (amount) < 32)
29133 /* Shifts by a constant less than 32. */
29134 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29136 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29137 emit_insn (SET (out_down,
29138 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29139 out_down)));
29140 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29142 else
29144 /* Shifts by a constant greater than 31. */
29145 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29147 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29148 if (code == ASHIFTRT)
29149 emit_insn (gen_ashrsi3 (out_up, in_up,
29150 GEN_INT (31)));
29151 else
29152 emit_insn (SET (out_up, const0_rtx));
29155 else
29157 /* We have a shift-by-register. */
29158 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29160 /* This alternative requires the scratch registers. */
29161 gcc_assert (scratch1 && REG_P (scratch1));
29162 gcc_assert (scratch2 && REG_P (scratch2));
29164 /* We will need the values "amount-32" and "32-amount" later.
29165 Swapping them around now allows the later code to be more general. */
29166 switch (code)
29168 case ASHIFT:
29169 emit_insn (SUB_32 (scratch1, amount));
29170 emit_insn (RSB_32 (scratch2, amount));
29171 break;
29172 case ASHIFTRT:
29173 emit_insn (RSB_32 (scratch1, amount));
29174 /* Also set CC = amount > 32. */
29175 emit_insn (SUB_S_32 (scratch2, amount));
29176 break;
29177 case LSHIFTRT:
29178 emit_insn (RSB_32 (scratch1, amount));
29179 emit_insn (SUB_32 (scratch2, amount));
29180 break;
29181 default:
29182 gcc_unreachable ();
29185 /* Emit code like this:
29187 arithmetic-left:
29188 out_down = in_down << amount;
29189 out_down = (in_up << (amount - 32)) | out_down;
29190 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29191 out_up = in_up << amount;
29193 arithmetic-right:
29194 out_down = in_down >> amount;
29195 out_down = (in_up << (32 - amount)) | out_down;
29196 if (amount < 32)
29197 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29198 out_up = in_up << amount;
29200 logical-right:
29201 out_down = in_down >> amount;
29202 out_down = (in_up << (32 - amount)) | out_down;
29203 if (amount < 32)
29204 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29205 out_up = in_up << amount;
29207 The ARM and Thumb2 variants are the same but implemented slightly
29208 differently. If this were only called during expand we could just
29209 use the Thumb2 case and let combine do the right thing, but this
29210 can also be called from post-reload splitters. */
29212 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29214 if (!TARGET_THUMB2)
29216 /* Emit code for ARM mode. */
29217 emit_insn (SET (out_down,
29218 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29219 if (code == ASHIFTRT)
29221 rtx_code_label *done_label = gen_label_rtx ();
29222 emit_jump_insn (BRANCH (LT, done_label));
29223 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29224 out_down)));
29225 emit_label (done_label);
29227 else
29228 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29229 out_down)));
29231 else
29233 /* Emit code for Thumb2 mode.
29234 Thumb2 can't do shift and or in one insn. */
29235 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29236 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29238 if (code == ASHIFTRT)
29240 rtx_code_label *done_label = gen_label_rtx ();
29241 emit_jump_insn (BRANCH (LT, done_label));
29242 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29243 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29244 emit_label (done_label);
29246 else
29248 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29249 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29253 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29256 #undef SUB_32
29257 #undef RSB_32
29258 #undef SUB_S_32
29259 #undef SET
29260 #undef SHIFT
29261 #undef LSHIFT
29262 #undef REV_LSHIFT
29263 #undef ORR
29264 #undef BRANCH
29267 /* Returns true if the pattern is a valid symbolic address, which is either a
29268 symbol_ref or (symbol_ref + addend).
29270 According to the ARM ELF ABI, the initial addend of REL-type relocations
29271 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29272 literal field of the instruction as a 16-bit signed value in the range
29273 -32768 <= A < 32768. */
29275 bool
29276 arm_valid_symbolic_address_p (rtx addr)
29278 rtx xop0, xop1 = NULL_RTX;
29279 rtx tmp = addr;
29281 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29282 return true;
29284 /* (const (plus: symbol_ref const_int)) */
29285 if (GET_CODE (addr) == CONST)
29286 tmp = XEXP (addr, 0);
29288 if (GET_CODE (tmp) == PLUS)
29290 xop0 = XEXP (tmp, 0);
29291 xop1 = XEXP (tmp, 1);
29293 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29294 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29297 return false;
29300 /* Returns true if a valid comparison operation and makes
29301 the operands in a form that is valid. */
29302 bool
29303 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29305 enum rtx_code code = GET_CODE (*comparison);
29306 int code_int;
29307 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29308 ? GET_MODE (*op2) : GET_MODE (*op1);
29310 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29312 if (code == UNEQ || code == LTGT)
29313 return false;
29315 code_int = (int)code;
29316 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29317 PUT_CODE (*comparison, (enum rtx_code)code_int);
29319 switch (mode)
29321 case SImode:
29322 if (!arm_add_operand (*op1, mode))
29323 *op1 = force_reg (mode, *op1);
29324 if (!arm_add_operand (*op2, mode))
29325 *op2 = force_reg (mode, *op2);
29326 return true;
29328 case DImode:
29329 if (!cmpdi_operand (*op1, mode))
29330 *op1 = force_reg (mode, *op1);
29331 if (!cmpdi_operand (*op2, mode))
29332 *op2 = force_reg (mode, *op2);
29333 return true;
29335 case SFmode:
29336 case DFmode:
29337 if (!arm_float_compare_operand (*op1, mode))
29338 *op1 = force_reg (mode, *op1);
29339 if (!arm_float_compare_operand (*op2, mode))
29340 *op2 = force_reg (mode, *op2);
29341 return true;
29342 default:
29343 break;
29346 return false;
29350 /* Maximum number of instructions to set block of memory. */
29351 static int
29352 arm_block_set_max_insns (void)
29354 if (optimize_function_for_size_p (cfun))
29355 return 4;
29356 else
29357 return current_tune->max_insns_inline_memset;
29360 /* Return TRUE if it's profitable to set block of memory for
29361 non-vectorized case. VAL is the value to set the memory
29362 with. LENGTH is the number of bytes to set. ALIGN is the
29363 alignment of the destination memory in bytes. UNALIGNED_P
29364 is TRUE if we can only set the memory with instructions
29365 meeting alignment requirements. USE_STRD_P is TRUE if we
29366 can use strd to set the memory. */
29367 static bool
29368 arm_block_set_non_vect_profit_p (rtx val,
29369 unsigned HOST_WIDE_INT length,
29370 unsigned HOST_WIDE_INT align,
29371 bool unaligned_p, bool use_strd_p)
29373 int num = 0;
29374 /* For leftovers in bytes of 0-7, we can set the memory block using
29375 strb/strh/str with minimum instruction number. */
29376 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29378 if (unaligned_p)
29380 num = arm_const_inline_cost (SET, val);
29381 num += length / align + length % align;
29383 else if (use_strd_p)
29385 num = arm_const_double_inline_cost (val);
29386 num += (length >> 3) + leftover[length & 7];
29388 else
29390 num = arm_const_inline_cost (SET, val);
29391 num += (length >> 2) + leftover[length & 3];
29394 /* We may be able to combine last pair STRH/STRB into a single STR
29395 by shifting one byte back. */
29396 if (unaligned_access && length > 3 && (length & 3) == 3)
29397 num--;
29399 return (num <= arm_block_set_max_insns ());
29402 /* Return TRUE if it's profitable to set block of memory for
29403 vectorized case. LENGTH is the number of bytes to set.
29404 ALIGN is the alignment of destination memory in bytes.
29405 MODE is the vector mode used to set the memory. */
29406 static bool
29407 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29408 unsigned HOST_WIDE_INT align,
29409 machine_mode mode)
29411 int num;
29412 bool unaligned_p = ((align & 3) != 0);
29413 unsigned int nelt = GET_MODE_NUNITS (mode);
29415 /* Instruction loading constant value. */
29416 num = 1;
29417 /* Instructions storing the memory. */
29418 num += (length + nelt - 1) / nelt;
29419 /* Instructions adjusting the address expression. Only need to
29420 adjust address expression if it's 4 bytes aligned and bytes
29421 leftover can only be stored by mis-aligned store instruction. */
29422 if (!unaligned_p && (length & 3) != 0)
29423 num++;
29425 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29426 if (!unaligned_p && mode == V16QImode)
29427 num--;
29429 return (num <= arm_block_set_max_insns ());
29432 /* Set a block of memory using vectorization instructions for the
29433 unaligned case. We fill the first LENGTH bytes of the memory
29434 area starting from DSTBASE with byte constant VALUE. ALIGN is
29435 the alignment requirement of memory. Return TRUE if succeeded. */
29436 static bool
29437 arm_block_set_unaligned_vect (rtx dstbase,
29438 unsigned HOST_WIDE_INT length,
29439 unsigned HOST_WIDE_INT value,
29440 unsigned HOST_WIDE_INT align)
29442 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29443 rtx dst, mem;
29444 rtx val_elt, val_vec, reg;
29445 rtx rval[MAX_VECT_LEN];
29446 rtx (*gen_func) (rtx, rtx);
29447 machine_mode mode;
29448 unsigned HOST_WIDE_INT v = value;
29449 unsigned int offset = 0;
29450 gcc_assert ((align & 0x3) != 0);
29451 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29452 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29453 if (length >= nelt_v16)
29455 mode = V16QImode;
29456 gen_func = gen_movmisalignv16qi;
29458 else
29460 mode = V8QImode;
29461 gen_func = gen_movmisalignv8qi;
29463 nelt_mode = GET_MODE_NUNITS (mode);
29464 gcc_assert (length >= nelt_mode);
29465 /* Skip if it isn't profitable. */
29466 if (!arm_block_set_vect_profit_p (length, align, mode))
29467 return false;
29469 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29470 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29472 v = sext_hwi (v, BITS_PER_WORD);
29473 val_elt = GEN_INT (v);
29474 for (j = 0; j < nelt_mode; j++)
29475 rval[j] = val_elt;
29477 reg = gen_reg_rtx (mode);
29478 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29479 /* Emit instruction loading the constant value. */
29480 emit_move_insn (reg, val_vec);
29482 /* Handle nelt_mode bytes in a vector. */
29483 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29485 emit_insn ((*gen_func) (mem, reg));
29486 if (i + 2 * nelt_mode <= length)
29488 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29489 offset += nelt_mode;
29490 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29494 /* If there are not less than nelt_v8 bytes leftover, we must be in
29495 V16QI mode. */
29496 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29498 /* Handle (8, 16) bytes leftover. */
29499 if (i + nelt_v8 < length)
29501 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29502 offset += length - i;
29503 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29505 /* We are shifting bytes back, set the alignment accordingly. */
29506 if ((length & 1) != 0 && align >= 2)
29507 set_mem_align (mem, BITS_PER_UNIT);
29509 emit_insn (gen_movmisalignv16qi (mem, reg));
29511 /* Handle (0, 8] bytes leftover. */
29512 else if (i < length && i + nelt_v8 >= length)
29514 if (mode == V16QImode)
29515 reg = gen_lowpart (V8QImode, reg);
29517 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29518 + (nelt_mode - nelt_v8))));
29519 offset += (length - i) + (nelt_mode - nelt_v8);
29520 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29522 /* We are shifting bytes back, set the alignment accordingly. */
29523 if ((length & 1) != 0 && align >= 2)
29524 set_mem_align (mem, BITS_PER_UNIT);
29526 emit_insn (gen_movmisalignv8qi (mem, reg));
29529 return true;
29532 /* Set a block of memory using vectorization instructions for the
29533 aligned case. We fill the first LENGTH bytes of the memory area
29534 starting from DSTBASE with byte constant VALUE. ALIGN is the
29535 alignment requirement of memory. Return TRUE if succeeded. */
29536 static bool
29537 arm_block_set_aligned_vect (rtx dstbase,
29538 unsigned HOST_WIDE_INT length,
29539 unsigned HOST_WIDE_INT value,
29540 unsigned HOST_WIDE_INT align)
29542 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29543 rtx dst, addr, mem;
29544 rtx val_elt, val_vec, reg;
29545 rtx rval[MAX_VECT_LEN];
29546 machine_mode mode;
29547 unsigned HOST_WIDE_INT v = value;
29548 unsigned int offset = 0;
29550 gcc_assert ((align & 0x3) == 0);
29551 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29552 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29553 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29554 mode = V16QImode;
29555 else
29556 mode = V8QImode;
29558 nelt_mode = GET_MODE_NUNITS (mode);
29559 gcc_assert (length >= nelt_mode);
29560 /* Skip if it isn't profitable. */
29561 if (!arm_block_set_vect_profit_p (length, align, mode))
29562 return false;
29564 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29566 v = sext_hwi (v, BITS_PER_WORD);
29567 val_elt = GEN_INT (v);
29568 for (j = 0; j < nelt_mode; j++)
29569 rval[j] = val_elt;
29571 reg = gen_reg_rtx (mode);
29572 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29573 /* Emit instruction loading the constant value. */
29574 emit_move_insn (reg, val_vec);
29576 i = 0;
29577 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29578 if (mode == V16QImode)
29580 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29581 emit_insn (gen_movmisalignv16qi (mem, reg));
29582 i += nelt_mode;
29583 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29584 if (i + nelt_v8 < length && i + nelt_v16 > length)
29586 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29587 offset += length - nelt_mode;
29588 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29589 /* We are shifting bytes back, set the alignment accordingly. */
29590 if ((length & 0x3) == 0)
29591 set_mem_align (mem, BITS_PER_UNIT * 4);
29592 else if ((length & 0x1) == 0)
29593 set_mem_align (mem, BITS_PER_UNIT * 2);
29594 else
29595 set_mem_align (mem, BITS_PER_UNIT);
29597 emit_insn (gen_movmisalignv16qi (mem, reg));
29598 return true;
29600 /* Fall through for bytes leftover. */
29601 mode = V8QImode;
29602 nelt_mode = GET_MODE_NUNITS (mode);
29603 reg = gen_lowpart (V8QImode, reg);
29606 /* Handle 8 bytes in a vector. */
29607 for (; (i + nelt_mode <= length); i += nelt_mode)
29609 addr = plus_constant (Pmode, dst, i);
29610 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29611 emit_move_insn (mem, reg);
29614 /* Handle single word leftover by shifting 4 bytes back. We can
29615 use aligned access for this case. */
29616 if (i + UNITS_PER_WORD == length)
29618 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29619 offset += i - UNITS_PER_WORD;
29620 mem = adjust_automodify_address (dstbase, mode, addr, offset);
29621 /* We are shifting 4 bytes back, set the alignment accordingly. */
29622 if (align > UNITS_PER_WORD)
29623 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29625 emit_move_insn (mem, reg);
29627 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29628 We have to use unaligned access for this case. */
29629 else if (i < length)
29631 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29632 offset += length - nelt_mode;
29633 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29634 /* We are shifting bytes back, set the alignment accordingly. */
29635 if ((length & 1) == 0)
29636 set_mem_align (mem, BITS_PER_UNIT * 2);
29637 else
29638 set_mem_align (mem, BITS_PER_UNIT);
29640 emit_insn (gen_movmisalignv8qi (mem, reg));
29643 return true;
29646 /* Set a block of memory using plain strh/strb instructions, only
29647 using instructions allowed by ALIGN on processor. We fill the
29648 first LENGTH bytes of the memory area starting from DSTBASE
29649 with byte constant VALUE. ALIGN is the alignment requirement
29650 of memory. */
29651 static bool
29652 arm_block_set_unaligned_non_vect (rtx dstbase,
29653 unsigned HOST_WIDE_INT length,
29654 unsigned HOST_WIDE_INT value,
29655 unsigned HOST_WIDE_INT align)
29657 unsigned int i;
29658 rtx dst, addr, mem;
29659 rtx val_exp, val_reg, reg;
29660 machine_mode mode;
29661 HOST_WIDE_INT v = value;
29663 gcc_assert (align == 1 || align == 2);
29665 if (align == 2)
29666 v |= (value << BITS_PER_UNIT);
29668 v = sext_hwi (v, BITS_PER_WORD);
29669 val_exp = GEN_INT (v);
29670 /* Skip if it isn't profitable. */
29671 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29672 align, true, false))
29673 return false;
29675 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29676 mode = (align == 2 ? HImode : QImode);
29677 val_reg = force_reg (SImode, val_exp);
29678 reg = gen_lowpart (mode, val_reg);
29680 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29682 addr = plus_constant (Pmode, dst, i);
29683 mem = adjust_automodify_address (dstbase, mode, addr, i);
29684 emit_move_insn (mem, reg);
29687 /* Handle single byte leftover. */
29688 if (i + 1 == length)
29690 reg = gen_lowpart (QImode, val_reg);
29691 addr = plus_constant (Pmode, dst, i);
29692 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29693 emit_move_insn (mem, reg);
29694 i++;
29697 gcc_assert (i == length);
29698 return true;
29701 /* Set a block of memory using plain strd/str/strh/strb instructions,
29702 to permit unaligned copies on processors which support unaligned
29703 semantics for those instructions. We fill the first LENGTH bytes
29704 of the memory area starting from DSTBASE with byte constant VALUE.
29705 ALIGN is the alignment requirement of memory. */
29706 static bool
29707 arm_block_set_aligned_non_vect (rtx dstbase,
29708 unsigned HOST_WIDE_INT length,
29709 unsigned HOST_WIDE_INT value,
29710 unsigned HOST_WIDE_INT align)
29712 unsigned int i;
29713 rtx dst, addr, mem;
29714 rtx val_exp, val_reg, reg;
29715 unsigned HOST_WIDE_INT v;
29716 bool use_strd_p;
29718 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29719 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29721 v = (value | (value << 8) | (value << 16) | (value << 24));
29722 if (length < UNITS_PER_WORD)
29723 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29725 if (use_strd_p)
29726 v |= (v << BITS_PER_WORD);
29727 else
29728 v = sext_hwi (v, BITS_PER_WORD);
29730 val_exp = GEN_INT (v);
29731 /* Skip if it isn't profitable. */
29732 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29733 align, false, use_strd_p))
29735 if (!use_strd_p)
29736 return false;
29738 /* Try without strd. */
29739 v = (v >> BITS_PER_WORD);
29740 v = sext_hwi (v, BITS_PER_WORD);
29741 val_exp = GEN_INT (v);
29742 use_strd_p = false;
29743 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29744 align, false, use_strd_p))
29745 return false;
29748 i = 0;
29749 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29750 /* Handle double words using strd if possible. */
29751 if (use_strd_p)
29753 val_reg = force_reg (DImode, val_exp);
29754 reg = val_reg;
29755 for (; (i + 8 <= length); i += 8)
29757 addr = plus_constant (Pmode, dst, i);
29758 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29759 emit_move_insn (mem, reg);
29762 else
29763 val_reg = force_reg (SImode, val_exp);
29765 /* Handle words. */
29766 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29767 for (; (i + 4 <= length); i += 4)
29769 addr = plus_constant (Pmode, dst, i);
29770 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29771 if ((align & 3) == 0)
29772 emit_move_insn (mem, reg);
29773 else
29774 emit_insn (gen_unaligned_storesi (mem, reg));
29777 /* Merge last pair of STRH and STRB into a STR if possible. */
29778 if (unaligned_access && i > 0 && (i + 3) == length)
29780 addr = plus_constant (Pmode, dst, i - 1);
29781 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29782 /* We are shifting one byte back, set the alignment accordingly. */
29783 if ((align & 1) == 0)
29784 set_mem_align (mem, BITS_PER_UNIT);
29786 /* Most likely this is an unaligned access, and we can't tell at
29787 compilation time. */
29788 emit_insn (gen_unaligned_storesi (mem, reg));
29789 return true;
29792 /* Handle half word leftover. */
29793 if (i + 2 <= length)
29795 reg = gen_lowpart (HImode, val_reg);
29796 addr = plus_constant (Pmode, dst, i);
29797 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29798 if ((align & 1) == 0)
29799 emit_move_insn (mem, reg);
29800 else
29801 emit_insn (gen_unaligned_storehi (mem, reg));
29803 i += 2;
29806 /* Handle single byte leftover. */
29807 if (i + 1 == length)
29809 reg = gen_lowpart (QImode, val_reg);
29810 addr = plus_constant (Pmode, dst, i);
29811 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29812 emit_move_insn (mem, reg);
29815 return true;
29818 /* Set a block of memory using vectorization instructions for both
29819 aligned and unaligned cases. We fill the first LENGTH bytes of
29820 the memory area starting from DSTBASE with byte constant VALUE.
29821 ALIGN is the alignment requirement of memory. */
29822 static bool
29823 arm_block_set_vect (rtx dstbase,
29824 unsigned HOST_WIDE_INT length,
29825 unsigned HOST_WIDE_INT value,
29826 unsigned HOST_WIDE_INT align)
29828 /* Check whether we need to use unaligned store instruction. */
29829 if (((align & 3) != 0 || (length & 3) != 0)
29830 /* Check whether unaligned store instruction is available. */
29831 && (!unaligned_access || BYTES_BIG_ENDIAN))
29832 return false;
29834 if ((align & 3) == 0)
29835 return arm_block_set_aligned_vect (dstbase, length, value, align);
29836 else
29837 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29840 /* Expand string store operation. Firstly we try to do that by using
29841 vectorization instructions, then try with ARM unaligned access and
29842 double-word store if profitable. OPERANDS[0] is the destination,
29843 OPERANDS[1] is the number of bytes, operands[2] is the value to
29844 initialize the memory, OPERANDS[3] is the known alignment of the
29845 destination. */
29846 bool
29847 arm_gen_setmem (rtx *operands)
29849 rtx dstbase = operands[0];
29850 unsigned HOST_WIDE_INT length;
29851 unsigned HOST_WIDE_INT value;
29852 unsigned HOST_WIDE_INT align;
29854 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29855 return false;
29857 length = UINTVAL (operands[1]);
29858 if (length > 64)
29859 return false;
29861 value = (UINTVAL (operands[2]) & 0xFF);
29862 align = UINTVAL (operands[3]);
29863 if (TARGET_NEON && length >= 8
29864 && current_tune->string_ops_prefer_neon
29865 && arm_block_set_vect (dstbase, length, value, align))
29866 return true;
29868 if (!unaligned_access && (align & 3) != 0)
29869 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29871 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29875 static bool
29876 arm_macro_fusion_p (void)
29878 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
29882 static bool
29883 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
29885 rtx set_dest;
29886 rtx prev_set = single_set (prev);
29887 rtx curr_set = single_set (curr);
29889 if (!prev_set
29890 || !curr_set)
29891 return false;
29893 if (any_condjump_p (curr))
29894 return false;
29896 if (!arm_macro_fusion_p ())
29897 return false;
29899 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
29900 && aarch_crypto_can_dual_issue (prev, curr))
29901 return true;
29903 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT)
29905 /* We are trying to fuse
29906 movw imm / movt imm
29907 instructions as a group that gets scheduled together. */
29909 set_dest = SET_DEST (curr_set);
29911 if (GET_MODE (set_dest) != SImode)
29912 return false;
29914 /* We are trying to match:
29915 prev (movw) == (set (reg r0) (const_int imm16))
29916 curr (movt) == (set (zero_extract (reg r0)
29917 (const_int 16)
29918 (const_int 16))
29919 (const_int imm16_1))
29921 prev (movw) == (set (reg r1)
29922 (high (symbol_ref ("SYM"))))
29923 curr (movt) == (set (reg r0)
29924 (lo_sum (reg r1)
29925 (symbol_ref ("SYM")))) */
29926 if (GET_CODE (set_dest) == ZERO_EXTRACT)
29928 if (CONST_INT_P (SET_SRC (curr_set))
29929 && CONST_INT_P (SET_SRC (prev_set))
29930 && REG_P (XEXP (set_dest, 0))
29931 && REG_P (SET_DEST (prev_set))
29932 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
29933 return true;
29935 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
29936 && REG_P (SET_DEST (curr_set))
29937 && REG_P (SET_DEST (prev_set))
29938 && GET_CODE (SET_SRC (prev_set)) == HIGH
29939 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
29940 return true;
29942 return false;
29945 /* Return true iff the instruction fusion described by OP is enabled. */
29946 bool
29947 arm_fusion_enabled_p (tune_params::fuse_ops op)
29949 return current_tune->fusible_ops & op;
29952 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29954 static unsigned HOST_WIDE_INT
29955 arm_asan_shadow_offset (void)
29957 return HOST_WIDE_INT_1U << 29;
29961 /* This is a temporary fix for PR60655. Ideally we need
29962 to handle most of these cases in the generic part but
29963 currently we reject minus (..) (sym_ref). We try to
29964 ameliorate the case with minus (sym_ref1) (sym_ref2)
29965 where they are in the same section. */
29967 static bool
29968 arm_const_not_ok_for_debug_p (rtx p)
29970 tree decl_op0 = NULL;
29971 tree decl_op1 = NULL;
29973 if (GET_CODE (p) == MINUS)
29975 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29977 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29978 if (decl_op1
29979 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29980 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29982 if ((TREE_CODE (decl_op1) == VAR_DECL
29983 || TREE_CODE (decl_op1) == CONST_DECL)
29984 && (TREE_CODE (decl_op0) == VAR_DECL
29985 || TREE_CODE (decl_op0) == CONST_DECL))
29986 return (get_variable_section (decl_op1, false)
29987 != get_variable_section (decl_op0, false));
29989 if (TREE_CODE (decl_op1) == LABEL_DECL
29990 && TREE_CODE (decl_op0) == LABEL_DECL)
29991 return (DECL_CONTEXT (decl_op1)
29992 != DECL_CONTEXT (decl_op0));
29995 return true;
29999 return false;
30002 /* return TRUE if x is a reference to a value in a constant pool */
30003 extern bool
30004 arm_is_constant_pool_ref (rtx x)
30006 return (MEM_P (x)
30007 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30008 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30011 /* Remember the last target of arm_set_current_function. */
30012 static GTY(()) tree arm_previous_fndecl;
30014 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30016 void
30017 save_restore_target_globals (tree new_tree)
30019 /* If we have a previous state, use it. */
30020 if (TREE_TARGET_GLOBALS (new_tree))
30021 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30022 else if (new_tree == target_option_default_node)
30023 restore_target_globals (&default_target_globals);
30024 else
30026 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30027 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30030 arm_option_params_internal ();
30033 /* Invalidate arm_previous_fndecl. */
30035 void
30036 arm_reset_previous_fndecl (void)
30038 arm_previous_fndecl = NULL_TREE;
30041 /* Establish appropriate back-end context for processing the function
30042 FNDECL. The argument might be NULL to indicate processing at top
30043 level, outside of any function scope. */
30045 static void
30046 arm_set_current_function (tree fndecl)
30048 if (!fndecl || fndecl == arm_previous_fndecl)
30049 return;
30051 tree old_tree = (arm_previous_fndecl
30052 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30053 : NULL_TREE);
30055 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30057 /* If current function has no attributes but previous one did,
30058 use the default node. */
30059 if (! new_tree && old_tree)
30060 new_tree = target_option_default_node;
30062 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30063 the default have been handled by save_restore_target_globals from
30064 arm_pragma_target_parse. */
30065 if (old_tree == new_tree)
30066 return;
30068 arm_previous_fndecl = fndecl;
30070 /* First set the target options. */
30071 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30073 save_restore_target_globals (new_tree);
30076 /* Implement TARGET_OPTION_PRINT. */
30078 static void
30079 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30081 int flags = ptr->x_target_flags;
30082 const struct arm_fpu_desc *fpu_desc = &all_fpus[ptr->x_arm_fpu_index];
30084 fprintf (file, "%*sselected arch %s\n", indent, "",
30085 TARGET_THUMB2_P (flags) ? "thumb2" :
30086 TARGET_THUMB_P (flags) ? "thumb1" :
30087 "arm");
30089 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_desc->name);
30092 /* Hook to determine if one function can safely inline another. */
30094 static bool
30095 arm_can_inline_p (tree caller, tree callee)
30097 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30098 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30100 struct cl_target_option *caller_opts
30101 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30102 : target_option_default_node);
30104 struct cl_target_option *callee_opts
30105 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30106 : target_option_default_node);
30108 const struct arm_fpu_desc *caller_fpu
30109 = &all_fpus[caller_opts->x_arm_fpu_index];
30110 const struct arm_fpu_desc *callee_fpu
30111 = &all_fpus[callee_opts->x_arm_fpu_index];
30113 /* Callee's fpu features should be a subset of the caller's. */
30114 if ((caller_fpu->features & callee_fpu->features) != callee_fpu->features)
30115 return false;
30117 /* Need same model and regs. */
30118 if (callee_fpu->model != caller_fpu->model
30119 || callee_fpu->regs != callee_fpu->regs)
30120 return false;
30122 /* OK to inline between different modes.
30123 Function with mode specific instructions, e.g using asm,
30124 must be explicitly protected with noinline. */
30125 return true;
30128 /* Hook to fix function's alignment affected by target attribute. */
30130 static void
30131 arm_relayout_function (tree fndecl)
30133 if (DECL_USER_ALIGN (fndecl))
30134 return;
30136 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30138 if (!callee_tree)
30139 callee_tree = target_option_default_node;
30141 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30142 SET_DECL_ALIGN (fndecl, FUNCTION_BOUNDARY_P (opts->x_target_flags));
30145 /* Inner function to process the attribute((target(...))), take an argument and
30146 set the current options from the argument. If we have a list, recursively
30147 go over the list. */
30149 static bool
30150 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30152 if (TREE_CODE (args) == TREE_LIST)
30154 bool ret = true;
30156 for (; args; args = TREE_CHAIN (args))
30157 if (TREE_VALUE (args)
30158 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30159 ret = false;
30160 return ret;
30163 else if (TREE_CODE (args) != STRING_CST)
30165 error ("attribute %<target%> argument not a string");
30166 return false;
30169 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30170 char *q;
30172 while ((q = strtok (argstr, ",")) != NULL)
30174 while (ISSPACE (*q)) ++q;
30176 argstr = NULL;
30177 if (!strncmp (q, "thumb", 5))
30178 opts->x_target_flags |= MASK_THUMB;
30180 else if (!strncmp (q, "arm", 3))
30181 opts->x_target_flags &= ~MASK_THUMB;
30183 else if (!strncmp (q, "fpu=", 4))
30185 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30186 &opts->x_arm_fpu_index, CL_TARGET))
30188 error ("invalid fpu for attribute(target(\"%s\"))", q);
30189 return false;
30192 else
30194 error ("attribute(target(\"%s\")) is unknown", q);
30195 return false;
30198 arm_option_check_internal (opts);
30201 return true;
30204 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30206 tree
30207 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30208 struct gcc_options *opts_set)
30210 if (!arm_valid_target_attribute_rec (args, opts))
30211 return NULL_TREE;
30213 /* Do any overrides, such as global options arch=xxx. */
30214 arm_option_override_internal (opts, opts_set);
30216 return build_target_option_node (opts);
30219 static void
30220 add_attribute (const char * mode, tree *attributes)
30222 size_t len = strlen (mode);
30223 tree value = build_string (len, mode);
30225 TREE_TYPE (value) = build_array_type (char_type_node,
30226 build_index_type (size_int (len)));
30228 *attributes = tree_cons (get_identifier ("target"),
30229 build_tree_list (NULL_TREE, value),
30230 *attributes);
30233 /* For testing. Insert thumb or arm modes alternatively on functions. */
30235 static void
30236 arm_insert_attributes (tree fndecl, tree * attributes)
30238 const char *mode;
30240 if (! TARGET_FLIP_THUMB)
30241 return;
30243 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30244 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30245 return;
30247 /* Nested definitions must inherit mode. */
30248 if (current_function_decl)
30250 mode = TARGET_THUMB ? "thumb" : "arm";
30251 add_attribute (mode, attributes);
30252 return;
30255 /* If there is already a setting don't change it. */
30256 if (lookup_attribute ("target", *attributes) != NULL)
30257 return;
30259 mode = thumb_flipper ? "thumb" : "arm";
30260 add_attribute (mode, attributes);
30262 thumb_flipper = !thumb_flipper;
30265 /* Hook to validate attribute((target("string"))). */
30267 static bool
30268 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30269 tree args, int ARG_UNUSED (flags))
30271 bool ret = true;
30272 struct gcc_options func_options;
30273 tree cur_tree, new_optimize;
30274 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30276 /* Get the optimization options of the current function. */
30277 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30279 /* If the function changed the optimization levels as well as setting target
30280 options, start with the optimizations specified. */
30281 if (!func_optimize)
30282 func_optimize = optimization_default_node;
30284 /* Init func_options. */
30285 memset (&func_options, 0, sizeof (func_options));
30286 init_options_struct (&func_options, NULL);
30287 lang_hooks.init_options_struct (&func_options);
30289 /* Initialize func_options to the defaults. */
30290 cl_optimization_restore (&func_options,
30291 TREE_OPTIMIZATION (func_optimize));
30293 cl_target_option_restore (&func_options,
30294 TREE_TARGET_OPTION (target_option_default_node));
30296 /* Set func_options flags with new target mode. */
30297 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30298 &global_options_set);
30300 if (cur_tree == NULL_TREE)
30301 ret = false;
30303 new_optimize = build_optimization_node (&func_options);
30305 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30307 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30309 finalize_options_struct (&func_options);
30311 return ret;
30314 void
30315 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30318 fprintf (stream, "\t.syntax unified\n");
30320 if (TARGET_THUMB)
30322 if (is_called_in_ARM_mode (decl)
30323 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30324 && cfun->is_thunk))
30325 fprintf (stream, "\t.code 32\n");
30326 else if (TARGET_THUMB1)
30327 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30328 else
30329 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30331 else
30332 fprintf (stream, "\t.arm\n");
30334 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30335 TARGET_SOFT_FLOAT ? "softvfp" : TARGET_FPU_NAME);
30337 if (TARGET_POKE_FUNCTION_NAME)
30338 arm_poke_function_name (stream, (const char *) name);
30341 /* If MEM is in the form of [base+offset], extract the two parts
30342 of address and set to BASE and OFFSET, otherwise return false
30343 after clearing BASE and OFFSET. */
30345 static bool
30346 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30348 rtx addr;
30350 gcc_assert (MEM_P (mem));
30352 addr = XEXP (mem, 0);
30354 /* Strip off const from addresses like (const (addr)). */
30355 if (GET_CODE (addr) == CONST)
30356 addr = XEXP (addr, 0);
30358 if (GET_CODE (addr) == REG)
30360 *base = addr;
30361 *offset = const0_rtx;
30362 return true;
30365 if (GET_CODE (addr) == PLUS
30366 && GET_CODE (XEXP (addr, 0)) == REG
30367 && CONST_INT_P (XEXP (addr, 1)))
30369 *base = XEXP (addr, 0);
30370 *offset = XEXP (addr, 1);
30371 return true;
30374 *base = NULL_RTX;
30375 *offset = NULL_RTX;
30377 return false;
30380 /* If INSN is a load or store of address in the form of [base+offset],
30381 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30382 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30383 otherwise return FALSE. */
30385 static bool
30386 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30388 rtx x, dest, src;
30390 gcc_assert (INSN_P (insn));
30391 x = PATTERN (insn);
30392 if (GET_CODE (x) != SET)
30393 return false;
30395 src = SET_SRC (x);
30396 dest = SET_DEST (x);
30397 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30399 *is_load = false;
30400 extract_base_offset_in_addr (dest, base, offset);
30402 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30404 *is_load = true;
30405 extract_base_offset_in_addr (src, base, offset);
30407 else
30408 return false;
30410 return (*base != NULL_RTX && *offset != NULL_RTX);
30413 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30415 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30416 and PRI are only calculated for these instructions. For other instruction,
30417 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30418 instruction fusion can be supported by returning different priorities.
30420 It's important that irrelevant instructions get the largest FUSION_PRI. */
30422 static void
30423 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30424 int *fusion_pri, int *pri)
30426 int tmp, off_val;
30427 bool is_load;
30428 rtx base, offset;
30430 gcc_assert (INSN_P (insn));
30432 tmp = max_pri - 1;
30433 if (!fusion_load_store (insn, &base, &offset, &is_load))
30435 *pri = tmp;
30436 *fusion_pri = tmp;
30437 return;
30440 /* Load goes first. */
30441 if (is_load)
30442 *fusion_pri = tmp - 1;
30443 else
30444 *fusion_pri = tmp - 2;
30446 tmp /= 2;
30448 /* INSN with smaller base register goes first. */
30449 tmp -= ((REGNO (base) & 0xff) << 20);
30451 /* INSN with smaller offset goes first. */
30452 off_val = (int)(INTVAL (offset));
30453 if (off_val >= 0)
30454 tmp -= (off_val & 0xfffff);
30455 else
30456 tmp += ((- off_val) & 0xfffff);
30458 *pri = tmp;
30459 return;
30463 /* Construct and return a PARALLEL RTX vector with elements numbering the
30464 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30465 the vector - from the perspective of the architecture. This does not
30466 line up with GCC's perspective on lane numbers, so we end up with
30467 different masks depending on our target endian-ness. The diagram
30468 below may help. We must draw the distinction when building masks
30469 which select one half of the vector. An instruction selecting
30470 architectural low-lanes for a big-endian target, must be described using
30471 a mask selecting GCC high-lanes.
30473 Big-Endian Little-Endian
30475 GCC 0 1 2 3 3 2 1 0
30476 | x | x | x | x | | x | x | x | x |
30477 Architecture 3 2 1 0 3 2 1 0
30479 Low Mask: { 2, 3 } { 0, 1 }
30480 High Mask: { 0, 1 } { 2, 3 }
30484 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30486 int nunits = GET_MODE_NUNITS (mode);
30487 rtvec v = rtvec_alloc (nunits / 2);
30488 int high_base = nunits / 2;
30489 int low_base = 0;
30490 int base;
30491 rtx t1;
30492 int i;
30494 if (BYTES_BIG_ENDIAN)
30495 base = high ? low_base : high_base;
30496 else
30497 base = high ? high_base : low_base;
30499 for (i = 0; i < nunits / 2; i++)
30500 RTVEC_ELT (v, i) = GEN_INT (base + i);
30502 t1 = gen_rtx_PARALLEL (mode, v);
30503 return t1;
30506 /* Check OP for validity as a PARALLEL RTX vector with elements
30507 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30508 from the perspective of the architecture. See the diagram above
30509 arm_simd_vect_par_cnst_half_p for more details. */
30511 bool
30512 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30513 bool high)
30515 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30516 HOST_WIDE_INT count_op = XVECLEN (op, 0);
30517 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30518 int i = 0;
30520 if (!VECTOR_MODE_P (mode))
30521 return false;
30523 if (count_op != count_ideal)
30524 return false;
30526 for (i = 0; i < count_ideal; i++)
30528 rtx elt_op = XVECEXP (op, 0, i);
30529 rtx elt_ideal = XVECEXP (ideal, 0, i);
30531 if (!CONST_INT_P (elt_op)
30532 || INTVAL (elt_ideal) != INTVAL (elt_op))
30533 return false;
30535 return true;
30538 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30539 in Thumb1. */
30540 static bool
30541 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30542 const_tree)
30544 /* For now, we punt and not handle this for TARGET_THUMB1. */
30545 if (vcall_offset && TARGET_THUMB1)
30546 return false;
30548 /* Otherwise ok. */
30549 return true;
30552 #include "gt-arm.h"