[ARM][2/4] Replace casts of 1 to HOST_WIDE_INT by HOST_WIDE_INT_1 and HOST_WIDE_INT_1U
[official-gcc.git] / gcc / config / arm / arm.c
blob21d09b8ad775f795bcddae12a4f233a5cf53d74b
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "cfghooks.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "emit-rtl.h"
37 #include "recog.h"
38 #include "cgraph.h"
39 #include "diagnostic-core.h"
40 #include "alias.h"
41 #include "fold-const.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "varasm.h"
45 #include "output.h"
46 #include "insn-attr.h"
47 #include "flags.h"
48 #include "reload.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "cfgrtl.h"
52 #include "sched-int.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
55 #include "intl.h"
56 #include "libfuncs.h"
57 #include "params.h"
58 #include "opts.h"
59 #include "dumpfile.h"
60 #include "target-globals.h"
61 #include "builtins.h"
62 #include "tm-constrs.h"
63 #include "rtl-iter.h"
65 /* This file should be included last. */
66 #include "target-def.h"
68 /* Forward definitions of types. */
69 typedef struct minipool_node Mnode;
70 typedef struct minipool_fixup Mfix;
72 void (*arm_lang_output_object_attributes_hook)(void);
74 struct four_ints
76 int i[4];
79 /* Forward function declarations. */
80 static bool arm_const_not_ok_for_debug_p (rtx);
81 static bool arm_needs_doubleword_align (machine_mode, const_tree);
82 static int arm_compute_static_chain_stack_bytes (void);
83 static arm_stack_offsets *arm_get_frame_offsets (void);
84 static void arm_add_gc_roots (void);
85 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
86 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
87 static unsigned bit_count (unsigned long);
88 static unsigned feature_count (const arm_feature_set*);
89 static int arm_address_register_rtx_p (rtx, int);
90 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
91 static bool is_called_in_ARM_mode (tree);
92 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
93 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
94 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
95 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
96 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
97 inline static int thumb1_index_register_rtx_p (rtx, int);
98 static int thumb_far_jump_used_p (void);
99 static bool thumb_force_lr_save (void);
100 static unsigned arm_size_return_regs (void);
101 static bool arm_assemble_integer (rtx, unsigned int, int);
102 static void arm_print_operand (FILE *, rtx, int);
103 static void arm_print_operand_address (FILE *, machine_mode, rtx);
104 static bool arm_print_operand_punct_valid_p (unsigned char code);
105 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
106 static arm_cc get_arm_condition_code (rtx);
107 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
108 static const char *output_multi_immediate (rtx *, const char *, const char *,
109 int, HOST_WIDE_INT);
110 static const char *shift_op (rtx, HOST_WIDE_INT *);
111 static struct machine_function *arm_init_machine_status (void);
112 static void thumb_exit (FILE *, int);
113 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
114 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
115 static Mnode *add_minipool_forward_ref (Mfix *);
116 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
117 static Mnode *add_minipool_backward_ref (Mfix *);
118 static void assign_minipool_offsets (Mfix *);
119 static void arm_print_value (FILE *, rtx);
120 static void dump_minipool (rtx_insn *);
121 static int arm_barrier_cost (rtx_insn *);
122 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
123 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
124 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
125 machine_mode, rtx);
126 static void arm_reorg (void);
127 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
128 static unsigned long arm_compute_save_reg0_reg12_mask (void);
129 static unsigned long arm_compute_save_reg_mask (void);
130 static unsigned long arm_isr_value (tree);
131 static unsigned long arm_compute_func_type (void);
132 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
133 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
134 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
135 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
136 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
137 #endif
138 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
139 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
140 static int arm_comp_type_attributes (const_tree, const_tree);
141 static void arm_set_default_type_attributes (tree);
142 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
143 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
144 static int optimal_immediate_sequence (enum rtx_code code,
145 unsigned HOST_WIDE_INT val,
146 struct four_ints *return_sequence);
147 static int optimal_immediate_sequence_1 (enum rtx_code code,
148 unsigned HOST_WIDE_INT val,
149 struct four_ints *return_sequence,
150 int i);
151 static int arm_get_strip_length (int);
152 static bool arm_function_ok_for_sibcall (tree, tree);
153 static machine_mode arm_promote_function_mode (const_tree,
154 machine_mode, int *,
155 const_tree, int);
156 static bool arm_return_in_memory (const_tree, const_tree);
157 static rtx arm_function_value (const_tree, const_tree, bool);
158 static rtx arm_libcall_value_1 (machine_mode);
159 static rtx arm_libcall_value (machine_mode, const_rtx);
160 static bool arm_function_value_regno_p (const unsigned int);
161 static void arm_internal_label (FILE *, const char *, unsigned long);
162 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
163 tree);
164 static bool arm_have_conditional_execution (void);
165 static bool arm_cannot_force_const_mem (machine_mode, rtx);
166 static bool arm_legitimate_constant_p (machine_mode, rtx);
167 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
168 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
169 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
170 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
171 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
172 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
173 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
174 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
175 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
176 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
177 static void emit_constant_insn (rtx cond, rtx pattern);
178 static rtx_insn *emit_set_insn (rtx, rtx);
179 static rtx emit_multi_reg_push (unsigned long, unsigned long);
180 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
181 tree, bool);
182 static rtx arm_function_arg (cumulative_args_t, machine_mode,
183 const_tree, bool);
184 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
185 const_tree, bool);
186 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
187 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
188 const_tree);
189 static rtx aapcs_libcall_value (machine_mode);
190 static int aapcs_select_return_coproc (const_tree, const_tree);
192 #ifdef OBJECT_FORMAT_ELF
193 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
194 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
195 #endif
196 #ifndef ARM_PE
197 static void arm_encode_section_info (tree, rtx, int);
198 #endif
200 static void arm_file_end (void);
201 static void arm_file_start (void);
202 static void arm_insert_attributes (tree, tree *);
204 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
205 tree, int *, int);
206 static bool arm_pass_by_reference (cumulative_args_t,
207 machine_mode, const_tree, bool);
208 static bool arm_promote_prototypes (const_tree);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree);
212 static bool arm_must_pass_in_stack (machine_mode, const_tree);
213 static bool arm_return_in_memory (const_tree, const_tree);
214 #if ARM_UNWIND_INFO
215 static void arm_unwind_emit (FILE *, rtx_insn *);
216 static bool arm_output_ttype (rtx);
217 static void arm_asm_emit_except_personality (rtx);
218 static void arm_asm_init_sections (void);
219 #endif
220 static rtx arm_dwarf_register_span (rtx);
222 static tree arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree arm_get_cookie_size (tree);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree, rtx);
234 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
235 static void arm_option_override (void);
236 static void arm_override_options_after_change (void);
237 static void arm_option_print (FILE *, int, struct cl_target_option *);
238 static void arm_set_current_function (tree);
239 static bool arm_can_inline_p (tree, tree);
240 static void arm_relayout_function (tree);
241 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
242 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
243 static bool arm_macro_fusion_p (void);
244 static bool arm_cannot_copy_insn_p (rtx_insn *);
245 static int arm_issue_rate (void);
246 static int arm_first_cycle_multipass_dfa_lookahead (void);
247 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
248 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
249 static bool arm_output_addr_const_extra (FILE *, rtx);
250 static bool arm_allocate_stack_slots_for_args (void);
251 static bool arm_warn_func_return (tree);
252 static tree arm_promoted_type (const_tree t);
253 static tree arm_convert_to_type (tree type, tree expr);
254 static bool arm_scalar_mode_supported_p (machine_mode);
255 static bool arm_frame_pointer_required (void);
256 static bool arm_can_eliminate (const int, const int);
257 static void arm_asm_trampoline_template (FILE *);
258 static void arm_trampoline_init (rtx, tree, rtx);
259 static rtx arm_trampoline_adjust_address (rtx);
260 static rtx arm_pic_static_addr (rtx orig, rtx reg);
261 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
262 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
263 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
264 static bool arm_array_mode_supported_p (machine_mode,
265 unsigned HOST_WIDE_INT);
266 static machine_mode arm_preferred_simd_mode (machine_mode);
267 static bool arm_class_likely_spilled_p (reg_class_t);
268 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
269 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
270 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
271 const_tree type,
272 int misalignment,
273 bool is_packed);
274 static void arm_conditional_register_usage (void);
275 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
276 static unsigned int arm_autovectorize_vector_sizes (void);
277 static int arm_default_branch_cost (bool, bool);
278 static int arm_cortex_a5_branch_cost (bool, bool);
279 static int arm_cortex_m_branch_cost (bool, bool);
280 static int arm_cortex_m7_branch_cost (bool, bool);
282 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
283 const unsigned char *sel);
285 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
287 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
288 tree vectype,
289 int misalign ATTRIBUTE_UNUSED);
290 static unsigned arm_add_stmt_cost (void *data, int count,
291 enum vect_cost_for_stmt kind,
292 struct _stmt_vec_info *stmt_info,
293 int misalign,
294 enum vect_cost_model_location where);
296 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
297 bool op0_preserve_value);
298 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
300 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
301 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
302 const_tree);
305 /* Table of machine attributes. */
306 static const struct attribute_spec arm_attribute_table[] =
308 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
309 affects_type_identity } */
310 /* Function calls made to this symbol must be done indirectly, because
311 it may lie outside of the 26 bit addressing range of a normal function
312 call. */
313 { "long_call", 0, 0, false, true, true, NULL, false },
314 /* Whereas these functions are always known to reside within the 26 bit
315 addressing range. */
316 { "short_call", 0, 0, false, true, true, NULL, false },
317 /* Specify the procedure call conventions for a function. */
318 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
319 false },
320 /* Interrupt Service Routines have special prologue and epilogue requirements. */
321 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
322 false },
323 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
324 false },
325 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
326 false },
327 #ifdef ARM_PE
328 /* ARM/PE has three new attributes:
329 interfacearm - ?
330 dllexport - for exporting a function/variable that will live in a dll
331 dllimport - for importing a function/variable from a dll
333 Microsoft allows multiple declspecs in one __declspec, separating
334 them with spaces. We do NOT support this. Instead, use __declspec
335 multiple times.
337 { "dllimport", 0, 0, true, false, false, NULL, false },
338 { "dllexport", 0, 0, true, false, false, NULL, false },
339 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
340 false },
341 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
342 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
343 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
344 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
345 false },
346 #endif
347 { NULL, 0, 0, false, false, false, NULL, false }
350 /* Initialize the GCC target structure. */
351 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
352 #undef TARGET_MERGE_DECL_ATTRIBUTES
353 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
354 #endif
356 #undef TARGET_LEGITIMIZE_ADDRESS
357 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
359 #undef TARGET_LRA_P
360 #define TARGET_LRA_P hook_bool_void_true
362 #undef TARGET_ATTRIBUTE_TABLE
363 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
365 #undef TARGET_INSERT_ATTRIBUTES
366 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
368 #undef TARGET_ASM_FILE_START
369 #define TARGET_ASM_FILE_START arm_file_start
370 #undef TARGET_ASM_FILE_END
371 #define TARGET_ASM_FILE_END arm_file_end
373 #undef TARGET_ASM_ALIGNED_SI_OP
374 #define TARGET_ASM_ALIGNED_SI_OP NULL
375 #undef TARGET_ASM_INTEGER
376 #define TARGET_ASM_INTEGER arm_assemble_integer
378 #undef TARGET_PRINT_OPERAND
379 #define TARGET_PRINT_OPERAND arm_print_operand
380 #undef TARGET_PRINT_OPERAND_ADDRESS
381 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
382 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
383 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
385 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
386 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
388 #undef TARGET_ASM_FUNCTION_PROLOGUE
389 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
391 #undef TARGET_ASM_FUNCTION_EPILOGUE
392 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
394 #undef TARGET_CAN_INLINE_P
395 #define TARGET_CAN_INLINE_P arm_can_inline_p
397 #undef TARGET_RELAYOUT_FUNCTION
398 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
400 #undef TARGET_OPTION_OVERRIDE
401 #define TARGET_OPTION_OVERRIDE arm_option_override
403 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
404 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
406 #undef TARGET_OPTION_PRINT
407 #define TARGET_OPTION_PRINT arm_option_print
409 #undef TARGET_COMP_TYPE_ATTRIBUTES
410 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
412 #undef TARGET_SCHED_MACRO_FUSION_P
413 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
415 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
416 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
418 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
419 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
421 #undef TARGET_SCHED_ADJUST_COST
422 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
424 #undef TARGET_SET_CURRENT_FUNCTION
425 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
427 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
428 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
430 #undef TARGET_SCHED_REORDER
431 #define TARGET_SCHED_REORDER arm_sched_reorder
433 #undef TARGET_REGISTER_MOVE_COST
434 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
436 #undef TARGET_MEMORY_MOVE_COST
437 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
439 #undef TARGET_ENCODE_SECTION_INFO
440 #ifdef ARM_PE
441 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
442 #else
443 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
444 #endif
446 #undef TARGET_STRIP_NAME_ENCODING
447 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
449 #undef TARGET_ASM_INTERNAL_LABEL
450 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
452 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
453 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
455 #undef TARGET_FUNCTION_VALUE
456 #define TARGET_FUNCTION_VALUE arm_function_value
458 #undef TARGET_LIBCALL_VALUE
459 #define TARGET_LIBCALL_VALUE arm_libcall_value
461 #undef TARGET_FUNCTION_VALUE_REGNO_P
462 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
464 #undef TARGET_ASM_OUTPUT_MI_THUNK
465 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
466 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
467 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
469 #undef TARGET_RTX_COSTS
470 #define TARGET_RTX_COSTS arm_rtx_costs
471 #undef TARGET_ADDRESS_COST
472 #define TARGET_ADDRESS_COST arm_address_cost
474 #undef TARGET_SHIFT_TRUNCATION_MASK
475 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
476 #undef TARGET_VECTOR_MODE_SUPPORTED_P
477 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
478 #undef TARGET_ARRAY_MODE_SUPPORTED_P
479 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
480 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
481 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
482 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
483 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
484 arm_autovectorize_vector_sizes
486 #undef TARGET_MACHINE_DEPENDENT_REORG
487 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
489 #undef TARGET_INIT_BUILTINS
490 #define TARGET_INIT_BUILTINS arm_init_builtins
491 #undef TARGET_EXPAND_BUILTIN
492 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
493 #undef TARGET_BUILTIN_DECL
494 #define TARGET_BUILTIN_DECL arm_builtin_decl
496 #undef TARGET_INIT_LIBFUNCS
497 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
499 #undef TARGET_PROMOTE_FUNCTION_MODE
500 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
501 #undef TARGET_PROMOTE_PROTOTYPES
502 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
503 #undef TARGET_PASS_BY_REFERENCE
504 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
505 #undef TARGET_ARG_PARTIAL_BYTES
506 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
507 #undef TARGET_FUNCTION_ARG
508 #define TARGET_FUNCTION_ARG arm_function_arg
509 #undef TARGET_FUNCTION_ARG_ADVANCE
510 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
511 #undef TARGET_FUNCTION_ARG_BOUNDARY
512 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
514 #undef TARGET_SETUP_INCOMING_VARARGS
515 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
517 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
518 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
520 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
521 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
522 #undef TARGET_TRAMPOLINE_INIT
523 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
524 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
525 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
527 #undef TARGET_WARN_FUNC_RETURN
528 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
530 #undef TARGET_DEFAULT_SHORT_ENUMS
531 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
533 #undef TARGET_ALIGN_ANON_BITFIELD
534 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
536 #undef TARGET_NARROW_VOLATILE_BITFIELD
537 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
539 #undef TARGET_CXX_GUARD_TYPE
540 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
542 #undef TARGET_CXX_GUARD_MASK_BIT
543 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
545 #undef TARGET_CXX_GET_COOKIE_SIZE
546 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
548 #undef TARGET_CXX_COOKIE_HAS_SIZE
549 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
551 #undef TARGET_CXX_CDTOR_RETURNS_THIS
552 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
554 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
555 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
557 #undef TARGET_CXX_USE_AEABI_ATEXIT
558 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
560 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
561 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
562 arm_cxx_determine_class_data_visibility
564 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
565 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
567 #undef TARGET_RETURN_IN_MSB
568 #define TARGET_RETURN_IN_MSB arm_return_in_msb
570 #undef TARGET_RETURN_IN_MEMORY
571 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
573 #undef TARGET_MUST_PASS_IN_STACK
574 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
576 #if ARM_UNWIND_INFO
577 #undef TARGET_ASM_UNWIND_EMIT
578 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
580 /* EABI unwinding tables use a different format for the typeinfo tables. */
581 #undef TARGET_ASM_TTYPE
582 #define TARGET_ASM_TTYPE arm_output_ttype
584 #undef TARGET_ARM_EABI_UNWINDER
585 #define TARGET_ARM_EABI_UNWINDER true
587 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
588 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
590 #undef TARGET_ASM_INIT_SECTIONS
591 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
592 #endif /* ARM_UNWIND_INFO */
594 #undef TARGET_DWARF_REGISTER_SPAN
595 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
597 #undef TARGET_CANNOT_COPY_INSN_P
598 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
600 #ifdef HAVE_AS_TLS
601 #undef TARGET_HAVE_TLS
602 #define TARGET_HAVE_TLS true
603 #endif
605 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
606 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
608 #undef TARGET_LEGITIMATE_CONSTANT_P
609 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
611 #undef TARGET_CANNOT_FORCE_CONST_MEM
612 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
614 #undef TARGET_MAX_ANCHOR_OFFSET
615 #define TARGET_MAX_ANCHOR_OFFSET 4095
617 /* The minimum is set such that the total size of the block
618 for a particular anchor is -4088 + 1 + 4095 bytes, which is
619 divisible by eight, ensuring natural spacing of anchors. */
620 #undef TARGET_MIN_ANCHOR_OFFSET
621 #define TARGET_MIN_ANCHOR_OFFSET -4088
623 #undef TARGET_SCHED_ISSUE_RATE
624 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
626 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
627 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
628 arm_first_cycle_multipass_dfa_lookahead
630 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
631 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
632 arm_first_cycle_multipass_dfa_lookahead_guard
634 #undef TARGET_MANGLE_TYPE
635 #define TARGET_MANGLE_TYPE arm_mangle_type
637 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
638 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
640 #undef TARGET_BUILD_BUILTIN_VA_LIST
641 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
642 #undef TARGET_EXPAND_BUILTIN_VA_START
643 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
644 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
645 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
647 #ifdef HAVE_AS_TLS
648 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
649 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
650 #endif
652 #undef TARGET_LEGITIMATE_ADDRESS_P
653 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
655 #undef TARGET_PREFERRED_RELOAD_CLASS
656 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
658 #undef TARGET_PROMOTED_TYPE
659 #define TARGET_PROMOTED_TYPE arm_promoted_type
661 #undef TARGET_CONVERT_TO_TYPE
662 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
664 #undef TARGET_SCALAR_MODE_SUPPORTED_P
665 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
667 #undef TARGET_FRAME_POINTER_REQUIRED
668 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
670 #undef TARGET_CAN_ELIMINATE
671 #define TARGET_CAN_ELIMINATE arm_can_eliminate
673 #undef TARGET_CONDITIONAL_REGISTER_USAGE
674 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
676 #undef TARGET_CLASS_LIKELY_SPILLED_P
677 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
679 #undef TARGET_VECTORIZE_BUILTINS
680 #define TARGET_VECTORIZE_BUILTINS
682 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
683 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
684 arm_builtin_vectorized_function
686 #undef TARGET_VECTOR_ALIGNMENT
687 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
689 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
690 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
691 arm_vector_alignment_reachable
693 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
694 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
695 arm_builtin_support_vector_misalignment
697 #undef TARGET_PREFERRED_RENAME_CLASS
698 #define TARGET_PREFERRED_RENAME_CLASS \
699 arm_preferred_rename_class
701 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
702 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
703 arm_vectorize_vec_perm_const_ok
705 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
706 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
707 arm_builtin_vectorization_cost
708 #undef TARGET_VECTORIZE_ADD_STMT_COST
709 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
711 #undef TARGET_CANONICALIZE_COMPARISON
712 #define TARGET_CANONICALIZE_COMPARISON \
713 arm_canonicalize_comparison
715 #undef TARGET_ASAN_SHADOW_OFFSET
716 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
718 #undef MAX_INSN_PER_IT_BLOCK
719 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
721 #undef TARGET_CAN_USE_DOLOOP_P
722 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
724 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
725 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
727 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
728 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
730 #undef TARGET_SCHED_FUSION_PRIORITY
731 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
733 struct gcc_target targetm = TARGET_INITIALIZER;
735 /* Obstack for minipool constant handling. */
736 static struct obstack minipool_obstack;
737 static char * minipool_startobj;
739 /* The maximum number of insns skipped which
740 will be conditionalised if possible. */
741 static int max_insns_skipped = 5;
743 extern FILE * asm_out_file;
745 /* True if we are currently building a constant table. */
746 int making_const_table;
748 /* The processor for which instructions should be scheduled. */
749 enum processor_type arm_tune = arm_none;
751 /* The current tuning set. */
752 const struct tune_params *current_tune;
754 /* Which floating point hardware to schedule for. */
755 int arm_fpu_attr;
757 /* Used for Thumb call_via trampolines. */
758 rtx thumb_call_via_label[14];
759 static int thumb_call_reg_needed;
761 /* The bits in this mask specify which
762 instructions we are allowed to generate. */
763 arm_feature_set insn_flags = ARM_FSET_EMPTY;
765 /* The bits in this mask specify which instruction scheduling options should
766 be used. */
767 arm_feature_set tune_flags = ARM_FSET_EMPTY;
769 /* The highest ARM architecture version supported by the
770 target. */
771 enum base_architecture arm_base_arch = BASE_ARCH_0;
773 /* The following are used in the arm.md file as equivalents to bits
774 in the above two flag variables. */
776 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
777 int arm_arch3m = 0;
779 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
780 int arm_arch4 = 0;
782 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
783 int arm_arch4t = 0;
785 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
786 int arm_arch5 = 0;
788 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
789 int arm_arch5e = 0;
791 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
792 int arm_arch6 = 0;
794 /* Nonzero if this chip supports the ARM 6K extensions. */
795 int arm_arch6k = 0;
797 /* Nonzero if this chip supports the ARM 6KZ extensions. */
798 int arm_arch6kz = 0;
800 /* Nonzero if instructions present in ARMv6-M can be used. */
801 int arm_arch6m = 0;
803 /* Nonzero if this chip supports the ARM 7 extensions. */
804 int arm_arch7 = 0;
806 /* Nonzero if instructions not present in the 'M' profile can be used. */
807 int arm_arch_notm = 0;
809 /* Nonzero if instructions present in ARMv7E-M can be used. */
810 int arm_arch7em = 0;
812 /* Nonzero if instructions present in ARMv8 can be used. */
813 int arm_arch8 = 0;
815 /* Nonzero if this chip supports the ARMv8.1 extensions. */
816 int arm_arch8_1 = 0;
818 /* Nonzero if this chip can benefit from load scheduling. */
819 int arm_ld_sched = 0;
821 /* Nonzero if this chip is a StrongARM. */
822 int arm_tune_strongarm = 0;
824 /* Nonzero if this chip supports Intel Wireless MMX technology. */
825 int arm_arch_iwmmxt = 0;
827 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
828 int arm_arch_iwmmxt2 = 0;
830 /* Nonzero if this chip is an XScale. */
831 int arm_arch_xscale = 0;
833 /* Nonzero if tuning for XScale */
834 int arm_tune_xscale = 0;
836 /* Nonzero if we want to tune for stores that access the write-buffer.
837 This typically means an ARM6 or ARM7 with MMU or MPU. */
838 int arm_tune_wbuf = 0;
840 /* Nonzero if tuning for Cortex-A9. */
841 int arm_tune_cortex_a9 = 0;
843 /* Nonzero if we should define __THUMB_INTERWORK__ in the
844 preprocessor.
845 XXX This is a bit of a hack, it's intended to help work around
846 problems in GLD which doesn't understand that armv5t code is
847 interworking clean. */
848 int arm_cpp_interwork = 0;
850 /* Nonzero if chip supports Thumb 2. */
851 int arm_arch_thumb2;
853 /* Nonzero if chip supports integer division instruction. */
854 int arm_arch_arm_hwdiv;
855 int arm_arch_thumb_hwdiv;
857 /* Nonzero if chip disallows volatile memory access in IT block. */
858 int arm_arch_no_volatile_ce;
860 /* Nonzero if we should use Neon to handle 64-bits operations rather
861 than core registers. */
862 int prefer_neon_for_64bits = 0;
864 /* Nonzero if we shouldn't use literal pools. */
865 bool arm_disable_literal_pool = false;
867 /* The register number to be used for the PIC offset register. */
868 unsigned arm_pic_register = INVALID_REGNUM;
870 enum arm_pcs arm_pcs_default;
872 /* For an explanation of these variables, see final_prescan_insn below. */
873 int arm_ccfsm_state;
874 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
875 enum arm_cond_code arm_current_cc;
877 rtx arm_target_insn;
878 int arm_target_label;
879 /* The number of conditionally executed insns, including the current insn. */
880 int arm_condexec_count = 0;
881 /* A bitmask specifying the patterns for the IT block.
882 Zero means do not output an IT block before this insn. */
883 int arm_condexec_mask = 0;
884 /* The number of bits used in arm_condexec_mask. */
885 int arm_condexec_masklen = 0;
887 /* Nonzero if chip supports the ARMv8 CRC instructions. */
888 int arm_arch_crc = 0;
890 /* Nonzero if the core has a very small, high-latency, multiply unit. */
891 int arm_m_profile_small_mul = 0;
893 /* The condition codes of the ARM, and the inverse function. */
894 static const char * const arm_condition_codes[] =
896 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
897 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
900 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
901 int arm_regs_in_sequence[] =
903 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
906 #define ARM_LSL_NAME "lsl"
907 #define streq(string1, string2) (strcmp (string1, string2) == 0)
909 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
910 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
911 | (1 << PIC_OFFSET_TABLE_REGNUM)))
913 /* Initialization code. */
915 struct processors
917 const char *const name;
918 enum processor_type core;
919 const char *arch;
920 enum base_architecture base_arch;
921 const arm_feature_set flags;
922 const struct tune_params *const tune;
926 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
927 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
929 num_slots, \
930 l1_size, \
931 l1_line_size \
934 /* arm generic vectorizer costs. */
935 static const
936 struct cpu_vec_costs arm_default_vec_cost = {
937 1, /* scalar_stmt_cost. */
938 1, /* scalar load_cost. */
939 1, /* scalar_store_cost. */
940 1, /* vec_stmt_cost. */
941 1, /* vec_to_scalar_cost. */
942 1, /* scalar_to_vec_cost. */
943 1, /* vec_align_load_cost. */
944 1, /* vec_unalign_load_cost. */
945 1, /* vec_unalign_store_cost. */
946 1, /* vec_store_cost. */
947 3, /* cond_taken_branch_cost. */
948 1, /* cond_not_taken_branch_cost. */
951 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
952 #include "aarch-cost-tables.h"
956 const struct cpu_cost_table cortexa9_extra_costs =
958 /* ALU */
960 0, /* arith. */
961 0, /* logical. */
962 0, /* shift. */
963 COSTS_N_INSNS (1), /* shift_reg. */
964 COSTS_N_INSNS (1), /* arith_shift. */
965 COSTS_N_INSNS (2), /* arith_shift_reg. */
966 0, /* log_shift. */
967 COSTS_N_INSNS (1), /* log_shift_reg. */
968 COSTS_N_INSNS (1), /* extend. */
969 COSTS_N_INSNS (2), /* extend_arith. */
970 COSTS_N_INSNS (1), /* bfi. */
971 COSTS_N_INSNS (1), /* bfx. */
972 0, /* clz. */
973 0, /* rev. */
974 0, /* non_exec. */
975 true /* non_exec_costs_exec. */
978 /* MULT SImode */
980 COSTS_N_INSNS (3), /* simple. */
981 COSTS_N_INSNS (3), /* flag_setting. */
982 COSTS_N_INSNS (2), /* extend. */
983 COSTS_N_INSNS (3), /* add. */
984 COSTS_N_INSNS (2), /* extend_add. */
985 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
987 /* MULT DImode */
989 0, /* simple (N/A). */
990 0, /* flag_setting (N/A). */
991 COSTS_N_INSNS (4), /* extend. */
992 0, /* add (N/A). */
993 COSTS_N_INSNS (4), /* extend_add. */
994 0 /* idiv (N/A). */
997 /* LD/ST */
999 COSTS_N_INSNS (2), /* load. */
1000 COSTS_N_INSNS (2), /* load_sign_extend. */
1001 COSTS_N_INSNS (2), /* ldrd. */
1002 COSTS_N_INSNS (2), /* ldm_1st. */
1003 1, /* ldm_regs_per_insn_1st. */
1004 2, /* ldm_regs_per_insn_subsequent. */
1005 COSTS_N_INSNS (5), /* loadf. */
1006 COSTS_N_INSNS (5), /* loadd. */
1007 COSTS_N_INSNS (1), /* load_unaligned. */
1008 COSTS_N_INSNS (2), /* store. */
1009 COSTS_N_INSNS (2), /* strd. */
1010 COSTS_N_INSNS (2), /* stm_1st. */
1011 1, /* stm_regs_per_insn_1st. */
1012 2, /* stm_regs_per_insn_subsequent. */
1013 COSTS_N_INSNS (1), /* storef. */
1014 COSTS_N_INSNS (1), /* stored. */
1015 COSTS_N_INSNS (1), /* store_unaligned. */
1016 COSTS_N_INSNS (1), /* loadv. */
1017 COSTS_N_INSNS (1) /* storev. */
1020 /* FP SFmode */
1022 COSTS_N_INSNS (14), /* div. */
1023 COSTS_N_INSNS (4), /* mult. */
1024 COSTS_N_INSNS (7), /* mult_addsub. */
1025 COSTS_N_INSNS (30), /* fma. */
1026 COSTS_N_INSNS (3), /* addsub. */
1027 COSTS_N_INSNS (1), /* fpconst. */
1028 COSTS_N_INSNS (1), /* neg. */
1029 COSTS_N_INSNS (3), /* compare. */
1030 COSTS_N_INSNS (3), /* widen. */
1031 COSTS_N_INSNS (3), /* narrow. */
1032 COSTS_N_INSNS (3), /* toint. */
1033 COSTS_N_INSNS (3), /* fromint. */
1034 COSTS_N_INSNS (3) /* roundint. */
1036 /* FP DFmode */
1038 COSTS_N_INSNS (24), /* div. */
1039 COSTS_N_INSNS (5), /* mult. */
1040 COSTS_N_INSNS (8), /* mult_addsub. */
1041 COSTS_N_INSNS (30), /* fma. */
1042 COSTS_N_INSNS (3), /* addsub. */
1043 COSTS_N_INSNS (1), /* fpconst. */
1044 COSTS_N_INSNS (1), /* neg. */
1045 COSTS_N_INSNS (3), /* compare. */
1046 COSTS_N_INSNS (3), /* widen. */
1047 COSTS_N_INSNS (3), /* narrow. */
1048 COSTS_N_INSNS (3), /* toint. */
1049 COSTS_N_INSNS (3), /* fromint. */
1050 COSTS_N_INSNS (3) /* roundint. */
1053 /* Vector */
1055 COSTS_N_INSNS (1) /* alu. */
1059 const struct cpu_cost_table cortexa8_extra_costs =
1061 /* ALU */
1063 0, /* arith. */
1064 0, /* logical. */
1065 COSTS_N_INSNS (1), /* shift. */
1066 0, /* shift_reg. */
1067 COSTS_N_INSNS (1), /* arith_shift. */
1068 0, /* arith_shift_reg. */
1069 COSTS_N_INSNS (1), /* log_shift. */
1070 0, /* log_shift_reg. */
1071 0, /* extend. */
1072 0, /* extend_arith. */
1073 0, /* bfi. */
1074 0, /* bfx. */
1075 0, /* clz. */
1076 0, /* rev. */
1077 0, /* non_exec. */
1078 true /* non_exec_costs_exec. */
1081 /* MULT SImode */
1083 COSTS_N_INSNS (1), /* simple. */
1084 COSTS_N_INSNS (1), /* flag_setting. */
1085 COSTS_N_INSNS (1), /* extend. */
1086 COSTS_N_INSNS (1), /* add. */
1087 COSTS_N_INSNS (1), /* extend_add. */
1088 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1090 /* MULT DImode */
1092 0, /* simple (N/A). */
1093 0, /* flag_setting (N/A). */
1094 COSTS_N_INSNS (2), /* extend. */
1095 0, /* add (N/A). */
1096 COSTS_N_INSNS (2), /* extend_add. */
1097 0 /* idiv (N/A). */
1100 /* LD/ST */
1102 COSTS_N_INSNS (1), /* load. */
1103 COSTS_N_INSNS (1), /* load_sign_extend. */
1104 COSTS_N_INSNS (1), /* ldrd. */
1105 COSTS_N_INSNS (1), /* ldm_1st. */
1106 1, /* ldm_regs_per_insn_1st. */
1107 2, /* ldm_regs_per_insn_subsequent. */
1108 COSTS_N_INSNS (1), /* loadf. */
1109 COSTS_N_INSNS (1), /* loadd. */
1110 COSTS_N_INSNS (1), /* load_unaligned. */
1111 COSTS_N_INSNS (1), /* store. */
1112 COSTS_N_INSNS (1), /* strd. */
1113 COSTS_N_INSNS (1), /* stm_1st. */
1114 1, /* stm_regs_per_insn_1st. */
1115 2, /* stm_regs_per_insn_subsequent. */
1116 COSTS_N_INSNS (1), /* storef. */
1117 COSTS_N_INSNS (1), /* stored. */
1118 COSTS_N_INSNS (1), /* store_unaligned. */
1119 COSTS_N_INSNS (1), /* loadv. */
1120 COSTS_N_INSNS (1) /* storev. */
1123 /* FP SFmode */
1125 COSTS_N_INSNS (36), /* div. */
1126 COSTS_N_INSNS (11), /* mult. */
1127 COSTS_N_INSNS (20), /* mult_addsub. */
1128 COSTS_N_INSNS (30), /* fma. */
1129 COSTS_N_INSNS (9), /* addsub. */
1130 COSTS_N_INSNS (3), /* fpconst. */
1131 COSTS_N_INSNS (3), /* neg. */
1132 COSTS_N_INSNS (6), /* compare. */
1133 COSTS_N_INSNS (4), /* widen. */
1134 COSTS_N_INSNS (4), /* narrow. */
1135 COSTS_N_INSNS (8), /* toint. */
1136 COSTS_N_INSNS (8), /* fromint. */
1137 COSTS_N_INSNS (8) /* roundint. */
1139 /* FP DFmode */
1141 COSTS_N_INSNS (64), /* div. */
1142 COSTS_N_INSNS (16), /* mult. */
1143 COSTS_N_INSNS (25), /* mult_addsub. */
1144 COSTS_N_INSNS (30), /* fma. */
1145 COSTS_N_INSNS (9), /* addsub. */
1146 COSTS_N_INSNS (3), /* fpconst. */
1147 COSTS_N_INSNS (3), /* neg. */
1148 COSTS_N_INSNS (6), /* compare. */
1149 COSTS_N_INSNS (6), /* widen. */
1150 COSTS_N_INSNS (6), /* narrow. */
1151 COSTS_N_INSNS (8), /* toint. */
1152 COSTS_N_INSNS (8), /* fromint. */
1153 COSTS_N_INSNS (8) /* roundint. */
1156 /* Vector */
1158 COSTS_N_INSNS (1) /* alu. */
1162 const struct cpu_cost_table cortexa5_extra_costs =
1164 /* ALU */
1166 0, /* arith. */
1167 0, /* logical. */
1168 COSTS_N_INSNS (1), /* shift. */
1169 COSTS_N_INSNS (1), /* shift_reg. */
1170 COSTS_N_INSNS (1), /* arith_shift. */
1171 COSTS_N_INSNS (1), /* arith_shift_reg. */
1172 COSTS_N_INSNS (1), /* log_shift. */
1173 COSTS_N_INSNS (1), /* log_shift_reg. */
1174 COSTS_N_INSNS (1), /* extend. */
1175 COSTS_N_INSNS (1), /* extend_arith. */
1176 COSTS_N_INSNS (1), /* bfi. */
1177 COSTS_N_INSNS (1), /* bfx. */
1178 COSTS_N_INSNS (1), /* clz. */
1179 COSTS_N_INSNS (1), /* rev. */
1180 0, /* non_exec. */
1181 true /* non_exec_costs_exec. */
1185 /* MULT SImode */
1187 0, /* simple. */
1188 COSTS_N_INSNS (1), /* flag_setting. */
1189 COSTS_N_INSNS (1), /* extend. */
1190 COSTS_N_INSNS (1), /* add. */
1191 COSTS_N_INSNS (1), /* extend_add. */
1192 COSTS_N_INSNS (7) /* idiv. */
1194 /* MULT DImode */
1196 0, /* simple (N/A). */
1197 0, /* flag_setting (N/A). */
1198 COSTS_N_INSNS (1), /* extend. */
1199 0, /* add. */
1200 COSTS_N_INSNS (2), /* extend_add. */
1201 0 /* idiv (N/A). */
1204 /* LD/ST */
1206 COSTS_N_INSNS (1), /* load. */
1207 COSTS_N_INSNS (1), /* load_sign_extend. */
1208 COSTS_N_INSNS (6), /* ldrd. */
1209 COSTS_N_INSNS (1), /* ldm_1st. */
1210 1, /* ldm_regs_per_insn_1st. */
1211 2, /* ldm_regs_per_insn_subsequent. */
1212 COSTS_N_INSNS (2), /* loadf. */
1213 COSTS_N_INSNS (4), /* loadd. */
1214 COSTS_N_INSNS (1), /* load_unaligned. */
1215 COSTS_N_INSNS (1), /* store. */
1216 COSTS_N_INSNS (3), /* strd. */
1217 COSTS_N_INSNS (1), /* stm_1st. */
1218 1, /* stm_regs_per_insn_1st. */
1219 2, /* stm_regs_per_insn_subsequent. */
1220 COSTS_N_INSNS (2), /* storef. */
1221 COSTS_N_INSNS (2), /* stored. */
1222 COSTS_N_INSNS (1), /* store_unaligned. */
1223 COSTS_N_INSNS (1), /* loadv. */
1224 COSTS_N_INSNS (1) /* storev. */
1227 /* FP SFmode */
1229 COSTS_N_INSNS (15), /* div. */
1230 COSTS_N_INSNS (3), /* mult. */
1231 COSTS_N_INSNS (7), /* mult_addsub. */
1232 COSTS_N_INSNS (7), /* fma. */
1233 COSTS_N_INSNS (3), /* addsub. */
1234 COSTS_N_INSNS (3), /* fpconst. */
1235 COSTS_N_INSNS (3), /* neg. */
1236 COSTS_N_INSNS (3), /* compare. */
1237 COSTS_N_INSNS (3), /* widen. */
1238 COSTS_N_INSNS (3), /* narrow. */
1239 COSTS_N_INSNS (3), /* toint. */
1240 COSTS_N_INSNS (3), /* fromint. */
1241 COSTS_N_INSNS (3) /* roundint. */
1243 /* FP DFmode */
1245 COSTS_N_INSNS (30), /* div. */
1246 COSTS_N_INSNS (6), /* mult. */
1247 COSTS_N_INSNS (10), /* mult_addsub. */
1248 COSTS_N_INSNS (7), /* fma. */
1249 COSTS_N_INSNS (3), /* addsub. */
1250 COSTS_N_INSNS (3), /* fpconst. */
1251 COSTS_N_INSNS (3), /* neg. */
1252 COSTS_N_INSNS (3), /* compare. */
1253 COSTS_N_INSNS (3), /* widen. */
1254 COSTS_N_INSNS (3), /* narrow. */
1255 COSTS_N_INSNS (3), /* toint. */
1256 COSTS_N_INSNS (3), /* fromint. */
1257 COSTS_N_INSNS (3) /* roundint. */
1260 /* Vector */
1262 COSTS_N_INSNS (1) /* alu. */
1267 const struct cpu_cost_table cortexa7_extra_costs =
1269 /* ALU */
1271 0, /* arith. */
1272 0, /* logical. */
1273 COSTS_N_INSNS (1), /* shift. */
1274 COSTS_N_INSNS (1), /* shift_reg. */
1275 COSTS_N_INSNS (1), /* arith_shift. */
1276 COSTS_N_INSNS (1), /* arith_shift_reg. */
1277 COSTS_N_INSNS (1), /* log_shift. */
1278 COSTS_N_INSNS (1), /* log_shift_reg. */
1279 COSTS_N_INSNS (1), /* extend. */
1280 COSTS_N_INSNS (1), /* extend_arith. */
1281 COSTS_N_INSNS (1), /* bfi. */
1282 COSTS_N_INSNS (1), /* bfx. */
1283 COSTS_N_INSNS (1), /* clz. */
1284 COSTS_N_INSNS (1), /* rev. */
1285 0, /* non_exec. */
1286 true /* non_exec_costs_exec. */
1290 /* MULT SImode */
1292 0, /* simple. */
1293 COSTS_N_INSNS (1), /* flag_setting. */
1294 COSTS_N_INSNS (1), /* extend. */
1295 COSTS_N_INSNS (1), /* add. */
1296 COSTS_N_INSNS (1), /* extend_add. */
1297 COSTS_N_INSNS (7) /* idiv. */
1299 /* MULT DImode */
1301 0, /* simple (N/A). */
1302 0, /* flag_setting (N/A). */
1303 COSTS_N_INSNS (1), /* extend. */
1304 0, /* add. */
1305 COSTS_N_INSNS (2), /* extend_add. */
1306 0 /* idiv (N/A). */
1309 /* LD/ST */
1311 COSTS_N_INSNS (1), /* load. */
1312 COSTS_N_INSNS (1), /* load_sign_extend. */
1313 COSTS_N_INSNS (3), /* ldrd. */
1314 COSTS_N_INSNS (1), /* ldm_1st. */
1315 1, /* ldm_regs_per_insn_1st. */
1316 2, /* ldm_regs_per_insn_subsequent. */
1317 COSTS_N_INSNS (2), /* loadf. */
1318 COSTS_N_INSNS (2), /* loadd. */
1319 COSTS_N_INSNS (1), /* load_unaligned. */
1320 COSTS_N_INSNS (1), /* store. */
1321 COSTS_N_INSNS (3), /* strd. */
1322 COSTS_N_INSNS (1), /* stm_1st. */
1323 1, /* stm_regs_per_insn_1st. */
1324 2, /* stm_regs_per_insn_subsequent. */
1325 COSTS_N_INSNS (2), /* storef. */
1326 COSTS_N_INSNS (2), /* stored. */
1327 COSTS_N_INSNS (1), /* store_unaligned. */
1328 COSTS_N_INSNS (1), /* loadv. */
1329 COSTS_N_INSNS (1) /* storev. */
1332 /* FP SFmode */
1334 COSTS_N_INSNS (15), /* div. */
1335 COSTS_N_INSNS (3), /* mult. */
1336 COSTS_N_INSNS (7), /* mult_addsub. */
1337 COSTS_N_INSNS (7), /* fma. */
1338 COSTS_N_INSNS (3), /* addsub. */
1339 COSTS_N_INSNS (3), /* fpconst. */
1340 COSTS_N_INSNS (3), /* neg. */
1341 COSTS_N_INSNS (3), /* compare. */
1342 COSTS_N_INSNS (3), /* widen. */
1343 COSTS_N_INSNS (3), /* narrow. */
1344 COSTS_N_INSNS (3), /* toint. */
1345 COSTS_N_INSNS (3), /* fromint. */
1346 COSTS_N_INSNS (3) /* roundint. */
1348 /* FP DFmode */
1350 COSTS_N_INSNS (30), /* div. */
1351 COSTS_N_INSNS (6), /* mult. */
1352 COSTS_N_INSNS (10), /* mult_addsub. */
1353 COSTS_N_INSNS (7), /* fma. */
1354 COSTS_N_INSNS (3), /* addsub. */
1355 COSTS_N_INSNS (3), /* fpconst. */
1356 COSTS_N_INSNS (3), /* neg. */
1357 COSTS_N_INSNS (3), /* compare. */
1358 COSTS_N_INSNS (3), /* widen. */
1359 COSTS_N_INSNS (3), /* narrow. */
1360 COSTS_N_INSNS (3), /* toint. */
1361 COSTS_N_INSNS (3), /* fromint. */
1362 COSTS_N_INSNS (3) /* roundint. */
1365 /* Vector */
1367 COSTS_N_INSNS (1) /* alu. */
1371 const struct cpu_cost_table cortexa12_extra_costs =
1373 /* ALU */
1375 0, /* arith. */
1376 0, /* logical. */
1377 0, /* shift. */
1378 COSTS_N_INSNS (1), /* shift_reg. */
1379 COSTS_N_INSNS (1), /* arith_shift. */
1380 COSTS_N_INSNS (1), /* arith_shift_reg. */
1381 COSTS_N_INSNS (1), /* log_shift. */
1382 COSTS_N_INSNS (1), /* log_shift_reg. */
1383 0, /* extend. */
1384 COSTS_N_INSNS (1), /* extend_arith. */
1385 0, /* bfi. */
1386 COSTS_N_INSNS (1), /* bfx. */
1387 COSTS_N_INSNS (1), /* clz. */
1388 COSTS_N_INSNS (1), /* rev. */
1389 0, /* non_exec. */
1390 true /* non_exec_costs_exec. */
1392 /* MULT SImode */
1395 COSTS_N_INSNS (2), /* simple. */
1396 COSTS_N_INSNS (3), /* flag_setting. */
1397 COSTS_N_INSNS (2), /* extend. */
1398 COSTS_N_INSNS (3), /* add. */
1399 COSTS_N_INSNS (2), /* extend_add. */
1400 COSTS_N_INSNS (18) /* idiv. */
1402 /* MULT DImode */
1404 0, /* simple (N/A). */
1405 0, /* flag_setting (N/A). */
1406 COSTS_N_INSNS (3), /* extend. */
1407 0, /* add (N/A). */
1408 COSTS_N_INSNS (3), /* extend_add. */
1409 0 /* idiv (N/A). */
1412 /* LD/ST */
1414 COSTS_N_INSNS (3), /* load. */
1415 COSTS_N_INSNS (3), /* load_sign_extend. */
1416 COSTS_N_INSNS (3), /* ldrd. */
1417 COSTS_N_INSNS (3), /* ldm_1st. */
1418 1, /* ldm_regs_per_insn_1st. */
1419 2, /* ldm_regs_per_insn_subsequent. */
1420 COSTS_N_INSNS (3), /* loadf. */
1421 COSTS_N_INSNS (3), /* loadd. */
1422 0, /* load_unaligned. */
1423 0, /* store. */
1424 0, /* strd. */
1425 0, /* stm_1st. */
1426 1, /* stm_regs_per_insn_1st. */
1427 2, /* stm_regs_per_insn_subsequent. */
1428 COSTS_N_INSNS (2), /* storef. */
1429 COSTS_N_INSNS (2), /* stored. */
1430 0, /* store_unaligned. */
1431 COSTS_N_INSNS (1), /* loadv. */
1432 COSTS_N_INSNS (1) /* storev. */
1435 /* FP SFmode */
1437 COSTS_N_INSNS (17), /* div. */
1438 COSTS_N_INSNS (4), /* mult. */
1439 COSTS_N_INSNS (8), /* mult_addsub. */
1440 COSTS_N_INSNS (8), /* fma. */
1441 COSTS_N_INSNS (4), /* addsub. */
1442 COSTS_N_INSNS (2), /* fpconst. */
1443 COSTS_N_INSNS (2), /* neg. */
1444 COSTS_N_INSNS (2), /* compare. */
1445 COSTS_N_INSNS (4), /* widen. */
1446 COSTS_N_INSNS (4), /* narrow. */
1447 COSTS_N_INSNS (4), /* toint. */
1448 COSTS_N_INSNS (4), /* fromint. */
1449 COSTS_N_INSNS (4) /* roundint. */
1451 /* FP DFmode */
1453 COSTS_N_INSNS (31), /* div. */
1454 COSTS_N_INSNS (4), /* mult. */
1455 COSTS_N_INSNS (8), /* mult_addsub. */
1456 COSTS_N_INSNS (8), /* fma. */
1457 COSTS_N_INSNS (4), /* addsub. */
1458 COSTS_N_INSNS (2), /* fpconst. */
1459 COSTS_N_INSNS (2), /* neg. */
1460 COSTS_N_INSNS (2), /* compare. */
1461 COSTS_N_INSNS (4), /* widen. */
1462 COSTS_N_INSNS (4), /* narrow. */
1463 COSTS_N_INSNS (4), /* toint. */
1464 COSTS_N_INSNS (4), /* fromint. */
1465 COSTS_N_INSNS (4) /* roundint. */
1468 /* Vector */
1470 COSTS_N_INSNS (1) /* alu. */
1474 const struct cpu_cost_table cortexa15_extra_costs =
1476 /* ALU */
1478 0, /* arith. */
1479 0, /* logical. */
1480 0, /* shift. */
1481 0, /* shift_reg. */
1482 COSTS_N_INSNS (1), /* arith_shift. */
1483 COSTS_N_INSNS (1), /* arith_shift_reg. */
1484 COSTS_N_INSNS (1), /* log_shift. */
1485 COSTS_N_INSNS (1), /* log_shift_reg. */
1486 0, /* extend. */
1487 COSTS_N_INSNS (1), /* extend_arith. */
1488 COSTS_N_INSNS (1), /* bfi. */
1489 0, /* bfx. */
1490 0, /* clz. */
1491 0, /* rev. */
1492 0, /* non_exec. */
1493 true /* non_exec_costs_exec. */
1495 /* MULT SImode */
1498 COSTS_N_INSNS (2), /* simple. */
1499 COSTS_N_INSNS (3), /* flag_setting. */
1500 COSTS_N_INSNS (2), /* extend. */
1501 COSTS_N_INSNS (2), /* add. */
1502 COSTS_N_INSNS (2), /* extend_add. */
1503 COSTS_N_INSNS (18) /* idiv. */
1505 /* MULT DImode */
1507 0, /* simple (N/A). */
1508 0, /* flag_setting (N/A). */
1509 COSTS_N_INSNS (3), /* extend. */
1510 0, /* add (N/A). */
1511 COSTS_N_INSNS (3), /* extend_add. */
1512 0 /* idiv (N/A). */
1515 /* LD/ST */
1517 COSTS_N_INSNS (3), /* load. */
1518 COSTS_N_INSNS (3), /* load_sign_extend. */
1519 COSTS_N_INSNS (3), /* ldrd. */
1520 COSTS_N_INSNS (4), /* ldm_1st. */
1521 1, /* ldm_regs_per_insn_1st. */
1522 2, /* ldm_regs_per_insn_subsequent. */
1523 COSTS_N_INSNS (4), /* loadf. */
1524 COSTS_N_INSNS (4), /* loadd. */
1525 0, /* load_unaligned. */
1526 0, /* store. */
1527 0, /* strd. */
1528 COSTS_N_INSNS (1), /* stm_1st. */
1529 1, /* stm_regs_per_insn_1st. */
1530 2, /* stm_regs_per_insn_subsequent. */
1531 0, /* storef. */
1532 0, /* stored. */
1533 0, /* store_unaligned. */
1534 COSTS_N_INSNS (1), /* loadv. */
1535 COSTS_N_INSNS (1) /* storev. */
1538 /* FP SFmode */
1540 COSTS_N_INSNS (17), /* div. */
1541 COSTS_N_INSNS (4), /* mult. */
1542 COSTS_N_INSNS (8), /* mult_addsub. */
1543 COSTS_N_INSNS (8), /* fma. */
1544 COSTS_N_INSNS (4), /* addsub. */
1545 COSTS_N_INSNS (2), /* fpconst. */
1546 COSTS_N_INSNS (2), /* neg. */
1547 COSTS_N_INSNS (5), /* compare. */
1548 COSTS_N_INSNS (4), /* widen. */
1549 COSTS_N_INSNS (4), /* narrow. */
1550 COSTS_N_INSNS (4), /* toint. */
1551 COSTS_N_INSNS (4), /* fromint. */
1552 COSTS_N_INSNS (4) /* roundint. */
1554 /* FP DFmode */
1556 COSTS_N_INSNS (31), /* div. */
1557 COSTS_N_INSNS (4), /* mult. */
1558 COSTS_N_INSNS (8), /* mult_addsub. */
1559 COSTS_N_INSNS (8), /* fma. */
1560 COSTS_N_INSNS (4), /* addsub. */
1561 COSTS_N_INSNS (2), /* fpconst. */
1562 COSTS_N_INSNS (2), /* neg. */
1563 COSTS_N_INSNS (2), /* compare. */
1564 COSTS_N_INSNS (4), /* widen. */
1565 COSTS_N_INSNS (4), /* narrow. */
1566 COSTS_N_INSNS (4), /* toint. */
1567 COSTS_N_INSNS (4), /* fromint. */
1568 COSTS_N_INSNS (4) /* roundint. */
1571 /* Vector */
1573 COSTS_N_INSNS (1) /* alu. */
1577 const struct cpu_cost_table v7m_extra_costs =
1579 /* ALU */
1581 0, /* arith. */
1582 0, /* logical. */
1583 0, /* shift. */
1584 0, /* shift_reg. */
1585 0, /* arith_shift. */
1586 COSTS_N_INSNS (1), /* arith_shift_reg. */
1587 0, /* log_shift. */
1588 COSTS_N_INSNS (1), /* log_shift_reg. */
1589 0, /* extend. */
1590 COSTS_N_INSNS (1), /* extend_arith. */
1591 0, /* bfi. */
1592 0, /* bfx. */
1593 0, /* clz. */
1594 0, /* rev. */
1595 COSTS_N_INSNS (1), /* non_exec. */
1596 false /* non_exec_costs_exec. */
1599 /* MULT SImode */
1601 COSTS_N_INSNS (1), /* simple. */
1602 COSTS_N_INSNS (1), /* flag_setting. */
1603 COSTS_N_INSNS (2), /* extend. */
1604 COSTS_N_INSNS (1), /* add. */
1605 COSTS_N_INSNS (3), /* extend_add. */
1606 COSTS_N_INSNS (8) /* idiv. */
1608 /* MULT DImode */
1610 0, /* simple (N/A). */
1611 0, /* flag_setting (N/A). */
1612 COSTS_N_INSNS (2), /* extend. */
1613 0, /* add (N/A). */
1614 COSTS_N_INSNS (3), /* extend_add. */
1615 0 /* idiv (N/A). */
1618 /* LD/ST */
1620 COSTS_N_INSNS (2), /* load. */
1621 0, /* load_sign_extend. */
1622 COSTS_N_INSNS (3), /* ldrd. */
1623 COSTS_N_INSNS (2), /* ldm_1st. */
1624 1, /* ldm_regs_per_insn_1st. */
1625 1, /* ldm_regs_per_insn_subsequent. */
1626 COSTS_N_INSNS (2), /* loadf. */
1627 COSTS_N_INSNS (3), /* loadd. */
1628 COSTS_N_INSNS (1), /* load_unaligned. */
1629 COSTS_N_INSNS (2), /* store. */
1630 COSTS_N_INSNS (3), /* strd. */
1631 COSTS_N_INSNS (2), /* stm_1st. */
1632 1, /* stm_regs_per_insn_1st. */
1633 1, /* stm_regs_per_insn_subsequent. */
1634 COSTS_N_INSNS (2), /* storef. */
1635 COSTS_N_INSNS (3), /* stored. */
1636 COSTS_N_INSNS (1), /* store_unaligned. */
1637 COSTS_N_INSNS (1), /* loadv. */
1638 COSTS_N_INSNS (1) /* storev. */
1641 /* FP SFmode */
1643 COSTS_N_INSNS (7), /* div. */
1644 COSTS_N_INSNS (2), /* mult. */
1645 COSTS_N_INSNS (5), /* mult_addsub. */
1646 COSTS_N_INSNS (3), /* fma. */
1647 COSTS_N_INSNS (1), /* addsub. */
1648 0, /* fpconst. */
1649 0, /* neg. */
1650 0, /* compare. */
1651 0, /* widen. */
1652 0, /* narrow. */
1653 0, /* toint. */
1654 0, /* fromint. */
1655 0 /* roundint. */
1657 /* FP DFmode */
1659 COSTS_N_INSNS (15), /* div. */
1660 COSTS_N_INSNS (5), /* mult. */
1661 COSTS_N_INSNS (7), /* mult_addsub. */
1662 COSTS_N_INSNS (7), /* fma. */
1663 COSTS_N_INSNS (3), /* addsub. */
1664 0, /* fpconst. */
1665 0, /* neg. */
1666 0, /* compare. */
1667 0, /* widen. */
1668 0, /* narrow. */
1669 0, /* toint. */
1670 0, /* fromint. */
1671 0 /* roundint. */
1674 /* Vector */
1676 COSTS_N_INSNS (1) /* alu. */
1680 const struct tune_params arm_slowmul_tune =
1682 arm_slowmul_rtx_costs,
1683 NULL, /* Insn extra costs. */
1684 NULL, /* Sched adj cost. */
1685 arm_default_branch_cost,
1686 &arm_default_vec_cost,
1687 3, /* Constant limit. */
1688 5, /* Max cond insns. */
1689 8, /* Memset max inline. */
1690 1, /* Issue rate. */
1691 ARM_PREFETCH_NOT_BENEFICIAL,
1692 tune_params::PREF_CONST_POOL_TRUE,
1693 tune_params::PREF_LDRD_FALSE,
1694 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1695 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1696 tune_params::DISPARAGE_FLAGS_NEITHER,
1697 tune_params::PREF_NEON_64_FALSE,
1698 tune_params::PREF_NEON_STRINGOPS_FALSE,
1699 tune_params::FUSE_NOTHING,
1700 tune_params::SCHED_AUTOPREF_OFF
1703 const struct tune_params arm_fastmul_tune =
1705 arm_fastmul_rtx_costs,
1706 NULL, /* Insn extra costs. */
1707 NULL, /* Sched adj cost. */
1708 arm_default_branch_cost,
1709 &arm_default_vec_cost,
1710 1, /* Constant limit. */
1711 5, /* Max cond insns. */
1712 8, /* Memset max inline. */
1713 1, /* Issue rate. */
1714 ARM_PREFETCH_NOT_BENEFICIAL,
1715 tune_params::PREF_CONST_POOL_TRUE,
1716 tune_params::PREF_LDRD_FALSE,
1717 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1718 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1719 tune_params::DISPARAGE_FLAGS_NEITHER,
1720 tune_params::PREF_NEON_64_FALSE,
1721 tune_params::PREF_NEON_STRINGOPS_FALSE,
1722 tune_params::FUSE_NOTHING,
1723 tune_params::SCHED_AUTOPREF_OFF
1726 /* StrongARM has early execution of branches, so a sequence that is worth
1727 skipping is shorter. Set max_insns_skipped to a lower value. */
1729 const struct tune_params arm_strongarm_tune =
1731 arm_fastmul_rtx_costs,
1732 NULL, /* Insn extra costs. */
1733 NULL, /* Sched adj cost. */
1734 arm_default_branch_cost,
1735 &arm_default_vec_cost,
1736 1, /* Constant limit. */
1737 3, /* Max cond insns. */
1738 8, /* Memset max inline. */
1739 1, /* Issue rate. */
1740 ARM_PREFETCH_NOT_BENEFICIAL,
1741 tune_params::PREF_CONST_POOL_TRUE,
1742 tune_params::PREF_LDRD_FALSE,
1743 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1744 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1745 tune_params::DISPARAGE_FLAGS_NEITHER,
1746 tune_params::PREF_NEON_64_FALSE,
1747 tune_params::PREF_NEON_STRINGOPS_FALSE,
1748 tune_params::FUSE_NOTHING,
1749 tune_params::SCHED_AUTOPREF_OFF
1752 const struct tune_params arm_xscale_tune =
1754 arm_xscale_rtx_costs,
1755 NULL, /* Insn extra costs. */
1756 xscale_sched_adjust_cost,
1757 arm_default_branch_cost,
1758 &arm_default_vec_cost,
1759 2, /* Constant limit. */
1760 3, /* Max cond insns. */
1761 8, /* Memset max inline. */
1762 1, /* Issue rate. */
1763 ARM_PREFETCH_NOT_BENEFICIAL,
1764 tune_params::PREF_CONST_POOL_TRUE,
1765 tune_params::PREF_LDRD_FALSE,
1766 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1767 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1768 tune_params::DISPARAGE_FLAGS_NEITHER,
1769 tune_params::PREF_NEON_64_FALSE,
1770 tune_params::PREF_NEON_STRINGOPS_FALSE,
1771 tune_params::FUSE_NOTHING,
1772 tune_params::SCHED_AUTOPREF_OFF
1775 const struct tune_params arm_9e_tune =
1777 arm_9e_rtx_costs,
1778 NULL, /* Insn extra costs. */
1779 NULL, /* Sched adj cost. */
1780 arm_default_branch_cost,
1781 &arm_default_vec_cost,
1782 1, /* Constant limit. */
1783 5, /* Max cond insns. */
1784 8, /* Memset max inline. */
1785 1, /* Issue rate. */
1786 ARM_PREFETCH_NOT_BENEFICIAL,
1787 tune_params::PREF_CONST_POOL_TRUE,
1788 tune_params::PREF_LDRD_FALSE,
1789 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1790 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1791 tune_params::DISPARAGE_FLAGS_NEITHER,
1792 tune_params::PREF_NEON_64_FALSE,
1793 tune_params::PREF_NEON_STRINGOPS_FALSE,
1794 tune_params::FUSE_NOTHING,
1795 tune_params::SCHED_AUTOPREF_OFF
1798 const struct tune_params arm_marvell_pj4_tune =
1800 arm_9e_rtx_costs,
1801 NULL, /* Insn extra costs. */
1802 NULL, /* Sched adj cost. */
1803 arm_default_branch_cost,
1804 &arm_default_vec_cost,
1805 1, /* Constant limit. */
1806 5, /* Max cond insns. */
1807 8, /* Memset max inline. */
1808 2, /* Issue rate. */
1809 ARM_PREFETCH_NOT_BENEFICIAL,
1810 tune_params::PREF_CONST_POOL_TRUE,
1811 tune_params::PREF_LDRD_FALSE,
1812 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1813 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1814 tune_params::DISPARAGE_FLAGS_NEITHER,
1815 tune_params::PREF_NEON_64_FALSE,
1816 tune_params::PREF_NEON_STRINGOPS_FALSE,
1817 tune_params::FUSE_NOTHING,
1818 tune_params::SCHED_AUTOPREF_OFF
1821 const struct tune_params arm_v6t2_tune =
1823 arm_9e_rtx_costs,
1824 NULL, /* Insn extra costs. */
1825 NULL, /* Sched adj cost. */
1826 arm_default_branch_cost,
1827 &arm_default_vec_cost,
1828 1, /* Constant limit. */
1829 5, /* Max cond insns. */
1830 8, /* Memset max inline. */
1831 1, /* Issue rate. */
1832 ARM_PREFETCH_NOT_BENEFICIAL,
1833 tune_params::PREF_CONST_POOL_FALSE,
1834 tune_params::PREF_LDRD_FALSE,
1835 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1836 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1837 tune_params::DISPARAGE_FLAGS_NEITHER,
1838 tune_params::PREF_NEON_64_FALSE,
1839 tune_params::PREF_NEON_STRINGOPS_FALSE,
1840 tune_params::FUSE_NOTHING,
1841 tune_params::SCHED_AUTOPREF_OFF
1845 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1846 const struct tune_params arm_cortex_tune =
1848 arm_9e_rtx_costs,
1849 &generic_extra_costs,
1850 NULL, /* Sched adj cost. */
1851 arm_default_branch_cost,
1852 &arm_default_vec_cost,
1853 1, /* Constant limit. */
1854 5, /* Max cond insns. */
1855 8, /* Memset max inline. */
1856 2, /* Issue rate. */
1857 ARM_PREFETCH_NOT_BENEFICIAL,
1858 tune_params::PREF_CONST_POOL_FALSE,
1859 tune_params::PREF_LDRD_FALSE,
1860 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1861 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1862 tune_params::DISPARAGE_FLAGS_NEITHER,
1863 tune_params::PREF_NEON_64_FALSE,
1864 tune_params::PREF_NEON_STRINGOPS_FALSE,
1865 tune_params::FUSE_NOTHING,
1866 tune_params::SCHED_AUTOPREF_OFF
1869 const struct tune_params arm_cortex_a8_tune =
1871 arm_9e_rtx_costs,
1872 &cortexa8_extra_costs,
1873 NULL, /* Sched adj cost. */
1874 arm_default_branch_cost,
1875 &arm_default_vec_cost,
1876 1, /* Constant limit. */
1877 5, /* Max cond insns. */
1878 8, /* Memset max inline. */
1879 2, /* Issue rate. */
1880 ARM_PREFETCH_NOT_BENEFICIAL,
1881 tune_params::PREF_CONST_POOL_FALSE,
1882 tune_params::PREF_LDRD_FALSE,
1883 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1884 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1885 tune_params::DISPARAGE_FLAGS_NEITHER,
1886 tune_params::PREF_NEON_64_FALSE,
1887 tune_params::PREF_NEON_STRINGOPS_TRUE,
1888 tune_params::FUSE_NOTHING,
1889 tune_params::SCHED_AUTOPREF_OFF
1892 const struct tune_params arm_cortex_a7_tune =
1894 arm_9e_rtx_costs,
1895 &cortexa7_extra_costs,
1896 NULL, /* Sched adj cost. */
1897 arm_default_branch_cost,
1898 &arm_default_vec_cost,
1899 1, /* Constant limit. */
1900 5, /* Max cond insns. */
1901 8, /* Memset max inline. */
1902 2, /* Issue rate. */
1903 ARM_PREFETCH_NOT_BENEFICIAL,
1904 tune_params::PREF_CONST_POOL_FALSE,
1905 tune_params::PREF_LDRD_FALSE,
1906 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1907 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1908 tune_params::DISPARAGE_FLAGS_NEITHER,
1909 tune_params::PREF_NEON_64_FALSE,
1910 tune_params::PREF_NEON_STRINGOPS_TRUE,
1911 tune_params::FUSE_NOTHING,
1912 tune_params::SCHED_AUTOPREF_OFF
1915 const struct tune_params arm_cortex_a15_tune =
1917 arm_9e_rtx_costs,
1918 &cortexa15_extra_costs,
1919 NULL, /* Sched adj cost. */
1920 arm_default_branch_cost,
1921 &arm_default_vec_cost,
1922 1, /* Constant limit. */
1923 2, /* Max cond insns. */
1924 8, /* Memset max inline. */
1925 3, /* Issue rate. */
1926 ARM_PREFETCH_NOT_BENEFICIAL,
1927 tune_params::PREF_CONST_POOL_FALSE,
1928 tune_params::PREF_LDRD_TRUE,
1929 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1930 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1931 tune_params::DISPARAGE_FLAGS_ALL,
1932 tune_params::PREF_NEON_64_FALSE,
1933 tune_params::PREF_NEON_STRINGOPS_TRUE,
1934 tune_params::FUSE_NOTHING,
1935 tune_params::SCHED_AUTOPREF_FULL
1938 const struct tune_params arm_cortex_a35_tune =
1940 arm_9e_rtx_costs,
1941 &cortexa53_extra_costs,
1942 NULL, /* Sched adj cost. */
1943 arm_default_branch_cost,
1944 &arm_default_vec_cost,
1945 1, /* Constant limit. */
1946 5, /* Max cond insns. */
1947 8, /* Memset max inline. */
1948 1, /* Issue rate. */
1949 ARM_PREFETCH_NOT_BENEFICIAL,
1950 tune_params::PREF_CONST_POOL_FALSE,
1951 tune_params::PREF_LDRD_FALSE,
1952 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1953 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1954 tune_params::DISPARAGE_FLAGS_NEITHER,
1955 tune_params::PREF_NEON_64_FALSE,
1956 tune_params::PREF_NEON_STRINGOPS_TRUE,
1957 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1958 tune_params::SCHED_AUTOPREF_OFF
1961 const struct tune_params arm_cortex_a53_tune =
1963 arm_9e_rtx_costs,
1964 &cortexa53_extra_costs,
1965 NULL, /* Sched adj cost. */
1966 arm_default_branch_cost,
1967 &arm_default_vec_cost,
1968 1, /* Constant limit. */
1969 5, /* Max cond insns. */
1970 8, /* Memset max inline. */
1971 2, /* Issue rate. */
1972 ARM_PREFETCH_NOT_BENEFICIAL,
1973 tune_params::PREF_CONST_POOL_FALSE,
1974 tune_params::PREF_LDRD_FALSE,
1975 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1976 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1977 tune_params::DISPARAGE_FLAGS_NEITHER,
1978 tune_params::PREF_NEON_64_FALSE,
1979 tune_params::PREF_NEON_STRINGOPS_TRUE,
1980 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
1981 tune_params::SCHED_AUTOPREF_OFF
1984 const struct tune_params arm_cortex_a57_tune =
1986 arm_9e_rtx_costs,
1987 &cortexa57_extra_costs,
1988 NULL, /* Sched adj cost. */
1989 arm_default_branch_cost,
1990 &arm_default_vec_cost,
1991 1, /* Constant limit. */
1992 2, /* Max cond insns. */
1993 8, /* Memset max inline. */
1994 3, /* Issue rate. */
1995 ARM_PREFETCH_NOT_BENEFICIAL,
1996 tune_params::PREF_CONST_POOL_FALSE,
1997 tune_params::PREF_LDRD_TRUE,
1998 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1999 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2000 tune_params::DISPARAGE_FLAGS_ALL,
2001 tune_params::PREF_NEON_64_FALSE,
2002 tune_params::PREF_NEON_STRINGOPS_TRUE,
2003 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2004 tune_params::SCHED_AUTOPREF_FULL
2007 const struct tune_params arm_exynosm1_tune =
2009 arm_9e_rtx_costs,
2010 &exynosm1_extra_costs,
2011 NULL, /* Sched adj cost. */
2012 arm_default_branch_cost,
2013 &arm_default_vec_cost,
2014 1, /* Constant limit. */
2015 2, /* Max cond insns. */
2016 8, /* Memset max inline. */
2017 3, /* Issue rate. */
2018 ARM_PREFETCH_NOT_BENEFICIAL,
2019 tune_params::PREF_CONST_POOL_FALSE,
2020 tune_params::PREF_LDRD_TRUE,
2021 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2022 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2023 tune_params::DISPARAGE_FLAGS_ALL,
2024 tune_params::PREF_NEON_64_FALSE,
2025 tune_params::PREF_NEON_STRINGOPS_TRUE,
2026 tune_params::FUSE_NOTHING,
2027 tune_params::SCHED_AUTOPREF_OFF
2030 const struct tune_params arm_xgene1_tune =
2032 arm_9e_rtx_costs,
2033 &xgene1_extra_costs,
2034 NULL, /* Sched adj cost. */
2035 arm_default_branch_cost,
2036 &arm_default_vec_cost,
2037 1, /* Constant limit. */
2038 2, /* Max cond insns. */
2039 32, /* Memset max inline. */
2040 4, /* Issue rate. */
2041 ARM_PREFETCH_NOT_BENEFICIAL,
2042 tune_params::PREF_CONST_POOL_FALSE,
2043 tune_params::PREF_LDRD_TRUE,
2044 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2045 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2046 tune_params::DISPARAGE_FLAGS_ALL,
2047 tune_params::PREF_NEON_64_FALSE,
2048 tune_params::PREF_NEON_STRINGOPS_FALSE,
2049 tune_params::FUSE_NOTHING,
2050 tune_params::SCHED_AUTOPREF_OFF
2053 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2054 less appealing. Set max_insns_skipped to a low value. */
2056 const struct tune_params arm_cortex_a5_tune =
2058 arm_9e_rtx_costs,
2059 &cortexa5_extra_costs,
2060 NULL, /* Sched adj cost. */
2061 arm_cortex_a5_branch_cost,
2062 &arm_default_vec_cost,
2063 1, /* Constant limit. */
2064 1, /* Max cond insns. */
2065 8, /* Memset max inline. */
2066 2, /* Issue rate. */
2067 ARM_PREFETCH_NOT_BENEFICIAL,
2068 tune_params::PREF_CONST_POOL_FALSE,
2069 tune_params::PREF_LDRD_FALSE,
2070 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2071 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2072 tune_params::DISPARAGE_FLAGS_NEITHER,
2073 tune_params::PREF_NEON_64_FALSE,
2074 tune_params::PREF_NEON_STRINGOPS_TRUE,
2075 tune_params::FUSE_NOTHING,
2076 tune_params::SCHED_AUTOPREF_OFF
2079 const struct tune_params arm_cortex_a9_tune =
2081 arm_9e_rtx_costs,
2082 &cortexa9_extra_costs,
2083 cortex_a9_sched_adjust_cost,
2084 arm_default_branch_cost,
2085 &arm_default_vec_cost,
2086 1, /* Constant limit. */
2087 5, /* Max cond insns. */
2088 8, /* Memset max inline. */
2089 2, /* Issue rate. */
2090 ARM_PREFETCH_BENEFICIAL(4,32,32),
2091 tune_params::PREF_CONST_POOL_FALSE,
2092 tune_params::PREF_LDRD_FALSE,
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2094 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2095 tune_params::DISPARAGE_FLAGS_NEITHER,
2096 tune_params::PREF_NEON_64_FALSE,
2097 tune_params::PREF_NEON_STRINGOPS_FALSE,
2098 tune_params::FUSE_NOTHING,
2099 tune_params::SCHED_AUTOPREF_OFF
2102 const struct tune_params arm_cortex_a12_tune =
2104 arm_9e_rtx_costs,
2105 &cortexa12_extra_costs,
2106 NULL, /* Sched adj cost. */
2107 arm_default_branch_cost,
2108 &arm_default_vec_cost, /* Vectorizer costs. */
2109 1, /* Constant limit. */
2110 2, /* Max cond insns. */
2111 8, /* Memset max inline. */
2112 2, /* Issue rate. */
2113 ARM_PREFETCH_NOT_BENEFICIAL,
2114 tune_params::PREF_CONST_POOL_FALSE,
2115 tune_params::PREF_LDRD_TRUE,
2116 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2117 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2118 tune_params::DISPARAGE_FLAGS_ALL,
2119 tune_params::PREF_NEON_64_FALSE,
2120 tune_params::PREF_NEON_STRINGOPS_TRUE,
2121 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2122 tune_params::SCHED_AUTOPREF_OFF
2125 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2126 cycle to execute each. An LDR from the constant pool also takes two cycles
2127 to execute, but mildly increases pipelining opportunity (consecutive
2128 loads/stores can be pipelined together, saving one cycle), and may also
2129 improve icache utilisation. Hence we prefer the constant pool for such
2130 processors. */
2132 const struct tune_params arm_v7m_tune =
2134 arm_9e_rtx_costs,
2135 &v7m_extra_costs,
2136 NULL, /* Sched adj cost. */
2137 arm_cortex_m_branch_cost,
2138 &arm_default_vec_cost,
2139 1, /* Constant limit. */
2140 2, /* Max cond insns. */
2141 8, /* Memset max inline. */
2142 1, /* Issue rate. */
2143 ARM_PREFETCH_NOT_BENEFICIAL,
2144 tune_params::PREF_CONST_POOL_TRUE,
2145 tune_params::PREF_LDRD_FALSE,
2146 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2147 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2148 tune_params::DISPARAGE_FLAGS_NEITHER,
2149 tune_params::PREF_NEON_64_FALSE,
2150 tune_params::PREF_NEON_STRINGOPS_FALSE,
2151 tune_params::FUSE_NOTHING,
2152 tune_params::SCHED_AUTOPREF_OFF
2155 /* Cortex-M7 tuning. */
2157 const struct tune_params arm_cortex_m7_tune =
2159 arm_9e_rtx_costs,
2160 &v7m_extra_costs,
2161 NULL, /* Sched adj cost. */
2162 arm_cortex_m7_branch_cost,
2163 &arm_default_vec_cost,
2164 0, /* Constant limit. */
2165 1, /* Max cond insns. */
2166 8, /* Memset max inline. */
2167 2, /* Issue rate. */
2168 ARM_PREFETCH_NOT_BENEFICIAL,
2169 tune_params::PREF_CONST_POOL_TRUE,
2170 tune_params::PREF_LDRD_FALSE,
2171 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2172 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2173 tune_params::DISPARAGE_FLAGS_NEITHER,
2174 tune_params::PREF_NEON_64_FALSE,
2175 tune_params::PREF_NEON_STRINGOPS_FALSE,
2176 tune_params::FUSE_NOTHING,
2177 tune_params::SCHED_AUTOPREF_OFF
2180 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2181 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2182 const struct tune_params arm_v6m_tune =
2184 arm_9e_rtx_costs,
2185 NULL, /* Insn extra costs. */
2186 NULL, /* Sched adj cost. */
2187 arm_default_branch_cost,
2188 &arm_default_vec_cost, /* Vectorizer costs. */
2189 1, /* Constant limit. */
2190 5, /* Max cond insns. */
2191 8, /* Memset max inline. */
2192 1, /* Issue rate. */
2193 ARM_PREFETCH_NOT_BENEFICIAL,
2194 tune_params::PREF_CONST_POOL_FALSE,
2195 tune_params::PREF_LDRD_FALSE,
2196 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2197 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2198 tune_params::DISPARAGE_FLAGS_NEITHER,
2199 tune_params::PREF_NEON_64_FALSE,
2200 tune_params::PREF_NEON_STRINGOPS_FALSE,
2201 tune_params::FUSE_NOTHING,
2202 tune_params::SCHED_AUTOPREF_OFF
2205 const struct tune_params arm_fa726te_tune =
2207 arm_9e_rtx_costs,
2208 NULL, /* Insn extra costs. */
2209 fa726te_sched_adjust_cost,
2210 arm_default_branch_cost,
2211 &arm_default_vec_cost,
2212 1, /* Constant limit. */
2213 5, /* Max cond insns. */
2214 8, /* Memset max inline. */
2215 2, /* Issue rate. */
2216 ARM_PREFETCH_NOT_BENEFICIAL,
2217 tune_params::PREF_CONST_POOL_TRUE,
2218 tune_params::PREF_LDRD_FALSE,
2219 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2220 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2221 tune_params::DISPARAGE_FLAGS_NEITHER,
2222 tune_params::PREF_NEON_64_FALSE,
2223 tune_params::PREF_NEON_STRINGOPS_FALSE,
2224 tune_params::FUSE_NOTHING,
2225 tune_params::SCHED_AUTOPREF_OFF
2229 /* Not all of these give usefully different compilation alternatives,
2230 but there is no simple way of generalizing them. */
2231 static const struct processors all_cores[] =
2233 /* ARM Cores */
2234 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2235 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2236 FLAGS, &arm_##COSTS##_tune},
2237 #include "arm-cores.def"
2238 #undef ARM_CORE
2239 {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
2242 static const struct processors all_architectures[] =
2244 /* ARM Architectures */
2245 /* We don't specify tuning costs here as it will be figured out
2246 from the core. */
2248 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2249 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2250 #include "arm-arches.def"
2251 #undef ARM_ARCH
2252 {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
2256 /* These are populated as commandline arguments are processed, or NULL
2257 if not specified. */
2258 static const struct processors *arm_selected_arch;
2259 static const struct processors *arm_selected_cpu;
2260 static const struct processors *arm_selected_tune;
2262 /* The name of the preprocessor macro to define for this architecture. */
2264 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2266 /* Available values for -mfpu=. */
2268 const struct arm_fpu_desc all_fpus[] =
2270 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, FEATURES) \
2271 { NAME, MODEL, REV, VFP_REGS, FEATURES },
2272 #include "arm-fpus.def"
2273 #undef ARM_FPU
2276 /* Supported TLS relocations. */
2278 enum tls_reloc {
2279 TLS_GD32,
2280 TLS_LDM32,
2281 TLS_LDO32,
2282 TLS_IE32,
2283 TLS_LE32,
2284 TLS_DESCSEQ /* GNU scheme */
2287 /* The maximum number of insns to be used when loading a constant. */
2288 inline static int
2289 arm_constant_limit (bool size_p)
2291 return size_p ? 1 : current_tune->constant_limit;
2294 /* Emit an insn that's a simple single-set. Both the operands must be known
2295 to be valid. */
2296 inline static rtx_insn *
2297 emit_set_insn (rtx x, rtx y)
2299 return emit_insn (gen_rtx_SET (x, y));
2302 /* Return the number of bits set in VALUE. */
2303 static unsigned
2304 bit_count (unsigned long value)
2306 unsigned long count = 0;
2308 while (value)
2310 count++;
2311 value &= value - 1; /* Clear the least-significant set bit. */
2314 return count;
2317 /* Return the number of features in feature-set SET. */
2318 static unsigned
2319 feature_count (const arm_feature_set * set)
2321 return (bit_count (ARM_FSET_CPU1 (*set))
2322 + bit_count (ARM_FSET_CPU2 (*set)));
2325 typedef struct
2327 machine_mode mode;
2328 const char *name;
2329 } arm_fixed_mode_set;
2331 /* A small helper for setting fixed-point library libfuncs. */
2333 static void
2334 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2335 const char *funcname, const char *modename,
2336 int num_suffix)
2338 char buffer[50];
2340 if (num_suffix == 0)
2341 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2342 else
2343 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2345 set_optab_libfunc (optable, mode, buffer);
2348 static void
2349 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2350 machine_mode from, const char *funcname,
2351 const char *toname, const char *fromname)
2353 char buffer[50];
2354 const char *maybe_suffix_2 = "";
2356 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2357 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2358 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2359 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2360 maybe_suffix_2 = "2";
2362 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2363 maybe_suffix_2);
2365 set_conv_libfunc (optable, to, from, buffer);
2368 /* Set up library functions unique to ARM. */
2370 static void
2371 arm_init_libfuncs (void)
2373 /* For Linux, we have access to kernel support for atomic operations. */
2374 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2375 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2377 /* There are no special library functions unless we are using the
2378 ARM BPABI. */
2379 if (!TARGET_BPABI)
2380 return;
2382 /* The functions below are described in Section 4 of the "Run-Time
2383 ABI for the ARM architecture", Version 1.0. */
2385 /* Double-precision floating-point arithmetic. Table 2. */
2386 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2387 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2388 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2389 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2390 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2392 /* Double-precision comparisons. Table 3. */
2393 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2394 set_optab_libfunc (ne_optab, DFmode, NULL);
2395 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2396 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2397 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2398 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2399 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2401 /* Single-precision floating-point arithmetic. Table 4. */
2402 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2403 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2404 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2405 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2406 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2408 /* Single-precision comparisons. Table 5. */
2409 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2410 set_optab_libfunc (ne_optab, SFmode, NULL);
2411 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2412 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2413 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2414 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2415 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2417 /* Floating-point to integer conversions. Table 6. */
2418 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2419 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2420 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2421 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2422 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2423 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2424 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2425 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2427 /* Conversions between floating types. Table 7. */
2428 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2429 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2431 /* Integer to floating-point conversions. Table 8. */
2432 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2433 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2434 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2435 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2436 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2437 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2438 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2439 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2441 /* Long long. Table 9. */
2442 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2443 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2444 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2445 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2446 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2447 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2448 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2449 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2451 /* Integer (32/32->32) division. \S 4.3.1. */
2452 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2453 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2455 /* The divmod functions are designed so that they can be used for
2456 plain division, even though they return both the quotient and the
2457 remainder. The quotient is returned in the usual location (i.e.,
2458 r0 for SImode, {r0, r1} for DImode), just as would be expected
2459 for an ordinary division routine. Because the AAPCS calling
2460 conventions specify that all of { r0, r1, r2, r3 } are
2461 callee-saved registers, there is no need to tell the compiler
2462 explicitly that those registers are clobbered by these
2463 routines. */
2464 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2465 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2467 /* For SImode division the ABI provides div-without-mod routines,
2468 which are faster. */
2469 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2470 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2472 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2473 divmod libcalls instead. */
2474 set_optab_libfunc (smod_optab, DImode, NULL);
2475 set_optab_libfunc (umod_optab, DImode, NULL);
2476 set_optab_libfunc (smod_optab, SImode, NULL);
2477 set_optab_libfunc (umod_optab, SImode, NULL);
2479 /* Half-precision float operations. The compiler handles all operations
2480 with NULL libfuncs by converting the SFmode. */
2481 switch (arm_fp16_format)
2483 case ARM_FP16_FORMAT_IEEE:
2484 case ARM_FP16_FORMAT_ALTERNATIVE:
2486 /* Conversions. */
2487 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2488 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2489 ? "__gnu_f2h_ieee"
2490 : "__gnu_f2h_alternative"));
2491 set_conv_libfunc (sext_optab, SFmode, HFmode,
2492 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2493 ? "__gnu_h2f_ieee"
2494 : "__gnu_h2f_alternative"));
2496 /* Arithmetic. */
2497 set_optab_libfunc (add_optab, HFmode, NULL);
2498 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2499 set_optab_libfunc (smul_optab, HFmode, NULL);
2500 set_optab_libfunc (neg_optab, HFmode, NULL);
2501 set_optab_libfunc (sub_optab, HFmode, NULL);
2503 /* Comparisons. */
2504 set_optab_libfunc (eq_optab, HFmode, NULL);
2505 set_optab_libfunc (ne_optab, HFmode, NULL);
2506 set_optab_libfunc (lt_optab, HFmode, NULL);
2507 set_optab_libfunc (le_optab, HFmode, NULL);
2508 set_optab_libfunc (ge_optab, HFmode, NULL);
2509 set_optab_libfunc (gt_optab, HFmode, NULL);
2510 set_optab_libfunc (unord_optab, HFmode, NULL);
2511 break;
2513 default:
2514 break;
2517 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2519 const arm_fixed_mode_set fixed_arith_modes[] =
2521 { QQmode, "qq" },
2522 { UQQmode, "uqq" },
2523 { HQmode, "hq" },
2524 { UHQmode, "uhq" },
2525 { SQmode, "sq" },
2526 { USQmode, "usq" },
2527 { DQmode, "dq" },
2528 { UDQmode, "udq" },
2529 { TQmode, "tq" },
2530 { UTQmode, "utq" },
2531 { HAmode, "ha" },
2532 { UHAmode, "uha" },
2533 { SAmode, "sa" },
2534 { USAmode, "usa" },
2535 { DAmode, "da" },
2536 { UDAmode, "uda" },
2537 { TAmode, "ta" },
2538 { UTAmode, "uta" }
2540 const arm_fixed_mode_set fixed_conv_modes[] =
2542 { QQmode, "qq" },
2543 { UQQmode, "uqq" },
2544 { HQmode, "hq" },
2545 { UHQmode, "uhq" },
2546 { SQmode, "sq" },
2547 { USQmode, "usq" },
2548 { DQmode, "dq" },
2549 { UDQmode, "udq" },
2550 { TQmode, "tq" },
2551 { UTQmode, "utq" },
2552 { HAmode, "ha" },
2553 { UHAmode, "uha" },
2554 { SAmode, "sa" },
2555 { USAmode, "usa" },
2556 { DAmode, "da" },
2557 { UDAmode, "uda" },
2558 { TAmode, "ta" },
2559 { UTAmode, "uta" },
2560 { QImode, "qi" },
2561 { HImode, "hi" },
2562 { SImode, "si" },
2563 { DImode, "di" },
2564 { TImode, "ti" },
2565 { SFmode, "sf" },
2566 { DFmode, "df" }
2568 unsigned int i, j;
2570 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2572 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2573 "add", fixed_arith_modes[i].name, 3);
2574 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2575 "ssadd", fixed_arith_modes[i].name, 3);
2576 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2577 "usadd", fixed_arith_modes[i].name, 3);
2578 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2579 "sub", fixed_arith_modes[i].name, 3);
2580 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2581 "sssub", fixed_arith_modes[i].name, 3);
2582 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2583 "ussub", fixed_arith_modes[i].name, 3);
2584 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2585 "mul", fixed_arith_modes[i].name, 3);
2586 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2587 "ssmul", fixed_arith_modes[i].name, 3);
2588 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2589 "usmul", fixed_arith_modes[i].name, 3);
2590 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2591 "div", fixed_arith_modes[i].name, 3);
2592 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2593 "udiv", fixed_arith_modes[i].name, 3);
2594 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2595 "ssdiv", fixed_arith_modes[i].name, 3);
2596 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2597 "usdiv", fixed_arith_modes[i].name, 3);
2598 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2599 "neg", fixed_arith_modes[i].name, 2);
2600 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2601 "ssneg", fixed_arith_modes[i].name, 2);
2602 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2603 "usneg", fixed_arith_modes[i].name, 2);
2604 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2605 "ashl", fixed_arith_modes[i].name, 3);
2606 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2607 "ashr", fixed_arith_modes[i].name, 3);
2608 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2609 "lshr", fixed_arith_modes[i].name, 3);
2610 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2611 "ssashl", fixed_arith_modes[i].name, 3);
2612 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2613 "usashl", fixed_arith_modes[i].name, 3);
2614 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2615 "cmp", fixed_arith_modes[i].name, 2);
2618 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2619 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2621 if (i == j
2622 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2623 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2624 continue;
2626 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2627 fixed_conv_modes[j].mode, "fract",
2628 fixed_conv_modes[i].name,
2629 fixed_conv_modes[j].name);
2630 arm_set_fixed_conv_libfunc (satfract_optab,
2631 fixed_conv_modes[i].mode,
2632 fixed_conv_modes[j].mode, "satfract",
2633 fixed_conv_modes[i].name,
2634 fixed_conv_modes[j].name);
2635 arm_set_fixed_conv_libfunc (fractuns_optab,
2636 fixed_conv_modes[i].mode,
2637 fixed_conv_modes[j].mode, "fractuns",
2638 fixed_conv_modes[i].name,
2639 fixed_conv_modes[j].name);
2640 arm_set_fixed_conv_libfunc (satfractuns_optab,
2641 fixed_conv_modes[i].mode,
2642 fixed_conv_modes[j].mode, "satfractuns",
2643 fixed_conv_modes[i].name,
2644 fixed_conv_modes[j].name);
2648 if (TARGET_AAPCS_BASED)
2649 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2652 /* On AAPCS systems, this is the "struct __va_list". */
2653 static GTY(()) tree va_list_type;
2655 /* Return the type to use as __builtin_va_list. */
2656 static tree
2657 arm_build_builtin_va_list (void)
2659 tree va_list_name;
2660 tree ap_field;
2662 if (!TARGET_AAPCS_BASED)
2663 return std_build_builtin_va_list ();
2665 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2666 defined as:
2668 struct __va_list
2670 void *__ap;
2673 The C Library ABI further reinforces this definition in \S
2674 4.1.
2676 We must follow this definition exactly. The structure tag
2677 name is visible in C++ mangled names, and thus forms a part
2678 of the ABI. The field name may be used by people who
2679 #include <stdarg.h>. */
2680 /* Create the type. */
2681 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2682 /* Give it the required name. */
2683 va_list_name = build_decl (BUILTINS_LOCATION,
2684 TYPE_DECL,
2685 get_identifier ("__va_list"),
2686 va_list_type);
2687 DECL_ARTIFICIAL (va_list_name) = 1;
2688 TYPE_NAME (va_list_type) = va_list_name;
2689 TYPE_STUB_DECL (va_list_type) = va_list_name;
2690 /* Create the __ap field. */
2691 ap_field = build_decl (BUILTINS_LOCATION,
2692 FIELD_DECL,
2693 get_identifier ("__ap"),
2694 ptr_type_node);
2695 DECL_ARTIFICIAL (ap_field) = 1;
2696 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2697 TYPE_FIELDS (va_list_type) = ap_field;
2698 /* Compute its layout. */
2699 layout_type (va_list_type);
2701 return va_list_type;
2704 /* Return an expression of type "void *" pointing to the next
2705 available argument in a variable-argument list. VALIST is the
2706 user-level va_list object, of type __builtin_va_list. */
2707 static tree
2708 arm_extract_valist_ptr (tree valist)
2710 if (TREE_TYPE (valist) == error_mark_node)
2711 return error_mark_node;
2713 /* On an AAPCS target, the pointer is stored within "struct
2714 va_list". */
2715 if (TARGET_AAPCS_BASED)
2717 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2718 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2719 valist, ap_field, NULL_TREE);
2722 return valist;
2725 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2726 static void
2727 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2729 valist = arm_extract_valist_ptr (valist);
2730 std_expand_builtin_va_start (valist, nextarg);
2733 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2734 static tree
2735 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2736 gimple_seq *post_p)
2738 valist = arm_extract_valist_ptr (valist);
2739 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2742 /* Check any incompatible options that the user has specified. */
2743 static void
2744 arm_option_check_internal (struct gcc_options *opts)
2746 int flags = opts->x_target_flags;
2747 const struct arm_fpu_desc *fpu_desc = &all_fpus[opts->x_arm_fpu_index];
2749 /* iWMMXt and NEON are incompatible. */
2750 if (TARGET_IWMMXT && TARGET_VFP
2751 && ARM_FPU_FSET_HAS (fpu_desc->features, FPU_FL_NEON))
2752 error ("iWMMXt and NEON are incompatible");
2754 /* Make sure that the processor choice does not conflict with any of the
2755 other command line choices. */
2756 if (TARGET_ARM_P (flags) && !ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM))
2757 error ("target CPU does not support ARM mode");
2759 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2760 from here where no function is being compiled currently. */
2761 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2762 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2764 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2765 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2767 /* If this target is normally configured to use APCS frames, warn if they
2768 are turned off and debugging is turned on. */
2769 if (TARGET_ARM_P (flags)
2770 && write_symbols != NO_DEBUG
2771 && !TARGET_APCS_FRAME
2772 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2773 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2775 /* iWMMXt unsupported under Thumb mode. */
2776 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2777 error ("iWMMXt unsupported under Thumb mode");
2779 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2780 error ("can not use -mtp=cp15 with 16-bit Thumb");
2782 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2784 error ("RTP PIC is incompatible with Thumb");
2785 flag_pic = 0;
2788 /* We only support -mslow-flash-data on armv7-m targets. */
2789 if (target_slow_flash_data
2790 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2791 || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON)))
2792 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2795 /* Recompute the global settings depending on target attribute options. */
2797 static void
2798 arm_option_params_internal (void)
2800 /* If we are not using the default (ARM mode) section anchor offset
2801 ranges, then set the correct ranges now. */
2802 if (TARGET_THUMB1)
2804 /* Thumb-1 LDR instructions cannot have negative offsets.
2805 Permissible positive offset ranges are 5-bit (for byte loads),
2806 6-bit (for halfword loads), or 7-bit (for word loads).
2807 Empirical results suggest a 7-bit anchor range gives the best
2808 overall code size. */
2809 targetm.min_anchor_offset = 0;
2810 targetm.max_anchor_offset = 127;
2812 else if (TARGET_THUMB2)
2814 /* The minimum is set such that the total size of the block
2815 for a particular anchor is 248 + 1 + 4095 bytes, which is
2816 divisible by eight, ensuring natural spacing of anchors. */
2817 targetm.min_anchor_offset = -248;
2818 targetm.max_anchor_offset = 4095;
2820 else
2822 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2823 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2826 if (optimize_size)
2828 /* If optimizing for size, bump the number of instructions that we
2829 are prepared to conditionally execute (even on a StrongARM). */
2830 max_insns_skipped = 6;
2832 /* For THUMB2, we limit the conditional sequence to one IT block. */
2833 if (TARGET_THUMB2)
2834 max_insns_skipped = arm_restrict_it ? 1 : 4;
2836 else
2837 /* When -mrestrict-it is in use tone down the if-conversion. */
2838 max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2839 ? 1 : current_tune->max_insns_skipped;
2842 /* True if -mflip-thumb should next add an attribute for the default
2843 mode, false if it should next add an attribute for the opposite mode. */
2844 static GTY(()) bool thumb_flipper;
2846 /* Options after initial target override. */
2847 static GTY(()) tree init_optimize;
2849 static void
2850 arm_override_options_after_change_1 (struct gcc_options *opts)
2852 if (opts->x_align_functions <= 0)
2853 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2854 && opts->x_optimize_size ? 2 : 4;
2857 /* Implement targetm.override_options_after_change. */
2859 static void
2860 arm_override_options_after_change (void)
2862 arm_override_options_after_change_1 (&global_options);
2865 /* Reset options between modes that the user has specified. */
2866 static void
2867 arm_option_override_internal (struct gcc_options *opts,
2868 struct gcc_options *opts_set)
2870 arm_override_options_after_change_1 (opts);
2872 if (TARGET_INTERWORK && !ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB))
2874 /* The default is to enable interworking, so this warning message would
2875 be confusing to users who have just compiled with, eg, -march=armv3. */
2876 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2877 opts->x_target_flags &= ~MASK_INTERWORK;
2880 if (TARGET_THUMB_P (opts->x_target_flags)
2881 && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB)))
2883 warning (0, "target CPU does not support THUMB instructions");
2884 opts->x_target_flags &= ~MASK_THUMB;
2887 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2889 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2890 opts->x_target_flags &= ~MASK_APCS_FRAME;
2893 /* Callee super interworking implies thumb interworking. Adding
2894 this to the flags here simplifies the logic elsewhere. */
2895 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2896 opts->x_target_flags |= MASK_INTERWORK;
2898 /* need to remember initial values so combinaisons of options like
2899 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2900 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2902 if (! opts_set->x_arm_restrict_it)
2903 opts->x_arm_restrict_it = arm_arch8;
2905 if (!TARGET_THUMB2_P (opts->x_target_flags))
2906 opts->x_arm_restrict_it = 0;
2908 /* Enable -munaligned-access by default for
2909 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2910 i.e. Thumb2 and ARM state only.
2911 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2912 - ARMv8 architecture-base processors.
2914 Disable -munaligned-access by default for
2915 - all pre-ARMv6 architecture-based processors
2916 - ARMv6-M architecture-based processors. */
2918 if (! opts_set->x_unaligned_access)
2920 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2921 && arm_arch6 && (arm_arch_notm || arm_arch7));
2923 else if (opts->x_unaligned_access == 1
2924 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2926 warning (0, "target CPU does not support unaligned accesses");
2927 opts->x_unaligned_access = 0;
2930 /* Don't warn since it's on by default in -O2. */
2931 if (TARGET_THUMB1_P (opts->x_target_flags))
2932 opts->x_flag_schedule_insns = 0;
2933 else
2934 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
2936 /* Disable shrink-wrap when optimizing function for size, since it tends to
2937 generate additional returns. */
2938 if (optimize_function_for_size_p (cfun)
2939 && TARGET_THUMB2_P (opts->x_target_flags))
2940 opts->x_flag_shrink_wrap = false;
2941 else
2942 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
2944 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2945 - epilogue_insns - does not accurately model the corresponding insns
2946 emitted in the asm file. In particular, see the comment in thumb_exit
2947 'Find out how many of the (return) argument registers we can corrupt'.
2948 As a consequence, the epilogue may clobber registers without fipa-ra
2949 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
2950 TODO: Accurately model clobbers for epilogue_insns and reenable
2951 fipa-ra. */
2952 if (TARGET_THUMB1_P (opts->x_target_flags))
2953 opts->x_flag_ipa_ra = 0;
2954 else
2955 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
2957 /* Thumb2 inline assembly code should always use unified syntax.
2958 This will apply to ARM and Thumb1 eventually. */
2959 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
2961 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
2962 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
2963 #endif
2966 /* Fix up any incompatible options that the user has specified. */
2967 static void
2968 arm_option_override (void)
2970 arm_selected_arch = NULL;
2971 arm_selected_cpu = NULL;
2972 arm_selected_tune = NULL;
2974 if (global_options_set.x_arm_arch_option)
2975 arm_selected_arch = &all_architectures[arm_arch_option];
2977 if (global_options_set.x_arm_cpu_option)
2979 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2980 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2983 if (global_options_set.x_arm_tune_option)
2984 arm_selected_tune = &all_cores[(int) arm_tune_option];
2986 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2987 SUBTARGET_OVERRIDE_OPTIONS;
2988 #endif
2990 if (arm_selected_arch)
2992 if (arm_selected_cpu)
2994 const arm_feature_set tuning_flags = ARM_FSET_MAKE_CPU1 (FL_TUNE);
2995 arm_feature_set selected_flags;
2996 ARM_FSET_XOR (selected_flags, arm_selected_cpu->flags,
2997 arm_selected_arch->flags);
2998 ARM_FSET_EXCLUDE (selected_flags, selected_flags, tuning_flags);
2999 /* Check for conflict between mcpu and march. */
3000 if (!ARM_FSET_IS_EMPTY (selected_flags))
3002 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3003 arm_selected_cpu->name, arm_selected_arch->name);
3004 /* -march wins for code generation.
3005 -mcpu wins for default tuning. */
3006 if (!arm_selected_tune)
3007 arm_selected_tune = arm_selected_cpu;
3009 arm_selected_cpu = arm_selected_arch;
3011 else
3012 /* -mcpu wins. */
3013 arm_selected_arch = NULL;
3015 else
3016 /* Pick a CPU based on the architecture. */
3017 arm_selected_cpu = arm_selected_arch;
3020 /* If the user did not specify a processor, choose one for them. */
3021 if (!arm_selected_cpu)
3023 const struct processors * sel;
3024 arm_feature_set sought = ARM_FSET_EMPTY;;
3026 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
3027 if (!arm_selected_cpu->name)
3029 #ifdef SUBTARGET_CPU_DEFAULT
3030 /* Use the subtarget default CPU if none was specified by
3031 configure. */
3032 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
3033 #endif
3034 /* Default to ARM6. */
3035 if (!arm_selected_cpu->name)
3036 arm_selected_cpu = &all_cores[arm6];
3039 sel = arm_selected_cpu;
3040 insn_flags = sel->flags;
3042 /* Now check to see if the user has specified some command line
3043 switch that require certain abilities from the cpu. */
3045 if (TARGET_INTERWORK || TARGET_THUMB)
3047 ARM_FSET_ADD_CPU1 (sought, FL_THUMB);
3048 ARM_FSET_ADD_CPU1 (sought, FL_MODE32);
3050 /* There are no ARM processors that support both APCS-26 and
3051 interworking. Therefore we force FL_MODE26 to be removed
3052 from insn_flags here (if it was set), so that the search
3053 below will always be able to find a compatible processor. */
3054 ARM_FSET_DEL_CPU1 (insn_flags, FL_MODE26);
3057 if (!ARM_FSET_IS_EMPTY (sought)
3058 && !(ARM_FSET_CPU_SUBSET (sought, insn_flags)))
3060 /* Try to locate a CPU type that supports all of the abilities
3061 of the default CPU, plus the extra abilities requested by
3062 the user. */
3063 for (sel = all_cores; sel->name != NULL; sel++)
3064 if (ARM_FSET_CPU_SUBSET (sought, sel->flags))
3065 break;
3067 if (sel->name == NULL)
3069 unsigned current_bit_count = 0;
3070 const struct processors * best_fit = NULL;
3072 /* Ideally we would like to issue an error message here
3073 saying that it was not possible to find a CPU compatible
3074 with the default CPU, but which also supports the command
3075 line options specified by the programmer, and so they
3076 ought to use the -mcpu=<name> command line option to
3077 override the default CPU type.
3079 If we cannot find a cpu that has both the
3080 characteristics of the default cpu and the given
3081 command line options we scan the array again looking
3082 for a best match. */
3083 for (sel = all_cores; sel->name != NULL; sel++)
3085 arm_feature_set required = ARM_FSET_EMPTY;
3086 ARM_FSET_UNION (required, sought, insn_flags);
3087 if (ARM_FSET_CPU_SUBSET (required, sel->flags))
3089 unsigned count;
3090 arm_feature_set flags;
3091 ARM_FSET_INTER (flags, sel->flags, insn_flags);
3092 count = feature_count (&flags);
3094 if (count >= current_bit_count)
3096 best_fit = sel;
3097 current_bit_count = count;
3101 gcc_assert (best_fit);
3102 sel = best_fit;
3105 arm_selected_cpu = sel;
3109 gcc_assert (arm_selected_cpu);
3110 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3111 if (!arm_selected_tune)
3112 arm_selected_tune = &all_cores[arm_selected_cpu->core];
3114 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
3115 insn_flags = arm_selected_cpu->flags;
3116 arm_base_arch = arm_selected_cpu->base_arch;
3118 arm_tune = arm_selected_tune->core;
3119 tune_flags = arm_selected_tune->flags;
3120 current_tune = arm_selected_tune->tune;
3122 /* TBD: Dwarf info for apcs frame is not handled yet. */
3123 if (TARGET_APCS_FRAME)
3124 flag_shrink_wrap = false;
3126 /* BPABI targets use linker tricks to allow interworking on cores
3127 without thumb support. */
3128 if (TARGET_INTERWORK
3129 && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB) || TARGET_BPABI))
3131 warning (0, "target CPU does not support interworking" );
3132 target_flags &= ~MASK_INTERWORK;
3135 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3137 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3138 target_flags |= MASK_APCS_FRAME;
3141 if (TARGET_POKE_FUNCTION_NAME)
3142 target_flags |= MASK_APCS_FRAME;
3144 if (TARGET_APCS_REENT && flag_pic)
3145 error ("-fpic and -mapcs-reent are incompatible");
3147 if (TARGET_APCS_REENT)
3148 warning (0, "APCS reentrant code not supported. Ignored");
3150 if (TARGET_APCS_FLOAT)
3151 warning (0, "passing floating point arguments in fp regs not yet supported");
3153 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3154 arm_arch3m = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH3M);
3155 arm_arch4 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH4);
3156 arm_arch4t = arm_arch4 && (ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB));
3157 arm_arch5 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5);
3158 arm_arch5e = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5E);
3159 arm_arch6 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6);
3160 arm_arch6k = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6K);
3161 arm_arch6kz = arm_arch6k && ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6KZ);
3162 arm_arch_notm = ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM);
3163 arm_arch6m = arm_arch6 && !arm_arch_notm;
3164 arm_arch7 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7);
3165 arm_arch7em = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7EM);
3166 arm_arch8 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH8);
3167 arm_arch8_1 = ARM_FSET_HAS_CPU2 (insn_flags, FL2_ARCH8_1);
3168 arm_arch_thumb2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB2);
3169 arm_arch_xscale = ARM_FSET_HAS_CPU1 (insn_flags, FL_XSCALE);
3171 arm_ld_sched = ARM_FSET_HAS_CPU1 (tune_flags, FL_LDSCHED);
3172 arm_tune_strongarm = ARM_FSET_HAS_CPU1 (tune_flags, FL_STRONG);
3173 arm_tune_wbuf = ARM_FSET_HAS_CPU1 (tune_flags, FL_WBUF);
3174 arm_tune_xscale = ARM_FSET_HAS_CPU1 (tune_flags, FL_XSCALE);
3175 arm_arch_iwmmxt = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT);
3176 arm_arch_iwmmxt2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT2);
3177 arm_arch_thumb_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB_DIV);
3178 arm_arch_arm_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARM_DIV);
3179 arm_arch_no_volatile_ce = ARM_FSET_HAS_CPU1 (insn_flags, FL_NO_VOLATILE_CE);
3180 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
3181 arm_arch_crc = ARM_FSET_HAS_CPU1 (insn_flags, FL_CRC32);
3182 arm_m_profile_small_mul = ARM_FSET_HAS_CPU1 (insn_flags, FL_SMALLMUL);
3184 /* V5 code we generate is completely interworking capable, so we turn off
3185 TARGET_INTERWORK here to avoid many tests later on. */
3187 /* XXX However, we must pass the right pre-processor defines to CPP
3188 or GLD can get confused. This is a hack. */
3189 if (TARGET_INTERWORK)
3190 arm_cpp_interwork = 1;
3192 if (arm_arch5)
3193 target_flags &= ~MASK_INTERWORK;
3195 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3196 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3198 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3199 error ("iwmmxt abi requires an iwmmxt capable cpu");
3201 if (!global_options_set.x_arm_fpu_index)
3203 const char *target_fpu_name;
3204 bool ok;
3206 #ifdef FPUTYPE_DEFAULT
3207 target_fpu_name = FPUTYPE_DEFAULT;
3208 #else
3209 target_fpu_name = "vfp";
3210 #endif
3212 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
3213 CL_TARGET);
3214 gcc_assert (ok);
3217 /* If soft-float is specified then don't use FPU. */
3218 if (TARGET_SOFT_FLOAT)
3219 arm_fpu_attr = FPU_NONE;
3220 else if (TARGET_VFP)
3221 arm_fpu_attr = FPU_VFP;
3222 else
3223 gcc_unreachable();
3225 if (TARGET_AAPCS_BASED)
3227 if (TARGET_CALLER_INTERWORKING)
3228 error ("AAPCS does not support -mcaller-super-interworking");
3229 else
3230 if (TARGET_CALLEE_INTERWORKING)
3231 error ("AAPCS does not support -mcallee-super-interworking");
3234 /* __fp16 support currently assumes the core has ldrh. */
3235 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3236 sorry ("__fp16 and no ldrh");
3238 if (TARGET_AAPCS_BASED)
3240 if (arm_abi == ARM_ABI_IWMMXT)
3241 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3242 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3243 && TARGET_HARD_FLOAT
3244 && TARGET_VFP)
3245 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3246 else
3247 arm_pcs_default = ARM_PCS_AAPCS;
3249 else
3251 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
3252 sorry ("-mfloat-abi=hard and VFP");
3254 if (arm_abi == ARM_ABI_APCS)
3255 arm_pcs_default = ARM_PCS_APCS;
3256 else
3257 arm_pcs_default = ARM_PCS_ATPCS;
3260 /* For arm2/3 there is no need to do any scheduling if we are doing
3261 software floating-point. */
3262 if (TARGET_SOFT_FLOAT && !ARM_FSET_HAS_CPU1 (tune_flags, FL_MODE32))
3263 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3265 /* Use the cp15 method if it is available. */
3266 if (target_thread_pointer == TP_AUTO)
3268 if (arm_arch6k && !TARGET_THUMB1)
3269 target_thread_pointer = TP_CP15;
3270 else
3271 target_thread_pointer = TP_SOFT;
3274 /* Override the default structure alignment for AAPCS ABI. */
3275 if (!global_options_set.x_arm_structure_size_boundary)
3277 if (TARGET_AAPCS_BASED)
3278 arm_structure_size_boundary = 8;
3280 else
3282 if (arm_structure_size_boundary != 8
3283 && arm_structure_size_boundary != 32
3284 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3286 if (ARM_DOUBLEWORD_ALIGN)
3287 warning (0,
3288 "structure size boundary can only be set to 8, 32 or 64");
3289 else
3290 warning (0, "structure size boundary can only be set to 8 or 32");
3291 arm_structure_size_boundary
3292 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3296 /* If stack checking is disabled, we can use r10 as the PIC register,
3297 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3298 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3300 if (TARGET_VXWORKS_RTP)
3301 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3302 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3305 if (flag_pic && TARGET_VXWORKS_RTP)
3306 arm_pic_register = 9;
3308 if (arm_pic_register_string != NULL)
3310 int pic_register = decode_reg_name (arm_pic_register_string);
3312 if (!flag_pic)
3313 warning (0, "-mpic-register= is useless without -fpic");
3315 /* Prevent the user from choosing an obviously stupid PIC register. */
3316 else if (pic_register < 0 || call_used_regs[pic_register]
3317 || pic_register == HARD_FRAME_POINTER_REGNUM
3318 || pic_register == STACK_POINTER_REGNUM
3319 || pic_register >= PC_REGNUM
3320 || (TARGET_VXWORKS_RTP
3321 && (unsigned int) pic_register != arm_pic_register))
3322 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3323 else
3324 arm_pic_register = pic_register;
3327 if (TARGET_VXWORKS_RTP
3328 && !global_options_set.x_arm_pic_data_is_text_relative)
3329 arm_pic_data_is_text_relative = 0;
3331 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3332 if (fix_cm3_ldrd == 2)
3334 if (arm_selected_cpu->core == cortexm3)
3335 fix_cm3_ldrd = 1;
3336 else
3337 fix_cm3_ldrd = 0;
3340 /* Hot/Cold partitioning is not currently supported, since we can't
3341 handle literal pool placement in that case. */
3342 if (flag_reorder_blocks_and_partition)
3344 inform (input_location,
3345 "-freorder-blocks-and-partition not supported on this architecture");
3346 flag_reorder_blocks_and_partition = 0;
3347 flag_reorder_blocks = 1;
3350 if (flag_pic)
3351 /* Hoisting PIC address calculations more aggressively provides a small,
3352 but measurable, size reduction for PIC code. Therefore, we decrease
3353 the bar for unrestricted expression hoisting to the cost of PIC address
3354 calculation, which is 2 instructions. */
3355 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3356 global_options.x_param_values,
3357 global_options_set.x_param_values);
3359 /* ARM EABI defaults to strict volatile bitfields. */
3360 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3361 && abi_version_at_least(2))
3362 flag_strict_volatile_bitfields = 1;
3364 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3365 have deemed it beneficial (signified by setting
3366 prefetch.num_slots to 1 or more). */
3367 if (flag_prefetch_loop_arrays < 0
3368 && HAVE_prefetch
3369 && optimize >= 3
3370 && current_tune->prefetch.num_slots > 0)
3371 flag_prefetch_loop_arrays = 1;
3373 /* Set up parameters to be used in prefetching algorithm. Do not
3374 override the defaults unless we are tuning for a core we have
3375 researched values for. */
3376 if (current_tune->prefetch.num_slots > 0)
3377 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3378 current_tune->prefetch.num_slots,
3379 global_options.x_param_values,
3380 global_options_set.x_param_values);
3381 if (current_tune->prefetch.l1_cache_line_size >= 0)
3382 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3383 current_tune->prefetch.l1_cache_line_size,
3384 global_options.x_param_values,
3385 global_options_set.x_param_values);
3386 if (current_tune->prefetch.l1_cache_size >= 0)
3387 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3388 current_tune->prefetch.l1_cache_size,
3389 global_options.x_param_values,
3390 global_options_set.x_param_values);
3392 /* Use Neon to perform 64-bits operations rather than core
3393 registers. */
3394 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3395 if (use_neon_for_64bits == 1)
3396 prefer_neon_for_64bits = true;
3398 /* Use the alternative scheduling-pressure algorithm by default. */
3399 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3400 global_options.x_param_values,
3401 global_options_set.x_param_values);
3403 /* Look through ready list and all of queue for instructions
3404 relevant for L2 auto-prefetcher. */
3405 int param_sched_autopref_queue_depth;
3407 switch (current_tune->sched_autopref)
3409 case tune_params::SCHED_AUTOPREF_OFF:
3410 param_sched_autopref_queue_depth = -1;
3411 break;
3413 case tune_params::SCHED_AUTOPREF_RANK:
3414 param_sched_autopref_queue_depth = 0;
3415 break;
3417 case tune_params::SCHED_AUTOPREF_FULL:
3418 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3419 break;
3421 default:
3422 gcc_unreachable ();
3425 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3426 param_sched_autopref_queue_depth,
3427 global_options.x_param_values,
3428 global_options_set.x_param_values);
3430 /* Currently, for slow flash data, we just disable literal pools. */
3431 if (target_slow_flash_data)
3432 arm_disable_literal_pool = true;
3434 /* Disable scheduling fusion by default if it's not armv7 processor
3435 or doesn't prefer ldrd/strd. */
3436 if (flag_schedule_fusion == 2
3437 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3438 flag_schedule_fusion = 0;
3440 /* Need to remember initial options before they are overriden. */
3441 init_optimize = build_optimization_node (&global_options);
3443 arm_option_override_internal (&global_options, &global_options_set);
3444 arm_option_check_internal (&global_options);
3445 arm_option_params_internal ();
3447 /* Register global variables with the garbage collector. */
3448 arm_add_gc_roots ();
3450 /* Save the initial options in case the user does function specific
3451 options or #pragma target. */
3452 target_option_default_node = target_option_current_node
3453 = build_target_option_node (&global_options);
3455 /* Init initial mode for testing. */
3456 thumb_flipper = TARGET_THUMB;
3459 static void
3460 arm_add_gc_roots (void)
3462 gcc_obstack_init(&minipool_obstack);
3463 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3466 /* A table of known ARM exception types.
3467 For use with the interrupt function attribute. */
3469 typedef struct
3471 const char *const arg;
3472 const unsigned long return_value;
3474 isr_attribute_arg;
3476 static const isr_attribute_arg isr_attribute_args [] =
3478 { "IRQ", ARM_FT_ISR },
3479 { "irq", ARM_FT_ISR },
3480 { "FIQ", ARM_FT_FIQ },
3481 { "fiq", ARM_FT_FIQ },
3482 { "ABORT", ARM_FT_ISR },
3483 { "abort", ARM_FT_ISR },
3484 { "ABORT", ARM_FT_ISR },
3485 { "abort", ARM_FT_ISR },
3486 { "UNDEF", ARM_FT_EXCEPTION },
3487 { "undef", ARM_FT_EXCEPTION },
3488 { "SWI", ARM_FT_EXCEPTION },
3489 { "swi", ARM_FT_EXCEPTION },
3490 { NULL, ARM_FT_NORMAL }
3493 /* Returns the (interrupt) function type of the current
3494 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3496 static unsigned long
3497 arm_isr_value (tree argument)
3499 const isr_attribute_arg * ptr;
3500 const char * arg;
3502 if (!arm_arch_notm)
3503 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3505 /* No argument - default to IRQ. */
3506 if (argument == NULL_TREE)
3507 return ARM_FT_ISR;
3509 /* Get the value of the argument. */
3510 if (TREE_VALUE (argument) == NULL_TREE
3511 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3512 return ARM_FT_UNKNOWN;
3514 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3516 /* Check it against the list of known arguments. */
3517 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3518 if (streq (arg, ptr->arg))
3519 return ptr->return_value;
3521 /* An unrecognized interrupt type. */
3522 return ARM_FT_UNKNOWN;
3525 /* Computes the type of the current function. */
3527 static unsigned long
3528 arm_compute_func_type (void)
3530 unsigned long type = ARM_FT_UNKNOWN;
3531 tree a;
3532 tree attr;
3534 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3536 /* Decide if the current function is volatile. Such functions
3537 never return, and many memory cycles can be saved by not storing
3538 register values that will never be needed again. This optimization
3539 was added to speed up context switching in a kernel application. */
3540 if (optimize > 0
3541 && (TREE_NOTHROW (current_function_decl)
3542 || !(flag_unwind_tables
3543 || (flag_exceptions
3544 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3545 && TREE_THIS_VOLATILE (current_function_decl))
3546 type |= ARM_FT_VOLATILE;
3548 if (cfun->static_chain_decl != NULL)
3549 type |= ARM_FT_NESTED;
3551 attr = DECL_ATTRIBUTES (current_function_decl);
3553 a = lookup_attribute ("naked", attr);
3554 if (a != NULL_TREE)
3555 type |= ARM_FT_NAKED;
3557 a = lookup_attribute ("isr", attr);
3558 if (a == NULL_TREE)
3559 a = lookup_attribute ("interrupt", attr);
3561 if (a == NULL_TREE)
3562 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3563 else
3564 type |= arm_isr_value (TREE_VALUE (a));
3566 return type;
3569 /* Returns the type of the current function. */
3571 unsigned long
3572 arm_current_func_type (void)
3574 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3575 cfun->machine->func_type = arm_compute_func_type ();
3577 return cfun->machine->func_type;
3580 bool
3581 arm_allocate_stack_slots_for_args (void)
3583 /* Naked functions should not allocate stack slots for arguments. */
3584 return !IS_NAKED (arm_current_func_type ());
3587 static bool
3588 arm_warn_func_return (tree decl)
3590 /* Naked functions are implemented entirely in assembly, including the
3591 return sequence, so suppress warnings about this. */
3592 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3596 /* Output assembler code for a block containing the constant parts
3597 of a trampoline, leaving space for the variable parts.
3599 On the ARM, (if r8 is the static chain regnum, and remembering that
3600 referencing pc adds an offset of 8) the trampoline looks like:
3601 ldr r8, [pc, #0]
3602 ldr pc, [pc]
3603 .word static chain value
3604 .word function's address
3605 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3607 static void
3608 arm_asm_trampoline_template (FILE *f)
3610 fprintf (f, "\t.syntax unified\n");
3612 if (TARGET_ARM)
3614 fprintf (f, "\t.arm\n");
3615 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3616 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3618 else if (TARGET_THUMB2)
3620 fprintf (f, "\t.thumb\n");
3621 /* The Thumb-2 trampoline is similar to the arm implementation.
3622 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3623 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3624 STATIC_CHAIN_REGNUM, PC_REGNUM);
3625 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3627 else
3629 ASM_OUTPUT_ALIGN (f, 2);
3630 fprintf (f, "\t.code\t16\n");
3631 fprintf (f, ".Ltrampoline_start:\n");
3632 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3633 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3634 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3635 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3636 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3637 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3639 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3640 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3643 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3645 static void
3646 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3648 rtx fnaddr, mem, a_tramp;
3650 emit_block_move (m_tramp, assemble_trampoline_template (),
3651 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3653 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3654 emit_move_insn (mem, chain_value);
3656 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3657 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3658 emit_move_insn (mem, fnaddr);
3660 a_tramp = XEXP (m_tramp, 0);
3661 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3662 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3663 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3666 /* Thumb trampolines should be entered in thumb mode, so set
3667 the bottom bit of the address. */
3669 static rtx
3670 arm_trampoline_adjust_address (rtx addr)
3672 if (TARGET_THUMB)
3673 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3674 NULL, 0, OPTAB_LIB_WIDEN);
3675 return addr;
3678 /* Return 1 if it is possible to return using a single instruction.
3679 If SIBLING is non-null, this is a test for a return before a sibling
3680 call. SIBLING is the call insn, so we can examine its register usage. */
3683 use_return_insn (int iscond, rtx sibling)
3685 int regno;
3686 unsigned int func_type;
3687 unsigned long saved_int_regs;
3688 unsigned HOST_WIDE_INT stack_adjust;
3689 arm_stack_offsets *offsets;
3691 /* Never use a return instruction before reload has run. */
3692 if (!reload_completed)
3693 return 0;
3695 func_type = arm_current_func_type ();
3697 /* Naked, volatile and stack alignment functions need special
3698 consideration. */
3699 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3700 return 0;
3702 /* So do interrupt functions that use the frame pointer and Thumb
3703 interrupt functions. */
3704 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3705 return 0;
3707 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3708 && !optimize_function_for_size_p (cfun))
3709 return 0;
3711 offsets = arm_get_frame_offsets ();
3712 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3714 /* As do variadic functions. */
3715 if (crtl->args.pretend_args_size
3716 || cfun->machine->uses_anonymous_args
3717 /* Or if the function calls __builtin_eh_return () */
3718 || crtl->calls_eh_return
3719 /* Or if the function calls alloca */
3720 || cfun->calls_alloca
3721 /* Or if there is a stack adjustment. However, if the stack pointer
3722 is saved on the stack, we can use a pre-incrementing stack load. */
3723 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3724 && stack_adjust == 4))
3725 /* Or if the static chain register was saved above the frame, under the
3726 assumption that the stack pointer isn't saved on the stack. */
3727 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3728 && arm_compute_static_chain_stack_bytes() != 0))
3729 return 0;
3731 saved_int_regs = offsets->saved_regs_mask;
3733 /* Unfortunately, the insn
3735 ldmib sp, {..., sp, ...}
3737 triggers a bug on most SA-110 based devices, such that the stack
3738 pointer won't be correctly restored if the instruction takes a
3739 page fault. We work around this problem by popping r3 along with
3740 the other registers, since that is never slower than executing
3741 another instruction.
3743 We test for !arm_arch5 here, because code for any architecture
3744 less than this could potentially be run on one of the buggy
3745 chips. */
3746 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3748 /* Validate that r3 is a call-clobbered register (always true in
3749 the default abi) ... */
3750 if (!call_used_regs[3])
3751 return 0;
3753 /* ... that it isn't being used for a return value ... */
3754 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3755 return 0;
3757 /* ... or for a tail-call argument ... */
3758 if (sibling)
3760 gcc_assert (CALL_P (sibling));
3762 if (find_regno_fusage (sibling, USE, 3))
3763 return 0;
3766 /* ... and that there are no call-saved registers in r0-r2
3767 (always true in the default ABI). */
3768 if (saved_int_regs & 0x7)
3769 return 0;
3772 /* Can't be done if interworking with Thumb, and any registers have been
3773 stacked. */
3774 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3775 return 0;
3777 /* On StrongARM, conditional returns are expensive if they aren't
3778 taken and multiple registers have been stacked. */
3779 if (iscond && arm_tune_strongarm)
3781 /* Conditional return when just the LR is stored is a simple
3782 conditional-load instruction, that's not expensive. */
3783 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3784 return 0;
3786 if (flag_pic
3787 && arm_pic_register != INVALID_REGNUM
3788 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3789 return 0;
3792 /* If there are saved registers but the LR isn't saved, then we need
3793 two instructions for the return. */
3794 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3795 return 0;
3797 /* Can't be done if any of the VFP regs are pushed,
3798 since this also requires an insn. */
3799 if (TARGET_HARD_FLOAT && TARGET_VFP)
3800 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3801 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3802 return 0;
3804 if (TARGET_REALLY_IWMMXT)
3805 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3806 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3807 return 0;
3809 return 1;
3812 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3813 shrink-wrapping if possible. This is the case if we need to emit a
3814 prologue, which we can test by looking at the offsets. */
3815 bool
3816 use_simple_return_p (void)
3818 arm_stack_offsets *offsets;
3820 offsets = arm_get_frame_offsets ();
3821 return offsets->outgoing_args != 0;
3824 /* Return TRUE if int I is a valid immediate ARM constant. */
3827 const_ok_for_arm (HOST_WIDE_INT i)
3829 int lowbit;
3831 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3832 be all zero, or all one. */
3833 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3834 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3835 != ((~(unsigned HOST_WIDE_INT) 0)
3836 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3837 return FALSE;
3839 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3841 /* Fast return for 0 and small values. We must do this for zero, since
3842 the code below can't handle that one case. */
3843 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3844 return TRUE;
3846 /* Get the number of trailing zeros. */
3847 lowbit = ffs((int) i) - 1;
3849 /* Only even shifts are allowed in ARM mode so round down to the
3850 nearest even number. */
3851 if (TARGET_ARM)
3852 lowbit &= ~1;
3854 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3855 return TRUE;
3857 if (TARGET_ARM)
3859 /* Allow rotated constants in ARM mode. */
3860 if (lowbit <= 4
3861 && ((i & ~0xc000003f) == 0
3862 || (i & ~0xf000000f) == 0
3863 || (i & ~0xfc000003) == 0))
3864 return TRUE;
3866 else
3868 HOST_WIDE_INT v;
3870 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3871 v = i & 0xff;
3872 v |= v << 16;
3873 if (i == v || i == (v | (v << 8)))
3874 return TRUE;
3876 /* Allow repeated pattern 0xXY00XY00. */
3877 v = i & 0xff00;
3878 v |= v << 16;
3879 if (i == v)
3880 return TRUE;
3883 return FALSE;
3886 /* Return true if I is a valid constant for the operation CODE. */
3888 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3890 if (const_ok_for_arm (i))
3891 return 1;
3893 switch (code)
3895 case SET:
3896 /* See if we can use movw. */
3897 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3898 return 1;
3899 else
3900 /* Otherwise, try mvn. */
3901 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3903 case PLUS:
3904 /* See if we can use addw or subw. */
3905 if (TARGET_THUMB2
3906 && ((i & 0xfffff000) == 0
3907 || ((-i) & 0xfffff000) == 0))
3908 return 1;
3909 /* else fall through. */
3911 case COMPARE:
3912 case EQ:
3913 case NE:
3914 case GT:
3915 case LE:
3916 case LT:
3917 case GE:
3918 case GEU:
3919 case LTU:
3920 case GTU:
3921 case LEU:
3922 case UNORDERED:
3923 case ORDERED:
3924 case UNEQ:
3925 case UNGE:
3926 case UNLT:
3927 case UNGT:
3928 case UNLE:
3929 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3931 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3932 case XOR:
3933 return 0;
3935 case IOR:
3936 if (TARGET_THUMB2)
3937 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3938 return 0;
3940 case AND:
3941 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3943 default:
3944 gcc_unreachable ();
3948 /* Return true if I is a valid di mode constant for the operation CODE. */
3950 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3952 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3953 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3954 rtx hi = GEN_INT (hi_val);
3955 rtx lo = GEN_INT (lo_val);
3957 if (TARGET_THUMB1)
3958 return 0;
3960 switch (code)
3962 case AND:
3963 case IOR:
3964 case XOR:
3965 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3966 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3967 case PLUS:
3968 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3970 default:
3971 return 0;
3975 /* Emit a sequence of insns to handle a large constant.
3976 CODE is the code of the operation required, it can be any of SET, PLUS,
3977 IOR, AND, XOR, MINUS;
3978 MODE is the mode in which the operation is being performed;
3979 VAL is the integer to operate on;
3980 SOURCE is the other operand (a register, or a null-pointer for SET);
3981 SUBTARGETS means it is safe to create scratch registers if that will
3982 either produce a simpler sequence, or we will want to cse the values.
3983 Return value is the number of insns emitted. */
3985 /* ??? Tweak this for thumb2. */
3987 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3988 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3990 rtx cond;
3992 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3993 cond = COND_EXEC_TEST (PATTERN (insn));
3994 else
3995 cond = NULL_RTX;
3997 if (subtargets || code == SET
3998 || (REG_P (target) && REG_P (source)
3999 && REGNO (target) != REGNO (source)))
4001 /* After arm_reorg has been called, we can't fix up expensive
4002 constants by pushing them into memory so we must synthesize
4003 them in-line, regardless of the cost. This is only likely to
4004 be more costly on chips that have load delay slots and we are
4005 compiling without running the scheduler (so no splitting
4006 occurred before the final instruction emission).
4008 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4010 if (!cfun->machine->after_arm_reorg
4011 && !cond
4012 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4013 1, 0)
4014 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4015 + (code != SET))))
4017 if (code == SET)
4019 /* Currently SET is the only monadic value for CODE, all
4020 the rest are diadic. */
4021 if (TARGET_USE_MOVT)
4022 arm_emit_movpair (target, GEN_INT (val));
4023 else
4024 emit_set_insn (target, GEN_INT (val));
4026 return 1;
4028 else
4030 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4032 if (TARGET_USE_MOVT)
4033 arm_emit_movpair (temp, GEN_INT (val));
4034 else
4035 emit_set_insn (temp, GEN_INT (val));
4037 /* For MINUS, the value is subtracted from, since we never
4038 have subtraction of a constant. */
4039 if (code == MINUS)
4040 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4041 else
4042 emit_set_insn (target,
4043 gen_rtx_fmt_ee (code, mode, source, temp));
4044 return 2;
4049 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4053 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4054 ARM/THUMB2 immediates, and add up to VAL.
4055 Thr function return value gives the number of insns required. */
4056 static int
4057 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4058 struct four_ints *return_sequence)
4060 int best_consecutive_zeros = 0;
4061 int i;
4062 int best_start = 0;
4063 int insns1, insns2;
4064 struct four_ints tmp_sequence;
4066 /* If we aren't targeting ARM, the best place to start is always at
4067 the bottom, otherwise look more closely. */
4068 if (TARGET_ARM)
4070 for (i = 0; i < 32; i += 2)
4072 int consecutive_zeros = 0;
4074 if (!(val & (3 << i)))
4076 while ((i < 32) && !(val & (3 << i)))
4078 consecutive_zeros += 2;
4079 i += 2;
4081 if (consecutive_zeros > best_consecutive_zeros)
4083 best_consecutive_zeros = consecutive_zeros;
4084 best_start = i - consecutive_zeros;
4086 i -= 2;
4091 /* So long as it won't require any more insns to do so, it's
4092 desirable to emit a small constant (in bits 0...9) in the last
4093 insn. This way there is more chance that it can be combined with
4094 a later addressing insn to form a pre-indexed load or store
4095 operation. Consider:
4097 *((volatile int *)0xe0000100) = 1;
4098 *((volatile int *)0xe0000110) = 2;
4100 We want this to wind up as:
4102 mov rA, #0xe0000000
4103 mov rB, #1
4104 str rB, [rA, #0x100]
4105 mov rB, #2
4106 str rB, [rA, #0x110]
4108 rather than having to synthesize both large constants from scratch.
4110 Therefore, we calculate how many insns would be required to emit
4111 the constant starting from `best_start', and also starting from
4112 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4113 yield a shorter sequence, we may as well use zero. */
4114 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4115 if (best_start != 0
4116 && ((HOST_WIDE_INT_1U << best_start) < val))
4118 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4119 if (insns2 <= insns1)
4121 *return_sequence = tmp_sequence;
4122 insns1 = insns2;
4126 return insns1;
4129 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4130 static int
4131 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4132 struct four_ints *return_sequence, int i)
4134 int remainder = val & 0xffffffff;
4135 int insns = 0;
4137 /* Try and find a way of doing the job in either two or three
4138 instructions.
4140 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4141 location. We start at position I. This may be the MSB, or
4142 optimial_immediate_sequence may have positioned it at the largest block
4143 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4144 wrapping around to the top of the word when we drop off the bottom.
4145 In the worst case this code should produce no more than four insns.
4147 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4148 constants, shifted to any arbitrary location. We should always start
4149 at the MSB. */
4152 int end;
4153 unsigned int b1, b2, b3, b4;
4154 unsigned HOST_WIDE_INT result;
4155 int loc;
4157 gcc_assert (insns < 4);
4159 if (i <= 0)
4160 i += 32;
4162 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4163 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4165 loc = i;
4166 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4167 /* We can use addw/subw for the last 12 bits. */
4168 result = remainder;
4169 else
4171 /* Use an 8-bit shifted/rotated immediate. */
4172 end = i - 8;
4173 if (end < 0)
4174 end += 32;
4175 result = remainder & ((0x0ff << end)
4176 | ((i < end) ? (0xff >> (32 - end))
4177 : 0));
4178 i -= 8;
4181 else
4183 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4184 arbitrary shifts. */
4185 i -= TARGET_ARM ? 2 : 1;
4186 continue;
4189 /* Next, see if we can do a better job with a thumb2 replicated
4190 constant.
4192 We do it this way around to catch the cases like 0x01F001E0 where
4193 two 8-bit immediates would work, but a replicated constant would
4194 make it worse.
4196 TODO: 16-bit constants that don't clear all the bits, but still win.
4197 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4198 if (TARGET_THUMB2)
4200 b1 = (remainder & 0xff000000) >> 24;
4201 b2 = (remainder & 0x00ff0000) >> 16;
4202 b3 = (remainder & 0x0000ff00) >> 8;
4203 b4 = remainder & 0xff;
4205 if (loc > 24)
4207 /* The 8-bit immediate already found clears b1 (and maybe b2),
4208 but must leave b3 and b4 alone. */
4210 /* First try to find a 32-bit replicated constant that clears
4211 almost everything. We can assume that we can't do it in one,
4212 or else we wouldn't be here. */
4213 unsigned int tmp = b1 & b2 & b3 & b4;
4214 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4215 + (tmp << 24);
4216 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4217 + (tmp == b3) + (tmp == b4);
4218 if (tmp
4219 && (matching_bytes >= 3
4220 || (matching_bytes == 2
4221 && const_ok_for_op (remainder & ~tmp2, code))))
4223 /* At least 3 of the bytes match, and the fourth has at
4224 least as many bits set, or two of the bytes match
4225 and it will only require one more insn to finish. */
4226 result = tmp2;
4227 i = tmp != b1 ? 32
4228 : tmp != b2 ? 24
4229 : tmp != b3 ? 16
4230 : 8;
4233 /* Second, try to find a 16-bit replicated constant that can
4234 leave three of the bytes clear. If b2 or b4 is already
4235 zero, then we can. If the 8-bit from above would not
4236 clear b2 anyway, then we still win. */
4237 else if (b1 == b3 && (!b2 || !b4
4238 || (remainder & 0x00ff0000 & ~result)))
4240 result = remainder & 0xff00ff00;
4241 i = 24;
4244 else if (loc > 16)
4246 /* The 8-bit immediate already found clears b2 (and maybe b3)
4247 and we don't get here unless b1 is alredy clear, but it will
4248 leave b4 unchanged. */
4250 /* If we can clear b2 and b4 at once, then we win, since the
4251 8-bits couldn't possibly reach that far. */
4252 if (b2 == b4)
4254 result = remainder & 0x00ff00ff;
4255 i = 16;
4260 return_sequence->i[insns++] = result;
4261 remainder &= ~result;
4263 if (code == SET || code == MINUS)
4264 code = PLUS;
4266 while (remainder);
4268 return insns;
4271 /* Emit an instruction with the indicated PATTERN. If COND is
4272 non-NULL, conditionalize the execution of the instruction on COND
4273 being true. */
4275 static void
4276 emit_constant_insn (rtx cond, rtx pattern)
4278 if (cond)
4279 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4280 emit_insn (pattern);
4283 /* As above, but extra parameter GENERATE which, if clear, suppresses
4284 RTL generation. */
4286 static int
4287 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4288 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4289 int subtargets, int generate)
4291 int can_invert = 0;
4292 int can_negate = 0;
4293 int final_invert = 0;
4294 int i;
4295 int set_sign_bit_copies = 0;
4296 int clear_sign_bit_copies = 0;
4297 int clear_zero_bit_copies = 0;
4298 int set_zero_bit_copies = 0;
4299 int insns = 0, neg_insns, inv_insns;
4300 unsigned HOST_WIDE_INT temp1, temp2;
4301 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4302 struct four_ints *immediates;
4303 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4305 /* Find out which operations are safe for a given CODE. Also do a quick
4306 check for degenerate cases; these can occur when DImode operations
4307 are split. */
4308 switch (code)
4310 case SET:
4311 can_invert = 1;
4312 break;
4314 case PLUS:
4315 can_negate = 1;
4316 break;
4318 case IOR:
4319 if (remainder == 0xffffffff)
4321 if (generate)
4322 emit_constant_insn (cond,
4323 gen_rtx_SET (target,
4324 GEN_INT (ARM_SIGN_EXTEND (val))));
4325 return 1;
4328 if (remainder == 0)
4330 if (reload_completed && rtx_equal_p (target, source))
4331 return 0;
4333 if (generate)
4334 emit_constant_insn (cond, gen_rtx_SET (target, source));
4335 return 1;
4337 break;
4339 case AND:
4340 if (remainder == 0)
4342 if (generate)
4343 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4344 return 1;
4346 if (remainder == 0xffffffff)
4348 if (reload_completed && rtx_equal_p (target, source))
4349 return 0;
4350 if (generate)
4351 emit_constant_insn (cond, gen_rtx_SET (target, source));
4352 return 1;
4354 can_invert = 1;
4355 break;
4357 case XOR:
4358 if (remainder == 0)
4360 if (reload_completed && rtx_equal_p (target, source))
4361 return 0;
4362 if (generate)
4363 emit_constant_insn (cond, gen_rtx_SET (target, source));
4364 return 1;
4367 if (remainder == 0xffffffff)
4369 if (generate)
4370 emit_constant_insn (cond,
4371 gen_rtx_SET (target,
4372 gen_rtx_NOT (mode, source)));
4373 return 1;
4375 final_invert = 1;
4376 break;
4378 case MINUS:
4379 /* We treat MINUS as (val - source), since (source - val) is always
4380 passed as (source + (-val)). */
4381 if (remainder == 0)
4383 if (generate)
4384 emit_constant_insn (cond,
4385 gen_rtx_SET (target,
4386 gen_rtx_NEG (mode, source)));
4387 return 1;
4389 if (const_ok_for_arm (val))
4391 if (generate)
4392 emit_constant_insn (cond,
4393 gen_rtx_SET (target,
4394 gen_rtx_MINUS (mode, GEN_INT (val),
4395 source)));
4396 return 1;
4399 break;
4401 default:
4402 gcc_unreachable ();
4405 /* If we can do it in one insn get out quickly. */
4406 if (const_ok_for_op (val, code))
4408 if (generate)
4409 emit_constant_insn (cond,
4410 gen_rtx_SET (target,
4411 (source
4412 ? gen_rtx_fmt_ee (code, mode, source,
4413 GEN_INT (val))
4414 : GEN_INT (val))));
4415 return 1;
4418 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4419 insn. */
4420 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4421 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4423 if (generate)
4425 if (mode == SImode && i == 16)
4426 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4427 smaller insn. */
4428 emit_constant_insn (cond,
4429 gen_zero_extendhisi2
4430 (target, gen_lowpart (HImode, source)));
4431 else
4432 /* Extz only supports SImode, but we can coerce the operands
4433 into that mode. */
4434 emit_constant_insn (cond,
4435 gen_extzv_t2 (gen_lowpart (SImode, target),
4436 gen_lowpart (SImode, source),
4437 GEN_INT (i), const0_rtx));
4440 return 1;
4443 /* Calculate a few attributes that may be useful for specific
4444 optimizations. */
4445 /* Count number of leading zeros. */
4446 for (i = 31; i >= 0; i--)
4448 if ((remainder & (1 << i)) == 0)
4449 clear_sign_bit_copies++;
4450 else
4451 break;
4454 /* Count number of leading 1's. */
4455 for (i = 31; i >= 0; i--)
4457 if ((remainder & (1 << i)) != 0)
4458 set_sign_bit_copies++;
4459 else
4460 break;
4463 /* Count number of trailing zero's. */
4464 for (i = 0; i <= 31; i++)
4466 if ((remainder & (1 << i)) == 0)
4467 clear_zero_bit_copies++;
4468 else
4469 break;
4472 /* Count number of trailing 1's. */
4473 for (i = 0; i <= 31; i++)
4475 if ((remainder & (1 << i)) != 0)
4476 set_zero_bit_copies++;
4477 else
4478 break;
4481 switch (code)
4483 case SET:
4484 /* See if we can do this by sign_extending a constant that is known
4485 to be negative. This is a good, way of doing it, since the shift
4486 may well merge into a subsequent insn. */
4487 if (set_sign_bit_copies > 1)
4489 if (const_ok_for_arm
4490 (temp1 = ARM_SIGN_EXTEND (remainder
4491 << (set_sign_bit_copies - 1))))
4493 if (generate)
4495 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4496 emit_constant_insn (cond,
4497 gen_rtx_SET (new_src, GEN_INT (temp1)));
4498 emit_constant_insn (cond,
4499 gen_ashrsi3 (target, new_src,
4500 GEN_INT (set_sign_bit_copies - 1)));
4502 return 2;
4504 /* For an inverted constant, we will need to set the low bits,
4505 these will be shifted out of harm's way. */
4506 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4507 if (const_ok_for_arm (~temp1))
4509 if (generate)
4511 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4512 emit_constant_insn (cond,
4513 gen_rtx_SET (new_src, GEN_INT (temp1)));
4514 emit_constant_insn (cond,
4515 gen_ashrsi3 (target, new_src,
4516 GEN_INT (set_sign_bit_copies - 1)));
4518 return 2;
4522 /* See if we can calculate the value as the difference between two
4523 valid immediates. */
4524 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4526 int topshift = clear_sign_bit_copies & ~1;
4528 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4529 & (0xff000000 >> topshift));
4531 /* If temp1 is zero, then that means the 9 most significant
4532 bits of remainder were 1 and we've caused it to overflow.
4533 When topshift is 0 we don't need to do anything since we
4534 can borrow from 'bit 32'. */
4535 if (temp1 == 0 && topshift != 0)
4536 temp1 = 0x80000000 >> (topshift - 1);
4538 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4540 if (const_ok_for_arm (temp2))
4542 if (generate)
4544 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4545 emit_constant_insn (cond,
4546 gen_rtx_SET (new_src, GEN_INT (temp1)));
4547 emit_constant_insn (cond,
4548 gen_addsi3 (target, new_src,
4549 GEN_INT (-temp2)));
4552 return 2;
4556 /* See if we can generate this by setting the bottom (or the top)
4557 16 bits, and then shifting these into the other half of the
4558 word. We only look for the simplest cases, to do more would cost
4559 too much. Be careful, however, not to generate this when the
4560 alternative would take fewer insns. */
4561 if (val & 0xffff0000)
4563 temp1 = remainder & 0xffff0000;
4564 temp2 = remainder & 0x0000ffff;
4566 /* Overlaps outside this range are best done using other methods. */
4567 for (i = 9; i < 24; i++)
4569 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4570 && !const_ok_for_arm (temp2))
4572 rtx new_src = (subtargets
4573 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4574 : target);
4575 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4576 source, subtargets, generate);
4577 source = new_src;
4578 if (generate)
4579 emit_constant_insn
4580 (cond,
4581 gen_rtx_SET
4582 (target,
4583 gen_rtx_IOR (mode,
4584 gen_rtx_ASHIFT (mode, source,
4585 GEN_INT (i)),
4586 source)));
4587 return insns + 1;
4591 /* Don't duplicate cases already considered. */
4592 for (i = 17; i < 24; i++)
4594 if (((temp1 | (temp1 >> i)) == remainder)
4595 && !const_ok_for_arm (temp1))
4597 rtx new_src = (subtargets
4598 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4599 : target);
4600 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4601 source, subtargets, generate);
4602 source = new_src;
4603 if (generate)
4604 emit_constant_insn
4605 (cond,
4606 gen_rtx_SET (target,
4607 gen_rtx_IOR
4608 (mode,
4609 gen_rtx_LSHIFTRT (mode, source,
4610 GEN_INT (i)),
4611 source)));
4612 return insns + 1;
4616 break;
4618 case IOR:
4619 case XOR:
4620 /* If we have IOR or XOR, and the constant can be loaded in a
4621 single instruction, and we can find a temporary to put it in,
4622 then this can be done in two instructions instead of 3-4. */
4623 if (subtargets
4624 /* TARGET can't be NULL if SUBTARGETS is 0 */
4625 || (reload_completed && !reg_mentioned_p (target, source)))
4627 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4629 if (generate)
4631 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4633 emit_constant_insn (cond,
4634 gen_rtx_SET (sub, GEN_INT (val)));
4635 emit_constant_insn (cond,
4636 gen_rtx_SET (target,
4637 gen_rtx_fmt_ee (code, mode,
4638 source, sub)));
4640 return 2;
4644 if (code == XOR)
4645 break;
4647 /* Convert.
4648 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4649 and the remainder 0s for e.g. 0xfff00000)
4650 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4652 This can be done in 2 instructions by using shifts with mov or mvn.
4653 e.g. for
4654 x = x | 0xfff00000;
4655 we generate.
4656 mvn r0, r0, asl #12
4657 mvn r0, r0, lsr #12 */
4658 if (set_sign_bit_copies > 8
4659 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4661 if (generate)
4663 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4664 rtx shift = GEN_INT (set_sign_bit_copies);
4666 emit_constant_insn
4667 (cond,
4668 gen_rtx_SET (sub,
4669 gen_rtx_NOT (mode,
4670 gen_rtx_ASHIFT (mode,
4671 source,
4672 shift))));
4673 emit_constant_insn
4674 (cond,
4675 gen_rtx_SET (target,
4676 gen_rtx_NOT (mode,
4677 gen_rtx_LSHIFTRT (mode, sub,
4678 shift))));
4680 return 2;
4683 /* Convert
4684 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4686 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4688 For eg. r0 = r0 | 0xfff
4689 mvn r0, r0, lsr #12
4690 mvn r0, r0, asl #12
4693 if (set_zero_bit_copies > 8
4694 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4696 if (generate)
4698 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4699 rtx shift = GEN_INT (set_zero_bit_copies);
4701 emit_constant_insn
4702 (cond,
4703 gen_rtx_SET (sub,
4704 gen_rtx_NOT (mode,
4705 gen_rtx_LSHIFTRT (mode,
4706 source,
4707 shift))));
4708 emit_constant_insn
4709 (cond,
4710 gen_rtx_SET (target,
4711 gen_rtx_NOT (mode,
4712 gen_rtx_ASHIFT (mode, sub,
4713 shift))));
4715 return 2;
4718 /* This will never be reached for Thumb2 because orn is a valid
4719 instruction. This is for Thumb1 and the ARM 32 bit cases.
4721 x = y | constant (such that ~constant is a valid constant)
4722 Transform this to
4723 x = ~(~y & ~constant).
4725 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4727 if (generate)
4729 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4730 emit_constant_insn (cond,
4731 gen_rtx_SET (sub,
4732 gen_rtx_NOT (mode, source)));
4733 source = sub;
4734 if (subtargets)
4735 sub = gen_reg_rtx (mode);
4736 emit_constant_insn (cond,
4737 gen_rtx_SET (sub,
4738 gen_rtx_AND (mode, source,
4739 GEN_INT (temp1))));
4740 emit_constant_insn (cond,
4741 gen_rtx_SET (target,
4742 gen_rtx_NOT (mode, sub)));
4744 return 3;
4746 break;
4748 case AND:
4749 /* See if two shifts will do 2 or more insn's worth of work. */
4750 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4752 HOST_WIDE_INT shift_mask = ((0xffffffff
4753 << (32 - clear_sign_bit_copies))
4754 & 0xffffffff);
4756 if ((remainder | shift_mask) != 0xffffffff)
4758 HOST_WIDE_INT new_val
4759 = ARM_SIGN_EXTEND (remainder | shift_mask);
4761 if (generate)
4763 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4764 insns = arm_gen_constant (AND, SImode, cond, new_val,
4765 new_src, source, subtargets, 1);
4766 source = new_src;
4768 else
4770 rtx targ = subtargets ? NULL_RTX : target;
4771 insns = arm_gen_constant (AND, mode, cond, new_val,
4772 targ, source, subtargets, 0);
4776 if (generate)
4778 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4779 rtx shift = GEN_INT (clear_sign_bit_copies);
4781 emit_insn (gen_ashlsi3 (new_src, source, shift));
4782 emit_insn (gen_lshrsi3 (target, new_src, shift));
4785 return insns + 2;
4788 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4790 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4792 if ((remainder | shift_mask) != 0xffffffff)
4794 HOST_WIDE_INT new_val
4795 = ARM_SIGN_EXTEND (remainder | shift_mask);
4796 if (generate)
4798 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4800 insns = arm_gen_constant (AND, mode, cond, new_val,
4801 new_src, source, subtargets, 1);
4802 source = new_src;
4804 else
4806 rtx targ = subtargets ? NULL_RTX : target;
4808 insns = arm_gen_constant (AND, mode, cond, new_val,
4809 targ, source, subtargets, 0);
4813 if (generate)
4815 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4816 rtx shift = GEN_INT (clear_zero_bit_copies);
4818 emit_insn (gen_lshrsi3 (new_src, source, shift));
4819 emit_insn (gen_ashlsi3 (target, new_src, shift));
4822 return insns + 2;
4825 break;
4827 default:
4828 break;
4831 /* Calculate what the instruction sequences would be if we generated it
4832 normally, negated, or inverted. */
4833 if (code == AND)
4834 /* AND cannot be split into multiple insns, so invert and use BIC. */
4835 insns = 99;
4836 else
4837 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4839 if (can_negate)
4840 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4841 &neg_immediates);
4842 else
4843 neg_insns = 99;
4845 if (can_invert || final_invert)
4846 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4847 &inv_immediates);
4848 else
4849 inv_insns = 99;
4851 immediates = &pos_immediates;
4853 /* Is the negated immediate sequence more efficient? */
4854 if (neg_insns < insns && neg_insns <= inv_insns)
4856 insns = neg_insns;
4857 immediates = &neg_immediates;
4859 else
4860 can_negate = 0;
4862 /* Is the inverted immediate sequence more efficient?
4863 We must allow for an extra NOT instruction for XOR operations, although
4864 there is some chance that the final 'mvn' will get optimized later. */
4865 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4867 insns = inv_insns;
4868 immediates = &inv_immediates;
4870 else
4872 can_invert = 0;
4873 final_invert = 0;
4876 /* Now output the chosen sequence as instructions. */
4877 if (generate)
4879 for (i = 0; i < insns; i++)
4881 rtx new_src, temp1_rtx;
4883 temp1 = immediates->i[i];
4885 if (code == SET || code == MINUS)
4886 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4887 else if ((final_invert || i < (insns - 1)) && subtargets)
4888 new_src = gen_reg_rtx (mode);
4889 else
4890 new_src = target;
4892 if (can_invert)
4893 temp1 = ~temp1;
4894 else if (can_negate)
4895 temp1 = -temp1;
4897 temp1 = trunc_int_for_mode (temp1, mode);
4898 temp1_rtx = GEN_INT (temp1);
4900 if (code == SET)
4902 else if (code == MINUS)
4903 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4904 else
4905 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4907 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
4908 source = new_src;
4910 if (code == SET)
4912 can_negate = can_invert;
4913 can_invert = 0;
4914 code = PLUS;
4916 else if (code == MINUS)
4917 code = PLUS;
4921 if (final_invert)
4923 if (generate)
4924 emit_constant_insn (cond, gen_rtx_SET (target,
4925 gen_rtx_NOT (mode, source)));
4926 insns++;
4929 return insns;
4932 /* Canonicalize a comparison so that we are more likely to recognize it.
4933 This can be done for a few constant compares, where we can make the
4934 immediate value easier to load. */
4936 static void
4937 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4938 bool op0_preserve_value)
4940 machine_mode mode;
4941 unsigned HOST_WIDE_INT i, maxval;
4943 mode = GET_MODE (*op0);
4944 if (mode == VOIDmode)
4945 mode = GET_MODE (*op1);
4947 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
4949 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4950 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4951 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4952 for GTU/LEU in Thumb mode. */
4953 if (mode == DImode)
4956 if (*code == GT || *code == LE
4957 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4959 /* Missing comparison. First try to use an available
4960 comparison. */
4961 if (CONST_INT_P (*op1))
4963 i = INTVAL (*op1);
4964 switch (*code)
4966 case GT:
4967 case LE:
4968 if (i != maxval
4969 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4971 *op1 = GEN_INT (i + 1);
4972 *code = *code == GT ? GE : LT;
4973 return;
4975 break;
4976 case GTU:
4977 case LEU:
4978 if (i != ~((unsigned HOST_WIDE_INT) 0)
4979 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4981 *op1 = GEN_INT (i + 1);
4982 *code = *code == GTU ? GEU : LTU;
4983 return;
4985 break;
4986 default:
4987 gcc_unreachable ();
4991 /* If that did not work, reverse the condition. */
4992 if (!op0_preserve_value)
4994 std::swap (*op0, *op1);
4995 *code = (int)swap_condition ((enum rtx_code)*code);
4998 return;
5001 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5002 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5003 to facilitate possible combining with a cmp into 'ands'. */
5004 if (mode == SImode
5005 && GET_CODE (*op0) == ZERO_EXTEND
5006 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5007 && GET_MODE (XEXP (*op0, 0)) == QImode
5008 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5009 && subreg_lowpart_p (XEXP (*op0, 0))
5010 && *op1 == const0_rtx)
5011 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5012 GEN_INT (255));
5014 /* Comparisons smaller than DImode. Only adjust comparisons against
5015 an out-of-range constant. */
5016 if (!CONST_INT_P (*op1)
5017 || const_ok_for_arm (INTVAL (*op1))
5018 || const_ok_for_arm (- INTVAL (*op1)))
5019 return;
5021 i = INTVAL (*op1);
5023 switch (*code)
5025 case EQ:
5026 case NE:
5027 return;
5029 case GT:
5030 case LE:
5031 if (i != maxval
5032 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5034 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5035 *code = *code == GT ? GE : LT;
5036 return;
5038 break;
5040 case GE:
5041 case LT:
5042 if (i != ~maxval
5043 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5045 *op1 = GEN_INT (i - 1);
5046 *code = *code == GE ? GT : LE;
5047 return;
5049 break;
5051 case GTU:
5052 case LEU:
5053 if (i != ~((unsigned HOST_WIDE_INT) 0)
5054 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5056 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5057 *code = *code == GTU ? GEU : LTU;
5058 return;
5060 break;
5062 case GEU:
5063 case LTU:
5064 if (i != 0
5065 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5067 *op1 = GEN_INT (i - 1);
5068 *code = *code == GEU ? GTU : LEU;
5069 return;
5071 break;
5073 default:
5074 gcc_unreachable ();
5079 /* Define how to find the value returned by a function. */
5081 static rtx
5082 arm_function_value(const_tree type, const_tree func,
5083 bool outgoing ATTRIBUTE_UNUSED)
5085 machine_mode mode;
5086 int unsignedp ATTRIBUTE_UNUSED;
5087 rtx r ATTRIBUTE_UNUSED;
5089 mode = TYPE_MODE (type);
5091 if (TARGET_AAPCS_BASED)
5092 return aapcs_allocate_return_reg (mode, type, func);
5094 /* Promote integer types. */
5095 if (INTEGRAL_TYPE_P (type))
5096 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5098 /* Promotes small structs returned in a register to full-word size
5099 for big-endian AAPCS. */
5100 if (arm_return_in_msb (type))
5102 HOST_WIDE_INT size = int_size_in_bytes (type);
5103 if (size % UNITS_PER_WORD != 0)
5105 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5106 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5110 return arm_libcall_value_1 (mode);
5113 /* libcall hashtable helpers. */
5115 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5117 static inline hashval_t hash (const rtx_def *);
5118 static inline bool equal (const rtx_def *, const rtx_def *);
5119 static inline void remove (rtx_def *);
5122 inline bool
5123 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5125 return rtx_equal_p (p1, p2);
5128 inline hashval_t
5129 libcall_hasher::hash (const rtx_def *p1)
5131 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5134 typedef hash_table<libcall_hasher> libcall_table_type;
5136 static void
5137 add_libcall (libcall_table_type *htab, rtx libcall)
5139 *htab->find_slot (libcall, INSERT) = libcall;
5142 static bool
5143 arm_libcall_uses_aapcs_base (const_rtx libcall)
5145 static bool init_done = false;
5146 static libcall_table_type *libcall_htab = NULL;
5148 if (!init_done)
5150 init_done = true;
5152 libcall_htab = new libcall_table_type (31);
5153 add_libcall (libcall_htab,
5154 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5155 add_libcall (libcall_htab,
5156 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5157 add_libcall (libcall_htab,
5158 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5159 add_libcall (libcall_htab,
5160 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5162 add_libcall (libcall_htab,
5163 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5164 add_libcall (libcall_htab,
5165 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5166 add_libcall (libcall_htab,
5167 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5168 add_libcall (libcall_htab,
5169 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5171 add_libcall (libcall_htab,
5172 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5173 add_libcall (libcall_htab,
5174 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5175 add_libcall (libcall_htab,
5176 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5177 add_libcall (libcall_htab,
5178 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5179 add_libcall (libcall_htab,
5180 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5181 add_libcall (libcall_htab,
5182 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5183 add_libcall (libcall_htab,
5184 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5185 add_libcall (libcall_htab,
5186 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5188 /* Values from double-precision helper functions are returned in core
5189 registers if the selected core only supports single-precision
5190 arithmetic, even if we are using the hard-float ABI. The same is
5191 true for single-precision helpers, but we will never be using the
5192 hard-float ABI on a CPU which doesn't support single-precision
5193 operations in hardware. */
5194 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5195 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5196 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5197 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5198 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5199 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5200 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5201 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5202 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5203 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5204 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5205 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5206 SFmode));
5207 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5208 DFmode));
5211 return libcall && libcall_htab->find (libcall) != NULL;
5214 static rtx
5215 arm_libcall_value_1 (machine_mode mode)
5217 if (TARGET_AAPCS_BASED)
5218 return aapcs_libcall_value (mode);
5219 else if (TARGET_IWMMXT_ABI
5220 && arm_vector_mode_supported_p (mode))
5221 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5222 else
5223 return gen_rtx_REG (mode, ARG_REGISTER (1));
5226 /* Define how to find the value returned by a library function
5227 assuming the value has mode MODE. */
5229 static rtx
5230 arm_libcall_value (machine_mode mode, const_rtx libcall)
5232 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5233 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5235 /* The following libcalls return their result in integer registers,
5236 even though they return a floating point value. */
5237 if (arm_libcall_uses_aapcs_base (libcall))
5238 return gen_rtx_REG (mode, ARG_REGISTER(1));
5242 return arm_libcall_value_1 (mode);
5245 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5247 static bool
5248 arm_function_value_regno_p (const unsigned int regno)
5250 if (regno == ARG_REGISTER (1)
5251 || (TARGET_32BIT
5252 && TARGET_AAPCS_BASED
5253 && TARGET_VFP
5254 && TARGET_HARD_FLOAT
5255 && regno == FIRST_VFP_REGNUM)
5256 || (TARGET_IWMMXT_ABI
5257 && regno == FIRST_IWMMXT_REGNUM))
5258 return true;
5260 return false;
5263 /* Determine the amount of memory needed to store the possible return
5264 registers of an untyped call. */
5266 arm_apply_result_size (void)
5268 int size = 16;
5270 if (TARGET_32BIT)
5272 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
5273 size += 32;
5274 if (TARGET_IWMMXT_ABI)
5275 size += 8;
5278 return size;
5281 /* Decide whether TYPE should be returned in memory (true)
5282 or in a register (false). FNTYPE is the type of the function making
5283 the call. */
5284 static bool
5285 arm_return_in_memory (const_tree type, const_tree fntype)
5287 HOST_WIDE_INT size;
5289 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5291 if (TARGET_AAPCS_BASED)
5293 /* Simple, non-aggregate types (ie not including vectors and
5294 complex) are always returned in a register (or registers).
5295 We don't care about which register here, so we can short-cut
5296 some of the detail. */
5297 if (!AGGREGATE_TYPE_P (type)
5298 && TREE_CODE (type) != VECTOR_TYPE
5299 && TREE_CODE (type) != COMPLEX_TYPE)
5300 return false;
5302 /* Any return value that is no larger than one word can be
5303 returned in r0. */
5304 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5305 return false;
5307 /* Check any available co-processors to see if they accept the
5308 type as a register candidate (VFP, for example, can return
5309 some aggregates in consecutive registers). These aren't
5310 available if the call is variadic. */
5311 if (aapcs_select_return_coproc (type, fntype) >= 0)
5312 return false;
5314 /* Vector values should be returned using ARM registers, not
5315 memory (unless they're over 16 bytes, which will break since
5316 we only have four call-clobbered registers to play with). */
5317 if (TREE_CODE (type) == VECTOR_TYPE)
5318 return (size < 0 || size > (4 * UNITS_PER_WORD));
5320 /* The rest go in memory. */
5321 return true;
5324 if (TREE_CODE (type) == VECTOR_TYPE)
5325 return (size < 0 || size > (4 * UNITS_PER_WORD));
5327 if (!AGGREGATE_TYPE_P (type) &&
5328 (TREE_CODE (type) != VECTOR_TYPE))
5329 /* All simple types are returned in registers. */
5330 return false;
5332 if (arm_abi != ARM_ABI_APCS)
5334 /* ATPCS and later return aggregate types in memory only if they are
5335 larger than a word (or are variable size). */
5336 return (size < 0 || size > UNITS_PER_WORD);
5339 /* For the arm-wince targets we choose to be compatible with Microsoft's
5340 ARM and Thumb compilers, which always return aggregates in memory. */
5341 #ifndef ARM_WINCE
5342 /* All structures/unions bigger than one word are returned in memory.
5343 Also catch the case where int_size_in_bytes returns -1. In this case
5344 the aggregate is either huge or of variable size, and in either case
5345 we will want to return it via memory and not in a register. */
5346 if (size < 0 || size > UNITS_PER_WORD)
5347 return true;
5349 if (TREE_CODE (type) == RECORD_TYPE)
5351 tree field;
5353 /* For a struct the APCS says that we only return in a register
5354 if the type is 'integer like' and every addressable element
5355 has an offset of zero. For practical purposes this means
5356 that the structure can have at most one non bit-field element
5357 and that this element must be the first one in the structure. */
5359 /* Find the first field, ignoring non FIELD_DECL things which will
5360 have been created by C++. */
5361 for (field = TYPE_FIELDS (type);
5362 field && TREE_CODE (field) != FIELD_DECL;
5363 field = DECL_CHAIN (field))
5364 continue;
5366 if (field == NULL)
5367 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5369 /* Check that the first field is valid for returning in a register. */
5371 /* ... Floats are not allowed */
5372 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5373 return true;
5375 /* ... Aggregates that are not themselves valid for returning in
5376 a register are not allowed. */
5377 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5378 return true;
5380 /* Now check the remaining fields, if any. Only bitfields are allowed,
5381 since they are not addressable. */
5382 for (field = DECL_CHAIN (field);
5383 field;
5384 field = DECL_CHAIN (field))
5386 if (TREE_CODE (field) != FIELD_DECL)
5387 continue;
5389 if (!DECL_BIT_FIELD_TYPE (field))
5390 return true;
5393 return false;
5396 if (TREE_CODE (type) == UNION_TYPE)
5398 tree field;
5400 /* Unions can be returned in registers if every element is
5401 integral, or can be returned in an integer register. */
5402 for (field = TYPE_FIELDS (type);
5403 field;
5404 field = DECL_CHAIN (field))
5406 if (TREE_CODE (field) != FIELD_DECL)
5407 continue;
5409 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5410 return true;
5412 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5413 return true;
5416 return false;
5418 #endif /* not ARM_WINCE */
5420 /* Return all other types in memory. */
5421 return true;
5424 const struct pcs_attribute_arg
5426 const char *arg;
5427 enum arm_pcs value;
5428 } pcs_attribute_args[] =
5430 {"aapcs", ARM_PCS_AAPCS},
5431 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5432 #if 0
5433 /* We could recognize these, but changes would be needed elsewhere
5434 * to implement them. */
5435 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5436 {"atpcs", ARM_PCS_ATPCS},
5437 {"apcs", ARM_PCS_APCS},
5438 #endif
5439 {NULL, ARM_PCS_UNKNOWN}
5442 static enum arm_pcs
5443 arm_pcs_from_attribute (tree attr)
5445 const struct pcs_attribute_arg *ptr;
5446 const char *arg;
5448 /* Get the value of the argument. */
5449 if (TREE_VALUE (attr) == NULL_TREE
5450 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5451 return ARM_PCS_UNKNOWN;
5453 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5455 /* Check it against the list of known arguments. */
5456 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5457 if (streq (arg, ptr->arg))
5458 return ptr->value;
5460 /* An unrecognized interrupt type. */
5461 return ARM_PCS_UNKNOWN;
5464 /* Get the PCS variant to use for this call. TYPE is the function's type
5465 specification, DECL is the specific declartion. DECL may be null if
5466 the call could be indirect or if this is a library call. */
5467 static enum arm_pcs
5468 arm_get_pcs_model (const_tree type, const_tree decl)
5470 bool user_convention = false;
5471 enum arm_pcs user_pcs = arm_pcs_default;
5472 tree attr;
5474 gcc_assert (type);
5476 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5477 if (attr)
5479 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5480 user_convention = true;
5483 if (TARGET_AAPCS_BASED)
5485 /* Detect varargs functions. These always use the base rules
5486 (no argument is ever a candidate for a co-processor
5487 register). */
5488 bool base_rules = stdarg_p (type);
5490 if (user_convention)
5492 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5493 sorry ("non-AAPCS derived PCS variant");
5494 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5495 error ("variadic functions must use the base AAPCS variant");
5498 if (base_rules)
5499 return ARM_PCS_AAPCS;
5500 else if (user_convention)
5501 return user_pcs;
5502 else if (decl && flag_unit_at_a_time)
5504 /* Local functions never leak outside this compilation unit,
5505 so we are free to use whatever conventions are
5506 appropriate. */
5507 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5508 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5509 if (i && i->local)
5510 return ARM_PCS_AAPCS_LOCAL;
5513 else if (user_convention && user_pcs != arm_pcs_default)
5514 sorry ("PCS variant");
5516 /* For everything else we use the target's default. */
5517 return arm_pcs_default;
5521 static void
5522 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5523 const_tree fntype ATTRIBUTE_UNUSED,
5524 rtx libcall ATTRIBUTE_UNUSED,
5525 const_tree fndecl ATTRIBUTE_UNUSED)
5527 /* Record the unallocated VFP registers. */
5528 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5529 pcum->aapcs_vfp_reg_alloc = 0;
5532 /* Walk down the type tree of TYPE counting consecutive base elements.
5533 If *MODEP is VOIDmode, then set it to the first valid floating point
5534 type. If a non-floating point type is found, or if a floating point
5535 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5536 otherwise return the count in the sub-tree. */
5537 static int
5538 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5540 machine_mode mode;
5541 HOST_WIDE_INT size;
5543 switch (TREE_CODE (type))
5545 case REAL_TYPE:
5546 mode = TYPE_MODE (type);
5547 if (mode != DFmode && mode != SFmode && mode != HFmode)
5548 return -1;
5550 if (*modep == VOIDmode)
5551 *modep = mode;
5553 if (*modep == mode)
5554 return 1;
5556 break;
5558 case COMPLEX_TYPE:
5559 mode = TYPE_MODE (TREE_TYPE (type));
5560 if (mode != DFmode && mode != SFmode)
5561 return -1;
5563 if (*modep == VOIDmode)
5564 *modep = mode;
5566 if (*modep == mode)
5567 return 2;
5569 break;
5571 case VECTOR_TYPE:
5572 /* Use V2SImode and V4SImode as representatives of all 64-bit
5573 and 128-bit vector types, whether or not those modes are
5574 supported with the present options. */
5575 size = int_size_in_bytes (type);
5576 switch (size)
5578 case 8:
5579 mode = V2SImode;
5580 break;
5581 case 16:
5582 mode = V4SImode;
5583 break;
5584 default:
5585 return -1;
5588 if (*modep == VOIDmode)
5589 *modep = mode;
5591 /* Vector modes are considered to be opaque: two vectors are
5592 equivalent for the purposes of being homogeneous aggregates
5593 if they are the same size. */
5594 if (*modep == mode)
5595 return 1;
5597 break;
5599 case ARRAY_TYPE:
5601 int count;
5602 tree index = TYPE_DOMAIN (type);
5604 /* Can't handle incomplete types nor sizes that are not
5605 fixed. */
5606 if (!COMPLETE_TYPE_P (type)
5607 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5608 return -1;
5610 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5611 if (count == -1
5612 || !index
5613 || !TYPE_MAX_VALUE (index)
5614 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5615 || !TYPE_MIN_VALUE (index)
5616 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5617 || count < 0)
5618 return -1;
5620 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5621 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5623 /* There must be no padding. */
5624 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5625 return -1;
5627 return count;
5630 case RECORD_TYPE:
5632 int count = 0;
5633 int sub_count;
5634 tree field;
5636 /* Can't handle incomplete types nor sizes that are not
5637 fixed. */
5638 if (!COMPLETE_TYPE_P (type)
5639 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5640 return -1;
5642 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5644 if (TREE_CODE (field) != FIELD_DECL)
5645 continue;
5647 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5648 if (sub_count < 0)
5649 return -1;
5650 count += sub_count;
5653 /* There must be no padding. */
5654 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5655 return -1;
5657 return count;
5660 case UNION_TYPE:
5661 case QUAL_UNION_TYPE:
5663 /* These aren't very interesting except in a degenerate case. */
5664 int count = 0;
5665 int sub_count;
5666 tree field;
5668 /* Can't handle incomplete types nor sizes that are not
5669 fixed. */
5670 if (!COMPLETE_TYPE_P (type)
5671 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5672 return -1;
5674 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5676 if (TREE_CODE (field) != FIELD_DECL)
5677 continue;
5679 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5680 if (sub_count < 0)
5681 return -1;
5682 count = count > sub_count ? count : sub_count;
5685 /* There must be no padding. */
5686 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5687 return -1;
5689 return count;
5692 default:
5693 break;
5696 return -1;
5699 /* Return true if PCS_VARIANT should use VFP registers. */
5700 static bool
5701 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5703 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5705 static bool seen_thumb1_vfp = false;
5707 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5709 sorry ("Thumb-1 hard-float VFP ABI");
5710 /* sorry() is not immediately fatal, so only display this once. */
5711 seen_thumb1_vfp = true;
5714 return true;
5717 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5718 return false;
5720 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5721 (TARGET_VFP_DOUBLE || !is_double));
5724 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5725 suitable for passing or returning in VFP registers for the PCS
5726 variant selected. If it is, then *BASE_MODE is updated to contain
5727 a machine mode describing each element of the argument's type and
5728 *COUNT to hold the number of such elements. */
5729 static bool
5730 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5731 machine_mode mode, const_tree type,
5732 machine_mode *base_mode, int *count)
5734 machine_mode new_mode = VOIDmode;
5736 /* If we have the type information, prefer that to working things
5737 out from the mode. */
5738 if (type)
5740 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5742 if (ag_count > 0 && ag_count <= 4)
5743 *count = ag_count;
5744 else
5745 return false;
5747 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5748 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5749 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5751 *count = 1;
5752 new_mode = mode;
5754 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5756 *count = 2;
5757 new_mode = (mode == DCmode ? DFmode : SFmode);
5759 else
5760 return false;
5763 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5764 return false;
5766 *base_mode = new_mode;
5767 return true;
5770 static bool
5771 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5772 machine_mode mode, const_tree type)
5774 int count ATTRIBUTE_UNUSED;
5775 machine_mode ag_mode ATTRIBUTE_UNUSED;
5777 if (!use_vfp_abi (pcs_variant, false))
5778 return false;
5779 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5780 &ag_mode, &count);
5783 static bool
5784 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5785 const_tree type)
5787 if (!use_vfp_abi (pcum->pcs_variant, false))
5788 return false;
5790 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5791 &pcum->aapcs_vfp_rmode,
5792 &pcum->aapcs_vfp_rcount);
5795 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
5796 for the behaviour of this function. */
5798 static bool
5799 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5800 const_tree type ATTRIBUTE_UNUSED)
5802 int rmode_size
5803 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
5804 int shift = rmode_size / GET_MODE_SIZE (SFmode);
5805 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5806 int regno;
5808 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5809 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5811 pcum->aapcs_vfp_reg_alloc = mask << regno;
5812 if (mode == BLKmode
5813 || (mode == TImode && ! TARGET_NEON)
5814 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5816 int i;
5817 int rcount = pcum->aapcs_vfp_rcount;
5818 int rshift = shift;
5819 machine_mode rmode = pcum->aapcs_vfp_rmode;
5820 rtx par;
5821 if (!TARGET_NEON)
5823 /* Avoid using unsupported vector modes. */
5824 if (rmode == V2SImode)
5825 rmode = DImode;
5826 else if (rmode == V4SImode)
5828 rmode = DImode;
5829 rcount *= 2;
5830 rshift /= 2;
5833 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5834 for (i = 0; i < rcount; i++)
5836 rtx tmp = gen_rtx_REG (rmode,
5837 FIRST_VFP_REGNUM + regno + i * rshift);
5838 tmp = gen_rtx_EXPR_LIST
5839 (VOIDmode, tmp,
5840 GEN_INT (i * GET_MODE_SIZE (rmode)));
5841 XVECEXP (par, 0, i) = tmp;
5844 pcum->aapcs_reg = par;
5846 else
5847 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5848 return true;
5850 return false;
5853 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
5854 comment there for the behaviour of this function. */
5856 static rtx
5857 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5858 machine_mode mode,
5859 const_tree type ATTRIBUTE_UNUSED)
5861 if (!use_vfp_abi (pcs_variant, false))
5862 return NULL;
5864 if (mode == BLKmode
5865 || (GET_MODE_CLASS (mode) == MODE_INT
5866 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
5867 && !TARGET_NEON))
5869 int count;
5870 machine_mode ag_mode;
5871 int i;
5872 rtx par;
5873 int shift;
5875 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5876 &ag_mode, &count);
5878 if (!TARGET_NEON)
5880 if (ag_mode == V2SImode)
5881 ag_mode = DImode;
5882 else if (ag_mode == V4SImode)
5884 ag_mode = DImode;
5885 count *= 2;
5888 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5889 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5890 for (i = 0; i < count; i++)
5892 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5893 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5894 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5895 XVECEXP (par, 0, i) = tmp;
5898 return par;
5901 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5904 static void
5905 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5906 machine_mode mode ATTRIBUTE_UNUSED,
5907 const_tree type ATTRIBUTE_UNUSED)
5909 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5910 pcum->aapcs_vfp_reg_alloc = 0;
5911 return;
5914 #define AAPCS_CP(X) \
5916 aapcs_ ## X ## _cum_init, \
5917 aapcs_ ## X ## _is_call_candidate, \
5918 aapcs_ ## X ## _allocate, \
5919 aapcs_ ## X ## _is_return_candidate, \
5920 aapcs_ ## X ## _allocate_return_reg, \
5921 aapcs_ ## X ## _advance \
5924 /* Table of co-processors that can be used to pass arguments in
5925 registers. Idealy no arugment should be a candidate for more than
5926 one co-processor table entry, but the table is processed in order
5927 and stops after the first match. If that entry then fails to put
5928 the argument into a co-processor register, the argument will go on
5929 the stack. */
5930 static struct
5932 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5933 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5935 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5936 BLKmode) is a candidate for this co-processor's registers; this
5937 function should ignore any position-dependent state in
5938 CUMULATIVE_ARGS and only use call-type dependent information. */
5939 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5941 /* Return true if the argument does get a co-processor register; it
5942 should set aapcs_reg to an RTX of the register allocated as is
5943 required for a return from FUNCTION_ARG. */
5944 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5946 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
5947 be returned in this co-processor's registers. */
5948 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5950 /* Allocate and return an RTX element to hold the return type of a call. This
5951 routine must not fail and will only be called if is_return_candidate
5952 returned true with the same parameters. */
5953 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5955 /* Finish processing this argument and prepare to start processing
5956 the next one. */
5957 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5958 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5960 AAPCS_CP(vfp)
5963 #undef AAPCS_CP
5965 static int
5966 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5967 const_tree type)
5969 int i;
5971 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5972 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5973 return i;
5975 return -1;
5978 static int
5979 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5981 /* We aren't passed a decl, so we can't check that a call is local.
5982 However, it isn't clear that that would be a win anyway, since it
5983 might limit some tail-calling opportunities. */
5984 enum arm_pcs pcs_variant;
5986 if (fntype)
5988 const_tree fndecl = NULL_TREE;
5990 if (TREE_CODE (fntype) == FUNCTION_DECL)
5992 fndecl = fntype;
5993 fntype = TREE_TYPE (fntype);
5996 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5998 else
5999 pcs_variant = arm_pcs_default;
6001 if (pcs_variant != ARM_PCS_AAPCS)
6003 int i;
6005 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6006 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6007 TYPE_MODE (type),
6008 type))
6009 return i;
6011 return -1;
6014 static rtx
6015 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6016 const_tree fntype)
6018 /* We aren't passed a decl, so we can't check that a call is local.
6019 However, it isn't clear that that would be a win anyway, since it
6020 might limit some tail-calling opportunities. */
6021 enum arm_pcs pcs_variant;
6022 int unsignedp ATTRIBUTE_UNUSED;
6024 if (fntype)
6026 const_tree fndecl = NULL_TREE;
6028 if (TREE_CODE (fntype) == FUNCTION_DECL)
6030 fndecl = fntype;
6031 fntype = TREE_TYPE (fntype);
6034 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6036 else
6037 pcs_variant = arm_pcs_default;
6039 /* Promote integer types. */
6040 if (type && INTEGRAL_TYPE_P (type))
6041 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6043 if (pcs_variant != ARM_PCS_AAPCS)
6045 int i;
6047 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6048 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6049 type))
6050 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6051 mode, type);
6054 /* Promotes small structs returned in a register to full-word size
6055 for big-endian AAPCS. */
6056 if (type && arm_return_in_msb (type))
6058 HOST_WIDE_INT size = int_size_in_bytes (type);
6059 if (size % UNITS_PER_WORD != 0)
6061 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6062 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6066 return gen_rtx_REG (mode, R0_REGNUM);
6069 static rtx
6070 aapcs_libcall_value (machine_mode mode)
6072 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6073 && GET_MODE_SIZE (mode) <= 4)
6074 mode = SImode;
6076 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6079 /* Lay out a function argument using the AAPCS rules. The rule
6080 numbers referred to here are those in the AAPCS. */
6081 static void
6082 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6083 const_tree type, bool named)
6085 int nregs, nregs2;
6086 int ncrn;
6088 /* We only need to do this once per argument. */
6089 if (pcum->aapcs_arg_processed)
6090 return;
6092 pcum->aapcs_arg_processed = true;
6094 /* Special case: if named is false then we are handling an incoming
6095 anonymous argument which is on the stack. */
6096 if (!named)
6097 return;
6099 /* Is this a potential co-processor register candidate? */
6100 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6102 int slot = aapcs_select_call_coproc (pcum, mode, type);
6103 pcum->aapcs_cprc_slot = slot;
6105 /* We don't have to apply any of the rules from part B of the
6106 preparation phase, these are handled elsewhere in the
6107 compiler. */
6109 if (slot >= 0)
6111 /* A Co-processor register candidate goes either in its own
6112 class of registers or on the stack. */
6113 if (!pcum->aapcs_cprc_failed[slot])
6115 /* C1.cp - Try to allocate the argument to co-processor
6116 registers. */
6117 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6118 return;
6120 /* C2.cp - Put the argument on the stack and note that we
6121 can't assign any more candidates in this slot. We also
6122 need to note that we have allocated stack space, so that
6123 we won't later try to split a non-cprc candidate between
6124 core registers and the stack. */
6125 pcum->aapcs_cprc_failed[slot] = true;
6126 pcum->can_split = false;
6129 /* We didn't get a register, so this argument goes on the
6130 stack. */
6131 gcc_assert (pcum->can_split == false);
6132 return;
6136 /* C3 - For double-word aligned arguments, round the NCRN up to the
6137 next even number. */
6138 ncrn = pcum->aapcs_ncrn;
6139 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
6140 ncrn++;
6142 nregs = ARM_NUM_REGS2(mode, type);
6144 /* Sigh, this test should really assert that nregs > 0, but a GCC
6145 extension allows empty structs and then gives them empty size; it
6146 then allows such a structure to be passed by value. For some of
6147 the code below we have to pretend that such an argument has
6148 non-zero size so that we 'locate' it correctly either in
6149 registers or on the stack. */
6150 gcc_assert (nregs >= 0);
6152 nregs2 = nregs ? nregs : 1;
6154 /* C4 - Argument fits entirely in core registers. */
6155 if (ncrn + nregs2 <= NUM_ARG_REGS)
6157 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6158 pcum->aapcs_next_ncrn = ncrn + nregs;
6159 return;
6162 /* C5 - Some core registers left and there are no arguments already
6163 on the stack: split this argument between the remaining core
6164 registers and the stack. */
6165 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6167 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6168 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6169 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6170 return;
6173 /* C6 - NCRN is set to 4. */
6174 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6176 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6177 return;
6180 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6181 for a call to a function whose data type is FNTYPE.
6182 For a library call, FNTYPE is NULL. */
6183 void
6184 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6185 rtx libname,
6186 tree fndecl ATTRIBUTE_UNUSED)
6188 /* Long call handling. */
6189 if (fntype)
6190 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6191 else
6192 pcum->pcs_variant = arm_pcs_default;
6194 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6196 if (arm_libcall_uses_aapcs_base (libname))
6197 pcum->pcs_variant = ARM_PCS_AAPCS;
6199 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6200 pcum->aapcs_reg = NULL_RTX;
6201 pcum->aapcs_partial = 0;
6202 pcum->aapcs_arg_processed = false;
6203 pcum->aapcs_cprc_slot = -1;
6204 pcum->can_split = true;
6206 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6208 int i;
6210 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6212 pcum->aapcs_cprc_failed[i] = false;
6213 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6216 return;
6219 /* Legacy ABIs */
6221 /* On the ARM, the offset starts at 0. */
6222 pcum->nregs = 0;
6223 pcum->iwmmxt_nregs = 0;
6224 pcum->can_split = true;
6226 /* Varargs vectors are treated the same as long long.
6227 named_count avoids having to change the way arm handles 'named' */
6228 pcum->named_count = 0;
6229 pcum->nargs = 0;
6231 if (TARGET_REALLY_IWMMXT && fntype)
6233 tree fn_arg;
6235 for (fn_arg = TYPE_ARG_TYPES (fntype);
6236 fn_arg;
6237 fn_arg = TREE_CHAIN (fn_arg))
6238 pcum->named_count += 1;
6240 if (! pcum->named_count)
6241 pcum->named_count = INT_MAX;
6245 /* Return true if mode/type need doubleword alignment. */
6246 static bool
6247 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6249 if (!type)
6250 return PARM_BOUNDARY < GET_MODE_ALIGNMENT (mode);
6252 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6253 if (!AGGREGATE_TYPE_P (type))
6254 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6256 /* Array types: Use member alignment of element type. */
6257 if (TREE_CODE (type) == ARRAY_TYPE)
6258 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6260 /* Record/aggregate types: Use greatest member alignment of any member. */
6261 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6262 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6263 return true;
6265 return false;
6269 /* Determine where to put an argument to a function.
6270 Value is zero to push the argument on the stack,
6271 or a hard register in which to store the argument.
6273 MODE is the argument's machine mode.
6274 TYPE is the data type of the argument (as a tree).
6275 This is null for libcalls where that information may
6276 not be available.
6277 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6278 the preceding args and about the function being called.
6279 NAMED is nonzero if this argument is a named parameter
6280 (otherwise it is an extra parameter matching an ellipsis).
6282 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6283 other arguments are passed on the stack. If (NAMED == 0) (which happens
6284 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6285 defined), say it is passed in the stack (function_prologue will
6286 indeed make it pass in the stack if necessary). */
6288 static rtx
6289 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6290 const_tree type, bool named)
6292 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6293 int nregs;
6295 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6296 a call insn (op3 of a call_value insn). */
6297 if (mode == VOIDmode)
6298 return const0_rtx;
6300 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6302 aapcs_layout_arg (pcum, mode, type, named);
6303 return pcum->aapcs_reg;
6306 /* Varargs vectors are treated the same as long long.
6307 named_count avoids having to change the way arm handles 'named' */
6308 if (TARGET_IWMMXT_ABI
6309 && arm_vector_mode_supported_p (mode)
6310 && pcum->named_count > pcum->nargs + 1)
6312 if (pcum->iwmmxt_nregs <= 9)
6313 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6314 else
6316 pcum->can_split = false;
6317 return NULL_RTX;
6321 /* Put doubleword aligned quantities in even register pairs. */
6322 if (pcum->nregs & 1
6323 && ARM_DOUBLEWORD_ALIGN
6324 && arm_needs_doubleword_align (mode, type))
6325 pcum->nregs++;
6327 /* Only allow splitting an arg between regs and memory if all preceding
6328 args were allocated to regs. For args passed by reference we only count
6329 the reference pointer. */
6330 if (pcum->can_split)
6331 nregs = 1;
6332 else
6333 nregs = ARM_NUM_REGS2 (mode, type);
6335 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6336 return NULL_RTX;
6338 return gen_rtx_REG (mode, pcum->nregs);
6341 static unsigned int
6342 arm_function_arg_boundary (machine_mode mode, const_tree type)
6344 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6345 ? DOUBLEWORD_ALIGNMENT
6346 : PARM_BOUNDARY);
6349 static int
6350 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6351 tree type, bool named)
6353 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6354 int nregs = pcum->nregs;
6356 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6358 aapcs_layout_arg (pcum, mode, type, named);
6359 return pcum->aapcs_partial;
6362 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6363 return 0;
6365 if (NUM_ARG_REGS > nregs
6366 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6367 && pcum->can_split)
6368 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6370 return 0;
6373 /* Update the data in PCUM to advance over an argument
6374 of mode MODE and data type TYPE.
6375 (TYPE is null for libcalls where that information may not be available.) */
6377 static void
6378 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6379 const_tree type, bool named)
6381 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6383 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6385 aapcs_layout_arg (pcum, mode, type, named);
6387 if (pcum->aapcs_cprc_slot >= 0)
6389 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6390 type);
6391 pcum->aapcs_cprc_slot = -1;
6394 /* Generic stuff. */
6395 pcum->aapcs_arg_processed = false;
6396 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6397 pcum->aapcs_reg = NULL_RTX;
6398 pcum->aapcs_partial = 0;
6400 else
6402 pcum->nargs += 1;
6403 if (arm_vector_mode_supported_p (mode)
6404 && pcum->named_count > pcum->nargs
6405 && TARGET_IWMMXT_ABI)
6406 pcum->iwmmxt_nregs += 1;
6407 else
6408 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6412 /* Variable sized types are passed by reference. This is a GCC
6413 extension to the ARM ABI. */
6415 static bool
6416 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6417 machine_mode mode ATTRIBUTE_UNUSED,
6418 const_tree type, bool named ATTRIBUTE_UNUSED)
6420 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6423 /* Encode the current state of the #pragma [no_]long_calls. */
6424 typedef enum
6426 OFF, /* No #pragma [no_]long_calls is in effect. */
6427 LONG, /* #pragma long_calls is in effect. */
6428 SHORT /* #pragma no_long_calls is in effect. */
6429 } arm_pragma_enum;
6431 static arm_pragma_enum arm_pragma_long_calls = OFF;
6433 void
6434 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6436 arm_pragma_long_calls = LONG;
6439 void
6440 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6442 arm_pragma_long_calls = SHORT;
6445 void
6446 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6448 arm_pragma_long_calls = OFF;
6451 /* Handle an attribute requiring a FUNCTION_DECL;
6452 arguments as in struct attribute_spec.handler. */
6453 static tree
6454 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6455 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6457 if (TREE_CODE (*node) != FUNCTION_DECL)
6459 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6460 name);
6461 *no_add_attrs = true;
6464 return NULL_TREE;
6467 /* Handle an "interrupt" or "isr" attribute;
6468 arguments as in struct attribute_spec.handler. */
6469 static tree
6470 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6471 bool *no_add_attrs)
6473 if (DECL_P (*node))
6475 if (TREE_CODE (*node) != FUNCTION_DECL)
6477 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6478 name);
6479 *no_add_attrs = true;
6481 /* FIXME: the argument if any is checked for type attributes;
6482 should it be checked for decl ones? */
6484 else
6486 if (TREE_CODE (*node) == FUNCTION_TYPE
6487 || TREE_CODE (*node) == METHOD_TYPE)
6489 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6491 warning (OPT_Wattributes, "%qE attribute ignored",
6492 name);
6493 *no_add_attrs = true;
6496 else if (TREE_CODE (*node) == POINTER_TYPE
6497 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6498 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6499 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6501 *node = build_variant_type_copy (*node);
6502 TREE_TYPE (*node) = build_type_attribute_variant
6503 (TREE_TYPE (*node),
6504 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6505 *no_add_attrs = true;
6507 else
6509 /* Possibly pass this attribute on from the type to a decl. */
6510 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6511 | (int) ATTR_FLAG_FUNCTION_NEXT
6512 | (int) ATTR_FLAG_ARRAY_NEXT))
6514 *no_add_attrs = true;
6515 return tree_cons (name, args, NULL_TREE);
6517 else
6519 warning (OPT_Wattributes, "%qE attribute ignored",
6520 name);
6525 return NULL_TREE;
6528 /* Handle a "pcs" attribute; arguments as in struct
6529 attribute_spec.handler. */
6530 static tree
6531 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6532 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6534 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6536 warning (OPT_Wattributes, "%qE attribute ignored", name);
6537 *no_add_attrs = true;
6539 return NULL_TREE;
6542 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6543 /* Handle the "notshared" attribute. This attribute is another way of
6544 requesting hidden visibility. ARM's compiler supports
6545 "__declspec(notshared)"; we support the same thing via an
6546 attribute. */
6548 static tree
6549 arm_handle_notshared_attribute (tree *node,
6550 tree name ATTRIBUTE_UNUSED,
6551 tree args ATTRIBUTE_UNUSED,
6552 int flags ATTRIBUTE_UNUSED,
6553 bool *no_add_attrs)
6555 tree decl = TYPE_NAME (*node);
6557 if (decl)
6559 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6560 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6561 *no_add_attrs = false;
6563 return NULL_TREE;
6565 #endif
6567 /* Return 0 if the attributes for two types are incompatible, 1 if they
6568 are compatible, and 2 if they are nearly compatible (which causes a
6569 warning to be generated). */
6570 static int
6571 arm_comp_type_attributes (const_tree type1, const_tree type2)
6573 int l1, l2, s1, s2;
6575 /* Check for mismatch of non-default calling convention. */
6576 if (TREE_CODE (type1) != FUNCTION_TYPE)
6577 return 1;
6579 /* Check for mismatched call attributes. */
6580 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6581 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6582 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6583 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6585 /* Only bother to check if an attribute is defined. */
6586 if (l1 | l2 | s1 | s2)
6588 /* If one type has an attribute, the other must have the same attribute. */
6589 if ((l1 != l2) || (s1 != s2))
6590 return 0;
6592 /* Disallow mixed attributes. */
6593 if ((l1 & s2) || (l2 & s1))
6594 return 0;
6597 /* Check for mismatched ISR attribute. */
6598 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6599 if (! l1)
6600 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6601 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6602 if (! l2)
6603 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6604 if (l1 != l2)
6605 return 0;
6607 return 1;
6610 /* Assigns default attributes to newly defined type. This is used to
6611 set short_call/long_call attributes for function types of
6612 functions defined inside corresponding #pragma scopes. */
6613 static void
6614 arm_set_default_type_attributes (tree type)
6616 /* Add __attribute__ ((long_call)) to all functions, when
6617 inside #pragma long_calls or __attribute__ ((short_call)),
6618 when inside #pragma no_long_calls. */
6619 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6621 tree type_attr_list, attr_name;
6622 type_attr_list = TYPE_ATTRIBUTES (type);
6624 if (arm_pragma_long_calls == LONG)
6625 attr_name = get_identifier ("long_call");
6626 else if (arm_pragma_long_calls == SHORT)
6627 attr_name = get_identifier ("short_call");
6628 else
6629 return;
6631 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6632 TYPE_ATTRIBUTES (type) = type_attr_list;
6636 /* Return true if DECL is known to be linked into section SECTION. */
6638 static bool
6639 arm_function_in_section_p (tree decl, section *section)
6641 /* We can only be certain about the prevailing symbol definition. */
6642 if (!decl_binds_to_current_def_p (decl))
6643 return false;
6645 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6646 if (!DECL_SECTION_NAME (decl))
6648 /* Make sure that we will not create a unique section for DECL. */
6649 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6650 return false;
6653 return function_section (decl) == section;
6656 /* Return nonzero if a 32-bit "long_call" should be generated for
6657 a call from the current function to DECL. We generate a long_call
6658 if the function:
6660 a. has an __attribute__((long call))
6661 or b. is within the scope of a #pragma long_calls
6662 or c. the -mlong-calls command line switch has been specified
6664 However we do not generate a long call if the function:
6666 d. has an __attribute__ ((short_call))
6667 or e. is inside the scope of a #pragma no_long_calls
6668 or f. is defined in the same section as the current function. */
6670 bool
6671 arm_is_long_call_p (tree decl)
6673 tree attrs;
6675 if (!decl)
6676 return TARGET_LONG_CALLS;
6678 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6679 if (lookup_attribute ("short_call", attrs))
6680 return false;
6682 /* For "f", be conservative, and only cater for cases in which the
6683 whole of the current function is placed in the same section. */
6684 if (!flag_reorder_blocks_and_partition
6685 && TREE_CODE (decl) == FUNCTION_DECL
6686 && arm_function_in_section_p (decl, current_function_section ()))
6687 return false;
6689 if (lookup_attribute ("long_call", attrs))
6690 return true;
6692 return TARGET_LONG_CALLS;
6695 /* Return nonzero if it is ok to make a tail-call to DECL. */
6696 static bool
6697 arm_function_ok_for_sibcall (tree decl, tree exp)
6699 unsigned long func_type;
6701 if (cfun->machine->sibcall_blocked)
6702 return false;
6704 /* Never tailcall something if we are generating code for Thumb-1. */
6705 if (TARGET_THUMB1)
6706 return false;
6708 /* The PIC register is live on entry to VxWorks PLT entries, so we
6709 must make the call before restoring the PIC register. */
6710 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6711 return false;
6713 /* If we are interworking and the function is not declared static
6714 then we can't tail-call it unless we know that it exists in this
6715 compilation unit (since it might be a Thumb routine). */
6716 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6717 && !TREE_ASM_WRITTEN (decl))
6718 return false;
6720 func_type = arm_current_func_type ();
6721 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6722 if (IS_INTERRUPT (func_type))
6723 return false;
6725 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6727 /* Check that the return value locations are the same. For
6728 example that we aren't returning a value from the sibling in
6729 a VFP register but then need to transfer it to a core
6730 register. */
6731 rtx a, b;
6732 tree decl_or_type = decl;
6734 /* If it is an indirect function pointer, get the function type. */
6735 if (!decl)
6736 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
6738 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
6739 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6740 cfun->decl, false);
6741 if (!rtx_equal_p (a, b))
6742 return false;
6745 /* Never tailcall if function may be called with a misaligned SP. */
6746 if (IS_STACKALIGN (func_type))
6747 return false;
6749 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6750 references should become a NOP. Don't convert such calls into
6751 sibling calls. */
6752 if (TARGET_AAPCS_BASED
6753 && arm_abi == ARM_ABI_AAPCS
6754 && decl
6755 && DECL_WEAK (decl))
6756 return false;
6758 /* Everything else is ok. */
6759 return true;
6763 /* Addressing mode support functions. */
6765 /* Return nonzero if X is a legitimate immediate operand when compiling
6766 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6768 legitimate_pic_operand_p (rtx x)
6770 if (GET_CODE (x) == SYMBOL_REF
6771 || (GET_CODE (x) == CONST
6772 && GET_CODE (XEXP (x, 0)) == PLUS
6773 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6774 return 0;
6776 return 1;
6779 /* Record that the current function needs a PIC register. Initialize
6780 cfun->machine->pic_reg if we have not already done so. */
6782 static void
6783 require_pic_register (void)
6785 /* A lot of the logic here is made obscure by the fact that this
6786 routine gets called as part of the rtx cost estimation process.
6787 We don't want those calls to affect any assumptions about the real
6788 function; and further, we can't call entry_of_function() until we
6789 start the real expansion process. */
6790 if (!crtl->uses_pic_offset_table)
6792 gcc_assert (can_create_pseudo_p ());
6793 if (arm_pic_register != INVALID_REGNUM
6794 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6796 if (!cfun->machine->pic_reg)
6797 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6799 /* Play games to avoid marking the function as needing pic
6800 if we are being called as part of the cost-estimation
6801 process. */
6802 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6803 crtl->uses_pic_offset_table = 1;
6805 else
6807 rtx_insn *seq, *insn;
6809 if (!cfun->machine->pic_reg)
6810 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6812 /* Play games to avoid marking the function as needing pic
6813 if we are being called as part of the cost-estimation
6814 process. */
6815 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6817 crtl->uses_pic_offset_table = 1;
6818 start_sequence ();
6820 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6821 && arm_pic_register > LAST_LO_REGNUM)
6822 emit_move_insn (cfun->machine->pic_reg,
6823 gen_rtx_REG (Pmode, arm_pic_register));
6824 else
6825 arm_load_pic_register (0UL);
6827 seq = get_insns ();
6828 end_sequence ();
6830 for (insn = seq; insn; insn = NEXT_INSN (insn))
6831 if (INSN_P (insn))
6832 INSN_LOCATION (insn) = prologue_location;
6834 /* We can be called during expansion of PHI nodes, where
6835 we can't yet emit instructions directly in the final
6836 insn stream. Queue the insns on the entry edge, they will
6837 be committed after everything else is expanded. */
6838 insert_insn_on_edge (seq,
6839 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6846 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6848 if (GET_CODE (orig) == SYMBOL_REF
6849 || GET_CODE (orig) == LABEL_REF)
6851 rtx insn;
6853 if (reg == 0)
6855 gcc_assert (can_create_pseudo_p ());
6856 reg = gen_reg_rtx (Pmode);
6859 /* VxWorks does not impose a fixed gap between segments; the run-time
6860 gap can be different from the object-file gap. We therefore can't
6861 use GOTOFF unless we are absolutely sure that the symbol is in the
6862 same segment as the GOT. Unfortunately, the flexibility of linker
6863 scripts means that we can't be sure of that in general, so assume
6864 that GOTOFF is never valid on VxWorks. */
6865 if ((GET_CODE (orig) == LABEL_REF
6866 || (GET_CODE (orig) == SYMBOL_REF &&
6867 SYMBOL_REF_LOCAL_P (orig)))
6868 && NEED_GOT_RELOC
6869 && arm_pic_data_is_text_relative)
6870 insn = arm_pic_static_addr (orig, reg);
6871 else
6873 rtx pat;
6874 rtx mem;
6876 /* If this function doesn't have a pic register, create one now. */
6877 require_pic_register ();
6879 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6881 /* Make the MEM as close to a constant as possible. */
6882 mem = SET_SRC (pat);
6883 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6884 MEM_READONLY_P (mem) = 1;
6885 MEM_NOTRAP_P (mem) = 1;
6887 insn = emit_insn (pat);
6890 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6891 by loop. */
6892 set_unique_reg_note (insn, REG_EQUAL, orig);
6894 return reg;
6896 else if (GET_CODE (orig) == CONST)
6898 rtx base, offset;
6900 if (GET_CODE (XEXP (orig, 0)) == PLUS
6901 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6902 return orig;
6904 /* Handle the case where we have: const (UNSPEC_TLS). */
6905 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6906 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6907 return orig;
6909 /* Handle the case where we have:
6910 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6911 CONST_INT. */
6912 if (GET_CODE (XEXP (orig, 0)) == PLUS
6913 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6914 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6916 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6917 return orig;
6920 if (reg == 0)
6922 gcc_assert (can_create_pseudo_p ());
6923 reg = gen_reg_rtx (Pmode);
6926 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6928 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6929 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6930 base == reg ? 0 : reg);
6932 if (CONST_INT_P (offset))
6934 /* The base register doesn't really matter, we only want to
6935 test the index for the appropriate mode. */
6936 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6938 gcc_assert (can_create_pseudo_p ());
6939 offset = force_reg (Pmode, offset);
6942 if (CONST_INT_P (offset))
6943 return plus_constant (Pmode, base, INTVAL (offset));
6946 if (GET_MODE_SIZE (mode) > 4
6947 && (GET_MODE_CLASS (mode) == MODE_INT
6948 || TARGET_SOFT_FLOAT))
6950 emit_insn (gen_addsi3 (reg, base, offset));
6951 return reg;
6954 return gen_rtx_PLUS (Pmode, base, offset);
6957 return orig;
6961 /* Find a spare register to use during the prolog of a function. */
6963 static int
6964 thumb_find_work_register (unsigned long pushed_regs_mask)
6966 int reg;
6968 /* Check the argument registers first as these are call-used. The
6969 register allocation order means that sometimes r3 might be used
6970 but earlier argument registers might not, so check them all. */
6971 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6972 if (!df_regs_ever_live_p (reg))
6973 return reg;
6975 /* Before going on to check the call-saved registers we can try a couple
6976 more ways of deducing that r3 is available. The first is when we are
6977 pushing anonymous arguments onto the stack and we have less than 4
6978 registers worth of fixed arguments(*). In this case r3 will be part of
6979 the variable argument list and so we can be sure that it will be
6980 pushed right at the start of the function. Hence it will be available
6981 for the rest of the prologue.
6982 (*): ie crtl->args.pretend_args_size is greater than 0. */
6983 if (cfun->machine->uses_anonymous_args
6984 && crtl->args.pretend_args_size > 0)
6985 return LAST_ARG_REGNUM;
6987 /* The other case is when we have fixed arguments but less than 4 registers
6988 worth. In this case r3 might be used in the body of the function, but
6989 it is not being used to convey an argument into the function. In theory
6990 we could just check crtl->args.size to see how many bytes are
6991 being passed in argument registers, but it seems that it is unreliable.
6992 Sometimes it will have the value 0 when in fact arguments are being
6993 passed. (See testcase execute/20021111-1.c for an example). So we also
6994 check the args_info.nregs field as well. The problem with this field is
6995 that it makes no allowances for arguments that are passed to the
6996 function but which are not used. Hence we could miss an opportunity
6997 when a function has an unused argument in r3. But it is better to be
6998 safe than to be sorry. */
6999 if (! cfun->machine->uses_anonymous_args
7000 && crtl->args.size >= 0
7001 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7002 && (TARGET_AAPCS_BASED
7003 ? crtl->args.info.aapcs_ncrn < 4
7004 : crtl->args.info.nregs < 4))
7005 return LAST_ARG_REGNUM;
7007 /* Otherwise look for a call-saved register that is going to be pushed. */
7008 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7009 if (pushed_regs_mask & (1 << reg))
7010 return reg;
7012 if (TARGET_THUMB2)
7014 /* Thumb-2 can use high regs. */
7015 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7016 if (pushed_regs_mask & (1 << reg))
7017 return reg;
7019 /* Something went wrong - thumb_compute_save_reg_mask()
7020 should have arranged for a suitable register to be pushed. */
7021 gcc_unreachable ();
7024 static GTY(()) int pic_labelno;
7026 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7027 low register. */
7029 void
7030 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7032 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7034 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7035 return;
7037 gcc_assert (flag_pic);
7039 pic_reg = cfun->machine->pic_reg;
7040 if (TARGET_VXWORKS_RTP)
7042 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7043 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7044 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7046 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7048 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7049 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7051 else
7053 /* We use an UNSPEC rather than a LABEL_REF because this label
7054 never appears in the code stream. */
7056 labelno = GEN_INT (pic_labelno++);
7057 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7058 l1 = gen_rtx_CONST (VOIDmode, l1);
7060 /* On the ARM the PC register contains 'dot + 8' at the time of the
7061 addition, on the Thumb it is 'dot + 4'. */
7062 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7063 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7064 UNSPEC_GOTSYM_OFF);
7065 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7067 if (TARGET_32BIT)
7069 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7071 else /* TARGET_THUMB1 */
7073 if (arm_pic_register != INVALID_REGNUM
7074 && REGNO (pic_reg) > LAST_LO_REGNUM)
7076 /* We will have pushed the pic register, so we should always be
7077 able to find a work register. */
7078 pic_tmp = gen_rtx_REG (SImode,
7079 thumb_find_work_register (saved_regs));
7080 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7081 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7082 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7084 else if (arm_pic_register != INVALID_REGNUM
7085 && arm_pic_register > LAST_LO_REGNUM
7086 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7088 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7089 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7090 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7092 else
7093 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7097 /* Need to emit this whether or not we obey regdecls,
7098 since setjmp/longjmp can cause life info to screw up. */
7099 emit_use (pic_reg);
7102 /* Generate code to load the address of a static var when flag_pic is set. */
7103 static rtx
7104 arm_pic_static_addr (rtx orig, rtx reg)
7106 rtx l1, labelno, offset_rtx, insn;
7108 gcc_assert (flag_pic);
7110 /* We use an UNSPEC rather than a LABEL_REF because this label
7111 never appears in the code stream. */
7112 labelno = GEN_INT (pic_labelno++);
7113 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7114 l1 = gen_rtx_CONST (VOIDmode, l1);
7116 /* On the ARM the PC register contains 'dot + 8' at the time of the
7117 addition, on the Thumb it is 'dot + 4'. */
7118 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7119 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7120 UNSPEC_SYMBOL_OFFSET);
7121 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7123 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7124 return insn;
7127 /* Return nonzero if X is valid as an ARM state addressing register. */
7128 static int
7129 arm_address_register_rtx_p (rtx x, int strict_p)
7131 int regno;
7133 if (!REG_P (x))
7134 return 0;
7136 regno = REGNO (x);
7138 if (strict_p)
7139 return ARM_REGNO_OK_FOR_BASE_P (regno);
7141 return (regno <= LAST_ARM_REGNUM
7142 || regno >= FIRST_PSEUDO_REGISTER
7143 || regno == FRAME_POINTER_REGNUM
7144 || regno == ARG_POINTER_REGNUM);
7147 /* Return TRUE if this rtx is the difference of a symbol and a label,
7148 and will reduce to a PC-relative relocation in the object file.
7149 Expressions like this can be left alone when generating PIC, rather
7150 than forced through the GOT. */
7151 static int
7152 pcrel_constant_p (rtx x)
7154 if (GET_CODE (x) == MINUS)
7155 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7157 return FALSE;
7160 /* Return true if X will surely end up in an index register after next
7161 splitting pass. */
7162 static bool
7163 will_be_in_index_register (const_rtx x)
7165 /* arm.md: calculate_pic_address will split this into a register. */
7166 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7169 /* Return nonzero if X is a valid ARM state address operand. */
7171 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7172 int strict_p)
7174 bool use_ldrd;
7175 enum rtx_code code = GET_CODE (x);
7177 if (arm_address_register_rtx_p (x, strict_p))
7178 return 1;
7180 use_ldrd = (TARGET_LDRD
7181 && (mode == DImode
7182 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7184 if (code == POST_INC || code == PRE_DEC
7185 || ((code == PRE_INC || code == POST_DEC)
7186 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7187 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7189 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7190 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7191 && GET_CODE (XEXP (x, 1)) == PLUS
7192 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7194 rtx addend = XEXP (XEXP (x, 1), 1);
7196 /* Don't allow ldrd post increment by register because it's hard
7197 to fixup invalid register choices. */
7198 if (use_ldrd
7199 && GET_CODE (x) == POST_MODIFY
7200 && REG_P (addend))
7201 return 0;
7203 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7204 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7207 /* After reload constants split into minipools will have addresses
7208 from a LABEL_REF. */
7209 else if (reload_completed
7210 && (code == LABEL_REF
7211 || (code == CONST
7212 && GET_CODE (XEXP (x, 0)) == PLUS
7213 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7214 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7215 return 1;
7217 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7218 return 0;
7220 else if (code == PLUS)
7222 rtx xop0 = XEXP (x, 0);
7223 rtx xop1 = XEXP (x, 1);
7225 return ((arm_address_register_rtx_p (xop0, strict_p)
7226 && ((CONST_INT_P (xop1)
7227 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7228 || (!strict_p && will_be_in_index_register (xop1))))
7229 || (arm_address_register_rtx_p (xop1, strict_p)
7230 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7233 #if 0
7234 /* Reload currently can't handle MINUS, so disable this for now */
7235 else if (GET_CODE (x) == MINUS)
7237 rtx xop0 = XEXP (x, 0);
7238 rtx xop1 = XEXP (x, 1);
7240 return (arm_address_register_rtx_p (xop0, strict_p)
7241 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7243 #endif
7245 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7246 && code == SYMBOL_REF
7247 && CONSTANT_POOL_ADDRESS_P (x)
7248 && ! (flag_pic
7249 && symbol_mentioned_p (get_pool_constant (x))
7250 && ! pcrel_constant_p (get_pool_constant (x))))
7251 return 1;
7253 return 0;
7256 /* Return nonzero if X is a valid Thumb-2 address operand. */
7257 static int
7258 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7260 bool use_ldrd;
7261 enum rtx_code code = GET_CODE (x);
7263 if (arm_address_register_rtx_p (x, strict_p))
7264 return 1;
7266 use_ldrd = (TARGET_LDRD
7267 && (mode == DImode
7268 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7270 if (code == POST_INC || code == PRE_DEC
7271 || ((code == PRE_INC || code == POST_DEC)
7272 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7273 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7275 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7276 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7277 && GET_CODE (XEXP (x, 1)) == PLUS
7278 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7280 /* Thumb-2 only has autoincrement by constant. */
7281 rtx addend = XEXP (XEXP (x, 1), 1);
7282 HOST_WIDE_INT offset;
7284 if (!CONST_INT_P (addend))
7285 return 0;
7287 offset = INTVAL(addend);
7288 if (GET_MODE_SIZE (mode) <= 4)
7289 return (offset > -256 && offset < 256);
7291 return (use_ldrd && offset > -1024 && offset < 1024
7292 && (offset & 3) == 0);
7295 /* After reload constants split into minipools will have addresses
7296 from a LABEL_REF. */
7297 else if (reload_completed
7298 && (code == LABEL_REF
7299 || (code == CONST
7300 && GET_CODE (XEXP (x, 0)) == PLUS
7301 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7302 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7303 return 1;
7305 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7306 return 0;
7308 else if (code == PLUS)
7310 rtx xop0 = XEXP (x, 0);
7311 rtx xop1 = XEXP (x, 1);
7313 return ((arm_address_register_rtx_p (xop0, strict_p)
7314 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7315 || (!strict_p && will_be_in_index_register (xop1))))
7316 || (arm_address_register_rtx_p (xop1, strict_p)
7317 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7320 /* Normally we can assign constant values to target registers without
7321 the help of constant pool. But there are cases we have to use constant
7322 pool like:
7323 1) assign a label to register.
7324 2) sign-extend a 8bit value to 32bit and then assign to register.
7326 Constant pool access in format:
7327 (set (reg r0) (mem (symbol_ref (".LC0"))))
7328 will cause the use of literal pool (later in function arm_reorg).
7329 So here we mark such format as an invalid format, then the compiler
7330 will adjust it into:
7331 (set (reg r0) (symbol_ref (".LC0")))
7332 (set (reg r0) (mem (reg r0))).
7333 No extra register is required, and (mem (reg r0)) won't cause the use
7334 of literal pools. */
7335 else if (arm_disable_literal_pool && code == SYMBOL_REF
7336 && CONSTANT_POOL_ADDRESS_P (x))
7337 return 0;
7339 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7340 && code == SYMBOL_REF
7341 && CONSTANT_POOL_ADDRESS_P (x)
7342 && ! (flag_pic
7343 && symbol_mentioned_p (get_pool_constant (x))
7344 && ! pcrel_constant_p (get_pool_constant (x))))
7345 return 1;
7347 return 0;
7350 /* Return nonzero if INDEX is valid for an address index operand in
7351 ARM state. */
7352 static int
7353 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7354 int strict_p)
7356 HOST_WIDE_INT range;
7357 enum rtx_code code = GET_CODE (index);
7359 /* Standard coprocessor addressing modes. */
7360 if (TARGET_HARD_FLOAT
7361 && TARGET_VFP
7362 && (mode == SFmode || mode == DFmode))
7363 return (code == CONST_INT && INTVAL (index) < 1024
7364 && INTVAL (index) > -1024
7365 && (INTVAL (index) & 3) == 0);
7367 /* For quad modes, we restrict the constant offset to be slightly less
7368 than what the instruction format permits. We do this because for
7369 quad mode moves, we will actually decompose them into two separate
7370 double-mode reads or writes. INDEX must therefore be a valid
7371 (double-mode) offset and so should INDEX+8. */
7372 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7373 return (code == CONST_INT
7374 && INTVAL (index) < 1016
7375 && INTVAL (index) > -1024
7376 && (INTVAL (index) & 3) == 0);
7378 /* We have no such constraint on double mode offsets, so we permit the
7379 full range of the instruction format. */
7380 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7381 return (code == CONST_INT
7382 && INTVAL (index) < 1024
7383 && INTVAL (index) > -1024
7384 && (INTVAL (index) & 3) == 0);
7386 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7387 return (code == CONST_INT
7388 && INTVAL (index) < 1024
7389 && INTVAL (index) > -1024
7390 && (INTVAL (index) & 3) == 0);
7392 if (arm_address_register_rtx_p (index, strict_p)
7393 && (GET_MODE_SIZE (mode) <= 4))
7394 return 1;
7396 if (mode == DImode || mode == DFmode)
7398 if (code == CONST_INT)
7400 HOST_WIDE_INT val = INTVAL (index);
7402 if (TARGET_LDRD)
7403 return val > -256 && val < 256;
7404 else
7405 return val > -4096 && val < 4092;
7408 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7411 if (GET_MODE_SIZE (mode) <= 4
7412 && ! (arm_arch4
7413 && (mode == HImode
7414 || mode == HFmode
7415 || (mode == QImode && outer == SIGN_EXTEND))))
7417 if (code == MULT)
7419 rtx xiop0 = XEXP (index, 0);
7420 rtx xiop1 = XEXP (index, 1);
7422 return ((arm_address_register_rtx_p (xiop0, strict_p)
7423 && power_of_two_operand (xiop1, SImode))
7424 || (arm_address_register_rtx_p (xiop1, strict_p)
7425 && power_of_two_operand (xiop0, SImode)));
7427 else if (code == LSHIFTRT || code == ASHIFTRT
7428 || code == ASHIFT || code == ROTATERT)
7430 rtx op = XEXP (index, 1);
7432 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7433 && CONST_INT_P (op)
7434 && INTVAL (op) > 0
7435 && INTVAL (op) <= 31);
7439 /* For ARM v4 we may be doing a sign-extend operation during the
7440 load. */
7441 if (arm_arch4)
7443 if (mode == HImode
7444 || mode == HFmode
7445 || (outer == SIGN_EXTEND && mode == QImode))
7446 range = 256;
7447 else
7448 range = 4096;
7450 else
7451 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7453 return (code == CONST_INT
7454 && INTVAL (index) < range
7455 && INTVAL (index) > -range);
7458 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7459 index operand. i.e. 1, 2, 4 or 8. */
7460 static bool
7461 thumb2_index_mul_operand (rtx op)
7463 HOST_WIDE_INT val;
7465 if (!CONST_INT_P (op))
7466 return false;
7468 val = INTVAL(op);
7469 return (val == 1 || val == 2 || val == 4 || val == 8);
7472 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7473 static int
7474 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7476 enum rtx_code code = GET_CODE (index);
7478 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7479 /* Standard coprocessor addressing modes. */
7480 if (TARGET_HARD_FLOAT
7481 && TARGET_VFP
7482 && (mode == SFmode || mode == DFmode))
7483 return (code == CONST_INT && INTVAL (index) < 1024
7484 /* Thumb-2 allows only > -256 index range for it's core register
7485 load/stores. Since we allow SF/DF in core registers, we have
7486 to use the intersection between -256~4096 (core) and -1024~1024
7487 (coprocessor). */
7488 && INTVAL (index) > -256
7489 && (INTVAL (index) & 3) == 0);
7491 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7493 /* For DImode assume values will usually live in core regs
7494 and only allow LDRD addressing modes. */
7495 if (!TARGET_LDRD || mode != DImode)
7496 return (code == CONST_INT
7497 && INTVAL (index) < 1024
7498 && INTVAL (index) > -1024
7499 && (INTVAL (index) & 3) == 0);
7502 /* For quad modes, we restrict the constant offset to be slightly less
7503 than what the instruction format permits. We do this because for
7504 quad mode moves, we will actually decompose them into two separate
7505 double-mode reads or writes. INDEX must therefore be a valid
7506 (double-mode) offset and so should INDEX+8. */
7507 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7508 return (code == CONST_INT
7509 && INTVAL (index) < 1016
7510 && INTVAL (index) > -1024
7511 && (INTVAL (index) & 3) == 0);
7513 /* We have no such constraint on double mode offsets, so we permit the
7514 full range of the instruction format. */
7515 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7516 return (code == CONST_INT
7517 && INTVAL (index) < 1024
7518 && INTVAL (index) > -1024
7519 && (INTVAL (index) & 3) == 0);
7521 if (arm_address_register_rtx_p (index, strict_p)
7522 && (GET_MODE_SIZE (mode) <= 4))
7523 return 1;
7525 if (mode == DImode || mode == DFmode)
7527 if (code == CONST_INT)
7529 HOST_WIDE_INT val = INTVAL (index);
7530 /* ??? Can we assume ldrd for thumb2? */
7531 /* Thumb-2 ldrd only has reg+const addressing modes. */
7532 /* ldrd supports offsets of +-1020.
7533 However the ldr fallback does not. */
7534 return val > -256 && val < 256 && (val & 3) == 0;
7536 else
7537 return 0;
7540 if (code == MULT)
7542 rtx xiop0 = XEXP (index, 0);
7543 rtx xiop1 = XEXP (index, 1);
7545 return ((arm_address_register_rtx_p (xiop0, strict_p)
7546 && thumb2_index_mul_operand (xiop1))
7547 || (arm_address_register_rtx_p (xiop1, strict_p)
7548 && thumb2_index_mul_operand (xiop0)));
7550 else if (code == ASHIFT)
7552 rtx op = XEXP (index, 1);
7554 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7555 && CONST_INT_P (op)
7556 && INTVAL (op) > 0
7557 && INTVAL (op) <= 3);
7560 return (code == CONST_INT
7561 && INTVAL (index) < 4096
7562 && INTVAL (index) > -256);
7565 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7566 static int
7567 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7569 int regno;
7571 if (!REG_P (x))
7572 return 0;
7574 regno = REGNO (x);
7576 if (strict_p)
7577 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7579 return (regno <= LAST_LO_REGNUM
7580 || regno > LAST_VIRTUAL_REGISTER
7581 || regno == FRAME_POINTER_REGNUM
7582 || (GET_MODE_SIZE (mode) >= 4
7583 && (regno == STACK_POINTER_REGNUM
7584 || regno >= FIRST_PSEUDO_REGISTER
7585 || x == hard_frame_pointer_rtx
7586 || x == arg_pointer_rtx)));
7589 /* Return nonzero if x is a legitimate index register. This is the case
7590 for any base register that can access a QImode object. */
7591 inline static int
7592 thumb1_index_register_rtx_p (rtx x, int strict_p)
7594 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7597 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7599 The AP may be eliminated to either the SP or the FP, so we use the
7600 least common denominator, e.g. SImode, and offsets from 0 to 64.
7602 ??? Verify whether the above is the right approach.
7604 ??? Also, the FP may be eliminated to the SP, so perhaps that
7605 needs special handling also.
7607 ??? Look at how the mips16 port solves this problem. It probably uses
7608 better ways to solve some of these problems.
7610 Although it is not incorrect, we don't accept QImode and HImode
7611 addresses based on the frame pointer or arg pointer until the
7612 reload pass starts. This is so that eliminating such addresses
7613 into stack based ones won't produce impossible code. */
7615 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7617 /* ??? Not clear if this is right. Experiment. */
7618 if (GET_MODE_SIZE (mode) < 4
7619 && !(reload_in_progress || reload_completed)
7620 && (reg_mentioned_p (frame_pointer_rtx, x)
7621 || reg_mentioned_p (arg_pointer_rtx, x)
7622 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7623 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7624 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7625 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7626 return 0;
7628 /* Accept any base register. SP only in SImode or larger. */
7629 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7630 return 1;
7632 /* This is PC relative data before arm_reorg runs. */
7633 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7634 && GET_CODE (x) == SYMBOL_REF
7635 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7636 return 1;
7638 /* This is PC relative data after arm_reorg runs. */
7639 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7640 && reload_completed
7641 && (GET_CODE (x) == LABEL_REF
7642 || (GET_CODE (x) == CONST
7643 && GET_CODE (XEXP (x, 0)) == PLUS
7644 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7645 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7646 return 1;
7648 /* Post-inc indexing only supported for SImode and larger. */
7649 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7650 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7651 return 1;
7653 else if (GET_CODE (x) == PLUS)
7655 /* REG+REG address can be any two index registers. */
7656 /* We disallow FRAME+REG addressing since we know that FRAME
7657 will be replaced with STACK, and SP relative addressing only
7658 permits SP+OFFSET. */
7659 if (GET_MODE_SIZE (mode) <= 4
7660 && XEXP (x, 0) != frame_pointer_rtx
7661 && XEXP (x, 1) != frame_pointer_rtx
7662 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7663 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7664 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7665 return 1;
7667 /* REG+const has 5-7 bit offset for non-SP registers. */
7668 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7669 || XEXP (x, 0) == arg_pointer_rtx)
7670 && CONST_INT_P (XEXP (x, 1))
7671 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7672 return 1;
7674 /* REG+const has 10-bit offset for SP, but only SImode and
7675 larger is supported. */
7676 /* ??? Should probably check for DI/DFmode overflow here
7677 just like GO_IF_LEGITIMATE_OFFSET does. */
7678 else if (REG_P (XEXP (x, 0))
7679 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7680 && GET_MODE_SIZE (mode) >= 4
7681 && CONST_INT_P (XEXP (x, 1))
7682 && INTVAL (XEXP (x, 1)) >= 0
7683 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7684 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7685 return 1;
7687 else if (REG_P (XEXP (x, 0))
7688 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7689 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7690 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7691 && REGNO (XEXP (x, 0))
7692 <= LAST_VIRTUAL_POINTER_REGISTER))
7693 && GET_MODE_SIZE (mode) >= 4
7694 && CONST_INT_P (XEXP (x, 1))
7695 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7696 return 1;
7699 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7700 && GET_MODE_SIZE (mode) == 4
7701 && GET_CODE (x) == SYMBOL_REF
7702 && CONSTANT_POOL_ADDRESS_P (x)
7703 && ! (flag_pic
7704 && symbol_mentioned_p (get_pool_constant (x))
7705 && ! pcrel_constant_p (get_pool_constant (x))))
7706 return 1;
7708 return 0;
7711 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7712 instruction of mode MODE. */
7714 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7716 switch (GET_MODE_SIZE (mode))
7718 case 1:
7719 return val >= 0 && val < 32;
7721 case 2:
7722 return val >= 0 && val < 64 && (val & 1) == 0;
7724 default:
7725 return (val >= 0
7726 && (val + GET_MODE_SIZE (mode)) <= 128
7727 && (val & 3) == 0);
7731 bool
7732 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7734 if (TARGET_ARM)
7735 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7736 else if (TARGET_THUMB2)
7737 return thumb2_legitimate_address_p (mode, x, strict_p);
7738 else /* if (TARGET_THUMB1) */
7739 return thumb1_legitimate_address_p (mode, x, strict_p);
7742 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7744 Given an rtx X being reloaded into a reg required to be
7745 in class CLASS, return the class of reg to actually use.
7746 In general this is just CLASS, but for the Thumb core registers and
7747 immediate constants we prefer a LO_REGS class or a subset. */
7749 static reg_class_t
7750 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7752 if (TARGET_32BIT)
7753 return rclass;
7754 else
7756 if (rclass == GENERAL_REGS)
7757 return LO_REGS;
7758 else
7759 return rclass;
7763 /* Build the SYMBOL_REF for __tls_get_addr. */
7765 static GTY(()) rtx tls_get_addr_libfunc;
7767 static rtx
7768 get_tls_get_addr (void)
7770 if (!tls_get_addr_libfunc)
7771 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7772 return tls_get_addr_libfunc;
7776 arm_load_tp (rtx target)
7778 if (!target)
7779 target = gen_reg_rtx (SImode);
7781 if (TARGET_HARD_TP)
7783 /* Can return in any reg. */
7784 emit_insn (gen_load_tp_hard (target));
7786 else
7788 /* Always returned in r0. Immediately copy the result into a pseudo,
7789 otherwise other uses of r0 (e.g. setting up function arguments) may
7790 clobber the value. */
7792 rtx tmp;
7794 emit_insn (gen_load_tp_soft ());
7796 tmp = gen_rtx_REG (SImode, R0_REGNUM);
7797 emit_move_insn (target, tmp);
7799 return target;
7802 static rtx
7803 load_tls_operand (rtx x, rtx reg)
7805 rtx tmp;
7807 if (reg == NULL_RTX)
7808 reg = gen_reg_rtx (SImode);
7810 tmp = gen_rtx_CONST (SImode, x);
7812 emit_move_insn (reg, tmp);
7814 return reg;
7817 static rtx
7818 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7820 rtx insns, label, labelno, sum;
7822 gcc_assert (reloc != TLS_DESCSEQ);
7823 start_sequence ();
7825 labelno = GEN_INT (pic_labelno++);
7826 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7827 label = gen_rtx_CONST (VOIDmode, label);
7829 sum = gen_rtx_UNSPEC (Pmode,
7830 gen_rtvec (4, x, GEN_INT (reloc), label,
7831 GEN_INT (TARGET_ARM ? 8 : 4)),
7832 UNSPEC_TLS);
7833 reg = load_tls_operand (sum, reg);
7835 if (TARGET_ARM)
7836 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7837 else
7838 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7840 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7841 LCT_PURE, /* LCT_CONST? */
7842 Pmode, 1, reg, Pmode);
7844 insns = get_insns ();
7845 end_sequence ();
7847 return insns;
7850 static rtx
7851 arm_tls_descseq_addr (rtx x, rtx reg)
7853 rtx labelno = GEN_INT (pic_labelno++);
7854 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7855 rtx sum = gen_rtx_UNSPEC (Pmode,
7856 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7857 gen_rtx_CONST (VOIDmode, label),
7858 GEN_INT (!TARGET_ARM)),
7859 UNSPEC_TLS);
7860 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7862 emit_insn (gen_tlscall (x, labelno));
7863 if (!reg)
7864 reg = gen_reg_rtx (SImode);
7865 else
7866 gcc_assert (REGNO (reg) != R0_REGNUM);
7868 emit_move_insn (reg, reg0);
7870 return reg;
7874 legitimize_tls_address (rtx x, rtx reg)
7876 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7877 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7879 switch (model)
7881 case TLS_MODEL_GLOBAL_DYNAMIC:
7882 if (TARGET_GNU2_TLS)
7884 reg = arm_tls_descseq_addr (x, reg);
7886 tp = arm_load_tp (NULL_RTX);
7888 dest = gen_rtx_PLUS (Pmode, tp, reg);
7890 else
7892 /* Original scheme */
7893 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7894 dest = gen_reg_rtx (Pmode);
7895 emit_libcall_block (insns, dest, ret, x);
7897 return dest;
7899 case TLS_MODEL_LOCAL_DYNAMIC:
7900 if (TARGET_GNU2_TLS)
7902 reg = arm_tls_descseq_addr (x, reg);
7904 tp = arm_load_tp (NULL_RTX);
7906 dest = gen_rtx_PLUS (Pmode, tp, reg);
7908 else
7910 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7912 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7913 share the LDM result with other LD model accesses. */
7914 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7915 UNSPEC_TLS);
7916 dest = gen_reg_rtx (Pmode);
7917 emit_libcall_block (insns, dest, ret, eqv);
7919 /* Load the addend. */
7920 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7921 GEN_INT (TLS_LDO32)),
7922 UNSPEC_TLS);
7923 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7924 dest = gen_rtx_PLUS (Pmode, dest, addend);
7926 return dest;
7928 case TLS_MODEL_INITIAL_EXEC:
7929 labelno = GEN_INT (pic_labelno++);
7930 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7931 label = gen_rtx_CONST (VOIDmode, label);
7932 sum = gen_rtx_UNSPEC (Pmode,
7933 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7934 GEN_INT (TARGET_ARM ? 8 : 4)),
7935 UNSPEC_TLS);
7936 reg = load_tls_operand (sum, reg);
7938 if (TARGET_ARM)
7939 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7940 else if (TARGET_THUMB2)
7941 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7942 else
7944 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7945 emit_move_insn (reg, gen_const_mem (SImode, reg));
7948 tp = arm_load_tp (NULL_RTX);
7950 return gen_rtx_PLUS (Pmode, tp, reg);
7952 case TLS_MODEL_LOCAL_EXEC:
7953 tp = arm_load_tp (NULL_RTX);
7955 reg = gen_rtx_UNSPEC (Pmode,
7956 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7957 UNSPEC_TLS);
7958 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7960 return gen_rtx_PLUS (Pmode, tp, reg);
7962 default:
7963 abort ();
7967 /* Try machine-dependent ways of modifying an illegitimate address
7968 to be legitimate. If we find one, return the new, valid address. */
7970 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7972 if (arm_tls_referenced_p (x))
7974 rtx addend = NULL;
7976 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7978 addend = XEXP (XEXP (x, 0), 1);
7979 x = XEXP (XEXP (x, 0), 0);
7982 if (GET_CODE (x) != SYMBOL_REF)
7983 return x;
7985 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7987 x = legitimize_tls_address (x, NULL_RTX);
7989 if (addend)
7991 x = gen_rtx_PLUS (SImode, x, addend);
7992 orig_x = x;
7994 else
7995 return x;
7998 if (!TARGET_ARM)
8000 /* TODO: legitimize_address for Thumb2. */
8001 if (TARGET_THUMB2)
8002 return x;
8003 return thumb_legitimize_address (x, orig_x, mode);
8006 if (GET_CODE (x) == PLUS)
8008 rtx xop0 = XEXP (x, 0);
8009 rtx xop1 = XEXP (x, 1);
8011 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8012 xop0 = force_reg (SImode, xop0);
8014 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8015 && !symbol_mentioned_p (xop1))
8016 xop1 = force_reg (SImode, xop1);
8018 if (ARM_BASE_REGISTER_RTX_P (xop0)
8019 && CONST_INT_P (xop1))
8021 HOST_WIDE_INT n, low_n;
8022 rtx base_reg, val;
8023 n = INTVAL (xop1);
8025 /* VFP addressing modes actually allow greater offsets, but for
8026 now we just stick with the lowest common denominator. */
8027 if (mode == DImode
8028 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
8030 low_n = n & 0x0f;
8031 n &= ~0x0f;
8032 if (low_n > 4)
8034 n += 16;
8035 low_n -= 16;
8038 else
8040 low_n = ((mode) == TImode ? 0
8041 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8042 n -= low_n;
8045 base_reg = gen_reg_rtx (SImode);
8046 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8047 emit_move_insn (base_reg, val);
8048 x = plus_constant (Pmode, base_reg, low_n);
8050 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8051 x = gen_rtx_PLUS (SImode, xop0, xop1);
8054 /* XXX We don't allow MINUS any more -- see comment in
8055 arm_legitimate_address_outer_p (). */
8056 else if (GET_CODE (x) == MINUS)
8058 rtx xop0 = XEXP (x, 0);
8059 rtx xop1 = XEXP (x, 1);
8061 if (CONSTANT_P (xop0))
8062 xop0 = force_reg (SImode, xop0);
8064 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8065 xop1 = force_reg (SImode, xop1);
8067 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8068 x = gen_rtx_MINUS (SImode, xop0, xop1);
8071 /* Make sure to take full advantage of the pre-indexed addressing mode
8072 with absolute addresses which often allows for the base register to
8073 be factorized for multiple adjacent memory references, and it might
8074 even allows for the mini pool to be avoided entirely. */
8075 else if (CONST_INT_P (x) && optimize > 0)
8077 unsigned int bits;
8078 HOST_WIDE_INT mask, base, index;
8079 rtx base_reg;
8081 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8082 use a 8-bit index. So let's use a 12-bit index for SImode only and
8083 hope that arm_gen_constant will enable ldrb to use more bits. */
8084 bits = (mode == SImode) ? 12 : 8;
8085 mask = (1 << bits) - 1;
8086 base = INTVAL (x) & ~mask;
8087 index = INTVAL (x) & mask;
8088 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8090 /* It'll most probably be more efficient to generate the base
8091 with more bits set and use a negative index instead. */
8092 base |= mask;
8093 index -= mask;
8095 base_reg = force_reg (SImode, GEN_INT (base));
8096 x = plus_constant (Pmode, base_reg, index);
8099 if (flag_pic)
8101 /* We need to find and carefully transform any SYMBOL and LABEL
8102 references; so go back to the original address expression. */
8103 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8105 if (new_x != orig_x)
8106 x = new_x;
8109 return x;
8113 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8114 to be legitimate. If we find one, return the new, valid address. */
8116 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8118 if (GET_CODE (x) == PLUS
8119 && CONST_INT_P (XEXP (x, 1))
8120 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8121 || INTVAL (XEXP (x, 1)) < 0))
8123 rtx xop0 = XEXP (x, 0);
8124 rtx xop1 = XEXP (x, 1);
8125 HOST_WIDE_INT offset = INTVAL (xop1);
8127 /* Try and fold the offset into a biasing of the base register and
8128 then offsetting that. Don't do this when optimizing for space
8129 since it can cause too many CSEs. */
8130 if (optimize_size && offset >= 0
8131 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8133 HOST_WIDE_INT delta;
8135 if (offset >= 256)
8136 delta = offset - (256 - GET_MODE_SIZE (mode));
8137 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8138 delta = 31 * GET_MODE_SIZE (mode);
8139 else
8140 delta = offset & (~31 * GET_MODE_SIZE (mode));
8142 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8143 NULL_RTX);
8144 x = plus_constant (Pmode, xop0, delta);
8146 else if (offset < 0 && offset > -256)
8147 /* Small negative offsets are best done with a subtract before the
8148 dereference, forcing these into a register normally takes two
8149 instructions. */
8150 x = force_operand (x, NULL_RTX);
8151 else
8153 /* For the remaining cases, force the constant into a register. */
8154 xop1 = force_reg (SImode, xop1);
8155 x = gen_rtx_PLUS (SImode, xop0, xop1);
8158 else if (GET_CODE (x) == PLUS
8159 && s_register_operand (XEXP (x, 1), SImode)
8160 && !s_register_operand (XEXP (x, 0), SImode))
8162 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8164 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8167 if (flag_pic)
8169 /* We need to find and carefully transform any SYMBOL and LABEL
8170 references; so go back to the original address expression. */
8171 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8173 if (new_x != orig_x)
8174 x = new_x;
8177 return x;
8180 /* Return TRUE if X contains any TLS symbol references. */
8182 bool
8183 arm_tls_referenced_p (rtx x)
8185 if (! TARGET_HAVE_TLS)
8186 return false;
8188 subrtx_iterator::array_type array;
8189 FOR_EACH_SUBRTX (iter, array, x, ALL)
8191 const_rtx x = *iter;
8192 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8193 return true;
8195 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8196 TLS offsets, not real symbol references. */
8197 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8198 iter.skip_subrtxes ();
8200 return false;
8203 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8205 On the ARM, allow any integer (invalid ones are removed later by insn
8206 patterns), nice doubles and symbol_refs which refer to the function's
8207 constant pool XXX.
8209 When generating pic allow anything. */
8211 static bool
8212 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8214 return flag_pic || !label_mentioned_p (x);
8217 static bool
8218 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8220 return (CONST_INT_P (x)
8221 || CONST_DOUBLE_P (x)
8222 || CONSTANT_ADDRESS_P (x)
8223 || flag_pic);
8226 static bool
8227 arm_legitimate_constant_p (machine_mode mode, rtx x)
8229 return (!arm_cannot_force_const_mem (mode, x)
8230 && (TARGET_32BIT
8231 ? arm_legitimate_constant_p_1 (mode, x)
8232 : thumb_legitimate_constant_p (mode, x)));
8235 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8237 static bool
8238 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8240 rtx base, offset;
8242 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8244 split_const (x, &base, &offset);
8245 if (GET_CODE (base) == SYMBOL_REF
8246 && !offset_within_block_p (base, INTVAL (offset)))
8247 return true;
8249 return arm_tls_referenced_p (x);
8252 #define REG_OR_SUBREG_REG(X) \
8253 (REG_P (X) \
8254 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8256 #define REG_OR_SUBREG_RTX(X) \
8257 (REG_P (X) ? (X) : SUBREG_REG (X))
8259 static inline int
8260 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8262 machine_mode mode = GET_MODE (x);
8263 int total, words;
8265 switch (code)
8267 case ASHIFT:
8268 case ASHIFTRT:
8269 case LSHIFTRT:
8270 case ROTATERT:
8271 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8273 case PLUS:
8274 case MINUS:
8275 case COMPARE:
8276 case NEG:
8277 case NOT:
8278 return COSTS_N_INSNS (1);
8280 case MULT:
8281 if (CONST_INT_P (XEXP (x, 1)))
8283 int cycles = 0;
8284 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8286 while (i)
8288 i >>= 2;
8289 cycles++;
8291 return COSTS_N_INSNS (2) + cycles;
8293 return COSTS_N_INSNS (1) + 16;
8295 case SET:
8296 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8297 the mode. */
8298 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8299 return (COSTS_N_INSNS (words)
8300 + 4 * ((MEM_P (SET_SRC (x)))
8301 + MEM_P (SET_DEST (x))));
8303 case CONST_INT:
8304 if (outer == SET)
8306 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8307 return 0;
8308 if (thumb_shiftable_const (INTVAL (x)))
8309 return COSTS_N_INSNS (2);
8310 return COSTS_N_INSNS (3);
8312 else if ((outer == PLUS || outer == COMPARE)
8313 && INTVAL (x) < 256 && INTVAL (x) > -256)
8314 return 0;
8315 else if ((outer == IOR || outer == XOR || outer == AND)
8316 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8317 return COSTS_N_INSNS (1);
8318 else if (outer == AND)
8320 int i;
8321 /* This duplicates the tests in the andsi3 expander. */
8322 for (i = 9; i <= 31; i++)
8323 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8324 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8325 return COSTS_N_INSNS (2);
8327 else if (outer == ASHIFT || outer == ASHIFTRT
8328 || outer == LSHIFTRT)
8329 return 0;
8330 return COSTS_N_INSNS (2);
8332 case CONST:
8333 case CONST_DOUBLE:
8334 case LABEL_REF:
8335 case SYMBOL_REF:
8336 return COSTS_N_INSNS (3);
8338 case UDIV:
8339 case UMOD:
8340 case DIV:
8341 case MOD:
8342 return 100;
8344 case TRUNCATE:
8345 return 99;
8347 case AND:
8348 case XOR:
8349 case IOR:
8350 /* XXX guess. */
8351 return 8;
8353 case MEM:
8354 /* XXX another guess. */
8355 /* Memory costs quite a lot for the first word, but subsequent words
8356 load at the equivalent of a single insn each. */
8357 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8358 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8359 ? 4 : 0));
8361 case IF_THEN_ELSE:
8362 /* XXX a guess. */
8363 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8364 return 14;
8365 return 2;
8367 case SIGN_EXTEND:
8368 case ZERO_EXTEND:
8369 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8370 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8372 if (mode == SImode)
8373 return total;
8375 if (arm_arch6)
8376 return total + COSTS_N_INSNS (1);
8378 /* Assume a two-shift sequence. Increase the cost slightly so
8379 we prefer actual shifts over an extend operation. */
8380 return total + 1 + COSTS_N_INSNS (2);
8382 default:
8383 return 99;
8387 static inline bool
8388 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8390 machine_mode mode = GET_MODE (x);
8391 enum rtx_code subcode;
8392 rtx operand;
8393 enum rtx_code code = GET_CODE (x);
8394 *total = 0;
8396 switch (code)
8398 case MEM:
8399 /* Memory costs quite a lot for the first word, but subsequent words
8400 load at the equivalent of a single insn each. */
8401 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8402 return true;
8404 case DIV:
8405 case MOD:
8406 case UDIV:
8407 case UMOD:
8408 if (TARGET_HARD_FLOAT && mode == SFmode)
8409 *total = COSTS_N_INSNS (2);
8410 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8411 *total = COSTS_N_INSNS (4);
8412 else
8413 *total = COSTS_N_INSNS (20);
8414 return false;
8416 case ROTATE:
8417 if (REG_P (XEXP (x, 1)))
8418 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8419 else if (!CONST_INT_P (XEXP (x, 1)))
8420 *total = rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8422 /* Fall through */
8423 case ROTATERT:
8424 if (mode != SImode)
8426 *total += COSTS_N_INSNS (4);
8427 return true;
8430 /* Fall through */
8431 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8432 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8433 if (mode == DImode)
8435 *total += COSTS_N_INSNS (3);
8436 return true;
8439 *total += COSTS_N_INSNS (1);
8440 /* Increase the cost of complex shifts because they aren't any faster,
8441 and reduce dual issue opportunities. */
8442 if (arm_tune_cortex_a9
8443 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8444 ++*total;
8446 return true;
8448 case MINUS:
8449 if (mode == DImode)
8451 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8452 if (CONST_INT_P (XEXP (x, 0))
8453 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8455 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8456 return true;
8459 if (CONST_INT_P (XEXP (x, 1))
8460 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8462 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8463 return true;
8466 return false;
8469 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8471 if (TARGET_HARD_FLOAT
8472 && (mode == SFmode
8473 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8475 *total = COSTS_N_INSNS (1);
8476 if (CONST_DOUBLE_P (XEXP (x, 0))
8477 && arm_const_double_rtx (XEXP (x, 0)))
8479 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8480 return true;
8483 if (CONST_DOUBLE_P (XEXP (x, 1))
8484 && arm_const_double_rtx (XEXP (x, 1)))
8486 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8487 return true;
8490 return false;
8492 *total = COSTS_N_INSNS (20);
8493 return false;
8496 *total = COSTS_N_INSNS (1);
8497 if (CONST_INT_P (XEXP (x, 0))
8498 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8500 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8501 return true;
8504 subcode = GET_CODE (XEXP (x, 1));
8505 if (subcode == ASHIFT || subcode == ASHIFTRT
8506 || subcode == LSHIFTRT
8507 || subcode == ROTATE || subcode == ROTATERT)
8509 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8510 *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, subcode, 0, speed);
8511 return true;
8514 /* A shift as a part of RSB costs no more than RSB itself. */
8515 if (GET_CODE (XEXP (x, 0)) == MULT
8516 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8518 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code, 0, speed);
8519 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8520 return true;
8523 if (subcode == MULT
8524 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8526 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8527 *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, subcode, 0, speed);
8528 return true;
8531 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8532 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8534 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
8535 0, speed);
8536 if (REG_P (XEXP (XEXP (x, 1), 0))
8537 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8538 *total += COSTS_N_INSNS (1);
8540 return true;
8543 /* Fall through */
8545 case PLUS:
8546 if (code == PLUS && arm_arch6 && mode == SImode
8547 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8548 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8550 *total = COSTS_N_INSNS (1);
8551 *total += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
8552 GET_CODE (XEXP (x, 0)), 0, speed);
8553 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8554 return true;
8557 /* MLA: All arguments must be registers. We filter out
8558 multiplication by a power of two, so that we fall down into
8559 the code below. */
8560 if (GET_CODE (XEXP (x, 0)) == MULT
8561 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8563 /* The cost comes from the cost of the multiply. */
8564 return false;
8567 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8569 if (TARGET_HARD_FLOAT
8570 && (mode == SFmode
8571 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8573 *total = COSTS_N_INSNS (1);
8574 if (CONST_DOUBLE_P (XEXP (x, 1))
8575 && arm_const_double_rtx (XEXP (x, 1)))
8577 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8578 return true;
8581 return false;
8584 *total = COSTS_N_INSNS (20);
8585 return false;
8588 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8589 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8591 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), mode, code,
8592 1, speed);
8593 if (REG_P (XEXP (XEXP (x, 0), 0))
8594 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8595 *total += COSTS_N_INSNS (1);
8596 return true;
8599 /* Fall through */
8601 case AND: case XOR: case IOR:
8603 /* Normally the frame registers will be spilt into reg+const during
8604 reload, so it is a bad idea to combine them with other instructions,
8605 since then they might not be moved outside of loops. As a compromise
8606 we allow integration with ops that have a constant as their second
8607 operand. */
8608 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8609 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8610 && !CONST_INT_P (XEXP (x, 1)))
8611 *total = COSTS_N_INSNS (1);
8613 if (mode == DImode)
8615 *total += COSTS_N_INSNS (2);
8616 if (CONST_INT_P (XEXP (x, 1))
8617 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8619 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8620 return true;
8623 return false;
8626 *total += COSTS_N_INSNS (1);
8627 if (CONST_INT_P (XEXP (x, 1))
8628 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8630 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8631 return true;
8633 subcode = GET_CODE (XEXP (x, 0));
8634 if (subcode == ASHIFT || subcode == ASHIFTRT
8635 || subcode == LSHIFTRT
8636 || subcode == ROTATE || subcode == ROTATERT)
8638 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8639 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8640 return true;
8643 if (subcode == MULT
8644 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8646 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8647 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8648 return true;
8651 if (subcode == UMIN || subcode == UMAX
8652 || subcode == SMIN || subcode == SMAX)
8654 *total = COSTS_N_INSNS (3);
8655 return true;
8658 return false;
8660 case MULT:
8661 /* This should have been handled by the CPU specific routines. */
8662 gcc_unreachable ();
8664 case TRUNCATE:
8665 if (arm_arch3m && mode == SImode
8666 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8667 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8668 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8669 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8670 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8671 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8673 *total = rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, LSHIFTRT,
8674 0, speed);
8675 return true;
8677 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8678 return false;
8680 case NEG:
8681 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8683 if (TARGET_HARD_FLOAT
8684 && (mode == SFmode
8685 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8687 *total = COSTS_N_INSNS (1);
8688 return false;
8690 *total = COSTS_N_INSNS (2);
8691 return false;
8694 /* Fall through */
8695 case NOT:
8696 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8697 if (mode == SImode && code == NOT)
8699 subcode = GET_CODE (XEXP (x, 0));
8700 if (subcode == ASHIFT || subcode == ASHIFTRT
8701 || subcode == LSHIFTRT
8702 || subcode == ROTATE || subcode == ROTATERT
8703 || (subcode == MULT
8704 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8706 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode,
8707 0, speed);
8708 /* Register shifts cost an extra cycle. */
8709 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8710 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8711 mode, subcode,
8712 1, speed);
8713 return true;
8717 return false;
8719 case IF_THEN_ELSE:
8720 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8722 *total = COSTS_N_INSNS (4);
8723 return true;
8726 operand = XEXP (x, 0);
8728 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8729 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8730 && REG_P (XEXP (operand, 0))
8731 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8732 *total += COSTS_N_INSNS (1);
8733 *total += rtx_cost (XEXP (x, 1), VOIDmode, code, 1, speed);
8734 *total += rtx_cost (XEXP (x, 2), VOIDmode, code, 2, speed);
8735 return true;
8737 case NE:
8738 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8740 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
8741 0, speed);
8742 return true;
8744 goto scc_insn;
8746 case GE:
8747 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8748 && mode == SImode && XEXP (x, 1) == const0_rtx)
8750 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
8751 0, speed);
8752 return true;
8754 goto scc_insn;
8756 case LT:
8757 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8758 && mode == SImode && XEXP (x, 1) == const0_rtx)
8760 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
8761 0, speed);
8762 return true;
8764 goto scc_insn;
8766 case EQ:
8767 case GT:
8768 case LE:
8769 case GEU:
8770 case LTU:
8771 case GTU:
8772 case LEU:
8773 case UNORDERED:
8774 case ORDERED:
8775 case UNEQ:
8776 case UNGE:
8777 case UNLT:
8778 case UNGT:
8779 case UNLE:
8780 scc_insn:
8781 /* SCC insns. In the case where the comparison has already been
8782 performed, then they cost 2 instructions. Otherwise they need
8783 an additional comparison before them. */
8784 *total = COSTS_N_INSNS (2);
8785 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8787 return true;
8790 /* Fall through */
8791 case COMPARE:
8792 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8794 *total = 0;
8795 return true;
8798 *total += COSTS_N_INSNS (1);
8799 if (CONST_INT_P (XEXP (x, 1))
8800 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8802 *total += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed);
8803 return true;
8806 subcode = GET_CODE (XEXP (x, 0));
8807 if (subcode == ASHIFT || subcode == ASHIFTRT
8808 || subcode == LSHIFTRT
8809 || subcode == ROTATE || subcode == ROTATERT)
8811 mode = GET_MODE (XEXP (x, 0));
8812 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8813 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8814 return true;
8817 if (subcode == MULT
8818 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8820 mode = GET_MODE (XEXP (x, 0));
8821 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8822 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8823 return true;
8826 return false;
8828 case UMIN:
8829 case UMAX:
8830 case SMIN:
8831 case SMAX:
8832 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8833 if (!CONST_INT_P (XEXP (x, 1))
8834 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8835 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8836 return true;
8838 case ABS:
8839 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8841 if (TARGET_HARD_FLOAT
8842 && (mode == SFmode
8843 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8845 *total = COSTS_N_INSNS (1);
8846 return false;
8848 *total = COSTS_N_INSNS (20);
8849 return false;
8851 *total = COSTS_N_INSNS (1);
8852 if (mode == DImode)
8853 *total += COSTS_N_INSNS (3);
8854 return false;
8856 case SIGN_EXTEND:
8857 case ZERO_EXTEND:
8858 *total = 0;
8859 if (GET_MODE_CLASS (mode) == MODE_INT)
8861 rtx op = XEXP (x, 0);
8862 machine_mode opmode = GET_MODE (op);
8864 if (mode == DImode)
8865 *total += COSTS_N_INSNS (1);
8867 if (opmode != SImode)
8869 if (MEM_P (op))
8871 /* If !arm_arch4, we use one of the extendhisi2_mem
8872 or movhi_bytes patterns for HImode. For a QImode
8873 sign extension, we first zero-extend from memory
8874 and then perform a shift sequence. */
8875 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8876 *total += COSTS_N_INSNS (2);
8878 else if (arm_arch6)
8879 *total += COSTS_N_INSNS (1);
8881 /* We don't have the necessary insn, so we need to perform some
8882 other operation. */
8883 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8884 /* An and with constant 255. */
8885 *total += COSTS_N_INSNS (1);
8886 else
8887 /* A shift sequence. Increase costs slightly to avoid
8888 combining two shifts into an extend operation. */
8889 *total += COSTS_N_INSNS (2) + 1;
8892 return false;
8895 switch (GET_MODE (XEXP (x, 0)))
8897 case V8QImode:
8898 case V4HImode:
8899 case V2SImode:
8900 case V4QImode:
8901 case V2HImode:
8902 *total = COSTS_N_INSNS (1);
8903 return false;
8905 default:
8906 gcc_unreachable ();
8908 gcc_unreachable ();
8910 case ZERO_EXTRACT:
8911 case SIGN_EXTRACT:
8912 mode = GET_MODE (XEXP (x, 0));
8913 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8914 return true;
8916 case CONST_INT:
8917 if (const_ok_for_arm (INTVAL (x))
8918 || const_ok_for_arm (~INTVAL (x)))
8919 *total = COSTS_N_INSNS (1);
8920 else
8921 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8922 INTVAL (x), NULL_RTX,
8923 NULL_RTX, 0, 0));
8924 return true;
8926 case CONST:
8927 case LABEL_REF:
8928 case SYMBOL_REF:
8929 *total = COSTS_N_INSNS (3);
8930 return true;
8932 case HIGH:
8933 *total = COSTS_N_INSNS (1);
8934 return true;
8936 case LO_SUM:
8937 *total = COSTS_N_INSNS (1);
8938 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8939 return true;
8941 case CONST_DOUBLE:
8942 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8943 && (mode == SFmode || !TARGET_VFP_SINGLE))
8944 *total = COSTS_N_INSNS (1);
8945 else
8946 *total = COSTS_N_INSNS (4);
8947 return true;
8949 case SET:
8950 /* The vec_extract patterns accept memory operands that require an
8951 address reload. Account for the cost of that reload to give the
8952 auto-inc-dec pass an incentive to try to replace them. */
8953 if (TARGET_NEON && MEM_P (SET_DEST (x))
8954 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8956 mode = GET_MODE (SET_DEST (x));
8957 *total = rtx_cost (SET_DEST (x), mode, code, 0, speed);
8958 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8959 *total += COSTS_N_INSNS (1);
8960 return true;
8962 /* Likewise for the vec_set patterns. */
8963 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8964 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8965 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8967 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8968 mode = GET_MODE (SET_DEST (x));
8969 *total = rtx_cost (mem, mode, code, 0, speed);
8970 if (!neon_vector_mem_operand (mem, 2, true))
8971 *total += COSTS_N_INSNS (1);
8972 return true;
8974 return false;
8976 case UNSPEC:
8977 /* We cost this as high as our memory costs to allow this to
8978 be hoisted from loops. */
8979 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8981 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8983 return true;
8985 case CONST_VECTOR:
8986 if (TARGET_NEON
8987 && TARGET_HARD_FLOAT
8988 && outer == SET
8989 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8990 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8991 *total = COSTS_N_INSNS (1);
8992 else
8993 *total = COSTS_N_INSNS (4);
8994 return true;
8996 default:
8997 *total = COSTS_N_INSNS (4);
8998 return false;
9002 /* Estimates the size cost of thumb1 instructions.
9003 For now most of the code is copied from thumb1_rtx_costs. We need more
9004 fine grain tuning when we have more related test cases. */
9005 static inline int
9006 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9008 machine_mode mode = GET_MODE (x);
9009 int words;
9011 switch (code)
9013 case ASHIFT:
9014 case ASHIFTRT:
9015 case LSHIFTRT:
9016 case ROTATERT:
9017 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9019 case PLUS:
9020 case MINUS:
9021 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9022 defined by RTL expansion, especially for the expansion of
9023 multiplication. */
9024 if ((GET_CODE (XEXP (x, 0)) == MULT
9025 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9026 || (GET_CODE (XEXP (x, 1)) == MULT
9027 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9028 return COSTS_N_INSNS (2);
9029 /* On purpose fall through for normal RTX. */
9030 case COMPARE:
9031 case NEG:
9032 case NOT:
9033 return COSTS_N_INSNS (1);
9035 case MULT:
9036 if (CONST_INT_P (XEXP (x, 1)))
9038 /* Thumb1 mul instruction can't operate on const. We must Load it
9039 into a register first. */
9040 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9041 /* For the targets which have a very small and high-latency multiply
9042 unit, we prefer to synthesize the mult with up to 5 instructions,
9043 giving a good balance between size and performance. */
9044 if (arm_arch6m && arm_m_profile_small_mul)
9045 return COSTS_N_INSNS (5);
9046 else
9047 return COSTS_N_INSNS (1) + const_size;
9049 return COSTS_N_INSNS (1);
9051 case SET:
9052 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9053 the mode. */
9054 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9055 return COSTS_N_INSNS (words)
9056 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
9057 || satisfies_constraint_K (SET_SRC (x))
9058 /* thumb1_movdi_insn. */
9059 || ((words > 1) && MEM_P (SET_SRC (x))));
9061 case CONST_INT:
9062 if (outer == SET)
9064 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
9065 return COSTS_N_INSNS (1);
9066 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9067 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9068 return COSTS_N_INSNS (2);
9069 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9070 if (thumb_shiftable_const (INTVAL (x)))
9071 return COSTS_N_INSNS (2);
9072 return COSTS_N_INSNS (3);
9074 else if ((outer == PLUS || outer == COMPARE)
9075 && INTVAL (x) < 256 && INTVAL (x) > -256)
9076 return 0;
9077 else if ((outer == IOR || outer == XOR || outer == AND)
9078 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9079 return COSTS_N_INSNS (1);
9080 else if (outer == AND)
9082 int i;
9083 /* This duplicates the tests in the andsi3 expander. */
9084 for (i = 9; i <= 31; i++)
9085 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9086 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9087 return COSTS_N_INSNS (2);
9089 else if (outer == ASHIFT || outer == ASHIFTRT
9090 || outer == LSHIFTRT)
9091 return 0;
9092 return COSTS_N_INSNS (2);
9094 case CONST:
9095 case CONST_DOUBLE:
9096 case LABEL_REF:
9097 case SYMBOL_REF:
9098 return COSTS_N_INSNS (3);
9100 case UDIV:
9101 case UMOD:
9102 case DIV:
9103 case MOD:
9104 return 100;
9106 case TRUNCATE:
9107 return 99;
9109 case AND:
9110 case XOR:
9111 case IOR:
9112 return COSTS_N_INSNS (1);
9114 case MEM:
9115 return (COSTS_N_INSNS (1)
9116 + COSTS_N_INSNS (1)
9117 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9118 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9119 ? COSTS_N_INSNS (1) : 0));
9121 case IF_THEN_ELSE:
9122 /* XXX a guess. */
9123 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9124 return 14;
9125 return 2;
9127 case ZERO_EXTEND:
9128 /* XXX still guessing. */
9129 switch (GET_MODE (XEXP (x, 0)))
9131 case QImode:
9132 return (1 + (mode == DImode ? 4 : 0)
9133 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9135 case HImode:
9136 return (4 + (mode == DImode ? 4 : 0)
9137 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9139 case SImode:
9140 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9142 default:
9143 return 99;
9146 default:
9147 return 99;
9151 /* RTX costs when optimizing for size. */
9152 static bool
9153 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9154 int *total)
9156 machine_mode mode = GET_MODE (x);
9157 if (TARGET_THUMB1)
9159 *total = thumb1_size_rtx_costs (x, code, outer_code);
9160 return true;
9163 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9164 switch (code)
9166 case MEM:
9167 /* A memory access costs 1 insn if the mode is small, or the address is
9168 a single register, otherwise it costs one insn per word. */
9169 if (REG_P (XEXP (x, 0)))
9170 *total = COSTS_N_INSNS (1);
9171 else if (flag_pic
9172 && GET_CODE (XEXP (x, 0)) == PLUS
9173 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9174 /* This will be split into two instructions.
9175 See arm.md:calculate_pic_address. */
9176 *total = COSTS_N_INSNS (2);
9177 else
9178 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9179 return true;
9181 case DIV:
9182 case MOD:
9183 case UDIV:
9184 case UMOD:
9185 /* Needs a libcall, so it costs about this. */
9186 *total = COSTS_N_INSNS (2);
9187 return false;
9189 case ROTATE:
9190 if (mode == SImode && REG_P (XEXP (x, 1)))
9192 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
9193 0, false);
9194 return true;
9196 /* Fall through */
9197 case ROTATERT:
9198 case ASHIFT:
9199 case LSHIFTRT:
9200 case ASHIFTRT:
9201 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9203 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), mode, code,
9204 0, false);
9205 return true;
9207 else if (mode == SImode)
9209 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
9210 0, false);
9211 /* Slightly disparage register shifts, but not by much. */
9212 if (!CONST_INT_P (XEXP (x, 1)))
9213 *total += 1 + rtx_cost (XEXP (x, 1), mode, code, 1, false);
9214 return true;
9217 /* Needs a libcall. */
9218 *total = COSTS_N_INSNS (2);
9219 return false;
9221 case MINUS:
9222 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9223 && (mode == SFmode || !TARGET_VFP_SINGLE))
9225 *total = COSTS_N_INSNS (1);
9226 return false;
9229 if (mode == SImode)
9231 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9232 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9234 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9235 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9236 || subcode1 == ROTATE || subcode1 == ROTATERT
9237 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9238 || subcode1 == ASHIFTRT)
9240 /* It's just the cost of the two operands. */
9241 *total = 0;
9242 return false;
9245 *total = COSTS_N_INSNS (1);
9246 return false;
9249 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9250 return false;
9252 case PLUS:
9253 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9254 && (mode == SFmode || !TARGET_VFP_SINGLE))
9256 *total = COSTS_N_INSNS (1);
9257 return false;
9260 /* A shift as a part of ADD costs nothing. */
9261 if (GET_CODE (XEXP (x, 0)) == MULT
9262 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9264 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9265 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code, 0, false);
9266 *total += rtx_cost (XEXP (x, 1), mode, code, 1, false);
9267 return true;
9270 /* Fall through */
9271 case AND: case XOR: case IOR:
9272 if (mode == SImode)
9274 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9276 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9277 || subcode == LSHIFTRT || subcode == ASHIFTRT
9278 || (code == AND && subcode == NOT))
9280 /* It's just the cost of the two operands. */
9281 *total = 0;
9282 return false;
9286 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9287 return false;
9289 case MULT:
9290 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9291 return false;
9293 case NEG:
9294 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9295 && (mode == SFmode || !TARGET_VFP_SINGLE))
9297 *total = COSTS_N_INSNS (1);
9298 return false;
9301 /* Fall through */
9302 case NOT:
9303 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9305 return false;
9307 case IF_THEN_ELSE:
9308 *total = 0;
9309 return false;
9311 case COMPARE:
9312 if (cc_register (XEXP (x, 0), VOIDmode))
9313 * total = 0;
9314 else
9315 *total = COSTS_N_INSNS (1);
9316 return false;
9318 case ABS:
9319 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9320 && (mode == SFmode || !TARGET_VFP_SINGLE))
9321 *total = COSTS_N_INSNS (1);
9322 else
9323 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9324 return false;
9326 case SIGN_EXTEND:
9327 case ZERO_EXTEND:
9328 return arm_rtx_costs_1 (x, outer_code, total, 0);
9330 case CONST_INT:
9331 if (const_ok_for_arm (INTVAL (x)))
9332 /* A multiplication by a constant requires another instruction
9333 to load the constant to a register. */
9334 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9335 ? 1 : 0);
9336 else if (const_ok_for_arm (~INTVAL (x)))
9337 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9338 else if (const_ok_for_arm (-INTVAL (x)))
9340 if (outer_code == COMPARE || outer_code == PLUS
9341 || outer_code == MINUS)
9342 *total = 0;
9343 else
9344 *total = COSTS_N_INSNS (1);
9346 else
9347 *total = COSTS_N_INSNS (2);
9348 return true;
9350 case CONST:
9351 case LABEL_REF:
9352 case SYMBOL_REF:
9353 *total = COSTS_N_INSNS (2);
9354 return true;
9356 case CONST_DOUBLE:
9357 *total = COSTS_N_INSNS (4);
9358 return true;
9360 case CONST_VECTOR:
9361 if (TARGET_NEON
9362 && TARGET_HARD_FLOAT
9363 && outer_code == SET
9364 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9365 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9366 *total = COSTS_N_INSNS (1);
9367 else
9368 *total = COSTS_N_INSNS (4);
9369 return true;
9371 case HIGH:
9372 case LO_SUM:
9373 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9374 cost of these slightly. */
9375 *total = COSTS_N_INSNS (1) + 1;
9376 return true;
9378 case SET:
9379 return false;
9381 default:
9382 if (mode != VOIDmode)
9383 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9384 else
9385 *total = COSTS_N_INSNS (4); /* How knows? */
9386 return false;
9390 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9391 operand, then return the operand that is being shifted. If the shift
9392 is not by a constant, then set SHIFT_REG to point to the operand.
9393 Return NULL if OP is not a shifter operand. */
9394 static rtx
9395 shifter_op_p (rtx op, rtx *shift_reg)
9397 enum rtx_code code = GET_CODE (op);
9399 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9400 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9401 return XEXP (op, 0);
9402 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9403 return XEXP (op, 0);
9404 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9405 || code == ASHIFTRT)
9407 if (!CONST_INT_P (XEXP (op, 1)))
9408 *shift_reg = XEXP (op, 1);
9409 return XEXP (op, 0);
9412 return NULL;
9415 static bool
9416 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9418 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9419 rtx_code code = GET_CODE (x);
9420 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9422 switch (XINT (x, 1))
9424 case UNSPEC_UNALIGNED_LOAD:
9425 /* We can only do unaligned loads into the integer unit, and we can't
9426 use LDM or LDRD. */
9427 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9428 if (speed_p)
9429 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9430 + extra_cost->ldst.load_unaligned);
9432 #ifdef NOT_YET
9433 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9434 ADDR_SPACE_GENERIC, speed_p);
9435 #endif
9436 return true;
9438 case UNSPEC_UNALIGNED_STORE:
9439 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9440 if (speed_p)
9441 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9442 + extra_cost->ldst.store_unaligned);
9444 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9445 #ifdef NOT_YET
9446 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9447 ADDR_SPACE_GENERIC, speed_p);
9448 #endif
9449 return true;
9451 case UNSPEC_VRINTZ:
9452 case UNSPEC_VRINTP:
9453 case UNSPEC_VRINTM:
9454 case UNSPEC_VRINTR:
9455 case UNSPEC_VRINTX:
9456 case UNSPEC_VRINTA:
9457 if (speed_p)
9458 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9460 return true;
9461 default:
9462 *cost = COSTS_N_INSNS (2);
9463 break;
9465 return true;
9468 /* Cost of a libcall. We assume one insn per argument, an amount for the
9469 call (one insn for -Os) and then one for processing the result. */
9470 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9472 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9473 do \
9475 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9476 if (shift_op != NULL \
9477 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9479 if (shift_reg) \
9481 if (speed_p) \
9482 *cost += extra_cost->alu.arith_shift_reg; \
9483 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9484 ASHIFT, 1, speed_p); \
9486 else if (speed_p) \
9487 *cost += extra_cost->alu.arith_shift; \
9489 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9490 ASHIFT, 0, speed_p) \
9491 + rtx_cost (XEXP (x, 1 - IDX), \
9492 GET_MODE (shift_op), \
9493 OP, 1, speed_p)); \
9494 return true; \
9497 while (0);
9499 /* RTX costs. Make an estimate of the cost of executing the operation
9500 X, which is contained with an operation with code OUTER_CODE.
9501 SPEED_P indicates whether the cost desired is the performance cost,
9502 or the size cost. The estimate is stored in COST and the return
9503 value is TRUE if the cost calculation is final, or FALSE if the
9504 caller should recurse through the operands of X to add additional
9505 costs.
9507 We currently make no attempt to model the size savings of Thumb-2
9508 16-bit instructions. At the normal points in compilation where
9509 this code is called we have no measure of whether the condition
9510 flags are live or not, and thus no realistic way to determine what
9511 the size will eventually be. */
9512 static bool
9513 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9514 const struct cpu_cost_table *extra_cost,
9515 int *cost, bool speed_p)
9517 machine_mode mode = GET_MODE (x);
9519 *cost = COSTS_N_INSNS (1);
9521 if (TARGET_THUMB1)
9523 if (speed_p)
9524 *cost = thumb1_rtx_costs (x, code, outer_code);
9525 else
9526 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9527 return true;
9530 switch (code)
9532 case SET:
9533 *cost = 0;
9534 /* SET RTXs don't have a mode so we get it from the destination. */
9535 mode = GET_MODE (SET_DEST (x));
9537 if (REG_P (SET_SRC (x))
9538 && REG_P (SET_DEST (x)))
9540 /* Assume that most copies can be done with a single insn,
9541 unless we don't have HW FP, in which case everything
9542 larger than word mode will require two insns. */
9543 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9544 && GET_MODE_SIZE (mode) > 4)
9545 || mode == DImode)
9546 ? 2 : 1);
9547 /* Conditional register moves can be encoded
9548 in 16 bits in Thumb mode. */
9549 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9550 *cost >>= 1;
9552 return true;
9555 if (CONST_INT_P (SET_SRC (x)))
9557 /* Handle CONST_INT here, since the value doesn't have a mode
9558 and we would otherwise be unable to work out the true cost. */
9559 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9560 0, speed_p);
9561 outer_code = SET;
9562 /* Slightly lower the cost of setting a core reg to a constant.
9563 This helps break up chains and allows for better scheduling. */
9564 if (REG_P (SET_DEST (x))
9565 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9566 *cost -= 1;
9567 x = SET_SRC (x);
9568 /* Immediate moves with an immediate in the range [0, 255] can be
9569 encoded in 16 bits in Thumb mode. */
9570 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9571 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9572 *cost >>= 1;
9573 goto const_int_cost;
9576 return false;
9578 case MEM:
9579 /* A memory access costs 1 insn if the mode is small, or the address is
9580 a single register, otherwise it costs one insn per word. */
9581 if (REG_P (XEXP (x, 0)))
9582 *cost = COSTS_N_INSNS (1);
9583 else if (flag_pic
9584 && GET_CODE (XEXP (x, 0)) == PLUS
9585 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9586 /* This will be split into two instructions.
9587 See arm.md:calculate_pic_address. */
9588 *cost = COSTS_N_INSNS (2);
9589 else
9590 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9592 /* For speed optimizations, add the costs of the address and
9593 accessing memory. */
9594 if (speed_p)
9595 #ifdef NOT_YET
9596 *cost += (extra_cost->ldst.load
9597 + arm_address_cost (XEXP (x, 0), mode,
9598 ADDR_SPACE_GENERIC, speed_p));
9599 #else
9600 *cost += extra_cost->ldst.load;
9601 #endif
9602 return true;
9604 case PARALLEL:
9606 /* Calculations of LDM costs are complex. We assume an initial cost
9607 (ldm_1st) which will load the number of registers mentioned in
9608 ldm_regs_per_insn_1st registers; then each additional
9609 ldm_regs_per_insn_subsequent registers cost one more insn. The
9610 formula for N regs is thus:
9612 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9613 + ldm_regs_per_insn_subsequent - 1)
9614 / ldm_regs_per_insn_subsequent).
9616 Additional costs may also be added for addressing. A similar
9617 formula is used for STM. */
9619 bool is_ldm = load_multiple_operation (x, SImode);
9620 bool is_stm = store_multiple_operation (x, SImode);
9622 if (is_ldm || is_stm)
9624 if (speed_p)
9626 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9627 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9628 ? extra_cost->ldst.ldm_regs_per_insn_1st
9629 : extra_cost->ldst.stm_regs_per_insn_1st;
9630 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9631 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9632 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9634 *cost += regs_per_insn_1st
9635 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9636 + regs_per_insn_sub - 1)
9637 / regs_per_insn_sub);
9638 return true;
9642 return false;
9644 case DIV:
9645 case UDIV:
9646 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9647 && (mode == SFmode || !TARGET_VFP_SINGLE))
9648 *cost += COSTS_N_INSNS (speed_p
9649 ? extra_cost->fp[mode != SFmode].div : 0);
9650 else if (mode == SImode && TARGET_IDIV)
9651 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9652 else
9653 *cost = LIBCALL_COST (2);
9654 return false; /* All arguments must be in registers. */
9656 case MOD:
9657 /* MOD by a power of 2 can be expanded as:
9658 rsbs r1, r0, #0
9659 and r0, r0, #(n - 1)
9660 and r1, r1, #(n - 1)
9661 rsbpl r0, r1, #0. */
9662 if (CONST_INT_P (XEXP (x, 1))
9663 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9664 && mode == SImode)
9666 *cost += COSTS_N_INSNS (3);
9668 if (speed_p)
9669 *cost += 2 * extra_cost->alu.logical
9670 + extra_cost->alu.arith;
9671 return true;
9674 /* Fall-through. */
9675 case UMOD:
9676 *cost = LIBCALL_COST (2);
9677 return false; /* All arguments must be in registers. */
9679 case ROTATE:
9680 if (mode == SImode && REG_P (XEXP (x, 1)))
9682 *cost += (COSTS_N_INSNS (1)
9683 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9684 if (speed_p)
9685 *cost += extra_cost->alu.shift_reg;
9686 return true;
9688 /* Fall through */
9689 case ROTATERT:
9690 case ASHIFT:
9691 case LSHIFTRT:
9692 case ASHIFTRT:
9693 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9695 *cost += (COSTS_N_INSNS (2)
9696 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9697 if (speed_p)
9698 *cost += 2 * extra_cost->alu.shift;
9699 return true;
9701 else if (mode == SImode)
9703 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9704 /* Slightly disparage register shifts at -Os, but not by much. */
9705 if (!CONST_INT_P (XEXP (x, 1)))
9706 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9707 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9708 return true;
9710 else if (GET_MODE_CLASS (mode) == MODE_INT
9711 && GET_MODE_SIZE (mode) < 4)
9713 if (code == ASHIFT)
9715 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9716 /* Slightly disparage register shifts at -Os, but not by
9717 much. */
9718 if (!CONST_INT_P (XEXP (x, 1)))
9719 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9720 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9722 else if (code == LSHIFTRT || code == ASHIFTRT)
9724 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9726 /* Can use SBFX/UBFX. */
9727 if (speed_p)
9728 *cost += extra_cost->alu.bfx;
9729 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9731 else
9733 *cost += COSTS_N_INSNS (1);
9734 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9735 if (speed_p)
9737 if (CONST_INT_P (XEXP (x, 1)))
9738 *cost += 2 * extra_cost->alu.shift;
9739 else
9740 *cost += (extra_cost->alu.shift
9741 + extra_cost->alu.shift_reg);
9743 else
9744 /* Slightly disparage register shifts. */
9745 *cost += !CONST_INT_P (XEXP (x, 1));
9748 else /* Rotates. */
9750 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9751 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9752 if (speed_p)
9754 if (CONST_INT_P (XEXP (x, 1)))
9755 *cost += (2 * extra_cost->alu.shift
9756 + extra_cost->alu.log_shift);
9757 else
9758 *cost += (extra_cost->alu.shift
9759 + extra_cost->alu.shift_reg
9760 + extra_cost->alu.log_shift_reg);
9763 return true;
9766 *cost = LIBCALL_COST (2);
9767 return false;
9769 case BSWAP:
9770 if (arm_arch6)
9772 if (mode == SImode)
9774 if (speed_p)
9775 *cost += extra_cost->alu.rev;
9777 return false;
9780 else
9782 /* No rev instruction available. Look at arm_legacy_rev
9783 and thumb_legacy_rev for the form of RTL used then. */
9784 if (TARGET_THUMB)
9786 *cost += COSTS_N_INSNS (9);
9788 if (speed_p)
9790 *cost += 6 * extra_cost->alu.shift;
9791 *cost += 3 * extra_cost->alu.logical;
9794 else
9796 *cost += COSTS_N_INSNS (4);
9798 if (speed_p)
9800 *cost += 2 * extra_cost->alu.shift;
9801 *cost += extra_cost->alu.arith_shift;
9802 *cost += 2 * extra_cost->alu.logical;
9805 return true;
9807 return false;
9809 case MINUS:
9810 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9811 && (mode == SFmode || !TARGET_VFP_SINGLE))
9813 if (GET_CODE (XEXP (x, 0)) == MULT
9814 || GET_CODE (XEXP (x, 1)) == MULT)
9816 rtx mul_op0, mul_op1, sub_op;
9818 if (speed_p)
9819 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9821 if (GET_CODE (XEXP (x, 0)) == MULT)
9823 mul_op0 = XEXP (XEXP (x, 0), 0);
9824 mul_op1 = XEXP (XEXP (x, 0), 1);
9825 sub_op = XEXP (x, 1);
9827 else
9829 mul_op0 = XEXP (XEXP (x, 1), 0);
9830 mul_op1 = XEXP (XEXP (x, 1), 1);
9831 sub_op = XEXP (x, 0);
9834 /* The first operand of the multiply may be optionally
9835 negated. */
9836 if (GET_CODE (mul_op0) == NEG)
9837 mul_op0 = XEXP (mul_op0, 0);
9839 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9840 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9841 + rtx_cost (sub_op, mode, code, 0, speed_p));
9843 return true;
9846 if (speed_p)
9847 *cost += extra_cost->fp[mode != SFmode].addsub;
9848 return false;
9851 if (mode == SImode)
9853 rtx shift_by_reg = NULL;
9854 rtx shift_op;
9855 rtx non_shift_op;
9857 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9858 if (shift_op == NULL)
9860 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9861 non_shift_op = XEXP (x, 0);
9863 else
9864 non_shift_op = XEXP (x, 1);
9866 if (shift_op != NULL)
9868 if (shift_by_reg != NULL)
9870 if (speed_p)
9871 *cost += extra_cost->alu.arith_shift_reg;
9872 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9874 else if (speed_p)
9875 *cost += extra_cost->alu.arith_shift;
9877 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9878 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9879 return true;
9882 if (arm_arch_thumb2
9883 && GET_CODE (XEXP (x, 1)) == MULT)
9885 /* MLS. */
9886 if (speed_p)
9887 *cost += extra_cost->mult[0].add;
9888 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9889 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9890 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9891 return true;
9894 if (CONST_INT_P (XEXP (x, 0)))
9896 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9897 INTVAL (XEXP (x, 0)), NULL_RTX,
9898 NULL_RTX, 1, 0);
9899 *cost = COSTS_N_INSNS (insns);
9900 if (speed_p)
9901 *cost += insns * extra_cost->alu.arith;
9902 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9903 return true;
9905 else if (speed_p)
9906 *cost += extra_cost->alu.arith;
9908 return false;
9911 if (GET_MODE_CLASS (mode) == MODE_INT
9912 && GET_MODE_SIZE (mode) < 4)
9914 rtx shift_op, shift_reg;
9915 shift_reg = NULL;
9917 /* We check both sides of the MINUS for shifter operands since,
9918 unlike PLUS, it's not commutative. */
9920 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9921 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9923 /* Slightly disparage, as we might need to widen the result. */
9924 *cost += 1;
9925 if (speed_p)
9926 *cost += extra_cost->alu.arith;
9928 if (CONST_INT_P (XEXP (x, 0)))
9930 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9931 return true;
9934 return false;
9937 if (mode == DImode)
9939 *cost += COSTS_N_INSNS (1);
9941 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9943 rtx op1 = XEXP (x, 1);
9945 if (speed_p)
9946 *cost += 2 * extra_cost->alu.arith;
9948 if (GET_CODE (op1) == ZERO_EXTEND)
9949 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9950 0, speed_p);
9951 else
9952 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9953 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9954 0, speed_p);
9955 return true;
9957 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9959 if (speed_p)
9960 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9961 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9962 0, speed_p)
9963 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9964 return true;
9966 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9967 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9969 if (speed_p)
9970 *cost += (extra_cost->alu.arith
9971 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9972 ? extra_cost->alu.arith
9973 : extra_cost->alu.arith_shift));
9974 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9975 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9976 GET_CODE (XEXP (x, 1)), 0, speed_p));
9977 return true;
9980 if (speed_p)
9981 *cost += 2 * extra_cost->alu.arith;
9982 return false;
9985 /* Vector mode? */
9987 *cost = LIBCALL_COST (2);
9988 return false;
9990 case PLUS:
9991 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9992 && (mode == SFmode || !TARGET_VFP_SINGLE))
9994 if (GET_CODE (XEXP (x, 0)) == MULT)
9996 rtx mul_op0, mul_op1, add_op;
9998 if (speed_p)
9999 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10001 mul_op0 = XEXP (XEXP (x, 0), 0);
10002 mul_op1 = XEXP (XEXP (x, 0), 1);
10003 add_op = XEXP (x, 1);
10005 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10006 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10007 + rtx_cost (add_op, mode, code, 0, speed_p));
10009 return true;
10012 if (speed_p)
10013 *cost += extra_cost->fp[mode != SFmode].addsub;
10014 return false;
10016 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10018 *cost = LIBCALL_COST (2);
10019 return false;
10022 /* Narrow modes can be synthesized in SImode, but the range
10023 of useful sub-operations is limited. Check for shift operations
10024 on one of the operands. Only left shifts can be used in the
10025 narrow modes. */
10026 if (GET_MODE_CLASS (mode) == MODE_INT
10027 && GET_MODE_SIZE (mode) < 4)
10029 rtx shift_op, shift_reg;
10030 shift_reg = NULL;
10032 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
10034 if (CONST_INT_P (XEXP (x, 1)))
10036 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10037 INTVAL (XEXP (x, 1)), NULL_RTX,
10038 NULL_RTX, 1, 0);
10039 *cost = COSTS_N_INSNS (insns);
10040 if (speed_p)
10041 *cost += insns * extra_cost->alu.arith;
10042 /* Slightly penalize a narrow operation as the result may
10043 need widening. */
10044 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10045 return true;
10048 /* Slightly penalize a narrow operation as the result may
10049 need widening. */
10050 *cost += 1;
10051 if (speed_p)
10052 *cost += extra_cost->alu.arith;
10054 return false;
10057 if (mode == SImode)
10059 rtx shift_op, shift_reg;
10061 if (TARGET_INT_SIMD
10062 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10063 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10065 /* UXTA[BH] or SXTA[BH]. */
10066 if (speed_p)
10067 *cost += extra_cost->alu.extend_arith;
10068 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10069 0, speed_p)
10070 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10071 return true;
10074 shift_reg = NULL;
10075 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10076 if (shift_op != NULL)
10078 if (shift_reg)
10080 if (speed_p)
10081 *cost += extra_cost->alu.arith_shift_reg;
10082 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10084 else if (speed_p)
10085 *cost += extra_cost->alu.arith_shift;
10087 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10088 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10089 return true;
10091 if (GET_CODE (XEXP (x, 0)) == MULT)
10093 rtx mul_op = XEXP (x, 0);
10095 if (TARGET_DSP_MULTIPLY
10096 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10097 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10098 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10099 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10100 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10101 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10102 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10103 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10104 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10105 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10106 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10107 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10108 == 16))))))
10110 /* SMLA[BT][BT]. */
10111 if (speed_p)
10112 *cost += extra_cost->mult[0].extend_add;
10113 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10114 SIGN_EXTEND, 0, speed_p)
10115 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10116 SIGN_EXTEND, 0, speed_p)
10117 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10118 return true;
10121 if (speed_p)
10122 *cost += extra_cost->mult[0].add;
10123 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10124 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10125 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10126 return true;
10128 if (CONST_INT_P (XEXP (x, 1)))
10130 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10131 INTVAL (XEXP (x, 1)), NULL_RTX,
10132 NULL_RTX, 1, 0);
10133 *cost = COSTS_N_INSNS (insns);
10134 if (speed_p)
10135 *cost += insns * extra_cost->alu.arith;
10136 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10137 return true;
10139 else if (speed_p)
10140 *cost += extra_cost->alu.arith;
10142 return false;
10145 if (mode == DImode)
10147 if (arm_arch3m
10148 && GET_CODE (XEXP (x, 0)) == MULT
10149 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10150 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10151 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10152 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10154 if (speed_p)
10155 *cost += extra_cost->mult[1].extend_add;
10156 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10157 ZERO_EXTEND, 0, speed_p)
10158 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10159 ZERO_EXTEND, 0, speed_p)
10160 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10161 return true;
10164 *cost += COSTS_N_INSNS (1);
10166 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10167 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10169 if (speed_p)
10170 *cost += (extra_cost->alu.arith
10171 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10172 ? extra_cost->alu.arith
10173 : extra_cost->alu.arith_shift));
10175 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10176 0, speed_p)
10177 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10178 return true;
10181 if (speed_p)
10182 *cost += 2 * extra_cost->alu.arith;
10183 return false;
10186 /* Vector mode? */
10187 *cost = LIBCALL_COST (2);
10188 return false;
10189 case IOR:
10190 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10192 if (speed_p)
10193 *cost += extra_cost->alu.rev;
10195 return true;
10197 /* Fall through. */
10198 case AND: case XOR:
10199 if (mode == SImode)
10201 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10202 rtx op0 = XEXP (x, 0);
10203 rtx shift_op, shift_reg;
10205 if (subcode == NOT
10206 && (code == AND
10207 || (code == IOR && TARGET_THUMB2)))
10208 op0 = XEXP (op0, 0);
10210 shift_reg = NULL;
10211 shift_op = shifter_op_p (op0, &shift_reg);
10212 if (shift_op != NULL)
10214 if (shift_reg)
10216 if (speed_p)
10217 *cost += extra_cost->alu.log_shift_reg;
10218 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10220 else if (speed_p)
10221 *cost += extra_cost->alu.log_shift;
10223 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10224 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10225 return true;
10228 if (CONST_INT_P (XEXP (x, 1)))
10230 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10231 INTVAL (XEXP (x, 1)), NULL_RTX,
10232 NULL_RTX, 1, 0);
10234 *cost = COSTS_N_INSNS (insns);
10235 if (speed_p)
10236 *cost += insns * extra_cost->alu.logical;
10237 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10238 return true;
10241 if (speed_p)
10242 *cost += extra_cost->alu.logical;
10243 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10244 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10245 return true;
10248 if (mode == DImode)
10250 rtx op0 = XEXP (x, 0);
10251 enum rtx_code subcode = GET_CODE (op0);
10253 *cost += COSTS_N_INSNS (1);
10255 if (subcode == NOT
10256 && (code == AND
10257 || (code == IOR && TARGET_THUMB2)))
10258 op0 = XEXP (op0, 0);
10260 if (GET_CODE (op0) == ZERO_EXTEND)
10262 if (speed_p)
10263 *cost += 2 * extra_cost->alu.logical;
10265 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10266 0, speed_p)
10267 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10268 return true;
10270 else if (GET_CODE (op0) == SIGN_EXTEND)
10272 if (speed_p)
10273 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10275 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10276 0, speed_p)
10277 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10278 return true;
10281 if (speed_p)
10282 *cost += 2 * extra_cost->alu.logical;
10284 return true;
10286 /* Vector mode? */
10288 *cost = LIBCALL_COST (2);
10289 return false;
10291 case MULT:
10292 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10293 && (mode == SFmode || !TARGET_VFP_SINGLE))
10295 rtx op0 = XEXP (x, 0);
10297 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10298 op0 = XEXP (op0, 0);
10300 if (speed_p)
10301 *cost += extra_cost->fp[mode != SFmode].mult;
10303 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10304 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10305 return true;
10307 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10309 *cost = LIBCALL_COST (2);
10310 return false;
10313 if (mode == SImode)
10315 if (TARGET_DSP_MULTIPLY
10316 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10317 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10318 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10319 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10320 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10321 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10322 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10323 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10324 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10325 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10326 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10327 && (INTVAL (XEXP (XEXP (x, 1), 1))
10328 == 16))))))
10330 /* SMUL[TB][TB]. */
10331 if (speed_p)
10332 *cost += extra_cost->mult[0].extend;
10333 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10334 SIGN_EXTEND, 0, speed_p);
10335 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10336 SIGN_EXTEND, 1, speed_p);
10337 return true;
10339 if (speed_p)
10340 *cost += extra_cost->mult[0].simple;
10341 return false;
10344 if (mode == DImode)
10346 if (arm_arch3m
10347 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10348 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10349 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10350 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10352 if (speed_p)
10353 *cost += extra_cost->mult[1].extend;
10354 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10355 ZERO_EXTEND, 0, speed_p)
10356 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10357 ZERO_EXTEND, 0, speed_p));
10358 return true;
10361 *cost = LIBCALL_COST (2);
10362 return false;
10365 /* Vector mode? */
10366 *cost = LIBCALL_COST (2);
10367 return false;
10369 case NEG:
10370 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10371 && (mode == SFmode || !TARGET_VFP_SINGLE))
10373 if (GET_CODE (XEXP (x, 0)) == MULT)
10375 /* VNMUL. */
10376 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10377 return true;
10380 if (speed_p)
10381 *cost += extra_cost->fp[mode != SFmode].neg;
10383 return false;
10385 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10387 *cost = LIBCALL_COST (1);
10388 return false;
10391 if (mode == SImode)
10393 if (GET_CODE (XEXP (x, 0)) == ABS)
10395 *cost += COSTS_N_INSNS (1);
10396 /* Assume the non-flag-changing variant. */
10397 if (speed_p)
10398 *cost += (extra_cost->alu.log_shift
10399 + extra_cost->alu.arith_shift);
10400 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10401 return true;
10404 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10405 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10407 *cost += COSTS_N_INSNS (1);
10408 /* No extra cost for MOV imm and MVN imm. */
10409 /* If the comparison op is using the flags, there's no further
10410 cost, otherwise we need to add the cost of the comparison. */
10411 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10412 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10413 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10415 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10416 *cost += (COSTS_N_INSNS (1)
10417 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10418 0, speed_p)
10419 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10420 1, speed_p));
10421 if (speed_p)
10422 *cost += extra_cost->alu.arith;
10424 return true;
10427 if (speed_p)
10428 *cost += extra_cost->alu.arith;
10429 return false;
10432 if (GET_MODE_CLASS (mode) == MODE_INT
10433 && GET_MODE_SIZE (mode) < 4)
10435 /* Slightly disparage, as we might need an extend operation. */
10436 *cost += 1;
10437 if (speed_p)
10438 *cost += extra_cost->alu.arith;
10439 return false;
10442 if (mode == DImode)
10444 *cost += COSTS_N_INSNS (1);
10445 if (speed_p)
10446 *cost += 2 * extra_cost->alu.arith;
10447 return false;
10450 /* Vector mode? */
10451 *cost = LIBCALL_COST (1);
10452 return false;
10454 case NOT:
10455 if (mode == SImode)
10457 rtx shift_op;
10458 rtx shift_reg = NULL;
10460 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10462 if (shift_op)
10464 if (shift_reg != NULL)
10466 if (speed_p)
10467 *cost += extra_cost->alu.log_shift_reg;
10468 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10470 else if (speed_p)
10471 *cost += extra_cost->alu.log_shift;
10472 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10473 return true;
10476 if (speed_p)
10477 *cost += extra_cost->alu.logical;
10478 return false;
10480 if (mode == DImode)
10482 *cost += COSTS_N_INSNS (1);
10483 return false;
10486 /* Vector mode? */
10488 *cost += LIBCALL_COST (1);
10489 return false;
10491 case IF_THEN_ELSE:
10493 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10495 *cost += COSTS_N_INSNS (3);
10496 return true;
10498 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10499 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10501 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10502 /* Assume that if one arm of the if_then_else is a register,
10503 that it will be tied with the result and eliminate the
10504 conditional insn. */
10505 if (REG_P (XEXP (x, 1)))
10506 *cost += op2cost;
10507 else if (REG_P (XEXP (x, 2)))
10508 *cost += op1cost;
10509 else
10511 if (speed_p)
10513 if (extra_cost->alu.non_exec_costs_exec)
10514 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10515 else
10516 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10518 else
10519 *cost += op1cost + op2cost;
10522 return true;
10524 case COMPARE:
10525 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10526 *cost = 0;
10527 else
10529 machine_mode op0mode;
10530 /* We'll mostly assume that the cost of a compare is the cost of the
10531 LHS. However, there are some notable exceptions. */
10533 /* Floating point compares are never done as side-effects. */
10534 op0mode = GET_MODE (XEXP (x, 0));
10535 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10536 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10538 if (speed_p)
10539 *cost += extra_cost->fp[op0mode != SFmode].compare;
10541 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10543 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10544 return true;
10547 return false;
10549 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10551 *cost = LIBCALL_COST (2);
10552 return false;
10555 /* DImode compares normally take two insns. */
10556 if (op0mode == DImode)
10558 *cost += COSTS_N_INSNS (1);
10559 if (speed_p)
10560 *cost += 2 * extra_cost->alu.arith;
10561 return false;
10564 if (op0mode == SImode)
10566 rtx shift_op;
10567 rtx shift_reg;
10569 if (XEXP (x, 1) == const0_rtx
10570 && !(REG_P (XEXP (x, 0))
10571 || (GET_CODE (XEXP (x, 0)) == SUBREG
10572 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10574 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10576 /* Multiply operations that set the flags are often
10577 significantly more expensive. */
10578 if (speed_p
10579 && GET_CODE (XEXP (x, 0)) == MULT
10580 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10581 *cost += extra_cost->mult[0].flag_setting;
10583 if (speed_p
10584 && GET_CODE (XEXP (x, 0)) == PLUS
10585 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10586 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10587 0), 1), mode))
10588 *cost += extra_cost->mult[0].flag_setting;
10589 return true;
10592 shift_reg = NULL;
10593 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10594 if (shift_op != NULL)
10596 if (shift_reg != NULL)
10598 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10599 1, speed_p);
10600 if (speed_p)
10601 *cost += extra_cost->alu.arith_shift_reg;
10603 else if (speed_p)
10604 *cost += extra_cost->alu.arith_shift;
10605 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10606 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10607 return true;
10610 if (speed_p)
10611 *cost += extra_cost->alu.arith;
10612 if (CONST_INT_P (XEXP (x, 1))
10613 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10615 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10616 return true;
10618 return false;
10621 /* Vector mode? */
10623 *cost = LIBCALL_COST (2);
10624 return false;
10626 return true;
10628 case EQ:
10629 case NE:
10630 case LT:
10631 case LE:
10632 case GT:
10633 case GE:
10634 case LTU:
10635 case LEU:
10636 case GEU:
10637 case GTU:
10638 case ORDERED:
10639 case UNORDERED:
10640 case UNEQ:
10641 case UNLE:
10642 case UNLT:
10643 case UNGE:
10644 case UNGT:
10645 case LTGT:
10646 if (outer_code == SET)
10648 /* Is it a store-flag operation? */
10649 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10650 && XEXP (x, 1) == const0_rtx)
10652 /* Thumb also needs an IT insn. */
10653 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10654 return true;
10656 if (XEXP (x, 1) == const0_rtx)
10658 switch (code)
10660 case LT:
10661 /* LSR Rd, Rn, #31. */
10662 if (speed_p)
10663 *cost += extra_cost->alu.shift;
10664 break;
10666 case EQ:
10667 /* RSBS T1, Rn, #0
10668 ADC Rd, Rn, T1. */
10670 case NE:
10671 /* SUBS T1, Rn, #1
10672 SBC Rd, Rn, T1. */
10673 *cost += COSTS_N_INSNS (1);
10674 break;
10676 case LE:
10677 /* RSBS T1, Rn, Rn, LSR #31
10678 ADC Rd, Rn, T1. */
10679 *cost += COSTS_N_INSNS (1);
10680 if (speed_p)
10681 *cost += extra_cost->alu.arith_shift;
10682 break;
10684 case GT:
10685 /* RSB Rd, Rn, Rn, ASR #1
10686 LSR Rd, Rd, #31. */
10687 *cost += COSTS_N_INSNS (1);
10688 if (speed_p)
10689 *cost += (extra_cost->alu.arith_shift
10690 + extra_cost->alu.shift);
10691 break;
10693 case GE:
10694 /* ASR Rd, Rn, #31
10695 ADD Rd, Rn, #1. */
10696 *cost += COSTS_N_INSNS (1);
10697 if (speed_p)
10698 *cost += extra_cost->alu.shift;
10699 break;
10701 default:
10702 /* Remaining cases are either meaningless or would take
10703 three insns anyway. */
10704 *cost = COSTS_N_INSNS (3);
10705 break;
10707 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10708 return true;
10710 else
10712 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10713 if (CONST_INT_P (XEXP (x, 1))
10714 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10716 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10717 return true;
10720 return false;
10723 /* Not directly inside a set. If it involves the condition code
10724 register it must be the condition for a branch, cond_exec or
10725 I_T_E operation. Since the comparison is performed elsewhere
10726 this is just the control part which has no additional
10727 cost. */
10728 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10729 && XEXP (x, 1) == const0_rtx)
10731 *cost = 0;
10732 return true;
10734 return false;
10736 case ABS:
10737 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10738 && (mode == SFmode || !TARGET_VFP_SINGLE))
10740 if (speed_p)
10741 *cost += extra_cost->fp[mode != SFmode].neg;
10743 return false;
10745 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10747 *cost = LIBCALL_COST (1);
10748 return false;
10751 if (mode == SImode)
10753 if (speed_p)
10754 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10755 return false;
10757 /* Vector mode? */
10758 *cost = LIBCALL_COST (1);
10759 return false;
10761 case SIGN_EXTEND:
10762 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10763 && MEM_P (XEXP (x, 0)))
10765 if (mode == DImode)
10766 *cost += COSTS_N_INSNS (1);
10768 if (!speed_p)
10769 return true;
10771 if (GET_MODE (XEXP (x, 0)) == SImode)
10772 *cost += extra_cost->ldst.load;
10773 else
10774 *cost += extra_cost->ldst.load_sign_extend;
10776 if (mode == DImode)
10777 *cost += extra_cost->alu.shift;
10779 return true;
10782 /* Widening from less than 32-bits requires an extend operation. */
10783 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10785 /* We have SXTB/SXTH. */
10786 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10787 if (speed_p)
10788 *cost += extra_cost->alu.extend;
10790 else if (GET_MODE (XEXP (x, 0)) != SImode)
10792 /* Needs two shifts. */
10793 *cost += COSTS_N_INSNS (1);
10794 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10795 if (speed_p)
10796 *cost += 2 * extra_cost->alu.shift;
10799 /* Widening beyond 32-bits requires one more insn. */
10800 if (mode == DImode)
10802 *cost += COSTS_N_INSNS (1);
10803 if (speed_p)
10804 *cost += extra_cost->alu.shift;
10807 return true;
10809 case ZERO_EXTEND:
10810 if ((arm_arch4
10811 || GET_MODE (XEXP (x, 0)) == SImode
10812 || GET_MODE (XEXP (x, 0)) == QImode)
10813 && MEM_P (XEXP (x, 0)))
10815 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10817 if (mode == DImode)
10818 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10820 return true;
10823 /* Widening from less than 32-bits requires an extend operation. */
10824 if (GET_MODE (XEXP (x, 0)) == QImode)
10826 /* UXTB can be a shorter instruction in Thumb2, but it might
10827 be slower than the AND Rd, Rn, #255 alternative. When
10828 optimizing for speed it should never be slower to use
10829 AND, and we don't really model 16-bit vs 32-bit insns
10830 here. */
10831 if (speed_p)
10832 *cost += extra_cost->alu.logical;
10834 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10836 /* We have UXTB/UXTH. */
10837 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10838 if (speed_p)
10839 *cost += extra_cost->alu.extend;
10841 else if (GET_MODE (XEXP (x, 0)) != SImode)
10843 /* Needs two shifts. It's marginally preferable to use
10844 shifts rather than two BIC instructions as the second
10845 shift may merge with a subsequent insn as a shifter
10846 op. */
10847 *cost = COSTS_N_INSNS (2);
10848 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10849 if (speed_p)
10850 *cost += 2 * extra_cost->alu.shift;
10853 /* Widening beyond 32-bits requires one more insn. */
10854 if (mode == DImode)
10856 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10859 return true;
10861 case CONST_INT:
10862 *cost = 0;
10863 /* CONST_INT has no mode, so we cannot tell for sure how many
10864 insns are really going to be needed. The best we can do is
10865 look at the value passed. If it fits in SImode, then assume
10866 that's the mode it will be used for. Otherwise assume it
10867 will be used in DImode. */
10868 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10869 mode = SImode;
10870 else
10871 mode = DImode;
10873 /* Avoid blowing up in arm_gen_constant (). */
10874 if (!(outer_code == PLUS
10875 || outer_code == AND
10876 || outer_code == IOR
10877 || outer_code == XOR
10878 || outer_code == MINUS))
10879 outer_code = SET;
10881 const_int_cost:
10882 if (mode == SImode)
10884 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10885 INTVAL (x), NULL, NULL,
10886 0, 0));
10887 /* Extra costs? */
10889 else
10891 *cost += COSTS_N_INSNS (arm_gen_constant
10892 (outer_code, SImode, NULL,
10893 trunc_int_for_mode (INTVAL (x), SImode),
10894 NULL, NULL, 0, 0)
10895 + arm_gen_constant (outer_code, SImode, NULL,
10896 INTVAL (x) >> 32, NULL,
10897 NULL, 0, 0));
10898 /* Extra costs? */
10901 return true;
10903 case CONST:
10904 case LABEL_REF:
10905 case SYMBOL_REF:
10906 if (speed_p)
10908 if (arm_arch_thumb2 && !flag_pic)
10909 *cost += COSTS_N_INSNS (1);
10910 else
10911 *cost += extra_cost->ldst.load;
10913 else
10914 *cost += COSTS_N_INSNS (1);
10916 if (flag_pic)
10918 *cost += COSTS_N_INSNS (1);
10919 if (speed_p)
10920 *cost += extra_cost->alu.arith;
10923 return true;
10925 case CONST_FIXED:
10926 *cost = COSTS_N_INSNS (4);
10927 /* Fixme. */
10928 return true;
10930 case CONST_DOUBLE:
10931 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10932 && (mode == SFmode || !TARGET_VFP_SINGLE))
10934 if (vfp3_const_double_rtx (x))
10936 if (speed_p)
10937 *cost += extra_cost->fp[mode == DFmode].fpconst;
10938 return true;
10941 if (speed_p)
10943 if (mode == DFmode)
10944 *cost += extra_cost->ldst.loadd;
10945 else
10946 *cost += extra_cost->ldst.loadf;
10948 else
10949 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10951 return true;
10953 *cost = COSTS_N_INSNS (4);
10954 return true;
10956 case CONST_VECTOR:
10957 /* Fixme. */
10958 if (TARGET_NEON
10959 && TARGET_HARD_FLOAT
10960 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10961 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10962 *cost = COSTS_N_INSNS (1);
10963 else
10964 *cost = COSTS_N_INSNS (4);
10965 return true;
10967 case HIGH:
10968 case LO_SUM:
10969 /* When optimizing for size, we prefer constant pool entries to
10970 MOVW/MOVT pairs, so bump the cost of these slightly. */
10971 if (!speed_p)
10972 *cost += 1;
10973 return true;
10975 case CLZ:
10976 if (speed_p)
10977 *cost += extra_cost->alu.clz;
10978 return false;
10980 case SMIN:
10981 if (XEXP (x, 1) == const0_rtx)
10983 if (speed_p)
10984 *cost += extra_cost->alu.log_shift;
10985 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10986 return true;
10988 /* Fall through. */
10989 case SMAX:
10990 case UMIN:
10991 case UMAX:
10992 *cost += COSTS_N_INSNS (1);
10993 return false;
10995 case TRUNCATE:
10996 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10997 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10998 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10999 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11000 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11001 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11002 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11003 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11004 == ZERO_EXTEND))))
11006 if (speed_p)
11007 *cost += extra_cost->mult[1].extend;
11008 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11009 ZERO_EXTEND, 0, speed_p)
11010 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11011 ZERO_EXTEND, 0, speed_p));
11012 return true;
11014 *cost = LIBCALL_COST (1);
11015 return false;
11017 case UNSPEC_VOLATILE:
11018 case UNSPEC:
11019 return arm_unspec_cost (x, outer_code, speed_p, cost);
11021 case PC:
11022 /* Reading the PC is like reading any other register. Writing it
11023 is more expensive, but we take that into account elsewhere. */
11024 *cost = 0;
11025 return true;
11027 case ZERO_EXTRACT:
11028 /* TODO: Simple zero_extract of bottom bits using AND. */
11029 /* Fall through. */
11030 case SIGN_EXTRACT:
11031 if (arm_arch6
11032 && mode == SImode
11033 && CONST_INT_P (XEXP (x, 1))
11034 && CONST_INT_P (XEXP (x, 2)))
11036 if (speed_p)
11037 *cost += extra_cost->alu.bfx;
11038 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11039 return true;
11041 /* Without UBFX/SBFX, need to resort to shift operations. */
11042 *cost += COSTS_N_INSNS (1);
11043 if (speed_p)
11044 *cost += 2 * extra_cost->alu.shift;
11045 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11046 return true;
11048 case FLOAT_EXTEND:
11049 if (TARGET_HARD_FLOAT)
11051 if (speed_p)
11052 *cost += extra_cost->fp[mode == DFmode].widen;
11053 if (!TARGET_FPU_ARMV8
11054 && GET_MODE (XEXP (x, 0)) == HFmode)
11056 /* Pre v8, widening HF->DF is a two-step process, first
11057 widening to SFmode. */
11058 *cost += COSTS_N_INSNS (1);
11059 if (speed_p)
11060 *cost += extra_cost->fp[0].widen;
11062 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11063 return true;
11066 *cost = LIBCALL_COST (1);
11067 return false;
11069 case FLOAT_TRUNCATE:
11070 if (TARGET_HARD_FLOAT)
11072 if (speed_p)
11073 *cost += extra_cost->fp[mode == DFmode].narrow;
11074 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11075 return true;
11076 /* Vector modes? */
11078 *cost = LIBCALL_COST (1);
11079 return false;
11081 case FMA:
11082 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11084 rtx op0 = XEXP (x, 0);
11085 rtx op1 = XEXP (x, 1);
11086 rtx op2 = XEXP (x, 2);
11089 /* vfms or vfnma. */
11090 if (GET_CODE (op0) == NEG)
11091 op0 = XEXP (op0, 0);
11093 /* vfnms or vfnma. */
11094 if (GET_CODE (op2) == NEG)
11095 op2 = XEXP (op2, 0);
11097 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11098 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11099 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11101 if (speed_p)
11102 *cost += extra_cost->fp[mode ==DFmode].fma;
11104 return true;
11107 *cost = LIBCALL_COST (3);
11108 return false;
11110 case FIX:
11111 case UNSIGNED_FIX:
11112 if (TARGET_HARD_FLOAT)
11114 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11115 a vcvt fixed-point conversion. */
11116 if (code == FIX && mode == SImode
11117 && GET_CODE (XEXP (x, 0)) == FIX
11118 && GET_MODE (XEXP (x, 0)) == SFmode
11119 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11120 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11121 > 0)
11123 if (speed_p)
11124 *cost += extra_cost->fp[0].toint;
11126 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11127 code, 0, speed_p);
11128 return true;
11131 if (GET_MODE_CLASS (mode) == MODE_INT)
11133 mode = GET_MODE (XEXP (x, 0));
11134 if (speed_p)
11135 *cost += extra_cost->fp[mode == DFmode].toint;
11136 /* Strip of the 'cost' of rounding towards zero. */
11137 if (GET_CODE (XEXP (x, 0)) == FIX)
11138 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11139 0, speed_p);
11140 else
11141 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11142 /* ??? Increase the cost to deal with transferring from
11143 FP -> CORE registers? */
11144 return true;
11146 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11147 && TARGET_FPU_ARMV8)
11149 if (speed_p)
11150 *cost += extra_cost->fp[mode == DFmode].roundint;
11151 return false;
11153 /* Vector costs? */
11155 *cost = LIBCALL_COST (1);
11156 return false;
11158 case FLOAT:
11159 case UNSIGNED_FLOAT:
11160 if (TARGET_HARD_FLOAT)
11162 /* ??? Increase the cost to deal with transferring from CORE
11163 -> FP registers? */
11164 if (speed_p)
11165 *cost += extra_cost->fp[mode == DFmode].fromint;
11166 return false;
11168 *cost = LIBCALL_COST (1);
11169 return false;
11171 case CALL:
11172 return true;
11174 case ASM_OPERANDS:
11176 /* Just a guess. Guess number of instructions in the asm
11177 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11178 though (see PR60663). */
11179 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11180 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11182 *cost = COSTS_N_INSNS (asm_length + num_operands);
11183 return true;
11185 default:
11186 if (mode != VOIDmode)
11187 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11188 else
11189 *cost = COSTS_N_INSNS (4); /* Who knows? */
11190 return false;
11194 #undef HANDLE_NARROW_SHIFT_ARITH
11196 /* RTX costs when optimizing for size. */
11197 static bool
11198 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11199 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11201 bool result;
11202 int code = GET_CODE (x);
11204 if (TARGET_OLD_RTX_COSTS
11205 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11207 /* Old way. (Deprecated.) */
11208 if (!speed)
11209 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11210 (enum rtx_code) outer_code, total);
11211 else
11212 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11213 (enum rtx_code) outer_code, total,
11214 speed);
11216 else
11218 /* New way. */
11219 if (current_tune->insn_extra_cost)
11220 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11221 (enum rtx_code) outer_code,
11222 current_tune->insn_extra_cost,
11223 total, speed);
11224 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11225 && current_tune->insn_extra_cost != NULL */
11226 else
11227 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11228 (enum rtx_code) outer_code,
11229 &generic_extra_costs, total, speed);
11232 if (dump_file && (dump_flags & TDF_DETAILS))
11234 print_rtl_single (dump_file, x);
11235 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11236 *total, result ? "final" : "partial");
11238 return result;
11241 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11242 supported on any "slowmul" cores, so it can be ignored. */
11244 static bool
11245 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11246 int *total, bool speed)
11248 machine_mode mode = GET_MODE (x);
11250 if (TARGET_THUMB)
11252 *total = thumb1_rtx_costs (x, code, outer_code);
11253 return true;
11256 switch (code)
11258 case MULT:
11259 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11260 || mode == DImode)
11262 *total = COSTS_N_INSNS (20);
11263 return false;
11266 if (CONST_INT_P (XEXP (x, 1)))
11268 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11269 & (unsigned HOST_WIDE_INT) 0xffffffff);
11270 int cost, const_ok = const_ok_for_arm (i);
11271 int j, booth_unit_size;
11273 /* Tune as appropriate. */
11274 cost = const_ok ? 4 : 8;
11275 booth_unit_size = 2;
11276 for (j = 0; i && j < 32; j += booth_unit_size)
11278 i >>= booth_unit_size;
11279 cost++;
11282 *total = COSTS_N_INSNS (cost);
11283 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
11284 return true;
11287 *total = COSTS_N_INSNS (20);
11288 return false;
11290 default:
11291 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11296 /* RTX cost for cores with a fast multiply unit (M variants). */
11298 static bool
11299 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11300 int *total, bool speed)
11302 machine_mode mode = GET_MODE (x);
11304 if (TARGET_THUMB1)
11306 *total = thumb1_rtx_costs (x, code, outer_code);
11307 return true;
11310 /* ??? should thumb2 use different costs? */
11311 switch (code)
11313 case MULT:
11314 /* There is no point basing this on the tuning, since it is always the
11315 fast variant if it exists at all. */
11316 if (mode == DImode
11317 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11318 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11319 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11321 *total = COSTS_N_INSNS(2);
11322 return false;
11326 if (mode == DImode)
11328 *total = COSTS_N_INSNS (5);
11329 return false;
11332 if (CONST_INT_P (XEXP (x, 1)))
11334 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11335 & (unsigned HOST_WIDE_INT) 0xffffffff);
11336 int cost, const_ok = const_ok_for_arm (i);
11337 int j, booth_unit_size;
11339 /* Tune as appropriate. */
11340 cost = const_ok ? 4 : 8;
11341 booth_unit_size = 8;
11342 for (j = 0; i && j < 32; j += booth_unit_size)
11344 i >>= booth_unit_size;
11345 cost++;
11348 *total = COSTS_N_INSNS(cost);
11349 return false;
11352 if (mode == SImode)
11354 *total = COSTS_N_INSNS (4);
11355 return false;
11358 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11360 if (TARGET_HARD_FLOAT
11361 && (mode == SFmode
11362 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11364 *total = COSTS_N_INSNS (1);
11365 return false;
11369 /* Requires a lib call */
11370 *total = COSTS_N_INSNS (20);
11371 return false;
11373 default:
11374 return arm_rtx_costs_1 (x, outer_code, total, speed);
11379 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11380 so it can be ignored. */
11382 static bool
11383 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11384 int *total, bool speed)
11386 machine_mode mode = GET_MODE (x);
11388 if (TARGET_THUMB)
11390 *total = thumb1_rtx_costs (x, code, outer_code);
11391 return true;
11394 switch (code)
11396 case COMPARE:
11397 if (GET_CODE (XEXP (x, 0)) != MULT)
11398 return arm_rtx_costs_1 (x, outer_code, total, speed);
11400 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11401 will stall until the multiplication is complete. */
11402 *total = COSTS_N_INSNS (3);
11403 return false;
11405 case MULT:
11406 /* There is no point basing this on the tuning, since it is always the
11407 fast variant if it exists at all. */
11408 if (mode == DImode
11409 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11410 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11411 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11413 *total = COSTS_N_INSNS (2);
11414 return false;
11418 if (mode == DImode)
11420 *total = COSTS_N_INSNS (5);
11421 return false;
11424 if (CONST_INT_P (XEXP (x, 1)))
11426 /* If operand 1 is a constant we can more accurately
11427 calculate the cost of the multiply. The multiplier can
11428 retire 15 bits on the first cycle and a further 12 on the
11429 second. We do, of course, have to load the constant into
11430 a register first. */
11431 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11432 /* There's a general overhead of one cycle. */
11433 int cost = 1;
11434 unsigned HOST_WIDE_INT masked_const;
11436 if (i & 0x80000000)
11437 i = ~i;
11439 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11441 masked_const = i & 0xffff8000;
11442 if (masked_const != 0)
11444 cost++;
11445 masked_const = i & 0xf8000000;
11446 if (masked_const != 0)
11447 cost++;
11449 *total = COSTS_N_INSNS (cost);
11450 return false;
11453 if (mode == SImode)
11455 *total = COSTS_N_INSNS (3);
11456 return false;
11459 /* Requires a lib call */
11460 *total = COSTS_N_INSNS (20);
11461 return false;
11463 default:
11464 return arm_rtx_costs_1 (x, outer_code, total, speed);
11469 /* RTX costs for 9e (and later) cores. */
11471 static bool
11472 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11473 int *total, bool speed)
11475 machine_mode mode = GET_MODE (x);
11477 if (TARGET_THUMB1)
11479 switch (code)
11481 case MULT:
11482 /* Small multiply: 32 cycles for an integer multiply inst. */
11483 if (arm_arch6m && arm_m_profile_small_mul)
11484 *total = COSTS_N_INSNS (32);
11485 else
11486 *total = COSTS_N_INSNS (3);
11487 return true;
11489 default:
11490 *total = thumb1_rtx_costs (x, code, outer_code);
11491 return true;
11495 switch (code)
11497 case MULT:
11498 /* There is no point basing this on the tuning, since it is always the
11499 fast variant if it exists at all. */
11500 if (mode == DImode
11501 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11502 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11503 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11505 *total = COSTS_N_INSNS (2);
11506 return false;
11510 if (mode == DImode)
11512 *total = COSTS_N_INSNS (5);
11513 return false;
11516 if (mode == SImode)
11518 *total = COSTS_N_INSNS (2);
11519 return false;
11522 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11524 if (TARGET_HARD_FLOAT
11525 && (mode == SFmode
11526 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11528 *total = COSTS_N_INSNS (1);
11529 return false;
11533 *total = COSTS_N_INSNS (20);
11534 return false;
11536 default:
11537 return arm_rtx_costs_1 (x, outer_code, total, speed);
11540 /* All address computations that can be done are free, but rtx cost returns
11541 the same for practically all of them. So we weight the different types
11542 of address here in the order (most pref first):
11543 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11544 static inline int
11545 arm_arm_address_cost (rtx x)
11547 enum rtx_code c = GET_CODE (x);
11549 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11550 return 0;
11551 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11552 return 10;
11554 if (c == PLUS)
11556 if (CONST_INT_P (XEXP (x, 1)))
11557 return 2;
11559 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11560 return 3;
11562 return 4;
11565 return 6;
11568 static inline int
11569 arm_thumb_address_cost (rtx x)
11571 enum rtx_code c = GET_CODE (x);
11573 if (c == REG)
11574 return 1;
11575 if (c == PLUS
11576 && REG_P (XEXP (x, 0))
11577 && CONST_INT_P (XEXP (x, 1)))
11578 return 1;
11580 return 2;
11583 static int
11584 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11585 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11587 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11590 /* Adjust cost hook for XScale. */
11591 static bool
11592 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11594 /* Some true dependencies can have a higher cost depending
11595 on precisely how certain input operands are used. */
11596 if (REG_NOTE_KIND(link) == 0
11597 && recog_memoized (insn) >= 0
11598 && recog_memoized (dep) >= 0)
11600 int shift_opnum = get_attr_shift (insn);
11601 enum attr_type attr_type = get_attr_type (dep);
11603 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11604 operand for INSN. If we have a shifted input operand and the
11605 instruction we depend on is another ALU instruction, then we may
11606 have to account for an additional stall. */
11607 if (shift_opnum != 0
11608 && (attr_type == TYPE_ALU_SHIFT_IMM
11609 || attr_type == TYPE_ALUS_SHIFT_IMM
11610 || attr_type == TYPE_LOGIC_SHIFT_IMM
11611 || attr_type == TYPE_LOGICS_SHIFT_IMM
11612 || attr_type == TYPE_ALU_SHIFT_REG
11613 || attr_type == TYPE_ALUS_SHIFT_REG
11614 || attr_type == TYPE_LOGIC_SHIFT_REG
11615 || attr_type == TYPE_LOGICS_SHIFT_REG
11616 || attr_type == TYPE_MOV_SHIFT
11617 || attr_type == TYPE_MVN_SHIFT
11618 || attr_type == TYPE_MOV_SHIFT_REG
11619 || attr_type == TYPE_MVN_SHIFT_REG))
11621 rtx shifted_operand;
11622 int opno;
11624 /* Get the shifted operand. */
11625 extract_insn (insn);
11626 shifted_operand = recog_data.operand[shift_opnum];
11628 /* Iterate over all the operands in DEP. If we write an operand
11629 that overlaps with SHIFTED_OPERAND, then we have increase the
11630 cost of this dependency. */
11631 extract_insn (dep);
11632 preprocess_constraints (dep);
11633 for (opno = 0; opno < recog_data.n_operands; opno++)
11635 /* We can ignore strict inputs. */
11636 if (recog_data.operand_type[opno] == OP_IN)
11637 continue;
11639 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11640 shifted_operand))
11642 *cost = 2;
11643 return false;
11648 return true;
11651 /* Adjust cost hook for Cortex A9. */
11652 static bool
11653 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11655 switch (REG_NOTE_KIND (link))
11657 case REG_DEP_ANTI:
11658 *cost = 0;
11659 return false;
11661 case REG_DEP_TRUE:
11662 case REG_DEP_OUTPUT:
11663 if (recog_memoized (insn) >= 0
11664 && recog_memoized (dep) >= 0)
11666 if (GET_CODE (PATTERN (insn)) == SET)
11668 if (GET_MODE_CLASS
11669 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11670 || GET_MODE_CLASS
11671 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11673 enum attr_type attr_type_insn = get_attr_type (insn);
11674 enum attr_type attr_type_dep = get_attr_type (dep);
11676 /* By default all dependencies of the form
11677 s0 = s0 <op> s1
11678 s0 = s0 <op> s2
11679 have an extra latency of 1 cycle because
11680 of the input and output dependency in this
11681 case. However this gets modeled as an true
11682 dependency and hence all these checks. */
11683 if (REG_P (SET_DEST (PATTERN (insn)))
11684 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11686 /* FMACS is a special case where the dependent
11687 instruction can be issued 3 cycles before
11688 the normal latency in case of an output
11689 dependency. */
11690 if ((attr_type_insn == TYPE_FMACS
11691 || attr_type_insn == TYPE_FMACD)
11692 && (attr_type_dep == TYPE_FMACS
11693 || attr_type_dep == TYPE_FMACD))
11695 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11696 *cost = insn_default_latency (dep) - 3;
11697 else
11698 *cost = insn_default_latency (dep);
11699 return false;
11701 else
11703 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11704 *cost = insn_default_latency (dep) + 1;
11705 else
11706 *cost = insn_default_latency (dep);
11708 return false;
11713 break;
11715 default:
11716 gcc_unreachable ();
11719 return true;
11722 /* Adjust cost hook for FA726TE. */
11723 static bool
11724 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11726 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11727 have penalty of 3. */
11728 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11729 && recog_memoized (insn) >= 0
11730 && recog_memoized (dep) >= 0
11731 && get_attr_conds (dep) == CONDS_SET)
11733 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11734 if (get_attr_conds (insn) == CONDS_USE
11735 && get_attr_type (insn) != TYPE_BRANCH)
11737 *cost = 3;
11738 return false;
11741 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11742 || get_attr_conds (insn) == CONDS_USE)
11744 *cost = 0;
11745 return false;
11749 return true;
11752 /* Implement TARGET_REGISTER_MOVE_COST.
11754 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11755 it is typically more expensive than a single memory access. We set
11756 the cost to less than two memory accesses so that floating
11757 point to integer conversion does not go through memory. */
11760 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11761 reg_class_t from, reg_class_t to)
11763 if (TARGET_32BIT)
11765 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11766 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11767 return 15;
11768 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11769 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11770 return 4;
11771 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11772 return 20;
11773 else
11774 return 2;
11776 else
11778 if (from == HI_REGS || to == HI_REGS)
11779 return 4;
11780 else
11781 return 2;
11785 /* Implement TARGET_MEMORY_MOVE_COST. */
11788 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11789 bool in ATTRIBUTE_UNUSED)
11791 if (TARGET_32BIT)
11792 return 10;
11793 else
11795 if (GET_MODE_SIZE (mode) < 4)
11796 return 8;
11797 else
11798 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11802 /* Vectorizer cost model implementation. */
11804 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11805 static int
11806 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11807 tree vectype,
11808 int misalign ATTRIBUTE_UNUSED)
11810 unsigned elements;
11812 switch (type_of_cost)
11814 case scalar_stmt:
11815 return current_tune->vec_costs->scalar_stmt_cost;
11817 case scalar_load:
11818 return current_tune->vec_costs->scalar_load_cost;
11820 case scalar_store:
11821 return current_tune->vec_costs->scalar_store_cost;
11823 case vector_stmt:
11824 return current_tune->vec_costs->vec_stmt_cost;
11826 case vector_load:
11827 return current_tune->vec_costs->vec_align_load_cost;
11829 case vector_store:
11830 return current_tune->vec_costs->vec_store_cost;
11832 case vec_to_scalar:
11833 return current_tune->vec_costs->vec_to_scalar_cost;
11835 case scalar_to_vec:
11836 return current_tune->vec_costs->scalar_to_vec_cost;
11838 case unaligned_load:
11839 return current_tune->vec_costs->vec_unalign_load_cost;
11841 case unaligned_store:
11842 return current_tune->vec_costs->vec_unalign_store_cost;
11844 case cond_branch_taken:
11845 return current_tune->vec_costs->cond_taken_branch_cost;
11847 case cond_branch_not_taken:
11848 return current_tune->vec_costs->cond_not_taken_branch_cost;
11850 case vec_perm:
11851 case vec_promote_demote:
11852 return current_tune->vec_costs->vec_stmt_cost;
11854 case vec_construct:
11855 elements = TYPE_VECTOR_SUBPARTS (vectype);
11856 return elements / 2 + 1;
11858 default:
11859 gcc_unreachable ();
11863 /* Implement targetm.vectorize.add_stmt_cost. */
11865 static unsigned
11866 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11867 struct _stmt_vec_info *stmt_info, int misalign,
11868 enum vect_cost_model_location where)
11870 unsigned *cost = (unsigned *) data;
11871 unsigned retval = 0;
11873 if (flag_vect_cost_model)
11875 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11876 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11878 /* Statements in an inner loop relative to the loop being
11879 vectorized are weighted more heavily. The value here is
11880 arbitrary and could potentially be improved with analysis. */
11881 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11882 count *= 50; /* FIXME. */
11884 retval = (unsigned) (count * stmt_cost);
11885 cost[where] += retval;
11888 return retval;
11891 /* Return true if and only if this insn can dual-issue only as older. */
11892 static bool
11893 cortexa7_older_only (rtx_insn *insn)
11895 if (recog_memoized (insn) < 0)
11896 return false;
11898 switch (get_attr_type (insn))
11900 case TYPE_ALU_DSP_REG:
11901 case TYPE_ALU_SREG:
11902 case TYPE_ALUS_SREG:
11903 case TYPE_LOGIC_REG:
11904 case TYPE_LOGICS_REG:
11905 case TYPE_ADC_REG:
11906 case TYPE_ADCS_REG:
11907 case TYPE_ADR:
11908 case TYPE_BFM:
11909 case TYPE_REV:
11910 case TYPE_MVN_REG:
11911 case TYPE_SHIFT_IMM:
11912 case TYPE_SHIFT_REG:
11913 case TYPE_LOAD_BYTE:
11914 case TYPE_LOAD1:
11915 case TYPE_STORE1:
11916 case TYPE_FFARITHS:
11917 case TYPE_FADDS:
11918 case TYPE_FFARITHD:
11919 case TYPE_FADDD:
11920 case TYPE_FMOV:
11921 case TYPE_F_CVT:
11922 case TYPE_FCMPS:
11923 case TYPE_FCMPD:
11924 case TYPE_FCONSTS:
11925 case TYPE_FCONSTD:
11926 case TYPE_FMULS:
11927 case TYPE_FMACS:
11928 case TYPE_FMULD:
11929 case TYPE_FMACD:
11930 case TYPE_FDIVS:
11931 case TYPE_FDIVD:
11932 case TYPE_F_MRC:
11933 case TYPE_F_MRRC:
11934 case TYPE_F_FLAG:
11935 case TYPE_F_LOADS:
11936 case TYPE_F_STORES:
11937 return true;
11938 default:
11939 return false;
11943 /* Return true if and only if this insn can dual-issue as younger. */
11944 static bool
11945 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11947 if (recog_memoized (insn) < 0)
11949 if (verbose > 5)
11950 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11951 return false;
11954 switch (get_attr_type (insn))
11956 case TYPE_ALU_IMM:
11957 case TYPE_ALUS_IMM:
11958 case TYPE_LOGIC_IMM:
11959 case TYPE_LOGICS_IMM:
11960 case TYPE_EXTEND:
11961 case TYPE_MVN_IMM:
11962 case TYPE_MOV_IMM:
11963 case TYPE_MOV_REG:
11964 case TYPE_MOV_SHIFT:
11965 case TYPE_MOV_SHIFT_REG:
11966 case TYPE_BRANCH:
11967 case TYPE_CALL:
11968 return true;
11969 default:
11970 return false;
11975 /* Look for an instruction that can dual issue only as an older
11976 instruction, and move it in front of any instructions that can
11977 dual-issue as younger, while preserving the relative order of all
11978 other instructions in the ready list. This is a hueuristic to help
11979 dual-issue in later cycles, by postponing issue of more flexible
11980 instructions. This heuristic may affect dual issue opportunities
11981 in the current cycle. */
11982 static void
11983 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11984 int *n_readyp, int clock)
11986 int i;
11987 int first_older_only = -1, first_younger = -1;
11989 if (verbose > 5)
11990 fprintf (file,
11991 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11992 clock,
11993 *n_readyp);
11995 /* Traverse the ready list from the head (the instruction to issue
11996 first), and looking for the first instruction that can issue as
11997 younger and the first instruction that can dual-issue only as
11998 older. */
11999 for (i = *n_readyp - 1; i >= 0; i--)
12001 rtx_insn *insn = ready[i];
12002 if (cortexa7_older_only (insn))
12004 first_older_only = i;
12005 if (verbose > 5)
12006 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12007 break;
12009 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12010 first_younger = i;
12013 /* Nothing to reorder because either no younger insn found or insn
12014 that can dual-issue only as older appears before any insn that
12015 can dual-issue as younger. */
12016 if (first_younger == -1)
12018 if (verbose > 5)
12019 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12020 return;
12023 /* Nothing to reorder because no older-only insn in the ready list. */
12024 if (first_older_only == -1)
12026 if (verbose > 5)
12027 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12028 return;
12031 /* Move first_older_only insn before first_younger. */
12032 if (verbose > 5)
12033 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12034 INSN_UID(ready [first_older_only]),
12035 INSN_UID(ready [first_younger]));
12036 rtx_insn *first_older_only_insn = ready [first_older_only];
12037 for (i = first_older_only; i < first_younger; i++)
12039 ready[i] = ready[i+1];
12042 ready[i] = first_older_only_insn;
12043 return;
12046 /* Implement TARGET_SCHED_REORDER. */
12047 static int
12048 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12049 int clock)
12051 switch (arm_tune)
12053 case cortexa7:
12054 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12055 break;
12056 default:
12057 /* Do nothing for other cores. */
12058 break;
12061 return arm_issue_rate ();
12064 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12065 It corrects the value of COST based on the relationship between
12066 INSN and DEP through the dependence LINK. It returns the new
12067 value. There is a per-core adjust_cost hook to adjust scheduler costs
12068 and the per-core hook can choose to completely override the generic
12069 adjust_cost function. Only put bits of code into arm_adjust_cost that
12070 are common across all cores. */
12071 static int
12072 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
12074 rtx i_pat, d_pat;
12076 /* When generating Thumb-1 code, we want to place flag-setting operations
12077 close to a conditional branch which depends on them, so that we can
12078 omit the comparison. */
12079 if (TARGET_THUMB1
12080 && REG_NOTE_KIND (link) == 0
12081 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12082 && recog_memoized (dep) >= 0
12083 && get_attr_conds (dep) == CONDS_SET)
12084 return 0;
12086 if (current_tune->sched_adjust_cost != NULL)
12088 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
12089 return cost;
12092 /* XXX Is this strictly true? */
12093 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
12094 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
12095 return 0;
12097 /* Call insns don't incur a stall, even if they follow a load. */
12098 if (REG_NOTE_KIND (link) == 0
12099 && CALL_P (insn))
12100 return 1;
12102 if ((i_pat = single_set (insn)) != NULL
12103 && MEM_P (SET_SRC (i_pat))
12104 && (d_pat = single_set (dep)) != NULL
12105 && MEM_P (SET_DEST (d_pat)))
12107 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12108 /* This is a load after a store, there is no conflict if the load reads
12109 from a cached area. Assume that loads from the stack, and from the
12110 constant pool are cached, and that others will miss. This is a
12111 hack. */
12113 if ((GET_CODE (src_mem) == SYMBOL_REF
12114 && CONSTANT_POOL_ADDRESS_P (src_mem))
12115 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12116 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12117 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12118 return 1;
12121 return cost;
12125 arm_max_conditional_execute (void)
12127 return max_insns_skipped;
12130 static int
12131 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12133 if (TARGET_32BIT)
12134 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12135 else
12136 return (optimize > 0) ? 2 : 0;
12139 static int
12140 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12142 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12145 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12146 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12147 sequences of non-executed instructions in IT blocks probably take the same
12148 amount of time as executed instructions (and the IT instruction itself takes
12149 space in icache). This function was experimentally determined to give good
12150 results on a popular embedded benchmark. */
12152 static int
12153 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12155 return (TARGET_32BIT && speed_p) ? 1
12156 : arm_default_branch_cost (speed_p, predictable_p);
12159 static int
12160 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12162 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12165 static bool fp_consts_inited = false;
12167 static REAL_VALUE_TYPE value_fp0;
12169 static void
12170 init_fp_table (void)
12172 REAL_VALUE_TYPE r;
12174 r = REAL_VALUE_ATOF ("0", DFmode);
12175 value_fp0 = r;
12176 fp_consts_inited = true;
12179 /* Return TRUE if rtx X is a valid immediate FP constant. */
12181 arm_const_double_rtx (rtx x)
12183 const REAL_VALUE_TYPE *r;
12185 if (!fp_consts_inited)
12186 init_fp_table ();
12188 r = CONST_DOUBLE_REAL_VALUE (x);
12189 if (REAL_VALUE_MINUS_ZERO (*r))
12190 return 0;
12192 if (real_equal (r, &value_fp0))
12193 return 1;
12195 return 0;
12198 /* VFPv3 has a fairly wide range of representable immediates, formed from
12199 "quarter-precision" floating-point values. These can be evaluated using this
12200 formula (with ^ for exponentiation):
12202 -1^s * n * 2^-r
12204 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12205 16 <= n <= 31 and 0 <= r <= 7.
12207 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12209 - A (most-significant) is the sign bit.
12210 - BCD are the exponent (encoded as r XOR 3).
12211 - EFGH are the mantissa (encoded as n - 16).
12214 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12215 fconst[sd] instruction, or -1 if X isn't suitable. */
12216 static int
12217 vfp3_const_double_index (rtx x)
12219 REAL_VALUE_TYPE r, m;
12220 int sign, exponent;
12221 unsigned HOST_WIDE_INT mantissa, mant_hi;
12222 unsigned HOST_WIDE_INT mask;
12223 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12224 bool fail;
12226 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12227 return -1;
12229 r = *CONST_DOUBLE_REAL_VALUE (x);
12231 /* We can't represent these things, so detect them first. */
12232 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12233 return -1;
12235 /* Extract sign, exponent and mantissa. */
12236 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12237 r = real_value_abs (&r);
12238 exponent = REAL_EXP (&r);
12239 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12240 highest (sign) bit, with a fixed binary point at bit point_pos.
12241 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12242 bits for the mantissa, this may fail (low bits would be lost). */
12243 real_ldexp (&m, &r, point_pos - exponent);
12244 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12245 mantissa = w.elt (0);
12246 mant_hi = w.elt (1);
12248 /* If there are bits set in the low part of the mantissa, we can't
12249 represent this value. */
12250 if (mantissa != 0)
12251 return -1;
12253 /* Now make it so that mantissa contains the most-significant bits, and move
12254 the point_pos to indicate that the least-significant bits have been
12255 discarded. */
12256 point_pos -= HOST_BITS_PER_WIDE_INT;
12257 mantissa = mant_hi;
12259 /* We can permit four significant bits of mantissa only, plus a high bit
12260 which is always 1. */
12261 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12262 if ((mantissa & mask) != 0)
12263 return -1;
12265 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12266 mantissa >>= point_pos - 5;
12268 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12269 floating-point immediate zero with Neon using an integer-zero load, but
12270 that case is handled elsewhere.) */
12271 if (mantissa == 0)
12272 return -1;
12274 gcc_assert (mantissa >= 16 && mantissa <= 31);
12276 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12277 normalized significands are in the range [1, 2). (Our mantissa is shifted
12278 left 4 places at this point relative to normalized IEEE754 values). GCC
12279 internally uses [0.5, 1) (see real.c), so the exponent returned from
12280 REAL_EXP must be altered. */
12281 exponent = 5 - exponent;
12283 if (exponent < 0 || exponent > 7)
12284 return -1;
12286 /* Sign, mantissa and exponent are now in the correct form to plug into the
12287 formula described in the comment above. */
12288 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12291 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12293 vfp3_const_double_rtx (rtx x)
12295 if (!TARGET_VFP3)
12296 return 0;
12298 return vfp3_const_double_index (x) != -1;
12301 /* Recognize immediates which can be used in various Neon instructions. Legal
12302 immediates are described by the following table (for VMVN variants, the
12303 bitwise inverse of the constant shown is recognized. In either case, VMOV
12304 is output and the correct instruction to use for a given constant is chosen
12305 by the assembler). The constant shown is replicated across all elements of
12306 the destination vector.
12308 insn elems variant constant (binary)
12309 ---- ----- ------- -----------------
12310 vmov i32 0 00000000 00000000 00000000 abcdefgh
12311 vmov i32 1 00000000 00000000 abcdefgh 00000000
12312 vmov i32 2 00000000 abcdefgh 00000000 00000000
12313 vmov i32 3 abcdefgh 00000000 00000000 00000000
12314 vmov i16 4 00000000 abcdefgh
12315 vmov i16 5 abcdefgh 00000000
12316 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12317 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12318 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12319 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12320 vmvn i16 10 00000000 abcdefgh
12321 vmvn i16 11 abcdefgh 00000000
12322 vmov i32 12 00000000 00000000 abcdefgh 11111111
12323 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12324 vmov i32 14 00000000 abcdefgh 11111111 11111111
12325 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12326 vmov i8 16 abcdefgh
12327 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12328 eeeeeeee ffffffff gggggggg hhhhhhhh
12329 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12330 vmov f32 19 00000000 00000000 00000000 00000000
12332 For case 18, B = !b. Representable values are exactly those accepted by
12333 vfp3_const_double_index, but are output as floating-point numbers rather
12334 than indices.
12336 For case 19, we will change it to vmov.i32 when assembling.
12338 Variants 0-5 (inclusive) may also be used as immediates for the second
12339 operand of VORR/VBIC instructions.
12341 The INVERSE argument causes the bitwise inverse of the given operand to be
12342 recognized instead (used for recognizing legal immediates for the VAND/VORN
12343 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12344 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12345 output, rather than the real insns vbic/vorr).
12347 INVERSE makes no difference to the recognition of float vectors.
12349 The return value is the variant of immediate as shown in the above table, or
12350 -1 if the given value doesn't match any of the listed patterns.
12352 static int
12353 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12354 rtx *modconst, int *elementwidth)
12356 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12357 matches = 1; \
12358 for (i = 0; i < idx; i += (STRIDE)) \
12359 if (!(TEST)) \
12360 matches = 0; \
12361 if (matches) \
12363 immtype = (CLASS); \
12364 elsize = (ELSIZE); \
12365 break; \
12368 unsigned int i, elsize = 0, idx = 0, n_elts;
12369 unsigned int innersize;
12370 unsigned char bytes[16];
12371 int immtype = -1, matches;
12372 unsigned int invmask = inverse ? 0xff : 0;
12373 bool vector = GET_CODE (op) == CONST_VECTOR;
12375 if (vector)
12376 n_elts = CONST_VECTOR_NUNITS (op);
12377 else
12379 n_elts = 1;
12380 if (mode == VOIDmode)
12381 mode = DImode;
12384 innersize = GET_MODE_UNIT_SIZE (mode);
12386 /* Vectors of float constants. */
12387 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12389 rtx el0 = CONST_VECTOR_ELT (op, 0);
12390 const REAL_VALUE_TYPE *r0;
12392 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12393 return -1;
12395 /* FP16 vectors cannot be represented. */
12396 if (GET_MODE_INNER (mode) == HFmode)
12397 return -1;
12399 r0 = CONST_DOUBLE_REAL_VALUE (el0);
12401 for (i = 1; i < n_elts; i++)
12403 rtx elt = CONST_VECTOR_ELT (op, i);
12404 if (!real_equal (r0, CONST_DOUBLE_REAL_VALUE (elt)))
12405 return -1;
12408 if (modconst)
12409 *modconst = CONST_VECTOR_ELT (op, 0);
12411 if (elementwidth)
12412 *elementwidth = 0;
12414 if (el0 == CONST0_RTX (GET_MODE (el0)))
12415 return 19;
12416 else
12417 return 18;
12420 /* Splat vector constant out into a byte vector. */
12421 for (i = 0; i < n_elts; i++)
12423 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12424 unsigned HOST_WIDE_INT elpart;
12426 gcc_assert (CONST_INT_P (el));
12427 elpart = INTVAL (el);
12429 for (unsigned int byte = 0; byte < innersize; byte++)
12431 bytes[idx++] = (elpart & 0xff) ^ invmask;
12432 elpart >>= BITS_PER_UNIT;
12436 /* Sanity check. */
12437 gcc_assert (idx == GET_MODE_SIZE (mode));
12441 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12442 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12444 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12445 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12447 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12448 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12450 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12451 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12453 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12455 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12457 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12458 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12460 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12461 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12463 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12464 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12466 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12467 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12469 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12471 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12473 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12474 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12476 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12477 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12479 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12480 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12482 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12483 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12485 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12487 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12488 && bytes[i] == bytes[(i + 8) % idx]);
12490 while (0);
12492 if (immtype == -1)
12493 return -1;
12495 if (elementwidth)
12496 *elementwidth = elsize;
12498 if (modconst)
12500 unsigned HOST_WIDE_INT imm = 0;
12502 /* Un-invert bytes of recognized vector, if necessary. */
12503 if (invmask != 0)
12504 for (i = 0; i < idx; i++)
12505 bytes[i] ^= invmask;
12507 if (immtype == 17)
12509 /* FIXME: Broken on 32-bit H_W_I hosts. */
12510 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12512 for (i = 0; i < 8; i++)
12513 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12514 << (i * BITS_PER_UNIT);
12516 *modconst = GEN_INT (imm);
12518 else
12520 unsigned HOST_WIDE_INT imm = 0;
12522 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12523 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12525 *modconst = GEN_INT (imm);
12529 return immtype;
12530 #undef CHECK
12533 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12534 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12535 float elements), and a modified constant (whatever should be output for a
12536 VMOV) in *MODCONST. */
12539 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12540 rtx *modconst, int *elementwidth)
12542 rtx tmpconst;
12543 int tmpwidth;
12544 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12546 if (retval == -1)
12547 return 0;
12549 if (modconst)
12550 *modconst = tmpconst;
12552 if (elementwidth)
12553 *elementwidth = tmpwidth;
12555 return 1;
12558 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12559 the immediate is valid, write a constant suitable for using as an operand
12560 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12561 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12564 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12565 rtx *modconst, int *elementwidth)
12567 rtx tmpconst;
12568 int tmpwidth;
12569 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12571 if (retval < 0 || retval > 5)
12572 return 0;
12574 if (modconst)
12575 *modconst = tmpconst;
12577 if (elementwidth)
12578 *elementwidth = tmpwidth;
12580 return 1;
12583 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12584 the immediate is valid, write a constant suitable for using as an operand
12585 to VSHR/VSHL to *MODCONST and the corresponding element width to
12586 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12587 because they have different limitations. */
12590 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12591 rtx *modconst, int *elementwidth,
12592 bool isleftshift)
12594 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12595 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12596 unsigned HOST_WIDE_INT last_elt = 0;
12597 unsigned HOST_WIDE_INT maxshift;
12599 /* Split vector constant out into a byte vector. */
12600 for (i = 0; i < n_elts; i++)
12602 rtx el = CONST_VECTOR_ELT (op, i);
12603 unsigned HOST_WIDE_INT elpart;
12605 if (CONST_INT_P (el))
12606 elpart = INTVAL (el);
12607 else if (CONST_DOUBLE_P (el))
12608 return 0;
12609 else
12610 gcc_unreachable ();
12612 if (i != 0 && elpart != last_elt)
12613 return 0;
12615 last_elt = elpart;
12618 /* Shift less than element size. */
12619 maxshift = innersize * 8;
12621 if (isleftshift)
12623 /* Left shift immediate value can be from 0 to <size>-1. */
12624 if (last_elt >= maxshift)
12625 return 0;
12627 else
12629 /* Right shift immediate value can be from 1 to <size>. */
12630 if (last_elt == 0 || last_elt > maxshift)
12631 return 0;
12634 if (elementwidth)
12635 *elementwidth = innersize * 8;
12637 if (modconst)
12638 *modconst = CONST_VECTOR_ELT (op, 0);
12640 return 1;
12643 /* Return a string suitable for output of Neon immediate logic operation
12644 MNEM. */
12646 char *
12647 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12648 int inverse, int quad)
12650 int width, is_valid;
12651 static char templ[40];
12653 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12655 gcc_assert (is_valid != 0);
12657 if (quad)
12658 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12659 else
12660 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12662 return templ;
12665 /* Return a string suitable for output of Neon immediate shift operation
12666 (VSHR or VSHL) MNEM. */
12668 char *
12669 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12670 machine_mode mode, int quad,
12671 bool isleftshift)
12673 int width, is_valid;
12674 static char templ[40];
12676 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12677 gcc_assert (is_valid != 0);
12679 if (quad)
12680 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12681 else
12682 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12684 return templ;
12687 /* Output a sequence of pairwise operations to implement a reduction.
12688 NOTE: We do "too much work" here, because pairwise operations work on two
12689 registers-worth of operands in one go. Unfortunately we can't exploit those
12690 extra calculations to do the full operation in fewer steps, I don't think.
12691 Although all vector elements of the result but the first are ignored, we
12692 actually calculate the same result in each of the elements. An alternative
12693 such as initially loading a vector with zero to use as each of the second
12694 operands would use up an additional register and take an extra instruction,
12695 for no particular gain. */
12697 void
12698 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12699 rtx (*reduc) (rtx, rtx, rtx))
12701 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12702 rtx tmpsum = op1;
12704 for (i = parts / 2; i >= 1; i /= 2)
12706 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12707 emit_insn (reduc (dest, tmpsum, tmpsum));
12708 tmpsum = dest;
12712 /* If VALS is a vector constant that can be loaded into a register
12713 using VDUP, generate instructions to do so and return an RTX to
12714 assign to the register. Otherwise return NULL_RTX. */
12716 static rtx
12717 neon_vdup_constant (rtx vals)
12719 machine_mode mode = GET_MODE (vals);
12720 machine_mode inner_mode = GET_MODE_INNER (mode);
12721 rtx x;
12723 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12724 return NULL_RTX;
12726 if (!const_vec_duplicate_p (vals, &x))
12727 /* The elements are not all the same. We could handle repeating
12728 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12729 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12730 vdup.i16). */
12731 return NULL_RTX;
12733 /* We can load this constant by using VDUP and a constant in a
12734 single ARM register. This will be cheaper than a vector
12735 load. */
12737 x = copy_to_mode_reg (inner_mode, x);
12738 return gen_rtx_VEC_DUPLICATE (mode, x);
12741 /* Generate code to load VALS, which is a PARALLEL containing only
12742 constants (for vec_init) or CONST_VECTOR, efficiently into a
12743 register. Returns an RTX to copy into the register, or NULL_RTX
12744 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12747 neon_make_constant (rtx vals)
12749 machine_mode mode = GET_MODE (vals);
12750 rtx target;
12751 rtx const_vec = NULL_RTX;
12752 int n_elts = GET_MODE_NUNITS (mode);
12753 int n_const = 0;
12754 int i;
12756 if (GET_CODE (vals) == CONST_VECTOR)
12757 const_vec = vals;
12758 else if (GET_CODE (vals) == PARALLEL)
12760 /* A CONST_VECTOR must contain only CONST_INTs and
12761 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12762 Only store valid constants in a CONST_VECTOR. */
12763 for (i = 0; i < n_elts; ++i)
12765 rtx x = XVECEXP (vals, 0, i);
12766 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12767 n_const++;
12769 if (n_const == n_elts)
12770 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12772 else
12773 gcc_unreachable ();
12775 if (const_vec != NULL
12776 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12777 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12778 return const_vec;
12779 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12780 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12781 pipeline cycle; creating the constant takes one or two ARM
12782 pipeline cycles. */
12783 return target;
12784 else if (const_vec != NULL_RTX)
12785 /* Load from constant pool. On Cortex-A8 this takes two cycles
12786 (for either double or quad vectors). We can not take advantage
12787 of single-cycle VLD1 because we need a PC-relative addressing
12788 mode. */
12789 return const_vec;
12790 else
12791 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12792 We can not construct an initializer. */
12793 return NULL_RTX;
12796 /* Initialize vector TARGET to VALS. */
12798 void
12799 neon_expand_vector_init (rtx target, rtx vals)
12801 machine_mode mode = GET_MODE (target);
12802 machine_mode inner_mode = GET_MODE_INNER (mode);
12803 int n_elts = GET_MODE_NUNITS (mode);
12804 int n_var = 0, one_var = -1;
12805 bool all_same = true;
12806 rtx x, mem;
12807 int i;
12809 for (i = 0; i < n_elts; ++i)
12811 x = XVECEXP (vals, 0, i);
12812 if (!CONSTANT_P (x))
12813 ++n_var, one_var = i;
12815 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12816 all_same = false;
12819 if (n_var == 0)
12821 rtx constant = neon_make_constant (vals);
12822 if (constant != NULL_RTX)
12824 emit_move_insn (target, constant);
12825 return;
12829 /* Splat a single non-constant element if we can. */
12830 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12832 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12833 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12834 return;
12837 /* One field is non-constant. Load constant then overwrite varying
12838 field. This is more efficient than using the stack. */
12839 if (n_var == 1)
12841 rtx copy = copy_rtx (vals);
12842 rtx index = GEN_INT (one_var);
12844 /* Load constant part of vector, substitute neighboring value for
12845 varying element. */
12846 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12847 neon_expand_vector_init (target, copy);
12849 /* Insert variable. */
12850 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12851 switch (mode)
12853 case V8QImode:
12854 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12855 break;
12856 case V16QImode:
12857 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12858 break;
12859 case V4HImode:
12860 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12861 break;
12862 case V8HImode:
12863 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12864 break;
12865 case V2SImode:
12866 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12867 break;
12868 case V4SImode:
12869 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12870 break;
12871 case V2SFmode:
12872 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12873 break;
12874 case V4SFmode:
12875 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12876 break;
12877 case V2DImode:
12878 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12879 break;
12880 default:
12881 gcc_unreachable ();
12883 return;
12886 /* Construct the vector in memory one field at a time
12887 and load the whole vector. */
12888 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12889 for (i = 0; i < n_elts; i++)
12890 emit_move_insn (adjust_address_nv (mem, inner_mode,
12891 i * GET_MODE_SIZE (inner_mode)),
12892 XVECEXP (vals, 0, i));
12893 emit_move_insn (target, mem);
12896 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12897 ERR if it doesn't. EXP indicates the source location, which includes the
12898 inlining history for intrinsics. */
12900 static void
12901 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12902 const_tree exp, const char *desc)
12904 HOST_WIDE_INT lane;
12906 gcc_assert (CONST_INT_P (operand));
12908 lane = INTVAL (operand);
12910 if (lane < low || lane >= high)
12912 if (exp)
12913 error ("%K%s %wd out of range %wd - %wd",
12914 exp, desc, lane, low, high - 1);
12915 else
12916 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12920 /* Bounds-check lanes. */
12922 void
12923 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12924 const_tree exp)
12926 bounds_check (operand, low, high, exp, "lane");
12929 /* Bounds-check constants. */
12931 void
12932 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12934 bounds_check (operand, low, high, NULL_TREE, "constant");
12937 HOST_WIDE_INT
12938 neon_element_bits (machine_mode mode)
12940 return GET_MODE_UNIT_BITSIZE (mode);
12944 /* Predicates for `match_operand' and `match_operator'. */
12946 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12947 WB is true if full writeback address modes are allowed and is false
12948 if limited writeback address modes (POST_INC and PRE_DEC) are
12949 allowed. */
12952 arm_coproc_mem_operand (rtx op, bool wb)
12954 rtx ind;
12956 /* Reject eliminable registers. */
12957 if (! (reload_in_progress || reload_completed || lra_in_progress)
12958 && ( reg_mentioned_p (frame_pointer_rtx, op)
12959 || reg_mentioned_p (arg_pointer_rtx, op)
12960 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12961 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12962 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12963 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12964 return FALSE;
12966 /* Constants are converted into offsets from labels. */
12967 if (!MEM_P (op))
12968 return FALSE;
12970 ind = XEXP (op, 0);
12972 if (reload_completed
12973 && (GET_CODE (ind) == LABEL_REF
12974 || (GET_CODE (ind) == CONST
12975 && GET_CODE (XEXP (ind, 0)) == PLUS
12976 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12977 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12978 return TRUE;
12980 /* Match: (mem (reg)). */
12981 if (REG_P (ind))
12982 return arm_address_register_rtx_p (ind, 0);
12984 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12985 acceptable in any case (subject to verification by
12986 arm_address_register_rtx_p). We need WB to be true to accept
12987 PRE_INC and POST_DEC. */
12988 if (GET_CODE (ind) == POST_INC
12989 || GET_CODE (ind) == PRE_DEC
12990 || (wb
12991 && (GET_CODE (ind) == PRE_INC
12992 || GET_CODE (ind) == POST_DEC)))
12993 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12995 if (wb
12996 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12997 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12998 && GET_CODE (XEXP (ind, 1)) == PLUS
12999 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13000 ind = XEXP (ind, 1);
13002 /* Match:
13003 (plus (reg)
13004 (const)). */
13005 if (GET_CODE (ind) == PLUS
13006 && REG_P (XEXP (ind, 0))
13007 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13008 && CONST_INT_P (XEXP (ind, 1))
13009 && INTVAL (XEXP (ind, 1)) > -1024
13010 && INTVAL (XEXP (ind, 1)) < 1024
13011 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13012 return TRUE;
13014 return FALSE;
13017 /* Return TRUE if OP is a memory operand which we can load or store a vector
13018 to/from. TYPE is one of the following values:
13019 0 - Vector load/stor (vldr)
13020 1 - Core registers (ldm)
13021 2 - Element/structure loads (vld1)
13024 neon_vector_mem_operand (rtx op, int type, bool strict)
13026 rtx ind;
13028 /* Reject eliminable registers. */
13029 if (strict && ! (reload_in_progress || reload_completed)
13030 && (reg_mentioned_p (frame_pointer_rtx, op)
13031 || reg_mentioned_p (arg_pointer_rtx, op)
13032 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13033 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13034 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13035 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13036 return FALSE;
13038 /* Constants are converted into offsets from labels. */
13039 if (!MEM_P (op))
13040 return FALSE;
13042 ind = XEXP (op, 0);
13044 if (reload_completed
13045 && (GET_CODE (ind) == LABEL_REF
13046 || (GET_CODE (ind) == CONST
13047 && GET_CODE (XEXP (ind, 0)) == PLUS
13048 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13049 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13050 return TRUE;
13052 /* Match: (mem (reg)). */
13053 if (REG_P (ind))
13054 return arm_address_register_rtx_p (ind, 0);
13056 /* Allow post-increment with Neon registers. */
13057 if ((type != 1 && GET_CODE (ind) == POST_INC)
13058 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13059 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13061 /* Allow post-increment by register for VLDn */
13062 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13063 && GET_CODE (XEXP (ind, 1)) == PLUS
13064 && REG_P (XEXP (XEXP (ind, 1), 1)))
13065 return true;
13067 /* Match:
13068 (plus (reg)
13069 (const)). */
13070 if (type == 0
13071 && GET_CODE (ind) == PLUS
13072 && REG_P (XEXP (ind, 0))
13073 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13074 && CONST_INT_P (XEXP (ind, 1))
13075 && INTVAL (XEXP (ind, 1)) > -1024
13076 /* For quad modes, we restrict the constant offset to be slightly less
13077 than what the instruction format permits. We have no such constraint
13078 on double mode offsets. (This must match arm_legitimate_index_p.) */
13079 && (INTVAL (XEXP (ind, 1))
13080 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13081 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13082 return TRUE;
13084 return FALSE;
13087 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13088 type. */
13090 neon_struct_mem_operand (rtx op)
13092 rtx ind;
13094 /* Reject eliminable registers. */
13095 if (! (reload_in_progress || reload_completed)
13096 && ( reg_mentioned_p (frame_pointer_rtx, op)
13097 || reg_mentioned_p (arg_pointer_rtx, op)
13098 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13099 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13100 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13101 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13102 return FALSE;
13104 /* Constants are converted into offsets from labels. */
13105 if (!MEM_P (op))
13106 return FALSE;
13108 ind = XEXP (op, 0);
13110 if (reload_completed
13111 && (GET_CODE (ind) == LABEL_REF
13112 || (GET_CODE (ind) == CONST
13113 && GET_CODE (XEXP (ind, 0)) == PLUS
13114 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13115 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13116 return TRUE;
13118 /* Match: (mem (reg)). */
13119 if (REG_P (ind))
13120 return arm_address_register_rtx_p (ind, 0);
13122 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13123 if (GET_CODE (ind) == POST_INC
13124 || GET_CODE (ind) == PRE_DEC)
13125 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13127 return FALSE;
13130 /* Return true if X is a register that will be eliminated later on. */
13132 arm_eliminable_register (rtx x)
13134 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13135 || REGNO (x) == ARG_POINTER_REGNUM
13136 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13137 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13140 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13141 coprocessor registers. Otherwise return NO_REGS. */
13143 enum reg_class
13144 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13146 if (mode == HFmode)
13148 if (!TARGET_NEON_FP16)
13149 return GENERAL_REGS;
13150 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13151 return NO_REGS;
13152 return GENERAL_REGS;
13155 /* The neon move patterns handle all legitimate vector and struct
13156 addresses. */
13157 if (TARGET_NEON
13158 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13159 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13160 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13161 || VALID_NEON_STRUCT_MODE (mode)))
13162 return NO_REGS;
13164 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13165 return NO_REGS;
13167 return GENERAL_REGS;
13170 /* Values which must be returned in the most-significant end of the return
13171 register. */
13173 static bool
13174 arm_return_in_msb (const_tree valtype)
13176 return (TARGET_AAPCS_BASED
13177 && BYTES_BIG_ENDIAN
13178 && (AGGREGATE_TYPE_P (valtype)
13179 || TREE_CODE (valtype) == COMPLEX_TYPE
13180 || FIXED_POINT_TYPE_P (valtype)));
13183 /* Return TRUE if X references a SYMBOL_REF. */
13185 symbol_mentioned_p (rtx x)
13187 const char * fmt;
13188 int i;
13190 if (GET_CODE (x) == SYMBOL_REF)
13191 return 1;
13193 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13194 are constant offsets, not symbols. */
13195 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13196 return 0;
13198 fmt = GET_RTX_FORMAT (GET_CODE (x));
13200 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13202 if (fmt[i] == 'E')
13204 int j;
13206 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13207 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13208 return 1;
13210 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13211 return 1;
13214 return 0;
13217 /* Return TRUE if X references a LABEL_REF. */
13219 label_mentioned_p (rtx x)
13221 const char * fmt;
13222 int i;
13224 if (GET_CODE (x) == LABEL_REF)
13225 return 1;
13227 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13228 instruction, but they are constant offsets, not symbols. */
13229 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13230 return 0;
13232 fmt = GET_RTX_FORMAT (GET_CODE (x));
13233 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13235 if (fmt[i] == 'E')
13237 int j;
13239 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13240 if (label_mentioned_p (XVECEXP (x, i, j)))
13241 return 1;
13243 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13244 return 1;
13247 return 0;
13251 tls_mentioned_p (rtx x)
13253 switch (GET_CODE (x))
13255 case CONST:
13256 return tls_mentioned_p (XEXP (x, 0));
13258 case UNSPEC:
13259 if (XINT (x, 1) == UNSPEC_TLS)
13260 return 1;
13262 default:
13263 return 0;
13267 /* Must not copy any rtx that uses a pc-relative address.
13268 Also, disallow copying of load-exclusive instructions that
13269 may appear after splitting of compare-and-swap-style operations
13270 so as to prevent those loops from being transformed away from their
13271 canonical forms (see PR 69904). */
13273 static bool
13274 arm_cannot_copy_insn_p (rtx_insn *insn)
13276 /* The tls call insn cannot be copied, as it is paired with a data
13277 word. */
13278 if (recog_memoized (insn) == CODE_FOR_tlscall)
13279 return true;
13281 subrtx_iterator::array_type array;
13282 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13284 const_rtx x = *iter;
13285 if (GET_CODE (x) == UNSPEC
13286 && (XINT (x, 1) == UNSPEC_PIC_BASE
13287 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13288 return true;
13291 rtx set = single_set (insn);
13292 if (set)
13294 rtx src = SET_SRC (set);
13295 if (GET_CODE (src) == ZERO_EXTEND)
13296 src = XEXP (src, 0);
13298 /* Catch the load-exclusive and load-acquire operations. */
13299 if (GET_CODE (src) == UNSPEC_VOLATILE
13300 && (XINT (src, 1) == VUNSPEC_LL
13301 || XINT (src, 1) == VUNSPEC_LAX))
13302 return true;
13304 return false;
13307 enum rtx_code
13308 minmax_code (rtx x)
13310 enum rtx_code code = GET_CODE (x);
13312 switch (code)
13314 case SMAX:
13315 return GE;
13316 case SMIN:
13317 return LE;
13318 case UMIN:
13319 return LEU;
13320 case UMAX:
13321 return GEU;
13322 default:
13323 gcc_unreachable ();
13327 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13329 bool
13330 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13331 int *mask, bool *signed_sat)
13333 /* The high bound must be a power of two minus one. */
13334 int log = exact_log2 (INTVAL (hi_bound) + 1);
13335 if (log == -1)
13336 return false;
13338 /* The low bound is either zero (for usat) or one less than the
13339 negation of the high bound (for ssat). */
13340 if (INTVAL (lo_bound) == 0)
13342 if (mask)
13343 *mask = log;
13344 if (signed_sat)
13345 *signed_sat = false;
13347 return true;
13350 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13352 if (mask)
13353 *mask = log + 1;
13354 if (signed_sat)
13355 *signed_sat = true;
13357 return true;
13360 return false;
13363 /* Return 1 if memory locations are adjacent. */
13365 adjacent_mem_locations (rtx a, rtx b)
13367 /* We don't guarantee to preserve the order of these memory refs. */
13368 if (volatile_refs_p (a) || volatile_refs_p (b))
13369 return 0;
13371 if ((REG_P (XEXP (a, 0))
13372 || (GET_CODE (XEXP (a, 0)) == PLUS
13373 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13374 && (REG_P (XEXP (b, 0))
13375 || (GET_CODE (XEXP (b, 0)) == PLUS
13376 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13378 HOST_WIDE_INT val0 = 0, val1 = 0;
13379 rtx reg0, reg1;
13380 int val_diff;
13382 if (GET_CODE (XEXP (a, 0)) == PLUS)
13384 reg0 = XEXP (XEXP (a, 0), 0);
13385 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13387 else
13388 reg0 = XEXP (a, 0);
13390 if (GET_CODE (XEXP (b, 0)) == PLUS)
13392 reg1 = XEXP (XEXP (b, 0), 0);
13393 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13395 else
13396 reg1 = XEXP (b, 0);
13398 /* Don't accept any offset that will require multiple
13399 instructions to handle, since this would cause the
13400 arith_adjacentmem pattern to output an overlong sequence. */
13401 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13402 return 0;
13404 /* Don't allow an eliminable register: register elimination can make
13405 the offset too large. */
13406 if (arm_eliminable_register (reg0))
13407 return 0;
13409 val_diff = val1 - val0;
13411 if (arm_ld_sched)
13413 /* If the target has load delay slots, then there's no benefit
13414 to using an ldm instruction unless the offset is zero and
13415 we are optimizing for size. */
13416 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13417 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13418 && (val_diff == 4 || val_diff == -4));
13421 return ((REGNO (reg0) == REGNO (reg1))
13422 && (val_diff == 4 || val_diff == -4));
13425 return 0;
13428 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13429 for load operations, false for store operations. CONSECUTIVE is true
13430 if the register numbers in the operation must be consecutive in the register
13431 bank. RETURN_PC is true if value is to be loaded in PC.
13432 The pattern we are trying to match for load is:
13433 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13434 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13437 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13439 where
13440 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13441 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13442 3. If consecutive is TRUE, then for kth register being loaded,
13443 REGNO (R_dk) = REGNO (R_d0) + k.
13444 The pattern for store is similar. */
13445 bool
13446 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13447 bool consecutive, bool return_pc)
13449 HOST_WIDE_INT count = XVECLEN (op, 0);
13450 rtx reg, mem, addr;
13451 unsigned regno;
13452 unsigned first_regno;
13453 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13454 rtx elt;
13455 bool addr_reg_in_reglist = false;
13456 bool update = false;
13457 int reg_increment;
13458 int offset_adj;
13459 int regs_per_val;
13461 /* If not in SImode, then registers must be consecutive
13462 (e.g., VLDM instructions for DFmode). */
13463 gcc_assert ((mode == SImode) || consecutive);
13464 /* Setting return_pc for stores is illegal. */
13465 gcc_assert (!return_pc || load);
13467 /* Set up the increments and the regs per val based on the mode. */
13468 reg_increment = GET_MODE_SIZE (mode);
13469 regs_per_val = reg_increment / 4;
13470 offset_adj = return_pc ? 1 : 0;
13472 if (count <= 1
13473 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13474 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13475 return false;
13477 /* Check if this is a write-back. */
13478 elt = XVECEXP (op, 0, offset_adj);
13479 if (GET_CODE (SET_SRC (elt)) == PLUS)
13481 i++;
13482 base = 1;
13483 update = true;
13485 /* The offset adjustment must be the number of registers being
13486 popped times the size of a single register. */
13487 if (!REG_P (SET_DEST (elt))
13488 || !REG_P (XEXP (SET_SRC (elt), 0))
13489 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13490 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13491 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13492 ((count - 1 - offset_adj) * reg_increment))
13493 return false;
13496 i = i + offset_adj;
13497 base = base + offset_adj;
13498 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13499 success depends on the type: VLDM can do just one reg,
13500 LDM must do at least two. */
13501 if ((count <= i) && (mode == SImode))
13502 return false;
13504 elt = XVECEXP (op, 0, i - 1);
13505 if (GET_CODE (elt) != SET)
13506 return false;
13508 if (load)
13510 reg = SET_DEST (elt);
13511 mem = SET_SRC (elt);
13513 else
13515 reg = SET_SRC (elt);
13516 mem = SET_DEST (elt);
13519 if (!REG_P (reg) || !MEM_P (mem))
13520 return false;
13522 regno = REGNO (reg);
13523 first_regno = regno;
13524 addr = XEXP (mem, 0);
13525 if (GET_CODE (addr) == PLUS)
13527 if (!CONST_INT_P (XEXP (addr, 1)))
13528 return false;
13530 offset = INTVAL (XEXP (addr, 1));
13531 addr = XEXP (addr, 0);
13534 if (!REG_P (addr))
13535 return false;
13537 /* Don't allow SP to be loaded unless it is also the base register. It
13538 guarantees that SP is reset correctly when an LDM instruction
13539 is interrupted. Otherwise, we might end up with a corrupt stack. */
13540 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13541 return false;
13543 for (; i < count; i++)
13545 elt = XVECEXP (op, 0, i);
13546 if (GET_CODE (elt) != SET)
13547 return false;
13549 if (load)
13551 reg = SET_DEST (elt);
13552 mem = SET_SRC (elt);
13554 else
13556 reg = SET_SRC (elt);
13557 mem = SET_DEST (elt);
13560 if (!REG_P (reg)
13561 || GET_MODE (reg) != mode
13562 || REGNO (reg) <= regno
13563 || (consecutive
13564 && (REGNO (reg) !=
13565 (unsigned int) (first_regno + regs_per_val * (i - base))))
13566 /* Don't allow SP to be loaded unless it is also the base register. It
13567 guarantees that SP is reset correctly when an LDM instruction
13568 is interrupted. Otherwise, we might end up with a corrupt stack. */
13569 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13570 || !MEM_P (mem)
13571 || GET_MODE (mem) != mode
13572 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13573 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13574 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13575 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13576 offset + (i - base) * reg_increment))
13577 && (!REG_P (XEXP (mem, 0))
13578 || offset + (i - base) * reg_increment != 0)))
13579 return false;
13581 regno = REGNO (reg);
13582 if (regno == REGNO (addr))
13583 addr_reg_in_reglist = true;
13586 if (load)
13588 if (update && addr_reg_in_reglist)
13589 return false;
13591 /* For Thumb-1, address register is always modified - either by write-back
13592 or by explicit load. If the pattern does not describe an update,
13593 then the address register must be in the list of loaded registers. */
13594 if (TARGET_THUMB1)
13595 return update || addr_reg_in_reglist;
13598 return true;
13601 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13602 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13603 instruction. ADD_OFFSET is nonzero if the base address register needs
13604 to be modified with an add instruction before we can use it. */
13606 static bool
13607 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13608 int nops, HOST_WIDE_INT add_offset)
13610 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13611 if the offset isn't small enough. The reason 2 ldrs are faster
13612 is because these ARMs are able to do more than one cache access
13613 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13614 whilst the ARM8 has a double bandwidth cache. This means that
13615 these cores can do both an instruction fetch and a data fetch in
13616 a single cycle, so the trick of calculating the address into a
13617 scratch register (one of the result regs) and then doing a load
13618 multiple actually becomes slower (and no smaller in code size).
13619 That is the transformation
13621 ldr rd1, [rbase + offset]
13622 ldr rd2, [rbase + offset + 4]
13626 add rd1, rbase, offset
13627 ldmia rd1, {rd1, rd2}
13629 produces worse code -- '3 cycles + any stalls on rd2' instead of
13630 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13631 access per cycle, the first sequence could never complete in less
13632 than 6 cycles, whereas the ldm sequence would only take 5 and
13633 would make better use of sequential accesses if not hitting the
13634 cache.
13636 We cheat here and test 'arm_ld_sched' which we currently know to
13637 only be true for the ARM8, ARM9 and StrongARM. If this ever
13638 changes, then the test below needs to be reworked. */
13639 if (nops == 2 && arm_ld_sched && add_offset != 0)
13640 return false;
13642 /* XScale has load-store double instructions, but they have stricter
13643 alignment requirements than load-store multiple, so we cannot
13644 use them.
13646 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13647 the pipeline until completion.
13649 NREGS CYCLES
13655 An ldr instruction takes 1-3 cycles, but does not block the
13656 pipeline.
13658 NREGS CYCLES
13659 1 1-3
13660 2 2-6
13661 3 3-9
13662 4 4-12
13664 Best case ldr will always win. However, the more ldr instructions
13665 we issue, the less likely we are to be able to schedule them well.
13666 Using ldr instructions also increases code size.
13668 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13669 for counts of 3 or 4 regs. */
13670 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13671 return false;
13672 return true;
13675 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13676 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13677 an array ORDER which describes the sequence to use when accessing the
13678 offsets that produces an ascending order. In this sequence, each
13679 offset must be larger by exactly 4 than the previous one. ORDER[0]
13680 must have been filled in with the lowest offset by the caller.
13681 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13682 we use to verify that ORDER produces an ascending order of registers.
13683 Return true if it was possible to construct such an order, false if
13684 not. */
13686 static bool
13687 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13688 int *unsorted_regs)
13690 int i;
13691 for (i = 1; i < nops; i++)
13693 int j;
13695 order[i] = order[i - 1];
13696 for (j = 0; j < nops; j++)
13697 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13699 /* We must find exactly one offset that is higher than the
13700 previous one by 4. */
13701 if (order[i] != order[i - 1])
13702 return false;
13703 order[i] = j;
13705 if (order[i] == order[i - 1])
13706 return false;
13707 /* The register numbers must be ascending. */
13708 if (unsorted_regs != NULL
13709 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13710 return false;
13712 return true;
13715 /* Used to determine in a peephole whether a sequence of load
13716 instructions can be changed into a load-multiple instruction.
13717 NOPS is the number of separate load instructions we are examining. The
13718 first NOPS entries in OPERANDS are the destination registers, the
13719 next NOPS entries are memory operands. If this function is
13720 successful, *BASE is set to the common base register of the memory
13721 accesses; *LOAD_OFFSET is set to the first memory location's offset
13722 from that base register.
13723 REGS is an array filled in with the destination register numbers.
13724 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13725 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13726 the sequence of registers in REGS matches the loads from ascending memory
13727 locations, and the function verifies that the register numbers are
13728 themselves ascending. If CHECK_REGS is false, the register numbers
13729 are stored in the order they are found in the operands. */
13730 static int
13731 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13732 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13734 int unsorted_regs[MAX_LDM_STM_OPS];
13735 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13736 int order[MAX_LDM_STM_OPS];
13737 rtx base_reg_rtx = NULL;
13738 int base_reg = -1;
13739 int i, ldm_case;
13741 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13742 easily extended if required. */
13743 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13745 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13747 /* Loop over the operands and check that the memory references are
13748 suitable (i.e. immediate offsets from the same base register). At
13749 the same time, extract the target register, and the memory
13750 offsets. */
13751 for (i = 0; i < nops; i++)
13753 rtx reg;
13754 rtx offset;
13756 /* Convert a subreg of a mem into the mem itself. */
13757 if (GET_CODE (operands[nops + i]) == SUBREG)
13758 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13760 gcc_assert (MEM_P (operands[nops + i]));
13762 /* Don't reorder volatile memory references; it doesn't seem worth
13763 looking for the case where the order is ok anyway. */
13764 if (MEM_VOLATILE_P (operands[nops + i]))
13765 return 0;
13767 offset = const0_rtx;
13769 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13770 || (GET_CODE (reg) == SUBREG
13771 && REG_P (reg = SUBREG_REG (reg))))
13772 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13773 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13774 || (GET_CODE (reg) == SUBREG
13775 && REG_P (reg = SUBREG_REG (reg))))
13776 && (CONST_INT_P (offset
13777 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13779 if (i == 0)
13781 base_reg = REGNO (reg);
13782 base_reg_rtx = reg;
13783 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13784 return 0;
13786 else if (base_reg != (int) REGNO (reg))
13787 /* Not addressed from the same base register. */
13788 return 0;
13790 unsorted_regs[i] = (REG_P (operands[i])
13791 ? REGNO (operands[i])
13792 : REGNO (SUBREG_REG (operands[i])));
13794 /* If it isn't an integer register, or if it overwrites the
13795 base register but isn't the last insn in the list, then
13796 we can't do this. */
13797 if (unsorted_regs[i] < 0
13798 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13799 || unsorted_regs[i] > 14
13800 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13801 return 0;
13803 /* Don't allow SP to be loaded unless it is also the base
13804 register. It guarantees that SP is reset correctly when
13805 an LDM instruction is interrupted. Otherwise, we might
13806 end up with a corrupt stack. */
13807 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13808 return 0;
13810 unsorted_offsets[i] = INTVAL (offset);
13811 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13812 order[0] = i;
13814 else
13815 /* Not a suitable memory address. */
13816 return 0;
13819 /* All the useful information has now been extracted from the
13820 operands into unsorted_regs and unsorted_offsets; additionally,
13821 order[0] has been set to the lowest offset in the list. Sort
13822 the offsets into order, verifying that they are adjacent, and
13823 check that the register numbers are ascending. */
13824 if (!compute_offset_order (nops, unsorted_offsets, order,
13825 check_regs ? unsorted_regs : NULL))
13826 return 0;
13828 if (saved_order)
13829 memcpy (saved_order, order, sizeof order);
13831 if (base)
13833 *base = base_reg;
13835 for (i = 0; i < nops; i++)
13836 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13838 *load_offset = unsorted_offsets[order[0]];
13841 if (TARGET_THUMB1
13842 && !peep2_reg_dead_p (nops, base_reg_rtx))
13843 return 0;
13845 if (unsorted_offsets[order[0]] == 0)
13846 ldm_case = 1; /* ldmia */
13847 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13848 ldm_case = 2; /* ldmib */
13849 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13850 ldm_case = 3; /* ldmda */
13851 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13852 ldm_case = 4; /* ldmdb */
13853 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13854 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13855 ldm_case = 5;
13856 else
13857 return 0;
13859 if (!multiple_operation_profitable_p (false, nops,
13860 ldm_case == 5
13861 ? unsorted_offsets[order[0]] : 0))
13862 return 0;
13864 return ldm_case;
13867 /* Used to determine in a peephole whether a sequence of store instructions can
13868 be changed into a store-multiple instruction.
13869 NOPS is the number of separate store instructions we are examining.
13870 NOPS_TOTAL is the total number of instructions recognized by the peephole
13871 pattern.
13872 The first NOPS entries in OPERANDS are the source registers, the next
13873 NOPS entries are memory operands. If this function is successful, *BASE is
13874 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13875 to the first memory location's offset from that base register. REGS is an
13876 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13877 likewise filled with the corresponding rtx's.
13878 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13879 numbers to an ascending order of stores.
13880 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13881 from ascending memory locations, and the function verifies that the register
13882 numbers are themselves ascending. If CHECK_REGS is false, the register
13883 numbers are stored in the order they are found in the operands. */
13884 static int
13885 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13886 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13887 HOST_WIDE_INT *load_offset, bool check_regs)
13889 int unsorted_regs[MAX_LDM_STM_OPS];
13890 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13891 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13892 int order[MAX_LDM_STM_OPS];
13893 int base_reg = -1;
13894 rtx base_reg_rtx = NULL;
13895 int i, stm_case;
13897 /* Write back of base register is currently only supported for Thumb 1. */
13898 int base_writeback = TARGET_THUMB1;
13900 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13901 easily extended if required. */
13902 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13904 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13906 /* Loop over the operands and check that the memory references are
13907 suitable (i.e. immediate offsets from the same base register). At
13908 the same time, extract the target register, and the memory
13909 offsets. */
13910 for (i = 0; i < nops; i++)
13912 rtx reg;
13913 rtx offset;
13915 /* Convert a subreg of a mem into the mem itself. */
13916 if (GET_CODE (operands[nops + i]) == SUBREG)
13917 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13919 gcc_assert (MEM_P (operands[nops + i]));
13921 /* Don't reorder volatile memory references; it doesn't seem worth
13922 looking for the case where the order is ok anyway. */
13923 if (MEM_VOLATILE_P (operands[nops + i]))
13924 return 0;
13926 offset = const0_rtx;
13928 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13929 || (GET_CODE (reg) == SUBREG
13930 && REG_P (reg = SUBREG_REG (reg))))
13931 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13932 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13933 || (GET_CODE (reg) == SUBREG
13934 && REG_P (reg = SUBREG_REG (reg))))
13935 && (CONST_INT_P (offset
13936 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13938 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13939 ? operands[i] : SUBREG_REG (operands[i]));
13940 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13942 if (i == 0)
13944 base_reg = REGNO (reg);
13945 base_reg_rtx = reg;
13946 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13947 return 0;
13949 else if (base_reg != (int) REGNO (reg))
13950 /* Not addressed from the same base register. */
13951 return 0;
13953 /* If it isn't an integer register, then we can't do this. */
13954 if (unsorted_regs[i] < 0
13955 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13956 /* The effects are unpredictable if the base register is
13957 both updated and stored. */
13958 || (base_writeback && unsorted_regs[i] == base_reg)
13959 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13960 || unsorted_regs[i] > 14)
13961 return 0;
13963 unsorted_offsets[i] = INTVAL (offset);
13964 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13965 order[0] = i;
13967 else
13968 /* Not a suitable memory address. */
13969 return 0;
13972 /* All the useful information has now been extracted from the
13973 operands into unsorted_regs and unsorted_offsets; additionally,
13974 order[0] has been set to the lowest offset in the list. Sort
13975 the offsets into order, verifying that they are adjacent, and
13976 check that the register numbers are ascending. */
13977 if (!compute_offset_order (nops, unsorted_offsets, order,
13978 check_regs ? unsorted_regs : NULL))
13979 return 0;
13981 if (saved_order)
13982 memcpy (saved_order, order, sizeof order);
13984 if (base)
13986 *base = base_reg;
13988 for (i = 0; i < nops; i++)
13990 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13991 if (reg_rtxs)
13992 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13995 *load_offset = unsorted_offsets[order[0]];
13998 if (TARGET_THUMB1
13999 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14000 return 0;
14002 if (unsorted_offsets[order[0]] == 0)
14003 stm_case = 1; /* stmia */
14004 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14005 stm_case = 2; /* stmib */
14006 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14007 stm_case = 3; /* stmda */
14008 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14009 stm_case = 4; /* stmdb */
14010 else
14011 return 0;
14013 if (!multiple_operation_profitable_p (false, nops, 0))
14014 return 0;
14016 return stm_case;
14019 /* Routines for use in generating RTL. */
14021 /* Generate a load-multiple instruction. COUNT is the number of loads in
14022 the instruction; REGS and MEMS are arrays containing the operands.
14023 BASEREG is the base register to be used in addressing the memory operands.
14024 WBACK_OFFSET is nonzero if the instruction should update the base
14025 register. */
14027 static rtx
14028 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14029 HOST_WIDE_INT wback_offset)
14031 int i = 0, j;
14032 rtx result;
14034 if (!multiple_operation_profitable_p (false, count, 0))
14036 rtx seq;
14038 start_sequence ();
14040 for (i = 0; i < count; i++)
14041 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14043 if (wback_offset != 0)
14044 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14046 seq = get_insns ();
14047 end_sequence ();
14049 return seq;
14052 result = gen_rtx_PARALLEL (VOIDmode,
14053 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14054 if (wback_offset != 0)
14056 XVECEXP (result, 0, 0)
14057 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14058 i = 1;
14059 count++;
14062 for (j = 0; i < count; i++, j++)
14063 XVECEXP (result, 0, i)
14064 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14066 return result;
14069 /* Generate a store-multiple instruction. COUNT is the number of stores in
14070 the instruction; REGS and MEMS are arrays containing the operands.
14071 BASEREG is the base register to be used in addressing the memory operands.
14072 WBACK_OFFSET is nonzero if the instruction should update the base
14073 register. */
14075 static rtx
14076 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14077 HOST_WIDE_INT wback_offset)
14079 int i = 0, j;
14080 rtx result;
14082 if (GET_CODE (basereg) == PLUS)
14083 basereg = XEXP (basereg, 0);
14085 if (!multiple_operation_profitable_p (false, count, 0))
14087 rtx seq;
14089 start_sequence ();
14091 for (i = 0; i < count; i++)
14092 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14094 if (wback_offset != 0)
14095 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14097 seq = get_insns ();
14098 end_sequence ();
14100 return seq;
14103 result = gen_rtx_PARALLEL (VOIDmode,
14104 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14105 if (wback_offset != 0)
14107 XVECEXP (result, 0, 0)
14108 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14109 i = 1;
14110 count++;
14113 for (j = 0; i < count; i++, j++)
14114 XVECEXP (result, 0, i)
14115 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14117 return result;
14120 /* Generate either a load-multiple or a store-multiple instruction. This
14121 function can be used in situations where we can start with a single MEM
14122 rtx and adjust its address upwards.
14123 COUNT is the number of operations in the instruction, not counting a
14124 possible update of the base register. REGS is an array containing the
14125 register operands.
14126 BASEREG is the base register to be used in addressing the memory operands,
14127 which are constructed from BASEMEM.
14128 WRITE_BACK specifies whether the generated instruction should include an
14129 update of the base register.
14130 OFFSETP is used to pass an offset to and from this function; this offset
14131 is not used when constructing the address (instead BASEMEM should have an
14132 appropriate offset in its address), it is used only for setting
14133 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14135 static rtx
14136 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14137 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14139 rtx mems[MAX_LDM_STM_OPS];
14140 HOST_WIDE_INT offset = *offsetp;
14141 int i;
14143 gcc_assert (count <= MAX_LDM_STM_OPS);
14145 if (GET_CODE (basereg) == PLUS)
14146 basereg = XEXP (basereg, 0);
14148 for (i = 0; i < count; i++)
14150 rtx addr = plus_constant (Pmode, basereg, i * 4);
14151 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14152 offset += 4;
14155 if (write_back)
14156 *offsetp = offset;
14158 if (is_load)
14159 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14160 write_back ? 4 * count : 0);
14161 else
14162 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14163 write_back ? 4 * count : 0);
14167 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14168 rtx basemem, HOST_WIDE_INT *offsetp)
14170 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14171 offsetp);
14175 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14176 rtx basemem, HOST_WIDE_INT *offsetp)
14178 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14179 offsetp);
14182 /* Called from a peephole2 expander to turn a sequence of loads into an
14183 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14184 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14185 is true if we can reorder the registers because they are used commutatively
14186 subsequently.
14187 Returns true iff we could generate a new instruction. */
14189 bool
14190 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14192 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14193 rtx mems[MAX_LDM_STM_OPS];
14194 int i, j, base_reg;
14195 rtx base_reg_rtx;
14196 HOST_WIDE_INT offset;
14197 int write_back = FALSE;
14198 int ldm_case;
14199 rtx addr;
14201 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14202 &base_reg, &offset, !sort_regs);
14204 if (ldm_case == 0)
14205 return false;
14207 if (sort_regs)
14208 for (i = 0; i < nops - 1; i++)
14209 for (j = i + 1; j < nops; j++)
14210 if (regs[i] > regs[j])
14212 int t = regs[i];
14213 regs[i] = regs[j];
14214 regs[j] = t;
14216 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14218 if (TARGET_THUMB1)
14220 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14221 gcc_assert (ldm_case == 1 || ldm_case == 5);
14222 write_back = TRUE;
14225 if (ldm_case == 5)
14227 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14228 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14229 offset = 0;
14230 if (!TARGET_THUMB1)
14232 base_reg = regs[0];
14233 base_reg_rtx = newbase;
14237 for (i = 0; i < nops; i++)
14239 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14240 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14241 SImode, addr, 0);
14243 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14244 write_back ? offset + i * 4 : 0));
14245 return true;
14248 /* Called from a peephole2 expander to turn a sequence of stores into an
14249 STM instruction. OPERANDS are the operands found by the peephole matcher;
14250 NOPS indicates how many separate stores we are trying to combine.
14251 Returns true iff we could generate a new instruction. */
14253 bool
14254 gen_stm_seq (rtx *operands, int nops)
14256 int i;
14257 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14258 rtx mems[MAX_LDM_STM_OPS];
14259 int base_reg;
14260 rtx base_reg_rtx;
14261 HOST_WIDE_INT offset;
14262 int write_back = FALSE;
14263 int stm_case;
14264 rtx addr;
14265 bool base_reg_dies;
14267 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14268 mem_order, &base_reg, &offset, true);
14270 if (stm_case == 0)
14271 return false;
14273 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14275 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14276 if (TARGET_THUMB1)
14278 gcc_assert (base_reg_dies);
14279 write_back = TRUE;
14282 if (stm_case == 5)
14284 gcc_assert (base_reg_dies);
14285 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14286 offset = 0;
14289 addr = plus_constant (Pmode, base_reg_rtx, offset);
14291 for (i = 0; i < nops; i++)
14293 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14294 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14295 SImode, addr, 0);
14297 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14298 write_back ? offset + i * 4 : 0));
14299 return true;
14302 /* Called from a peephole2 expander to turn a sequence of stores that are
14303 preceded by constant loads into an STM instruction. OPERANDS are the
14304 operands found by the peephole matcher; NOPS indicates how many
14305 separate stores we are trying to combine; there are 2 * NOPS
14306 instructions in the peephole.
14307 Returns true iff we could generate a new instruction. */
14309 bool
14310 gen_const_stm_seq (rtx *operands, int nops)
14312 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14313 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14314 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14315 rtx mems[MAX_LDM_STM_OPS];
14316 int base_reg;
14317 rtx base_reg_rtx;
14318 HOST_WIDE_INT offset;
14319 int write_back = FALSE;
14320 int stm_case;
14321 rtx addr;
14322 bool base_reg_dies;
14323 int i, j;
14324 HARD_REG_SET allocated;
14326 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14327 mem_order, &base_reg, &offset, false);
14329 if (stm_case == 0)
14330 return false;
14332 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14334 /* If the same register is used more than once, try to find a free
14335 register. */
14336 CLEAR_HARD_REG_SET (allocated);
14337 for (i = 0; i < nops; i++)
14339 for (j = i + 1; j < nops; j++)
14340 if (regs[i] == regs[j])
14342 rtx t = peep2_find_free_register (0, nops * 2,
14343 TARGET_THUMB1 ? "l" : "r",
14344 SImode, &allocated);
14345 if (t == NULL_RTX)
14346 return false;
14347 reg_rtxs[i] = t;
14348 regs[i] = REGNO (t);
14352 /* Compute an ordering that maps the register numbers to an ascending
14353 sequence. */
14354 reg_order[0] = 0;
14355 for (i = 0; i < nops; i++)
14356 if (regs[i] < regs[reg_order[0]])
14357 reg_order[0] = i;
14359 for (i = 1; i < nops; i++)
14361 int this_order = reg_order[i - 1];
14362 for (j = 0; j < nops; j++)
14363 if (regs[j] > regs[reg_order[i - 1]]
14364 && (this_order == reg_order[i - 1]
14365 || regs[j] < regs[this_order]))
14366 this_order = j;
14367 reg_order[i] = this_order;
14370 /* Ensure that registers that must be live after the instruction end
14371 up with the correct value. */
14372 for (i = 0; i < nops; i++)
14374 int this_order = reg_order[i];
14375 if ((this_order != mem_order[i]
14376 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14377 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14378 return false;
14381 /* Load the constants. */
14382 for (i = 0; i < nops; i++)
14384 rtx op = operands[2 * nops + mem_order[i]];
14385 sorted_regs[i] = regs[reg_order[i]];
14386 emit_move_insn (reg_rtxs[reg_order[i]], op);
14389 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14391 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14392 if (TARGET_THUMB1)
14394 gcc_assert (base_reg_dies);
14395 write_back = TRUE;
14398 if (stm_case == 5)
14400 gcc_assert (base_reg_dies);
14401 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14402 offset = 0;
14405 addr = plus_constant (Pmode, base_reg_rtx, offset);
14407 for (i = 0; i < nops; i++)
14409 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14410 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14411 SImode, addr, 0);
14413 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14414 write_back ? offset + i * 4 : 0));
14415 return true;
14418 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14419 unaligned copies on processors which support unaligned semantics for those
14420 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14421 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14422 An interleave factor of 1 (the minimum) will perform no interleaving.
14423 Load/store multiple are used for aligned addresses where possible. */
14425 static void
14426 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14427 HOST_WIDE_INT length,
14428 unsigned int interleave_factor)
14430 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14431 int *regnos = XALLOCAVEC (int, interleave_factor);
14432 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14433 HOST_WIDE_INT i, j;
14434 HOST_WIDE_INT remaining = length, words;
14435 rtx halfword_tmp = NULL, byte_tmp = NULL;
14436 rtx dst, src;
14437 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14438 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14439 HOST_WIDE_INT srcoffset, dstoffset;
14440 HOST_WIDE_INT src_autoinc, dst_autoinc;
14441 rtx mem, addr;
14443 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14445 /* Use hard registers if we have aligned source or destination so we can use
14446 load/store multiple with contiguous registers. */
14447 if (dst_aligned || src_aligned)
14448 for (i = 0; i < interleave_factor; i++)
14449 regs[i] = gen_rtx_REG (SImode, i);
14450 else
14451 for (i = 0; i < interleave_factor; i++)
14452 regs[i] = gen_reg_rtx (SImode);
14454 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14455 src = copy_addr_to_reg (XEXP (srcbase, 0));
14457 srcoffset = dstoffset = 0;
14459 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14460 For copying the last bytes we want to subtract this offset again. */
14461 src_autoinc = dst_autoinc = 0;
14463 for (i = 0; i < interleave_factor; i++)
14464 regnos[i] = i;
14466 /* Copy BLOCK_SIZE_BYTES chunks. */
14468 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14470 /* Load words. */
14471 if (src_aligned && interleave_factor > 1)
14473 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14474 TRUE, srcbase, &srcoffset));
14475 src_autoinc += UNITS_PER_WORD * interleave_factor;
14477 else
14479 for (j = 0; j < interleave_factor; j++)
14481 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14482 - src_autoinc));
14483 mem = adjust_automodify_address (srcbase, SImode, addr,
14484 srcoffset + j * UNITS_PER_WORD);
14485 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14487 srcoffset += block_size_bytes;
14490 /* Store words. */
14491 if (dst_aligned && interleave_factor > 1)
14493 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14494 TRUE, dstbase, &dstoffset));
14495 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14497 else
14499 for (j = 0; j < interleave_factor; j++)
14501 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14502 - dst_autoinc));
14503 mem = adjust_automodify_address (dstbase, SImode, addr,
14504 dstoffset + j * UNITS_PER_WORD);
14505 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14507 dstoffset += block_size_bytes;
14510 remaining -= block_size_bytes;
14513 /* Copy any whole words left (note these aren't interleaved with any
14514 subsequent halfword/byte load/stores in the interests of simplicity). */
14516 words = remaining / UNITS_PER_WORD;
14518 gcc_assert (words < interleave_factor);
14520 if (src_aligned && words > 1)
14522 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14523 &srcoffset));
14524 src_autoinc += UNITS_PER_WORD * words;
14526 else
14528 for (j = 0; j < words; j++)
14530 addr = plus_constant (Pmode, src,
14531 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14532 mem = adjust_automodify_address (srcbase, SImode, addr,
14533 srcoffset + j * UNITS_PER_WORD);
14534 if (src_aligned)
14535 emit_move_insn (regs[j], mem);
14536 else
14537 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14539 srcoffset += words * UNITS_PER_WORD;
14542 if (dst_aligned && words > 1)
14544 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14545 &dstoffset));
14546 dst_autoinc += words * UNITS_PER_WORD;
14548 else
14550 for (j = 0; j < words; j++)
14552 addr = plus_constant (Pmode, dst,
14553 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14554 mem = adjust_automodify_address (dstbase, SImode, addr,
14555 dstoffset + j * UNITS_PER_WORD);
14556 if (dst_aligned)
14557 emit_move_insn (mem, regs[j]);
14558 else
14559 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14561 dstoffset += words * UNITS_PER_WORD;
14564 remaining -= words * UNITS_PER_WORD;
14566 gcc_assert (remaining < 4);
14568 /* Copy a halfword if necessary. */
14570 if (remaining >= 2)
14572 halfword_tmp = gen_reg_rtx (SImode);
14574 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14575 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14576 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14578 /* Either write out immediately, or delay until we've loaded the last
14579 byte, depending on interleave factor. */
14580 if (interleave_factor == 1)
14582 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14583 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14584 emit_insn (gen_unaligned_storehi (mem,
14585 gen_lowpart (HImode, halfword_tmp)));
14586 halfword_tmp = NULL;
14587 dstoffset += 2;
14590 remaining -= 2;
14591 srcoffset += 2;
14594 gcc_assert (remaining < 2);
14596 /* Copy last byte. */
14598 if ((remaining & 1) != 0)
14600 byte_tmp = gen_reg_rtx (SImode);
14602 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14603 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14604 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14606 if (interleave_factor == 1)
14608 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14609 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14610 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14611 byte_tmp = NULL;
14612 dstoffset++;
14615 remaining--;
14616 srcoffset++;
14619 /* Store last halfword if we haven't done so already. */
14621 if (halfword_tmp)
14623 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14624 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14625 emit_insn (gen_unaligned_storehi (mem,
14626 gen_lowpart (HImode, halfword_tmp)));
14627 dstoffset += 2;
14630 /* Likewise for last byte. */
14632 if (byte_tmp)
14634 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14635 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14636 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14637 dstoffset++;
14640 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14643 /* From mips_adjust_block_mem:
14645 Helper function for doing a loop-based block operation on memory
14646 reference MEM. Each iteration of the loop will operate on LENGTH
14647 bytes of MEM.
14649 Create a new base register for use within the loop and point it to
14650 the start of MEM. Create a new memory reference that uses this
14651 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14653 static void
14654 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14655 rtx *loop_mem)
14657 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14659 /* Although the new mem does not refer to a known location,
14660 it does keep up to LENGTH bytes of alignment. */
14661 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14662 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14665 /* From mips_block_move_loop:
14667 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14668 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14669 the memory regions do not overlap. */
14671 static void
14672 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14673 unsigned int interleave_factor,
14674 HOST_WIDE_INT bytes_per_iter)
14676 rtx src_reg, dest_reg, final_src, test;
14677 HOST_WIDE_INT leftover;
14679 leftover = length % bytes_per_iter;
14680 length -= leftover;
14682 /* Create registers and memory references for use within the loop. */
14683 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14684 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14686 /* Calculate the value that SRC_REG should have after the last iteration of
14687 the loop. */
14688 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14689 0, 0, OPTAB_WIDEN);
14691 /* Emit the start of the loop. */
14692 rtx_code_label *label = gen_label_rtx ();
14693 emit_label (label);
14695 /* Emit the loop body. */
14696 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14697 interleave_factor);
14699 /* Move on to the next block. */
14700 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14701 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14703 /* Emit the loop condition. */
14704 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14705 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14707 /* Mop up any left-over bytes. */
14708 if (leftover)
14709 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14712 /* Emit a block move when either the source or destination is unaligned (not
14713 aligned to a four-byte boundary). This may need further tuning depending on
14714 core type, optimize_size setting, etc. */
14716 static int
14717 arm_movmemqi_unaligned (rtx *operands)
14719 HOST_WIDE_INT length = INTVAL (operands[2]);
14721 if (optimize_size)
14723 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14724 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14725 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14726 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14727 or dst_aligned though: allow more interleaving in those cases since the
14728 resulting code can be smaller. */
14729 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14730 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14732 if (length > 12)
14733 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14734 interleave_factor, bytes_per_iter);
14735 else
14736 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14737 interleave_factor);
14739 else
14741 /* Note that the loop created by arm_block_move_unaligned_loop may be
14742 subject to loop unrolling, which makes tuning this condition a little
14743 redundant. */
14744 if (length > 32)
14745 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14746 else
14747 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14750 return 1;
14754 arm_gen_movmemqi (rtx *operands)
14756 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14757 HOST_WIDE_INT srcoffset, dstoffset;
14758 int i;
14759 rtx src, dst, srcbase, dstbase;
14760 rtx part_bytes_reg = NULL;
14761 rtx mem;
14763 if (!CONST_INT_P (operands[2])
14764 || !CONST_INT_P (operands[3])
14765 || INTVAL (operands[2]) > 64)
14766 return 0;
14768 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14769 return arm_movmemqi_unaligned (operands);
14771 if (INTVAL (operands[3]) & 3)
14772 return 0;
14774 dstbase = operands[0];
14775 srcbase = operands[1];
14777 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14778 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14780 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14781 out_words_to_go = INTVAL (operands[2]) / 4;
14782 last_bytes = INTVAL (operands[2]) & 3;
14783 dstoffset = srcoffset = 0;
14785 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14786 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14788 for (i = 0; in_words_to_go >= 2; i+=4)
14790 if (in_words_to_go > 4)
14791 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14792 TRUE, srcbase, &srcoffset));
14793 else
14794 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14795 src, FALSE, srcbase,
14796 &srcoffset));
14798 if (out_words_to_go)
14800 if (out_words_to_go > 4)
14801 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14802 TRUE, dstbase, &dstoffset));
14803 else if (out_words_to_go != 1)
14804 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14805 out_words_to_go, dst,
14806 (last_bytes == 0
14807 ? FALSE : TRUE),
14808 dstbase, &dstoffset));
14809 else
14811 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14812 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14813 if (last_bytes != 0)
14815 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14816 dstoffset += 4;
14821 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14822 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14825 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14826 if (out_words_to_go)
14828 rtx sreg;
14830 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14831 sreg = copy_to_reg (mem);
14833 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14834 emit_move_insn (mem, sreg);
14835 in_words_to_go--;
14837 gcc_assert (!in_words_to_go); /* Sanity check */
14840 if (in_words_to_go)
14842 gcc_assert (in_words_to_go > 0);
14844 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14845 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14848 gcc_assert (!last_bytes || part_bytes_reg);
14850 if (BYTES_BIG_ENDIAN && last_bytes)
14852 rtx tmp = gen_reg_rtx (SImode);
14854 /* The bytes we want are in the top end of the word. */
14855 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14856 GEN_INT (8 * (4 - last_bytes))));
14857 part_bytes_reg = tmp;
14859 while (last_bytes)
14861 mem = adjust_automodify_address (dstbase, QImode,
14862 plus_constant (Pmode, dst,
14863 last_bytes - 1),
14864 dstoffset + last_bytes - 1);
14865 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14867 if (--last_bytes)
14869 tmp = gen_reg_rtx (SImode);
14870 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14871 part_bytes_reg = tmp;
14876 else
14878 if (last_bytes > 1)
14880 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14881 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14882 last_bytes -= 2;
14883 if (last_bytes)
14885 rtx tmp = gen_reg_rtx (SImode);
14886 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14887 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14888 part_bytes_reg = tmp;
14889 dstoffset += 2;
14893 if (last_bytes)
14895 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14896 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14900 return 1;
14903 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14904 by mode size. */
14905 inline static rtx
14906 next_consecutive_mem (rtx mem)
14908 machine_mode mode = GET_MODE (mem);
14909 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14910 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14912 return adjust_automodify_address (mem, mode, addr, offset);
14915 /* Copy using LDRD/STRD instructions whenever possible.
14916 Returns true upon success. */
14917 bool
14918 gen_movmem_ldrd_strd (rtx *operands)
14920 unsigned HOST_WIDE_INT len;
14921 HOST_WIDE_INT align;
14922 rtx src, dst, base;
14923 rtx reg0;
14924 bool src_aligned, dst_aligned;
14925 bool src_volatile, dst_volatile;
14927 gcc_assert (CONST_INT_P (operands[2]));
14928 gcc_assert (CONST_INT_P (operands[3]));
14930 len = UINTVAL (operands[2]);
14931 if (len > 64)
14932 return false;
14934 /* Maximum alignment we can assume for both src and dst buffers. */
14935 align = INTVAL (operands[3]);
14937 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14938 return false;
14940 /* Place src and dst addresses in registers
14941 and update the corresponding mem rtx. */
14942 dst = operands[0];
14943 dst_volatile = MEM_VOLATILE_P (dst);
14944 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14945 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14946 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14948 src = operands[1];
14949 src_volatile = MEM_VOLATILE_P (src);
14950 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14951 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14952 src = adjust_automodify_address (src, VOIDmode, base, 0);
14954 if (!unaligned_access && !(src_aligned && dst_aligned))
14955 return false;
14957 if (src_volatile || dst_volatile)
14958 return false;
14960 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14961 if (!(dst_aligned || src_aligned))
14962 return arm_gen_movmemqi (operands);
14964 /* If the either src or dst is unaligned we'll be accessing it as pairs
14965 of unaligned SImode accesses. Otherwise we can generate DImode
14966 ldrd/strd instructions. */
14967 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14968 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14970 while (len >= 8)
14972 len -= 8;
14973 reg0 = gen_reg_rtx (DImode);
14974 rtx low_reg = NULL_RTX;
14975 rtx hi_reg = NULL_RTX;
14977 if (!src_aligned || !dst_aligned)
14979 low_reg = gen_lowpart (SImode, reg0);
14980 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14982 if (src_aligned)
14983 emit_move_insn (reg0, src);
14984 else
14986 emit_insn (gen_unaligned_loadsi (low_reg, src));
14987 src = next_consecutive_mem (src);
14988 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14991 if (dst_aligned)
14992 emit_move_insn (dst, reg0);
14993 else
14995 emit_insn (gen_unaligned_storesi (dst, low_reg));
14996 dst = next_consecutive_mem (dst);
14997 emit_insn (gen_unaligned_storesi (dst, hi_reg));
15000 src = next_consecutive_mem (src);
15001 dst = next_consecutive_mem (dst);
15004 gcc_assert (len < 8);
15005 if (len >= 4)
15007 /* More than a word but less than a double-word to copy. Copy a word. */
15008 reg0 = gen_reg_rtx (SImode);
15009 src = adjust_address (src, SImode, 0);
15010 dst = adjust_address (dst, SImode, 0);
15011 if (src_aligned)
15012 emit_move_insn (reg0, src);
15013 else
15014 emit_insn (gen_unaligned_loadsi (reg0, src));
15016 if (dst_aligned)
15017 emit_move_insn (dst, reg0);
15018 else
15019 emit_insn (gen_unaligned_storesi (dst, reg0));
15021 src = next_consecutive_mem (src);
15022 dst = next_consecutive_mem (dst);
15023 len -= 4;
15026 if (len == 0)
15027 return true;
15029 /* Copy the remaining bytes. */
15030 if (len >= 2)
15032 dst = adjust_address (dst, HImode, 0);
15033 src = adjust_address (src, HImode, 0);
15034 reg0 = gen_reg_rtx (SImode);
15035 if (src_aligned)
15036 emit_insn (gen_zero_extendhisi2 (reg0, src));
15037 else
15038 emit_insn (gen_unaligned_loadhiu (reg0, src));
15040 if (dst_aligned)
15041 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15042 else
15043 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15045 src = next_consecutive_mem (src);
15046 dst = next_consecutive_mem (dst);
15047 if (len == 2)
15048 return true;
15051 dst = adjust_address (dst, QImode, 0);
15052 src = adjust_address (src, QImode, 0);
15053 reg0 = gen_reg_rtx (QImode);
15054 emit_move_insn (reg0, src);
15055 emit_move_insn (dst, reg0);
15056 return true;
15059 /* Select a dominance comparison mode if possible for a test of the general
15060 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15061 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15062 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15063 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15064 In all cases OP will be either EQ or NE, but we don't need to know which
15065 here. If we are unable to support a dominance comparison we return
15066 CC mode. This will then fail to match for the RTL expressions that
15067 generate this call. */
15068 machine_mode
15069 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15071 enum rtx_code cond1, cond2;
15072 int swapped = 0;
15074 /* Currently we will probably get the wrong result if the individual
15075 comparisons are not simple. This also ensures that it is safe to
15076 reverse a comparison if necessary. */
15077 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15078 != CCmode)
15079 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15080 != CCmode))
15081 return CCmode;
15083 /* The if_then_else variant of this tests the second condition if the
15084 first passes, but is true if the first fails. Reverse the first
15085 condition to get a true "inclusive-or" expression. */
15086 if (cond_or == DOM_CC_NX_OR_Y)
15087 cond1 = reverse_condition (cond1);
15089 /* If the comparisons are not equal, and one doesn't dominate the other,
15090 then we can't do this. */
15091 if (cond1 != cond2
15092 && !comparison_dominates_p (cond1, cond2)
15093 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15094 return CCmode;
15096 if (swapped)
15097 std::swap (cond1, cond2);
15099 switch (cond1)
15101 case EQ:
15102 if (cond_or == DOM_CC_X_AND_Y)
15103 return CC_DEQmode;
15105 switch (cond2)
15107 case EQ: return CC_DEQmode;
15108 case LE: return CC_DLEmode;
15109 case LEU: return CC_DLEUmode;
15110 case GE: return CC_DGEmode;
15111 case GEU: return CC_DGEUmode;
15112 default: gcc_unreachable ();
15115 case LT:
15116 if (cond_or == DOM_CC_X_AND_Y)
15117 return CC_DLTmode;
15119 switch (cond2)
15121 case LT:
15122 return CC_DLTmode;
15123 case LE:
15124 return CC_DLEmode;
15125 case NE:
15126 return CC_DNEmode;
15127 default:
15128 gcc_unreachable ();
15131 case GT:
15132 if (cond_or == DOM_CC_X_AND_Y)
15133 return CC_DGTmode;
15135 switch (cond2)
15137 case GT:
15138 return CC_DGTmode;
15139 case GE:
15140 return CC_DGEmode;
15141 case NE:
15142 return CC_DNEmode;
15143 default:
15144 gcc_unreachable ();
15147 case LTU:
15148 if (cond_or == DOM_CC_X_AND_Y)
15149 return CC_DLTUmode;
15151 switch (cond2)
15153 case LTU:
15154 return CC_DLTUmode;
15155 case LEU:
15156 return CC_DLEUmode;
15157 case NE:
15158 return CC_DNEmode;
15159 default:
15160 gcc_unreachable ();
15163 case GTU:
15164 if (cond_or == DOM_CC_X_AND_Y)
15165 return CC_DGTUmode;
15167 switch (cond2)
15169 case GTU:
15170 return CC_DGTUmode;
15171 case GEU:
15172 return CC_DGEUmode;
15173 case NE:
15174 return CC_DNEmode;
15175 default:
15176 gcc_unreachable ();
15179 /* The remaining cases only occur when both comparisons are the
15180 same. */
15181 case NE:
15182 gcc_assert (cond1 == cond2);
15183 return CC_DNEmode;
15185 case LE:
15186 gcc_assert (cond1 == cond2);
15187 return CC_DLEmode;
15189 case GE:
15190 gcc_assert (cond1 == cond2);
15191 return CC_DGEmode;
15193 case LEU:
15194 gcc_assert (cond1 == cond2);
15195 return CC_DLEUmode;
15197 case GEU:
15198 gcc_assert (cond1 == cond2);
15199 return CC_DGEUmode;
15201 default:
15202 gcc_unreachable ();
15206 machine_mode
15207 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15209 /* All floating point compares return CCFP if it is an equality
15210 comparison, and CCFPE otherwise. */
15211 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15213 switch (op)
15215 case EQ:
15216 case NE:
15217 case UNORDERED:
15218 case ORDERED:
15219 case UNLT:
15220 case UNLE:
15221 case UNGT:
15222 case UNGE:
15223 case UNEQ:
15224 case LTGT:
15225 return CCFPmode;
15227 case LT:
15228 case LE:
15229 case GT:
15230 case GE:
15231 return CCFPEmode;
15233 default:
15234 gcc_unreachable ();
15238 /* A compare with a shifted operand. Because of canonicalization, the
15239 comparison will have to be swapped when we emit the assembler. */
15240 if (GET_MODE (y) == SImode
15241 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15242 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15243 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15244 || GET_CODE (x) == ROTATERT))
15245 return CC_SWPmode;
15247 /* This operation is performed swapped, but since we only rely on the Z
15248 flag we don't need an additional mode. */
15249 if (GET_MODE (y) == SImode
15250 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15251 && GET_CODE (x) == NEG
15252 && (op == EQ || op == NE))
15253 return CC_Zmode;
15255 /* This is a special case that is used by combine to allow a
15256 comparison of a shifted byte load to be split into a zero-extend
15257 followed by a comparison of the shifted integer (only valid for
15258 equalities and unsigned inequalities). */
15259 if (GET_MODE (x) == SImode
15260 && GET_CODE (x) == ASHIFT
15261 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15262 && GET_CODE (XEXP (x, 0)) == SUBREG
15263 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15264 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15265 && (op == EQ || op == NE
15266 || op == GEU || op == GTU || op == LTU || op == LEU)
15267 && CONST_INT_P (y))
15268 return CC_Zmode;
15270 /* A construct for a conditional compare, if the false arm contains
15271 0, then both conditions must be true, otherwise either condition
15272 must be true. Not all conditions are possible, so CCmode is
15273 returned if it can't be done. */
15274 if (GET_CODE (x) == IF_THEN_ELSE
15275 && (XEXP (x, 2) == const0_rtx
15276 || XEXP (x, 2) == const1_rtx)
15277 && COMPARISON_P (XEXP (x, 0))
15278 && COMPARISON_P (XEXP (x, 1)))
15279 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15280 INTVAL (XEXP (x, 2)));
15282 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15283 if (GET_CODE (x) == AND
15284 && (op == EQ || op == NE)
15285 && COMPARISON_P (XEXP (x, 0))
15286 && COMPARISON_P (XEXP (x, 1)))
15287 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15288 DOM_CC_X_AND_Y);
15290 if (GET_CODE (x) == IOR
15291 && (op == EQ || op == NE)
15292 && COMPARISON_P (XEXP (x, 0))
15293 && COMPARISON_P (XEXP (x, 1)))
15294 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15295 DOM_CC_X_OR_Y);
15297 /* An operation (on Thumb) where we want to test for a single bit.
15298 This is done by shifting that bit up into the top bit of a
15299 scratch register; we can then branch on the sign bit. */
15300 if (TARGET_THUMB1
15301 && GET_MODE (x) == SImode
15302 && (op == EQ || op == NE)
15303 && GET_CODE (x) == ZERO_EXTRACT
15304 && XEXP (x, 1) == const1_rtx)
15305 return CC_Nmode;
15307 /* An operation that sets the condition codes as a side-effect, the
15308 V flag is not set correctly, so we can only use comparisons where
15309 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15310 instead.) */
15311 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15312 if (GET_MODE (x) == SImode
15313 && y == const0_rtx
15314 && (op == EQ || op == NE || op == LT || op == GE)
15315 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15316 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15317 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15318 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15319 || GET_CODE (x) == LSHIFTRT
15320 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15321 || GET_CODE (x) == ROTATERT
15322 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15323 return CC_NOOVmode;
15325 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15326 return CC_Zmode;
15328 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15329 && GET_CODE (x) == PLUS
15330 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15331 return CC_Cmode;
15333 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15335 switch (op)
15337 case EQ:
15338 case NE:
15339 /* A DImode comparison against zero can be implemented by
15340 or'ing the two halves together. */
15341 if (y == const0_rtx)
15342 return CC_Zmode;
15344 /* We can do an equality test in three Thumb instructions. */
15345 if (!TARGET_32BIT)
15346 return CC_Zmode;
15348 /* FALLTHROUGH */
15350 case LTU:
15351 case LEU:
15352 case GTU:
15353 case GEU:
15354 /* DImode unsigned comparisons can be implemented by cmp +
15355 cmpeq without a scratch register. Not worth doing in
15356 Thumb-2. */
15357 if (TARGET_32BIT)
15358 return CC_CZmode;
15360 /* FALLTHROUGH */
15362 case LT:
15363 case LE:
15364 case GT:
15365 case GE:
15366 /* DImode signed and unsigned comparisons can be implemented
15367 by cmp + sbcs with a scratch register, but that does not
15368 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15369 gcc_assert (op != EQ && op != NE);
15370 return CC_NCVmode;
15372 default:
15373 gcc_unreachable ();
15377 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15378 return GET_MODE (x);
15380 return CCmode;
15383 /* X and Y are two things to compare using CODE. Emit the compare insn and
15384 return the rtx for register 0 in the proper mode. FP means this is a
15385 floating point compare: I don't think that it is needed on the arm. */
15387 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15389 machine_mode mode;
15390 rtx cc_reg;
15391 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15393 /* We might have X as a constant, Y as a register because of the predicates
15394 used for cmpdi. If so, force X to a register here. */
15395 if (dimode_comparison && !REG_P (x))
15396 x = force_reg (DImode, x);
15398 mode = SELECT_CC_MODE (code, x, y);
15399 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15401 if (dimode_comparison
15402 && mode != CC_CZmode)
15404 rtx clobber, set;
15406 /* To compare two non-zero values for equality, XOR them and
15407 then compare against zero. Not used for ARM mode; there
15408 CC_CZmode is cheaper. */
15409 if (mode == CC_Zmode && y != const0_rtx)
15411 gcc_assert (!reload_completed);
15412 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15413 y = const0_rtx;
15416 /* A scratch register is required. */
15417 if (reload_completed)
15418 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15419 else
15420 scratch = gen_rtx_SCRATCH (SImode);
15422 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15423 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15424 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15426 else
15427 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15429 return cc_reg;
15432 /* Generate a sequence of insns that will generate the correct return
15433 address mask depending on the physical architecture that the program
15434 is running on. */
15436 arm_gen_return_addr_mask (void)
15438 rtx reg = gen_reg_rtx (Pmode);
15440 emit_insn (gen_return_addr_mask (reg));
15441 return reg;
15444 void
15445 arm_reload_in_hi (rtx *operands)
15447 rtx ref = operands[1];
15448 rtx base, scratch;
15449 HOST_WIDE_INT offset = 0;
15451 if (GET_CODE (ref) == SUBREG)
15453 offset = SUBREG_BYTE (ref);
15454 ref = SUBREG_REG (ref);
15457 if (REG_P (ref))
15459 /* We have a pseudo which has been spilt onto the stack; there
15460 are two cases here: the first where there is a simple
15461 stack-slot replacement and a second where the stack-slot is
15462 out of range, or is used as a subreg. */
15463 if (reg_equiv_mem (REGNO (ref)))
15465 ref = reg_equiv_mem (REGNO (ref));
15466 base = find_replacement (&XEXP (ref, 0));
15468 else
15469 /* The slot is out of range, or was dressed up in a SUBREG. */
15470 base = reg_equiv_address (REGNO (ref));
15472 /* PR 62554: If there is no equivalent memory location then just move
15473 the value as an SImode register move. This happens when the target
15474 architecture variant does not have an HImode register move. */
15475 if (base == NULL)
15477 gcc_assert (REG_P (operands[0]));
15478 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15479 gen_rtx_SUBREG (SImode, ref, 0)));
15480 return;
15483 else
15484 base = find_replacement (&XEXP (ref, 0));
15486 /* Handle the case where the address is too complex to be offset by 1. */
15487 if (GET_CODE (base) == MINUS
15488 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15490 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15492 emit_set_insn (base_plus, base);
15493 base = base_plus;
15495 else if (GET_CODE (base) == PLUS)
15497 /* The addend must be CONST_INT, or we would have dealt with it above. */
15498 HOST_WIDE_INT hi, lo;
15500 offset += INTVAL (XEXP (base, 1));
15501 base = XEXP (base, 0);
15503 /* Rework the address into a legal sequence of insns. */
15504 /* Valid range for lo is -4095 -> 4095 */
15505 lo = (offset >= 0
15506 ? (offset & 0xfff)
15507 : -((-offset) & 0xfff));
15509 /* Corner case, if lo is the max offset then we would be out of range
15510 once we have added the additional 1 below, so bump the msb into the
15511 pre-loading insn(s). */
15512 if (lo == 4095)
15513 lo &= 0x7ff;
15515 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15516 ^ (HOST_WIDE_INT) 0x80000000)
15517 - (HOST_WIDE_INT) 0x80000000);
15519 gcc_assert (hi + lo == offset);
15521 if (hi != 0)
15523 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15525 /* Get the base address; addsi3 knows how to handle constants
15526 that require more than one insn. */
15527 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15528 base = base_plus;
15529 offset = lo;
15533 /* Operands[2] may overlap operands[0] (though it won't overlap
15534 operands[1]), that's why we asked for a DImode reg -- so we can
15535 use the bit that does not overlap. */
15536 if (REGNO (operands[2]) == REGNO (operands[0]))
15537 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15538 else
15539 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15541 emit_insn (gen_zero_extendqisi2 (scratch,
15542 gen_rtx_MEM (QImode,
15543 plus_constant (Pmode, base,
15544 offset))));
15545 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15546 gen_rtx_MEM (QImode,
15547 plus_constant (Pmode, base,
15548 offset + 1))));
15549 if (!BYTES_BIG_ENDIAN)
15550 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15551 gen_rtx_IOR (SImode,
15552 gen_rtx_ASHIFT
15553 (SImode,
15554 gen_rtx_SUBREG (SImode, operands[0], 0),
15555 GEN_INT (8)),
15556 scratch));
15557 else
15558 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15559 gen_rtx_IOR (SImode,
15560 gen_rtx_ASHIFT (SImode, scratch,
15561 GEN_INT (8)),
15562 gen_rtx_SUBREG (SImode, operands[0], 0)));
15565 /* Handle storing a half-word to memory during reload by synthesizing as two
15566 byte stores. Take care not to clobber the input values until after we
15567 have moved them somewhere safe. This code assumes that if the DImode
15568 scratch in operands[2] overlaps either the input value or output address
15569 in some way, then that value must die in this insn (we absolutely need
15570 two scratch registers for some corner cases). */
15571 void
15572 arm_reload_out_hi (rtx *operands)
15574 rtx ref = operands[0];
15575 rtx outval = operands[1];
15576 rtx base, scratch;
15577 HOST_WIDE_INT offset = 0;
15579 if (GET_CODE (ref) == SUBREG)
15581 offset = SUBREG_BYTE (ref);
15582 ref = SUBREG_REG (ref);
15585 if (REG_P (ref))
15587 /* We have a pseudo which has been spilt onto the stack; there
15588 are two cases here: the first where there is a simple
15589 stack-slot replacement and a second where the stack-slot is
15590 out of range, or is used as a subreg. */
15591 if (reg_equiv_mem (REGNO (ref)))
15593 ref = reg_equiv_mem (REGNO (ref));
15594 base = find_replacement (&XEXP (ref, 0));
15596 else
15597 /* The slot is out of range, or was dressed up in a SUBREG. */
15598 base = reg_equiv_address (REGNO (ref));
15600 /* PR 62254: If there is no equivalent memory location then just move
15601 the value as an SImode register move. This happens when the target
15602 architecture variant does not have an HImode register move. */
15603 if (base == NULL)
15605 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15607 if (REG_P (outval))
15609 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15610 gen_rtx_SUBREG (SImode, outval, 0)));
15612 else /* SUBREG_P (outval) */
15614 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15615 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15616 SUBREG_REG (outval)));
15617 else
15618 /* FIXME: Handle other cases ? */
15619 gcc_unreachable ();
15621 return;
15624 else
15625 base = find_replacement (&XEXP (ref, 0));
15627 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15629 /* Handle the case where the address is too complex to be offset by 1. */
15630 if (GET_CODE (base) == MINUS
15631 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15633 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15635 /* Be careful not to destroy OUTVAL. */
15636 if (reg_overlap_mentioned_p (base_plus, outval))
15638 /* Updating base_plus might destroy outval, see if we can
15639 swap the scratch and base_plus. */
15640 if (!reg_overlap_mentioned_p (scratch, outval))
15641 std::swap (scratch, base_plus);
15642 else
15644 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15646 /* Be conservative and copy OUTVAL into the scratch now,
15647 this should only be necessary if outval is a subreg
15648 of something larger than a word. */
15649 /* XXX Might this clobber base? I can't see how it can,
15650 since scratch is known to overlap with OUTVAL, and
15651 must be wider than a word. */
15652 emit_insn (gen_movhi (scratch_hi, outval));
15653 outval = scratch_hi;
15657 emit_set_insn (base_plus, base);
15658 base = base_plus;
15660 else if (GET_CODE (base) == PLUS)
15662 /* The addend must be CONST_INT, or we would have dealt with it above. */
15663 HOST_WIDE_INT hi, lo;
15665 offset += INTVAL (XEXP (base, 1));
15666 base = XEXP (base, 0);
15668 /* Rework the address into a legal sequence of insns. */
15669 /* Valid range for lo is -4095 -> 4095 */
15670 lo = (offset >= 0
15671 ? (offset & 0xfff)
15672 : -((-offset) & 0xfff));
15674 /* Corner case, if lo is the max offset then we would be out of range
15675 once we have added the additional 1 below, so bump the msb into the
15676 pre-loading insn(s). */
15677 if (lo == 4095)
15678 lo &= 0x7ff;
15680 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15681 ^ (HOST_WIDE_INT) 0x80000000)
15682 - (HOST_WIDE_INT) 0x80000000);
15684 gcc_assert (hi + lo == offset);
15686 if (hi != 0)
15688 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15690 /* Be careful not to destroy OUTVAL. */
15691 if (reg_overlap_mentioned_p (base_plus, outval))
15693 /* Updating base_plus might destroy outval, see if we
15694 can swap the scratch and base_plus. */
15695 if (!reg_overlap_mentioned_p (scratch, outval))
15696 std::swap (scratch, base_plus);
15697 else
15699 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15701 /* Be conservative and copy outval into scratch now,
15702 this should only be necessary if outval is a
15703 subreg of something larger than a word. */
15704 /* XXX Might this clobber base? I can't see how it
15705 can, since scratch is known to overlap with
15706 outval. */
15707 emit_insn (gen_movhi (scratch_hi, outval));
15708 outval = scratch_hi;
15712 /* Get the base address; addsi3 knows how to handle constants
15713 that require more than one insn. */
15714 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15715 base = base_plus;
15716 offset = lo;
15720 if (BYTES_BIG_ENDIAN)
15722 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15723 plus_constant (Pmode, base,
15724 offset + 1)),
15725 gen_lowpart (QImode, outval)));
15726 emit_insn (gen_lshrsi3 (scratch,
15727 gen_rtx_SUBREG (SImode, outval, 0),
15728 GEN_INT (8)));
15729 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15730 offset)),
15731 gen_lowpart (QImode, scratch)));
15733 else
15735 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15736 offset)),
15737 gen_lowpart (QImode, outval)));
15738 emit_insn (gen_lshrsi3 (scratch,
15739 gen_rtx_SUBREG (SImode, outval, 0),
15740 GEN_INT (8)));
15741 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15742 plus_constant (Pmode, base,
15743 offset + 1)),
15744 gen_lowpart (QImode, scratch)));
15748 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15749 (padded to the size of a word) should be passed in a register. */
15751 static bool
15752 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15754 if (TARGET_AAPCS_BASED)
15755 return must_pass_in_stack_var_size (mode, type);
15756 else
15757 return must_pass_in_stack_var_size_or_pad (mode, type);
15761 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15762 Return true if an argument passed on the stack should be padded upwards,
15763 i.e. if the least-significant byte has useful data.
15764 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15765 aggregate types are placed in the lowest memory address. */
15767 bool
15768 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15770 if (!TARGET_AAPCS_BASED)
15771 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15773 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15774 return false;
15776 return true;
15780 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15781 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15782 register has useful data, and return the opposite if the most
15783 significant byte does. */
15785 bool
15786 arm_pad_reg_upward (machine_mode mode,
15787 tree type, int first ATTRIBUTE_UNUSED)
15789 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15791 /* For AAPCS, small aggregates, small fixed-point types,
15792 and small complex types are always padded upwards. */
15793 if (type)
15795 if ((AGGREGATE_TYPE_P (type)
15796 || TREE_CODE (type) == COMPLEX_TYPE
15797 || FIXED_POINT_TYPE_P (type))
15798 && int_size_in_bytes (type) <= 4)
15799 return true;
15801 else
15803 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15804 && GET_MODE_SIZE (mode) <= 4)
15805 return true;
15809 /* Otherwise, use default padding. */
15810 return !BYTES_BIG_ENDIAN;
15813 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15814 assuming that the address in the base register is word aligned. */
15815 bool
15816 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15818 HOST_WIDE_INT max_offset;
15820 /* Offset must be a multiple of 4 in Thumb mode. */
15821 if (TARGET_THUMB2 && ((offset & 3) != 0))
15822 return false;
15824 if (TARGET_THUMB2)
15825 max_offset = 1020;
15826 else if (TARGET_ARM)
15827 max_offset = 255;
15828 else
15829 return false;
15831 return ((offset <= max_offset) && (offset >= -max_offset));
15834 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15835 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15836 Assumes that the address in the base register RN is word aligned. Pattern
15837 guarantees that both memory accesses use the same base register,
15838 the offsets are constants within the range, and the gap between the offsets is 4.
15839 If preload complete then check that registers are legal. WBACK indicates whether
15840 address is updated. LOAD indicates whether memory access is load or store. */
15841 bool
15842 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15843 bool wback, bool load)
15845 unsigned int t, t2, n;
15847 if (!reload_completed)
15848 return true;
15850 if (!offset_ok_for_ldrd_strd (offset))
15851 return false;
15853 t = REGNO (rt);
15854 t2 = REGNO (rt2);
15855 n = REGNO (rn);
15857 if ((TARGET_THUMB2)
15858 && ((wback && (n == t || n == t2))
15859 || (t == SP_REGNUM)
15860 || (t == PC_REGNUM)
15861 || (t2 == SP_REGNUM)
15862 || (t2 == PC_REGNUM)
15863 || (!load && (n == PC_REGNUM))
15864 || (load && (t == t2))
15865 /* Triggers Cortex-M3 LDRD errata. */
15866 || (!wback && load && fix_cm3_ldrd && (n == t))))
15867 return false;
15869 if ((TARGET_ARM)
15870 && ((wback && (n == t || n == t2))
15871 || (t2 == PC_REGNUM)
15872 || (t % 2 != 0) /* First destination register is not even. */
15873 || (t2 != t + 1)
15874 /* PC can be used as base register (for offset addressing only),
15875 but it is depricated. */
15876 || (n == PC_REGNUM)))
15877 return false;
15879 return true;
15882 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15883 operand MEM's address contains an immediate offset from the base
15884 register and has no side effects, in which case it sets BASE and
15885 OFFSET accordingly. */
15886 static bool
15887 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15889 rtx addr;
15891 gcc_assert (base != NULL && offset != NULL);
15893 /* TODO: Handle more general memory operand patterns, such as
15894 PRE_DEC and PRE_INC. */
15896 if (side_effects_p (mem))
15897 return false;
15899 /* Can't deal with subregs. */
15900 if (GET_CODE (mem) == SUBREG)
15901 return false;
15903 gcc_assert (MEM_P (mem));
15905 *offset = const0_rtx;
15907 addr = XEXP (mem, 0);
15909 /* If addr isn't valid for DImode, then we can't handle it. */
15910 if (!arm_legitimate_address_p (DImode, addr,
15911 reload_in_progress || reload_completed))
15912 return false;
15914 if (REG_P (addr))
15916 *base = addr;
15917 return true;
15919 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15921 *base = XEXP (addr, 0);
15922 *offset = XEXP (addr, 1);
15923 return (REG_P (*base) && CONST_INT_P (*offset));
15926 return false;
15929 /* Called from a peephole2 to replace two word-size accesses with a
15930 single LDRD/STRD instruction. Returns true iff we can generate a
15931 new instruction sequence. That is, both accesses use the same base
15932 register and the gap between constant offsets is 4. This function
15933 may reorder its operands to match ldrd/strd RTL templates.
15934 OPERANDS are the operands found by the peephole matcher;
15935 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15936 corresponding memory operands. LOAD indicaates whether the access
15937 is load or store. CONST_STORE indicates a store of constant
15938 integer values held in OPERANDS[4,5] and assumes that the pattern
15939 is of length 4 insn, for the purpose of checking dead registers.
15940 COMMUTE indicates that register operands may be reordered. */
15941 bool
15942 gen_operands_ldrd_strd (rtx *operands, bool load,
15943 bool const_store, bool commute)
15945 int nops = 2;
15946 HOST_WIDE_INT offsets[2], offset;
15947 rtx base = NULL_RTX;
15948 rtx cur_base, cur_offset, tmp;
15949 int i, gap;
15950 HARD_REG_SET regset;
15952 gcc_assert (!const_store || !load);
15953 /* Check that the memory references are immediate offsets from the
15954 same base register. Extract the base register, the destination
15955 registers, and the corresponding memory offsets. */
15956 for (i = 0; i < nops; i++)
15958 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15959 return false;
15961 if (i == 0)
15962 base = cur_base;
15963 else if (REGNO (base) != REGNO (cur_base))
15964 return false;
15966 offsets[i] = INTVAL (cur_offset);
15967 if (GET_CODE (operands[i]) == SUBREG)
15969 tmp = SUBREG_REG (operands[i]);
15970 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15971 operands[i] = tmp;
15975 /* Make sure there is no dependency between the individual loads. */
15976 if (load && REGNO (operands[0]) == REGNO (base))
15977 return false; /* RAW */
15979 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15980 return false; /* WAW */
15982 /* If the same input register is used in both stores
15983 when storing different constants, try to find a free register.
15984 For example, the code
15985 mov r0, 0
15986 str r0, [r2]
15987 mov r0, 1
15988 str r0, [r2, #4]
15989 can be transformed into
15990 mov r1, 0
15991 mov r0, 1
15992 strd r1, r0, [r2]
15993 in Thumb mode assuming that r1 is free.
15994 For ARM mode do the same but only if the starting register
15995 can be made to be even. */
15996 if (const_store
15997 && REGNO (operands[0]) == REGNO (operands[1])
15998 && INTVAL (operands[4]) != INTVAL (operands[5]))
16000 if (TARGET_THUMB2)
16002 CLEAR_HARD_REG_SET (regset);
16003 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16004 if (tmp == NULL_RTX)
16005 return false;
16007 /* Use the new register in the first load to ensure that
16008 if the original input register is not dead after peephole,
16009 then it will have the correct constant value. */
16010 operands[0] = tmp;
16012 else if (TARGET_ARM)
16014 int regno = REGNO (operands[0]);
16015 if (!peep2_reg_dead_p (4, operands[0]))
16017 /* When the input register is even and is not dead after the
16018 pattern, it has to hold the second constant but we cannot
16019 form a legal STRD in ARM mode with this register as the second
16020 register. */
16021 if (regno % 2 == 0)
16022 return false;
16024 /* Is regno-1 free? */
16025 SET_HARD_REG_SET (regset);
16026 CLEAR_HARD_REG_BIT(regset, regno - 1);
16027 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16028 if (tmp == NULL_RTX)
16029 return false;
16031 operands[0] = tmp;
16033 else
16035 /* Find a DImode register. */
16036 CLEAR_HARD_REG_SET (regset);
16037 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16038 if (tmp != NULL_RTX)
16040 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16041 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16043 else
16045 /* Can we use the input register to form a DI register? */
16046 SET_HARD_REG_SET (regset);
16047 CLEAR_HARD_REG_BIT(regset,
16048 regno % 2 == 0 ? regno + 1 : regno - 1);
16049 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16050 if (tmp == NULL_RTX)
16051 return false;
16052 operands[regno % 2 == 1 ? 0 : 1] = tmp;
16056 gcc_assert (operands[0] != NULL_RTX);
16057 gcc_assert (operands[1] != NULL_RTX);
16058 gcc_assert (REGNO (operands[0]) % 2 == 0);
16059 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
16063 /* Make sure the instructions are ordered with lower memory access first. */
16064 if (offsets[0] > offsets[1])
16066 gap = offsets[0] - offsets[1];
16067 offset = offsets[1];
16069 /* Swap the instructions such that lower memory is accessed first. */
16070 std::swap (operands[0], operands[1]);
16071 std::swap (operands[2], operands[3]);
16072 if (const_store)
16073 std::swap (operands[4], operands[5]);
16075 else
16077 gap = offsets[1] - offsets[0];
16078 offset = offsets[0];
16081 /* Make sure accesses are to consecutive memory locations. */
16082 if (gap != 4)
16083 return false;
16085 /* Make sure we generate legal instructions. */
16086 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16087 false, load))
16088 return true;
16090 /* In Thumb state, where registers are almost unconstrained, there
16091 is little hope to fix it. */
16092 if (TARGET_THUMB2)
16093 return false;
16095 if (load && commute)
16097 /* Try reordering registers. */
16098 std::swap (operands[0], operands[1]);
16099 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16100 false, load))
16101 return true;
16104 if (const_store)
16106 /* If input registers are dead after this pattern, they can be
16107 reordered or replaced by other registers that are free in the
16108 current pattern. */
16109 if (!peep2_reg_dead_p (4, operands[0])
16110 || !peep2_reg_dead_p (4, operands[1]))
16111 return false;
16113 /* Try to reorder the input registers. */
16114 /* For example, the code
16115 mov r0, 0
16116 mov r1, 1
16117 str r1, [r2]
16118 str r0, [r2, #4]
16119 can be transformed into
16120 mov r1, 0
16121 mov r0, 1
16122 strd r0, [r2]
16124 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
16125 false, false))
16127 std::swap (operands[0], operands[1]);
16128 return true;
16131 /* Try to find a free DI register. */
16132 CLEAR_HARD_REG_SET (regset);
16133 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16134 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16135 while (true)
16137 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16138 if (tmp == NULL_RTX)
16139 return false;
16141 /* DREG must be an even-numbered register in DImode.
16142 Split it into SI registers. */
16143 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16144 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16145 gcc_assert (operands[0] != NULL_RTX);
16146 gcc_assert (operands[1] != NULL_RTX);
16147 gcc_assert (REGNO (operands[0]) % 2 == 0);
16148 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16150 return (operands_ok_ldrd_strd (operands[0], operands[1],
16151 base, offset,
16152 false, load));
16156 return false;
16162 /* Print a symbolic form of X to the debug file, F. */
16163 static void
16164 arm_print_value (FILE *f, rtx x)
16166 switch (GET_CODE (x))
16168 case CONST_INT:
16169 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16170 return;
16172 case CONST_DOUBLE:
16173 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
16174 return;
16176 case CONST_VECTOR:
16178 int i;
16180 fprintf (f, "<");
16181 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16183 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16184 if (i < (CONST_VECTOR_NUNITS (x) - 1))
16185 fputc (',', f);
16187 fprintf (f, ">");
16189 return;
16191 case CONST_STRING:
16192 fprintf (f, "\"%s\"", XSTR (x, 0));
16193 return;
16195 case SYMBOL_REF:
16196 fprintf (f, "`%s'", XSTR (x, 0));
16197 return;
16199 case LABEL_REF:
16200 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16201 return;
16203 case CONST:
16204 arm_print_value (f, XEXP (x, 0));
16205 return;
16207 case PLUS:
16208 arm_print_value (f, XEXP (x, 0));
16209 fprintf (f, "+");
16210 arm_print_value (f, XEXP (x, 1));
16211 return;
16213 case PC:
16214 fprintf (f, "pc");
16215 return;
16217 default:
16218 fprintf (f, "????");
16219 return;
16223 /* Routines for manipulation of the constant pool. */
16225 /* Arm instructions cannot load a large constant directly into a
16226 register; they have to come from a pc relative load. The constant
16227 must therefore be placed in the addressable range of the pc
16228 relative load. Depending on the precise pc relative load
16229 instruction the range is somewhere between 256 bytes and 4k. This
16230 means that we often have to dump a constant inside a function, and
16231 generate code to branch around it.
16233 It is important to minimize this, since the branches will slow
16234 things down and make the code larger.
16236 Normally we can hide the table after an existing unconditional
16237 branch so that there is no interruption of the flow, but in the
16238 worst case the code looks like this:
16240 ldr rn, L1
16242 b L2
16243 align
16244 L1: .long value
16248 ldr rn, L3
16250 b L4
16251 align
16252 L3: .long value
16256 We fix this by performing a scan after scheduling, which notices
16257 which instructions need to have their operands fetched from the
16258 constant table and builds the table.
16260 The algorithm starts by building a table of all the constants that
16261 need fixing up and all the natural barriers in the function (places
16262 where a constant table can be dropped without breaking the flow).
16263 For each fixup we note how far the pc-relative replacement will be
16264 able to reach and the offset of the instruction into the function.
16266 Having built the table we then group the fixes together to form
16267 tables that are as large as possible (subject to addressing
16268 constraints) and emit each table of constants after the last
16269 barrier that is within range of all the instructions in the group.
16270 If a group does not contain a barrier, then we forcibly create one
16271 by inserting a jump instruction into the flow. Once the table has
16272 been inserted, the insns are then modified to reference the
16273 relevant entry in the pool.
16275 Possible enhancements to the algorithm (not implemented) are:
16277 1) For some processors and object formats, there may be benefit in
16278 aligning the pools to the start of cache lines; this alignment
16279 would need to be taken into account when calculating addressability
16280 of a pool. */
16282 /* These typedefs are located at the start of this file, so that
16283 they can be used in the prototypes there. This comment is to
16284 remind readers of that fact so that the following structures
16285 can be understood more easily.
16287 typedef struct minipool_node Mnode;
16288 typedef struct minipool_fixup Mfix; */
16290 struct minipool_node
16292 /* Doubly linked chain of entries. */
16293 Mnode * next;
16294 Mnode * prev;
16295 /* The maximum offset into the code that this entry can be placed. While
16296 pushing fixes for forward references, all entries are sorted in order
16297 of increasing max_address. */
16298 HOST_WIDE_INT max_address;
16299 /* Similarly for an entry inserted for a backwards ref. */
16300 HOST_WIDE_INT min_address;
16301 /* The number of fixes referencing this entry. This can become zero
16302 if we "unpush" an entry. In this case we ignore the entry when we
16303 come to emit the code. */
16304 int refcount;
16305 /* The offset from the start of the minipool. */
16306 HOST_WIDE_INT offset;
16307 /* The value in table. */
16308 rtx value;
16309 /* The mode of value. */
16310 machine_mode mode;
16311 /* The size of the value. With iWMMXt enabled
16312 sizes > 4 also imply an alignment of 8-bytes. */
16313 int fix_size;
16316 struct minipool_fixup
16318 Mfix * next;
16319 rtx_insn * insn;
16320 HOST_WIDE_INT address;
16321 rtx * loc;
16322 machine_mode mode;
16323 int fix_size;
16324 rtx value;
16325 Mnode * minipool;
16326 HOST_WIDE_INT forwards;
16327 HOST_WIDE_INT backwards;
16330 /* Fixes less than a word need padding out to a word boundary. */
16331 #define MINIPOOL_FIX_SIZE(mode) \
16332 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16334 static Mnode * minipool_vector_head;
16335 static Mnode * minipool_vector_tail;
16336 static rtx_code_label *minipool_vector_label;
16337 static int minipool_pad;
16339 /* The linked list of all minipool fixes required for this function. */
16340 Mfix * minipool_fix_head;
16341 Mfix * minipool_fix_tail;
16342 /* The fix entry for the current minipool, once it has been placed. */
16343 Mfix * minipool_barrier;
16345 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16346 #define JUMP_TABLES_IN_TEXT_SECTION 0
16347 #endif
16349 static HOST_WIDE_INT
16350 get_jump_table_size (rtx_jump_table_data *insn)
16352 /* ADDR_VECs only take room if read-only data does into the text
16353 section. */
16354 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16356 rtx body = PATTERN (insn);
16357 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16358 HOST_WIDE_INT size;
16359 HOST_WIDE_INT modesize;
16361 modesize = GET_MODE_SIZE (GET_MODE (body));
16362 size = modesize * XVECLEN (body, elt);
16363 switch (modesize)
16365 case 1:
16366 /* Round up size of TBB table to a halfword boundary. */
16367 size = (size + 1) & ~HOST_WIDE_INT_1;
16368 break;
16369 case 2:
16370 /* No padding necessary for TBH. */
16371 break;
16372 case 4:
16373 /* Add two bytes for alignment on Thumb. */
16374 if (TARGET_THUMB)
16375 size += 2;
16376 break;
16377 default:
16378 gcc_unreachable ();
16380 return size;
16383 return 0;
16386 /* Return the maximum amount of padding that will be inserted before
16387 label LABEL. */
16389 static HOST_WIDE_INT
16390 get_label_padding (rtx label)
16392 HOST_WIDE_INT align, min_insn_size;
16394 align = 1 << label_to_alignment (label);
16395 min_insn_size = TARGET_THUMB ? 2 : 4;
16396 return align > min_insn_size ? align - min_insn_size : 0;
16399 /* Move a minipool fix MP from its current location to before MAX_MP.
16400 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16401 constraints may need updating. */
16402 static Mnode *
16403 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16404 HOST_WIDE_INT max_address)
16406 /* The code below assumes these are different. */
16407 gcc_assert (mp != max_mp);
16409 if (max_mp == NULL)
16411 if (max_address < mp->max_address)
16412 mp->max_address = max_address;
16414 else
16416 if (max_address > max_mp->max_address - mp->fix_size)
16417 mp->max_address = max_mp->max_address - mp->fix_size;
16418 else
16419 mp->max_address = max_address;
16421 /* Unlink MP from its current position. Since max_mp is non-null,
16422 mp->prev must be non-null. */
16423 mp->prev->next = mp->next;
16424 if (mp->next != NULL)
16425 mp->next->prev = mp->prev;
16426 else
16427 minipool_vector_tail = mp->prev;
16429 /* Re-insert it before MAX_MP. */
16430 mp->next = max_mp;
16431 mp->prev = max_mp->prev;
16432 max_mp->prev = mp;
16434 if (mp->prev != NULL)
16435 mp->prev->next = mp;
16436 else
16437 minipool_vector_head = mp;
16440 /* Save the new entry. */
16441 max_mp = mp;
16443 /* Scan over the preceding entries and adjust their addresses as
16444 required. */
16445 while (mp->prev != NULL
16446 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16448 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16449 mp = mp->prev;
16452 return max_mp;
16455 /* Add a constant to the minipool for a forward reference. Returns the
16456 node added or NULL if the constant will not fit in this pool. */
16457 static Mnode *
16458 add_minipool_forward_ref (Mfix *fix)
16460 /* If set, max_mp is the first pool_entry that has a lower
16461 constraint than the one we are trying to add. */
16462 Mnode * max_mp = NULL;
16463 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16464 Mnode * mp;
16466 /* If the minipool starts before the end of FIX->INSN then this FIX
16467 can not be placed into the current pool. Furthermore, adding the
16468 new constant pool entry may cause the pool to start FIX_SIZE bytes
16469 earlier. */
16470 if (minipool_vector_head &&
16471 (fix->address + get_attr_length (fix->insn)
16472 >= minipool_vector_head->max_address - fix->fix_size))
16473 return NULL;
16475 /* Scan the pool to see if a constant with the same value has
16476 already been added. While we are doing this, also note the
16477 location where we must insert the constant if it doesn't already
16478 exist. */
16479 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16481 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16482 && fix->mode == mp->mode
16483 && (!LABEL_P (fix->value)
16484 || (CODE_LABEL_NUMBER (fix->value)
16485 == CODE_LABEL_NUMBER (mp->value)))
16486 && rtx_equal_p (fix->value, mp->value))
16488 /* More than one fix references this entry. */
16489 mp->refcount++;
16490 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16493 /* Note the insertion point if necessary. */
16494 if (max_mp == NULL
16495 && mp->max_address > max_address)
16496 max_mp = mp;
16498 /* If we are inserting an 8-bytes aligned quantity and
16499 we have not already found an insertion point, then
16500 make sure that all such 8-byte aligned quantities are
16501 placed at the start of the pool. */
16502 if (ARM_DOUBLEWORD_ALIGN
16503 && max_mp == NULL
16504 && fix->fix_size >= 8
16505 && mp->fix_size < 8)
16507 max_mp = mp;
16508 max_address = mp->max_address;
16512 /* The value is not currently in the minipool, so we need to create
16513 a new entry for it. If MAX_MP is NULL, the entry will be put on
16514 the end of the list since the placement is less constrained than
16515 any existing entry. Otherwise, we insert the new fix before
16516 MAX_MP and, if necessary, adjust the constraints on the other
16517 entries. */
16518 mp = XNEW (Mnode);
16519 mp->fix_size = fix->fix_size;
16520 mp->mode = fix->mode;
16521 mp->value = fix->value;
16522 mp->refcount = 1;
16523 /* Not yet required for a backwards ref. */
16524 mp->min_address = -65536;
16526 if (max_mp == NULL)
16528 mp->max_address = max_address;
16529 mp->next = NULL;
16530 mp->prev = minipool_vector_tail;
16532 if (mp->prev == NULL)
16534 minipool_vector_head = mp;
16535 minipool_vector_label = gen_label_rtx ();
16537 else
16538 mp->prev->next = mp;
16540 minipool_vector_tail = mp;
16542 else
16544 if (max_address > max_mp->max_address - mp->fix_size)
16545 mp->max_address = max_mp->max_address - mp->fix_size;
16546 else
16547 mp->max_address = max_address;
16549 mp->next = max_mp;
16550 mp->prev = max_mp->prev;
16551 max_mp->prev = mp;
16552 if (mp->prev != NULL)
16553 mp->prev->next = mp;
16554 else
16555 minipool_vector_head = mp;
16558 /* Save the new entry. */
16559 max_mp = mp;
16561 /* Scan over the preceding entries and adjust their addresses as
16562 required. */
16563 while (mp->prev != NULL
16564 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16566 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16567 mp = mp->prev;
16570 return max_mp;
16573 static Mnode *
16574 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16575 HOST_WIDE_INT min_address)
16577 HOST_WIDE_INT offset;
16579 /* The code below assumes these are different. */
16580 gcc_assert (mp != min_mp);
16582 if (min_mp == NULL)
16584 if (min_address > mp->min_address)
16585 mp->min_address = min_address;
16587 else
16589 /* We will adjust this below if it is too loose. */
16590 mp->min_address = min_address;
16592 /* Unlink MP from its current position. Since min_mp is non-null,
16593 mp->next must be non-null. */
16594 mp->next->prev = mp->prev;
16595 if (mp->prev != NULL)
16596 mp->prev->next = mp->next;
16597 else
16598 minipool_vector_head = mp->next;
16600 /* Reinsert it after MIN_MP. */
16601 mp->prev = min_mp;
16602 mp->next = min_mp->next;
16603 min_mp->next = mp;
16604 if (mp->next != NULL)
16605 mp->next->prev = mp;
16606 else
16607 minipool_vector_tail = mp;
16610 min_mp = mp;
16612 offset = 0;
16613 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16615 mp->offset = offset;
16616 if (mp->refcount > 0)
16617 offset += mp->fix_size;
16619 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16620 mp->next->min_address = mp->min_address + mp->fix_size;
16623 return min_mp;
16626 /* Add a constant to the minipool for a backward reference. Returns the
16627 node added or NULL if the constant will not fit in this pool.
16629 Note that the code for insertion for a backwards reference can be
16630 somewhat confusing because the calculated offsets for each fix do
16631 not take into account the size of the pool (which is still under
16632 construction. */
16633 static Mnode *
16634 add_minipool_backward_ref (Mfix *fix)
16636 /* If set, min_mp is the last pool_entry that has a lower constraint
16637 than the one we are trying to add. */
16638 Mnode *min_mp = NULL;
16639 /* This can be negative, since it is only a constraint. */
16640 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16641 Mnode *mp;
16643 /* If we can't reach the current pool from this insn, or if we can't
16644 insert this entry at the end of the pool without pushing other
16645 fixes out of range, then we don't try. This ensures that we
16646 can't fail later on. */
16647 if (min_address >= minipool_barrier->address
16648 || (minipool_vector_tail->min_address + fix->fix_size
16649 >= minipool_barrier->address))
16650 return NULL;
16652 /* Scan the pool to see if a constant with the same value has
16653 already been added. While we are doing this, also note the
16654 location where we must insert the constant if it doesn't already
16655 exist. */
16656 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16658 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16659 && fix->mode == mp->mode
16660 && (!LABEL_P (fix->value)
16661 || (CODE_LABEL_NUMBER (fix->value)
16662 == CODE_LABEL_NUMBER (mp->value)))
16663 && rtx_equal_p (fix->value, mp->value)
16664 /* Check that there is enough slack to move this entry to the
16665 end of the table (this is conservative). */
16666 && (mp->max_address
16667 > (minipool_barrier->address
16668 + minipool_vector_tail->offset
16669 + minipool_vector_tail->fix_size)))
16671 mp->refcount++;
16672 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16675 if (min_mp != NULL)
16676 mp->min_address += fix->fix_size;
16677 else
16679 /* Note the insertion point if necessary. */
16680 if (mp->min_address < min_address)
16682 /* For now, we do not allow the insertion of 8-byte alignment
16683 requiring nodes anywhere but at the start of the pool. */
16684 if (ARM_DOUBLEWORD_ALIGN
16685 && fix->fix_size >= 8 && mp->fix_size < 8)
16686 return NULL;
16687 else
16688 min_mp = mp;
16690 else if (mp->max_address
16691 < minipool_barrier->address + mp->offset + fix->fix_size)
16693 /* Inserting before this entry would push the fix beyond
16694 its maximum address (which can happen if we have
16695 re-located a forwards fix); force the new fix to come
16696 after it. */
16697 if (ARM_DOUBLEWORD_ALIGN
16698 && fix->fix_size >= 8 && mp->fix_size < 8)
16699 return NULL;
16700 else
16702 min_mp = mp;
16703 min_address = mp->min_address + fix->fix_size;
16706 /* Do not insert a non-8-byte aligned quantity before 8-byte
16707 aligned quantities. */
16708 else if (ARM_DOUBLEWORD_ALIGN
16709 && fix->fix_size < 8
16710 && mp->fix_size >= 8)
16712 min_mp = mp;
16713 min_address = mp->min_address + fix->fix_size;
16718 /* We need to create a new entry. */
16719 mp = XNEW (Mnode);
16720 mp->fix_size = fix->fix_size;
16721 mp->mode = fix->mode;
16722 mp->value = fix->value;
16723 mp->refcount = 1;
16724 mp->max_address = minipool_barrier->address + 65536;
16726 mp->min_address = min_address;
16728 if (min_mp == NULL)
16730 mp->prev = NULL;
16731 mp->next = minipool_vector_head;
16733 if (mp->next == NULL)
16735 minipool_vector_tail = mp;
16736 minipool_vector_label = gen_label_rtx ();
16738 else
16739 mp->next->prev = mp;
16741 minipool_vector_head = mp;
16743 else
16745 mp->next = min_mp->next;
16746 mp->prev = min_mp;
16747 min_mp->next = mp;
16749 if (mp->next != NULL)
16750 mp->next->prev = mp;
16751 else
16752 minipool_vector_tail = mp;
16755 /* Save the new entry. */
16756 min_mp = mp;
16758 if (mp->prev)
16759 mp = mp->prev;
16760 else
16761 mp->offset = 0;
16763 /* Scan over the following entries and adjust their offsets. */
16764 while (mp->next != NULL)
16766 if (mp->next->min_address < mp->min_address + mp->fix_size)
16767 mp->next->min_address = mp->min_address + mp->fix_size;
16769 if (mp->refcount)
16770 mp->next->offset = mp->offset + mp->fix_size;
16771 else
16772 mp->next->offset = mp->offset;
16774 mp = mp->next;
16777 return min_mp;
16780 static void
16781 assign_minipool_offsets (Mfix *barrier)
16783 HOST_WIDE_INT offset = 0;
16784 Mnode *mp;
16786 minipool_barrier = barrier;
16788 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16790 mp->offset = offset;
16792 if (mp->refcount > 0)
16793 offset += mp->fix_size;
16797 /* Output the literal table */
16798 static void
16799 dump_minipool (rtx_insn *scan)
16801 Mnode * mp;
16802 Mnode * nmp;
16803 int align64 = 0;
16805 if (ARM_DOUBLEWORD_ALIGN)
16806 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16807 if (mp->refcount > 0 && mp->fix_size >= 8)
16809 align64 = 1;
16810 break;
16813 if (dump_file)
16814 fprintf (dump_file,
16815 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16816 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16818 scan = emit_label_after (gen_label_rtx (), scan);
16819 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16820 scan = emit_label_after (minipool_vector_label, scan);
16822 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16824 if (mp->refcount > 0)
16826 if (dump_file)
16828 fprintf (dump_file,
16829 ";; Offset %u, min %ld, max %ld ",
16830 (unsigned) mp->offset, (unsigned long) mp->min_address,
16831 (unsigned long) mp->max_address);
16832 arm_print_value (dump_file, mp->value);
16833 fputc ('\n', dump_file);
16836 switch (GET_MODE_SIZE (mp->mode))
16838 #ifdef HAVE_consttable_1
16839 case 1:
16840 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16841 break;
16843 #endif
16844 #ifdef HAVE_consttable_2
16845 case 2:
16846 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16847 break;
16849 #endif
16850 #ifdef HAVE_consttable_4
16851 case 4:
16852 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16853 break;
16855 #endif
16856 #ifdef HAVE_consttable_8
16857 case 8:
16858 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16859 break;
16861 #endif
16862 #ifdef HAVE_consttable_16
16863 case 16:
16864 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16865 break;
16867 #endif
16868 default:
16869 gcc_unreachable ();
16873 nmp = mp->next;
16874 free (mp);
16877 minipool_vector_head = minipool_vector_tail = NULL;
16878 scan = emit_insn_after (gen_consttable_end (), scan);
16879 scan = emit_barrier_after (scan);
16882 /* Return the cost of forcibly inserting a barrier after INSN. */
16883 static int
16884 arm_barrier_cost (rtx_insn *insn)
16886 /* Basing the location of the pool on the loop depth is preferable,
16887 but at the moment, the basic block information seems to be
16888 corrupt by this stage of the compilation. */
16889 int base_cost = 50;
16890 rtx_insn *next = next_nonnote_insn (insn);
16892 if (next != NULL && LABEL_P (next))
16893 base_cost -= 20;
16895 switch (GET_CODE (insn))
16897 case CODE_LABEL:
16898 /* It will always be better to place the table before the label, rather
16899 than after it. */
16900 return 50;
16902 case INSN:
16903 case CALL_INSN:
16904 return base_cost;
16906 case JUMP_INSN:
16907 return base_cost - 10;
16909 default:
16910 return base_cost + 10;
16914 /* Find the best place in the insn stream in the range
16915 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16916 Create the barrier by inserting a jump and add a new fix entry for
16917 it. */
16918 static Mfix *
16919 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16921 HOST_WIDE_INT count = 0;
16922 rtx_barrier *barrier;
16923 rtx_insn *from = fix->insn;
16924 /* The instruction after which we will insert the jump. */
16925 rtx_insn *selected = NULL;
16926 int selected_cost;
16927 /* The address at which the jump instruction will be placed. */
16928 HOST_WIDE_INT selected_address;
16929 Mfix * new_fix;
16930 HOST_WIDE_INT max_count = max_address - fix->address;
16931 rtx_code_label *label = gen_label_rtx ();
16933 selected_cost = arm_barrier_cost (from);
16934 selected_address = fix->address;
16936 while (from && count < max_count)
16938 rtx_jump_table_data *tmp;
16939 int new_cost;
16941 /* This code shouldn't have been called if there was a natural barrier
16942 within range. */
16943 gcc_assert (!BARRIER_P (from));
16945 /* Count the length of this insn. This must stay in sync with the
16946 code that pushes minipool fixes. */
16947 if (LABEL_P (from))
16948 count += get_label_padding (from);
16949 else
16950 count += get_attr_length (from);
16952 /* If there is a jump table, add its length. */
16953 if (tablejump_p (from, NULL, &tmp))
16955 count += get_jump_table_size (tmp);
16957 /* Jump tables aren't in a basic block, so base the cost on
16958 the dispatch insn. If we select this location, we will
16959 still put the pool after the table. */
16960 new_cost = arm_barrier_cost (from);
16962 if (count < max_count
16963 && (!selected || new_cost <= selected_cost))
16965 selected = tmp;
16966 selected_cost = new_cost;
16967 selected_address = fix->address + count;
16970 /* Continue after the dispatch table. */
16971 from = NEXT_INSN (tmp);
16972 continue;
16975 new_cost = arm_barrier_cost (from);
16977 if (count < max_count
16978 && (!selected || new_cost <= selected_cost))
16980 selected = from;
16981 selected_cost = new_cost;
16982 selected_address = fix->address + count;
16985 from = NEXT_INSN (from);
16988 /* Make sure that we found a place to insert the jump. */
16989 gcc_assert (selected);
16991 /* Make sure we do not split a call and its corresponding
16992 CALL_ARG_LOCATION note. */
16993 if (CALL_P (selected))
16995 rtx_insn *next = NEXT_INSN (selected);
16996 if (next && NOTE_P (next)
16997 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16998 selected = next;
17001 /* Create a new JUMP_INSN that branches around a barrier. */
17002 from = emit_jump_insn_after (gen_jump (label), selected);
17003 JUMP_LABEL (from) = label;
17004 barrier = emit_barrier_after (from);
17005 emit_label_after (label, barrier);
17007 /* Create a minipool barrier entry for the new barrier. */
17008 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
17009 new_fix->insn = barrier;
17010 new_fix->address = selected_address;
17011 new_fix->next = fix->next;
17012 fix->next = new_fix;
17014 return new_fix;
17017 /* Record that there is a natural barrier in the insn stream at
17018 ADDRESS. */
17019 static void
17020 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
17022 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17024 fix->insn = insn;
17025 fix->address = address;
17027 fix->next = NULL;
17028 if (minipool_fix_head != NULL)
17029 minipool_fix_tail->next = fix;
17030 else
17031 minipool_fix_head = fix;
17033 minipool_fix_tail = fix;
17036 /* Record INSN, which will need fixing up to load a value from the
17037 minipool. ADDRESS is the offset of the insn since the start of the
17038 function; LOC is a pointer to the part of the insn which requires
17039 fixing; VALUE is the constant that must be loaded, which is of type
17040 MODE. */
17041 static void
17042 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
17043 machine_mode mode, rtx value)
17045 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17047 fix->insn = insn;
17048 fix->address = address;
17049 fix->loc = loc;
17050 fix->mode = mode;
17051 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
17052 fix->value = value;
17053 fix->forwards = get_attr_pool_range (insn);
17054 fix->backwards = get_attr_neg_pool_range (insn);
17055 fix->minipool = NULL;
17057 /* If an insn doesn't have a range defined for it, then it isn't
17058 expecting to be reworked by this code. Better to stop now than
17059 to generate duff assembly code. */
17060 gcc_assert (fix->forwards || fix->backwards);
17062 /* If an entry requires 8-byte alignment then assume all constant pools
17063 require 4 bytes of padding. Trying to do this later on a per-pool
17064 basis is awkward because existing pool entries have to be modified. */
17065 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
17066 minipool_pad = 4;
17068 if (dump_file)
17070 fprintf (dump_file,
17071 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17072 GET_MODE_NAME (mode),
17073 INSN_UID (insn), (unsigned long) address,
17074 -1 * (long)fix->backwards, (long)fix->forwards);
17075 arm_print_value (dump_file, fix->value);
17076 fprintf (dump_file, "\n");
17079 /* Add it to the chain of fixes. */
17080 fix->next = NULL;
17082 if (minipool_fix_head != NULL)
17083 minipool_fix_tail->next = fix;
17084 else
17085 minipool_fix_head = fix;
17087 minipool_fix_tail = fix;
17090 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17091 Returns the number of insns needed, or 99 if we always want to synthesize
17092 the value. */
17094 arm_max_const_double_inline_cost ()
17096 /* Let the value get synthesized to avoid the use of literal pools. */
17097 if (arm_disable_literal_pool)
17098 return 99;
17100 return ((optimize_size || arm_ld_sched) ? 3 : 4);
17103 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17104 Returns the number of insns needed, or 99 if we don't know how to
17105 do it. */
17107 arm_const_double_inline_cost (rtx val)
17109 rtx lowpart, highpart;
17110 machine_mode mode;
17112 mode = GET_MODE (val);
17114 if (mode == VOIDmode)
17115 mode = DImode;
17117 gcc_assert (GET_MODE_SIZE (mode) == 8);
17119 lowpart = gen_lowpart (SImode, val);
17120 highpart = gen_highpart_mode (SImode, mode, val);
17122 gcc_assert (CONST_INT_P (lowpart));
17123 gcc_assert (CONST_INT_P (highpart));
17125 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
17126 NULL_RTX, NULL_RTX, 0, 0)
17127 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
17128 NULL_RTX, NULL_RTX, 0, 0));
17131 /* Cost of loading a SImode constant. */
17132 static inline int
17133 arm_const_inline_cost (enum rtx_code code, rtx val)
17135 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17136 NULL_RTX, NULL_RTX, 1, 0);
17139 /* Return true if it is worthwhile to split a 64-bit constant into two
17140 32-bit operations. This is the case if optimizing for size, or
17141 if we have load delay slots, or if one 32-bit part can be done with
17142 a single data operation. */
17143 bool
17144 arm_const_double_by_parts (rtx val)
17146 machine_mode mode = GET_MODE (val);
17147 rtx part;
17149 if (optimize_size || arm_ld_sched)
17150 return true;
17152 if (mode == VOIDmode)
17153 mode = DImode;
17155 part = gen_highpart_mode (SImode, mode, val);
17157 gcc_assert (CONST_INT_P (part));
17159 if (const_ok_for_arm (INTVAL (part))
17160 || const_ok_for_arm (~INTVAL (part)))
17161 return true;
17163 part = gen_lowpart (SImode, val);
17165 gcc_assert (CONST_INT_P (part));
17167 if (const_ok_for_arm (INTVAL (part))
17168 || const_ok_for_arm (~INTVAL (part)))
17169 return true;
17171 return false;
17174 /* Return true if it is possible to inline both the high and low parts
17175 of a 64-bit constant into 32-bit data processing instructions. */
17176 bool
17177 arm_const_double_by_immediates (rtx val)
17179 machine_mode mode = GET_MODE (val);
17180 rtx part;
17182 if (mode == VOIDmode)
17183 mode = DImode;
17185 part = gen_highpart_mode (SImode, mode, val);
17187 gcc_assert (CONST_INT_P (part));
17189 if (!const_ok_for_arm (INTVAL (part)))
17190 return false;
17192 part = gen_lowpart (SImode, val);
17194 gcc_assert (CONST_INT_P (part));
17196 if (!const_ok_for_arm (INTVAL (part)))
17197 return false;
17199 return true;
17202 /* Scan INSN and note any of its operands that need fixing.
17203 If DO_PUSHES is false we do not actually push any of the fixups
17204 needed. */
17205 static void
17206 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17208 int opno;
17210 extract_constrain_insn (insn);
17212 if (recog_data.n_alternatives == 0)
17213 return;
17215 /* Fill in recog_op_alt with information about the constraints of
17216 this insn. */
17217 preprocess_constraints (insn);
17219 const operand_alternative *op_alt = which_op_alt ();
17220 for (opno = 0; opno < recog_data.n_operands; opno++)
17222 /* Things we need to fix can only occur in inputs. */
17223 if (recog_data.operand_type[opno] != OP_IN)
17224 continue;
17226 /* If this alternative is a memory reference, then any mention
17227 of constants in this alternative is really to fool reload
17228 into allowing us to accept one there. We need to fix them up
17229 now so that we output the right code. */
17230 if (op_alt[opno].memory_ok)
17232 rtx op = recog_data.operand[opno];
17234 if (CONSTANT_P (op))
17236 if (do_pushes)
17237 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17238 recog_data.operand_mode[opno], op);
17240 else if (MEM_P (op)
17241 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17242 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17244 if (do_pushes)
17246 rtx cop = avoid_constant_pool_reference (op);
17248 /* Casting the address of something to a mode narrower
17249 than a word can cause avoid_constant_pool_reference()
17250 to return the pool reference itself. That's no good to
17251 us here. Lets just hope that we can use the
17252 constant pool value directly. */
17253 if (op == cop)
17254 cop = get_pool_constant (XEXP (op, 0));
17256 push_minipool_fix (insn, address,
17257 recog_data.operand_loc[opno],
17258 recog_data.operand_mode[opno], cop);
17265 return;
17268 /* Rewrite move insn into subtract of 0 if the condition codes will
17269 be useful in next conditional jump insn. */
17271 static void
17272 thumb1_reorg (void)
17274 basic_block bb;
17276 FOR_EACH_BB_FN (bb, cfun)
17278 rtx dest, src;
17279 rtx cmp, op0, op1, set = NULL;
17280 rtx_insn *prev, *insn = BB_END (bb);
17281 bool insn_clobbered = false;
17283 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17284 insn = PREV_INSN (insn);
17286 /* Find the last cbranchsi4_insn in basic block BB. */
17287 if (insn == BB_HEAD (bb)
17288 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17289 continue;
17291 /* Get the register with which we are comparing. */
17292 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17293 op0 = XEXP (cmp, 0);
17294 op1 = XEXP (cmp, 1);
17296 /* Check that comparison is against ZERO. */
17297 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17298 continue;
17300 /* Find the first flag setting insn before INSN in basic block BB. */
17301 gcc_assert (insn != BB_HEAD (bb));
17302 for (prev = PREV_INSN (insn);
17303 (!insn_clobbered
17304 && prev != BB_HEAD (bb)
17305 && (NOTE_P (prev)
17306 || DEBUG_INSN_P (prev)
17307 || ((set = single_set (prev)) != NULL
17308 && get_attr_conds (prev) == CONDS_NOCOND)));
17309 prev = PREV_INSN (prev))
17311 if (reg_set_p (op0, prev))
17312 insn_clobbered = true;
17315 /* Skip if op0 is clobbered by insn other than prev. */
17316 if (insn_clobbered)
17317 continue;
17319 if (!set)
17320 continue;
17322 dest = SET_DEST (set);
17323 src = SET_SRC (set);
17324 if (!low_register_operand (dest, SImode)
17325 || !low_register_operand (src, SImode))
17326 continue;
17328 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17329 in INSN. Both src and dest of the move insn are checked. */
17330 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17332 dest = copy_rtx (dest);
17333 src = copy_rtx (src);
17334 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17335 PATTERN (prev) = gen_rtx_SET (dest, src);
17336 INSN_CODE (prev) = -1;
17337 /* Set test register in INSN to dest. */
17338 XEXP (cmp, 0) = copy_rtx (dest);
17339 INSN_CODE (insn) = -1;
17344 /* Convert instructions to their cc-clobbering variant if possible, since
17345 that allows us to use smaller encodings. */
17347 static void
17348 thumb2_reorg (void)
17350 basic_block bb;
17351 regset_head live;
17353 INIT_REG_SET (&live);
17355 /* We are freeing block_for_insn in the toplev to keep compatibility
17356 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17357 compute_bb_for_insn ();
17358 df_analyze ();
17360 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17362 FOR_EACH_BB_FN (bb, cfun)
17364 if ((current_tune->disparage_flag_setting_t16_encodings
17365 == tune_params::DISPARAGE_FLAGS_ALL)
17366 && optimize_bb_for_speed_p (bb))
17367 continue;
17369 rtx_insn *insn;
17370 Convert_Action action = SKIP;
17371 Convert_Action action_for_partial_flag_setting
17372 = ((current_tune->disparage_flag_setting_t16_encodings
17373 != tune_params::DISPARAGE_FLAGS_NEITHER)
17374 && optimize_bb_for_speed_p (bb))
17375 ? SKIP : CONV;
17377 COPY_REG_SET (&live, DF_LR_OUT (bb));
17378 df_simulate_initialize_backwards (bb, &live);
17379 FOR_BB_INSNS_REVERSE (bb, insn)
17381 if (NONJUMP_INSN_P (insn)
17382 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17383 && GET_CODE (PATTERN (insn)) == SET)
17385 action = SKIP;
17386 rtx pat = PATTERN (insn);
17387 rtx dst = XEXP (pat, 0);
17388 rtx src = XEXP (pat, 1);
17389 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17391 if (UNARY_P (src) || BINARY_P (src))
17392 op0 = XEXP (src, 0);
17394 if (BINARY_P (src))
17395 op1 = XEXP (src, 1);
17397 if (low_register_operand (dst, SImode))
17399 switch (GET_CODE (src))
17401 case PLUS:
17402 /* Adding two registers and storing the result
17403 in the first source is already a 16-bit
17404 operation. */
17405 if (rtx_equal_p (dst, op0)
17406 && register_operand (op1, SImode))
17407 break;
17409 if (low_register_operand (op0, SImode))
17411 /* ADDS <Rd>,<Rn>,<Rm> */
17412 if (low_register_operand (op1, SImode))
17413 action = CONV;
17414 /* ADDS <Rdn>,#<imm8> */
17415 /* SUBS <Rdn>,#<imm8> */
17416 else if (rtx_equal_p (dst, op0)
17417 && CONST_INT_P (op1)
17418 && IN_RANGE (INTVAL (op1), -255, 255))
17419 action = CONV;
17420 /* ADDS <Rd>,<Rn>,#<imm3> */
17421 /* SUBS <Rd>,<Rn>,#<imm3> */
17422 else if (CONST_INT_P (op1)
17423 && IN_RANGE (INTVAL (op1), -7, 7))
17424 action = CONV;
17426 /* ADCS <Rd>, <Rn> */
17427 else if (GET_CODE (XEXP (src, 0)) == PLUS
17428 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17429 && low_register_operand (XEXP (XEXP (src, 0), 1),
17430 SImode)
17431 && COMPARISON_P (op1)
17432 && cc_register (XEXP (op1, 0), VOIDmode)
17433 && maybe_get_arm_condition_code (op1) == ARM_CS
17434 && XEXP (op1, 1) == const0_rtx)
17435 action = CONV;
17436 break;
17438 case MINUS:
17439 /* RSBS <Rd>,<Rn>,#0
17440 Not handled here: see NEG below. */
17441 /* SUBS <Rd>,<Rn>,#<imm3>
17442 SUBS <Rdn>,#<imm8>
17443 Not handled here: see PLUS above. */
17444 /* SUBS <Rd>,<Rn>,<Rm> */
17445 if (low_register_operand (op0, SImode)
17446 && low_register_operand (op1, SImode))
17447 action = CONV;
17448 break;
17450 case MULT:
17451 /* MULS <Rdm>,<Rn>,<Rdm>
17452 As an exception to the rule, this is only used
17453 when optimizing for size since MULS is slow on all
17454 known implementations. We do not even want to use
17455 MULS in cold code, if optimizing for speed, so we
17456 test the global flag here. */
17457 if (!optimize_size)
17458 break;
17459 /* else fall through. */
17460 case AND:
17461 case IOR:
17462 case XOR:
17463 /* ANDS <Rdn>,<Rm> */
17464 if (rtx_equal_p (dst, op0)
17465 && low_register_operand (op1, SImode))
17466 action = action_for_partial_flag_setting;
17467 else if (rtx_equal_p (dst, op1)
17468 && low_register_operand (op0, SImode))
17469 action = action_for_partial_flag_setting == SKIP
17470 ? SKIP : SWAP_CONV;
17471 break;
17473 case ASHIFTRT:
17474 case ASHIFT:
17475 case LSHIFTRT:
17476 /* ASRS <Rdn>,<Rm> */
17477 /* LSRS <Rdn>,<Rm> */
17478 /* LSLS <Rdn>,<Rm> */
17479 if (rtx_equal_p (dst, op0)
17480 && low_register_operand (op1, SImode))
17481 action = action_for_partial_flag_setting;
17482 /* ASRS <Rd>,<Rm>,#<imm5> */
17483 /* LSRS <Rd>,<Rm>,#<imm5> */
17484 /* LSLS <Rd>,<Rm>,#<imm5> */
17485 else if (low_register_operand (op0, SImode)
17486 && CONST_INT_P (op1)
17487 && IN_RANGE (INTVAL (op1), 0, 31))
17488 action = action_for_partial_flag_setting;
17489 break;
17491 case ROTATERT:
17492 /* RORS <Rdn>,<Rm> */
17493 if (rtx_equal_p (dst, op0)
17494 && low_register_operand (op1, SImode))
17495 action = action_for_partial_flag_setting;
17496 break;
17498 case NOT:
17499 /* MVNS <Rd>,<Rm> */
17500 if (low_register_operand (op0, SImode))
17501 action = action_for_partial_flag_setting;
17502 break;
17504 case NEG:
17505 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17506 if (low_register_operand (op0, SImode))
17507 action = CONV;
17508 break;
17510 case CONST_INT:
17511 /* MOVS <Rd>,#<imm8> */
17512 if (CONST_INT_P (src)
17513 && IN_RANGE (INTVAL (src), 0, 255))
17514 action = action_for_partial_flag_setting;
17515 break;
17517 case REG:
17518 /* MOVS and MOV<c> with registers have different
17519 encodings, so are not relevant here. */
17520 break;
17522 default:
17523 break;
17527 if (action != SKIP)
17529 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17530 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17531 rtvec vec;
17533 if (action == SWAP_CONV)
17535 src = copy_rtx (src);
17536 XEXP (src, 0) = op1;
17537 XEXP (src, 1) = op0;
17538 pat = gen_rtx_SET (dst, src);
17539 vec = gen_rtvec (2, pat, clobber);
17541 else /* action == CONV */
17542 vec = gen_rtvec (2, pat, clobber);
17544 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17545 INSN_CODE (insn) = -1;
17549 if (NONDEBUG_INSN_P (insn))
17550 df_simulate_one_insn_backwards (bb, insn, &live);
17554 CLEAR_REG_SET (&live);
17557 /* Gcc puts the pool in the wrong place for ARM, since we can only
17558 load addresses a limited distance around the pc. We do some
17559 special munging to move the constant pool values to the correct
17560 point in the code. */
17561 static void
17562 arm_reorg (void)
17564 rtx_insn *insn;
17565 HOST_WIDE_INT address = 0;
17566 Mfix * fix;
17568 if (TARGET_THUMB1)
17569 thumb1_reorg ();
17570 else if (TARGET_THUMB2)
17571 thumb2_reorg ();
17573 /* Ensure all insns that must be split have been split at this point.
17574 Otherwise, the pool placement code below may compute incorrect
17575 insn lengths. Note that when optimizing, all insns have already
17576 been split at this point. */
17577 if (!optimize)
17578 split_all_insns_noflow ();
17580 minipool_fix_head = minipool_fix_tail = NULL;
17582 /* The first insn must always be a note, or the code below won't
17583 scan it properly. */
17584 insn = get_insns ();
17585 gcc_assert (NOTE_P (insn));
17586 minipool_pad = 0;
17588 /* Scan all the insns and record the operands that will need fixing. */
17589 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17591 if (BARRIER_P (insn))
17592 push_minipool_barrier (insn, address);
17593 else if (INSN_P (insn))
17595 rtx_jump_table_data *table;
17597 note_invalid_constants (insn, address, true);
17598 address += get_attr_length (insn);
17600 /* If the insn is a vector jump, add the size of the table
17601 and skip the table. */
17602 if (tablejump_p (insn, NULL, &table))
17604 address += get_jump_table_size (table);
17605 insn = table;
17608 else if (LABEL_P (insn))
17609 /* Add the worst-case padding due to alignment. We don't add
17610 the _current_ padding because the minipool insertions
17611 themselves might change it. */
17612 address += get_label_padding (insn);
17615 fix = minipool_fix_head;
17617 /* Now scan the fixups and perform the required changes. */
17618 while (fix)
17620 Mfix * ftmp;
17621 Mfix * fdel;
17622 Mfix * last_added_fix;
17623 Mfix * last_barrier = NULL;
17624 Mfix * this_fix;
17626 /* Skip any further barriers before the next fix. */
17627 while (fix && BARRIER_P (fix->insn))
17628 fix = fix->next;
17630 /* No more fixes. */
17631 if (fix == NULL)
17632 break;
17634 last_added_fix = NULL;
17636 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17638 if (BARRIER_P (ftmp->insn))
17640 if (ftmp->address >= minipool_vector_head->max_address)
17641 break;
17643 last_barrier = ftmp;
17645 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17646 break;
17648 last_added_fix = ftmp; /* Keep track of the last fix added. */
17651 /* If we found a barrier, drop back to that; any fixes that we
17652 could have reached but come after the barrier will now go in
17653 the next mini-pool. */
17654 if (last_barrier != NULL)
17656 /* Reduce the refcount for those fixes that won't go into this
17657 pool after all. */
17658 for (fdel = last_barrier->next;
17659 fdel && fdel != ftmp;
17660 fdel = fdel->next)
17662 fdel->minipool->refcount--;
17663 fdel->minipool = NULL;
17666 ftmp = last_barrier;
17668 else
17670 /* ftmp is first fix that we can't fit into this pool and
17671 there no natural barriers that we could use. Insert a
17672 new barrier in the code somewhere between the previous
17673 fix and this one, and arrange to jump around it. */
17674 HOST_WIDE_INT max_address;
17676 /* The last item on the list of fixes must be a barrier, so
17677 we can never run off the end of the list of fixes without
17678 last_barrier being set. */
17679 gcc_assert (ftmp);
17681 max_address = minipool_vector_head->max_address;
17682 /* Check that there isn't another fix that is in range that
17683 we couldn't fit into this pool because the pool was
17684 already too large: we need to put the pool before such an
17685 instruction. The pool itself may come just after the
17686 fix because create_fix_barrier also allows space for a
17687 jump instruction. */
17688 if (ftmp->address < max_address)
17689 max_address = ftmp->address + 1;
17691 last_barrier = create_fix_barrier (last_added_fix, max_address);
17694 assign_minipool_offsets (last_barrier);
17696 while (ftmp)
17698 if (!BARRIER_P (ftmp->insn)
17699 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17700 == NULL))
17701 break;
17703 ftmp = ftmp->next;
17706 /* Scan over the fixes we have identified for this pool, fixing them
17707 up and adding the constants to the pool itself. */
17708 for (this_fix = fix; this_fix && ftmp != this_fix;
17709 this_fix = this_fix->next)
17710 if (!BARRIER_P (this_fix->insn))
17712 rtx addr
17713 = plus_constant (Pmode,
17714 gen_rtx_LABEL_REF (VOIDmode,
17715 minipool_vector_label),
17716 this_fix->minipool->offset);
17717 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17720 dump_minipool (last_barrier->insn);
17721 fix = ftmp;
17724 /* From now on we must synthesize any constants that we can't handle
17725 directly. This can happen if the RTL gets split during final
17726 instruction generation. */
17727 cfun->machine->after_arm_reorg = 1;
17729 /* Free the minipool memory. */
17730 obstack_free (&minipool_obstack, minipool_startobj);
17733 /* Routines to output assembly language. */
17735 /* Return string representation of passed in real value. */
17736 static const char *
17737 fp_const_from_val (REAL_VALUE_TYPE *r)
17739 if (!fp_consts_inited)
17740 init_fp_table ();
17742 gcc_assert (real_equal (r, &value_fp0));
17743 return "0";
17746 /* OPERANDS[0] is the entire list of insns that constitute pop,
17747 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17748 is in the list, UPDATE is true iff the list contains explicit
17749 update of base register. */
17750 void
17751 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17752 bool update)
17754 int i;
17755 char pattern[100];
17756 int offset;
17757 const char *conditional;
17758 int num_saves = XVECLEN (operands[0], 0);
17759 unsigned int regno;
17760 unsigned int regno_base = REGNO (operands[1]);
17761 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17763 offset = 0;
17764 offset += update ? 1 : 0;
17765 offset += return_pc ? 1 : 0;
17767 /* Is the base register in the list? */
17768 for (i = offset; i < num_saves; i++)
17770 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17771 /* If SP is in the list, then the base register must be SP. */
17772 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17773 /* If base register is in the list, there must be no explicit update. */
17774 if (regno == regno_base)
17775 gcc_assert (!update);
17778 conditional = reverse ? "%?%D0" : "%?%d0";
17779 /* Can't use POP if returning from an interrupt. */
17780 if ((regno_base == SP_REGNUM) && !(interrupt_p && return_pc))
17782 sprintf (pattern, "pop%s\t{", conditional);
17784 else
17786 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17787 It's just a convention, their semantics are identical. */
17788 if (regno_base == SP_REGNUM)
17789 sprintf (pattern, "ldmfd%s\t", conditional);
17790 else if (update)
17791 sprintf (pattern, "ldmia%s\t", conditional);
17792 else
17793 sprintf (pattern, "ldm%s\t", conditional);
17795 strcat (pattern, reg_names[regno_base]);
17796 if (update)
17797 strcat (pattern, "!, {");
17798 else
17799 strcat (pattern, ", {");
17802 /* Output the first destination register. */
17803 strcat (pattern,
17804 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17806 /* Output the rest of the destination registers. */
17807 for (i = offset + 1; i < num_saves; i++)
17809 strcat (pattern, ", ");
17810 strcat (pattern,
17811 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17814 strcat (pattern, "}");
17816 if (interrupt_p && return_pc)
17817 strcat (pattern, "^");
17819 output_asm_insn (pattern, &cond);
17823 /* Output the assembly for a store multiple. */
17825 const char *
17826 vfp_output_vstmd (rtx * operands)
17828 char pattern[100];
17829 int p;
17830 int base;
17831 int i;
17832 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17833 ? XEXP (operands[0], 0)
17834 : XEXP (XEXP (operands[0], 0), 0);
17835 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17837 if (push_p)
17838 strcpy (pattern, "vpush%?.64\t{%P1");
17839 else
17840 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17842 p = strlen (pattern);
17844 gcc_assert (REG_P (operands[1]));
17846 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17847 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17849 p += sprintf (&pattern[p], ", d%d", base + i);
17851 strcpy (&pattern[p], "}");
17853 output_asm_insn (pattern, operands);
17854 return "";
17858 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17859 number of bytes pushed. */
17861 static int
17862 vfp_emit_fstmd (int base_reg, int count)
17864 rtx par;
17865 rtx dwarf;
17866 rtx tmp, reg;
17867 int i;
17869 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17870 register pairs are stored by a store multiple insn. We avoid this
17871 by pushing an extra pair. */
17872 if (count == 2 && !arm_arch6)
17874 if (base_reg == LAST_VFP_REGNUM - 3)
17875 base_reg -= 2;
17876 count++;
17879 /* FSTMD may not store more than 16 doubleword registers at once. Split
17880 larger stores into multiple parts (up to a maximum of two, in
17881 practice). */
17882 if (count > 16)
17884 int saved;
17885 /* NOTE: base_reg is an internal register number, so each D register
17886 counts as 2. */
17887 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17888 saved += vfp_emit_fstmd (base_reg, 16);
17889 return saved;
17892 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17893 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17895 reg = gen_rtx_REG (DFmode, base_reg);
17896 base_reg += 2;
17898 XVECEXP (par, 0, 0)
17899 = gen_rtx_SET (gen_frame_mem
17900 (BLKmode,
17901 gen_rtx_PRE_MODIFY (Pmode,
17902 stack_pointer_rtx,
17903 plus_constant
17904 (Pmode, stack_pointer_rtx,
17905 - (count * 8)))
17907 gen_rtx_UNSPEC (BLKmode,
17908 gen_rtvec (1, reg),
17909 UNSPEC_PUSH_MULT));
17911 tmp = gen_rtx_SET (stack_pointer_rtx,
17912 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17913 RTX_FRAME_RELATED_P (tmp) = 1;
17914 XVECEXP (dwarf, 0, 0) = tmp;
17916 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17917 RTX_FRAME_RELATED_P (tmp) = 1;
17918 XVECEXP (dwarf, 0, 1) = tmp;
17920 for (i = 1; i < count; i++)
17922 reg = gen_rtx_REG (DFmode, base_reg);
17923 base_reg += 2;
17924 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17926 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17927 plus_constant (Pmode,
17928 stack_pointer_rtx,
17929 i * 8)),
17930 reg);
17931 RTX_FRAME_RELATED_P (tmp) = 1;
17932 XVECEXP (dwarf, 0, i + 1) = tmp;
17935 par = emit_insn (par);
17936 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17937 RTX_FRAME_RELATED_P (par) = 1;
17939 return count * 8;
17942 /* Emit a call instruction with pattern PAT. ADDR is the address of
17943 the call target. */
17945 void
17946 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17948 rtx insn;
17950 insn = emit_call_insn (pat);
17952 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17953 If the call might use such an entry, add a use of the PIC register
17954 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17955 if (TARGET_VXWORKS_RTP
17956 && flag_pic
17957 && !sibcall
17958 && GET_CODE (addr) == SYMBOL_REF
17959 && (SYMBOL_REF_DECL (addr)
17960 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17961 : !SYMBOL_REF_LOCAL_P (addr)))
17963 require_pic_register ();
17964 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17967 if (TARGET_AAPCS_BASED)
17969 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17970 linker. We need to add an IP clobber to allow setting
17971 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17972 is not needed since it's a fixed register. */
17973 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17974 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17978 /* Output a 'call' insn. */
17979 const char *
17980 output_call (rtx *operands)
17982 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17984 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17985 if (REGNO (operands[0]) == LR_REGNUM)
17987 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17988 output_asm_insn ("mov%?\t%0, %|lr", operands);
17991 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17993 if (TARGET_INTERWORK || arm_arch4t)
17994 output_asm_insn ("bx%?\t%0", operands);
17995 else
17996 output_asm_insn ("mov%?\t%|pc, %0", operands);
17998 return "";
18001 /* Output a move from arm registers to arm registers of a long double
18002 OPERANDS[0] is the destination.
18003 OPERANDS[1] is the source. */
18004 const char *
18005 output_mov_long_double_arm_from_arm (rtx *operands)
18007 /* We have to be careful here because the two might overlap. */
18008 int dest_start = REGNO (operands[0]);
18009 int src_start = REGNO (operands[1]);
18010 rtx ops[2];
18011 int i;
18013 if (dest_start < src_start)
18015 for (i = 0; i < 3; i++)
18017 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18018 ops[1] = gen_rtx_REG (SImode, src_start + i);
18019 output_asm_insn ("mov%?\t%0, %1", ops);
18022 else
18024 for (i = 2; i >= 0; i--)
18026 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18027 ops[1] = gen_rtx_REG (SImode, src_start + i);
18028 output_asm_insn ("mov%?\t%0, %1", ops);
18032 return "";
18035 void
18036 arm_emit_movpair (rtx dest, rtx src)
18038 rtx insn;
18040 /* If the src is an immediate, simplify it. */
18041 if (CONST_INT_P (src))
18043 HOST_WIDE_INT val = INTVAL (src);
18044 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18045 if ((val >> 16) & 0x0000ffff)
18047 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18048 GEN_INT (16)),
18049 GEN_INT ((val >> 16) & 0x0000ffff));
18050 insn = get_last_insn ();
18051 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18053 return;
18055 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18056 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18057 insn = get_last_insn ();
18058 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18061 /* Output a move between double words. It must be REG<-MEM
18062 or MEM<-REG. */
18063 const char *
18064 output_move_double (rtx *operands, bool emit, int *count)
18066 enum rtx_code code0 = GET_CODE (operands[0]);
18067 enum rtx_code code1 = GET_CODE (operands[1]);
18068 rtx otherops[3];
18069 if (count)
18070 *count = 1;
18072 /* The only case when this might happen is when
18073 you are looking at the length of a DImode instruction
18074 that has an invalid constant in it. */
18075 if (code0 == REG && code1 != MEM)
18077 gcc_assert (!emit);
18078 *count = 2;
18079 return "";
18082 if (code0 == REG)
18084 unsigned int reg0 = REGNO (operands[0]);
18086 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18088 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18090 switch (GET_CODE (XEXP (operands[1], 0)))
18092 case REG:
18094 if (emit)
18096 if (TARGET_LDRD
18097 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18098 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18099 else
18100 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18102 break;
18104 case PRE_INC:
18105 gcc_assert (TARGET_LDRD);
18106 if (emit)
18107 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18108 break;
18110 case PRE_DEC:
18111 if (emit)
18113 if (TARGET_LDRD)
18114 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18115 else
18116 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18118 break;
18120 case POST_INC:
18121 if (emit)
18123 if (TARGET_LDRD)
18124 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18125 else
18126 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18128 break;
18130 case POST_DEC:
18131 gcc_assert (TARGET_LDRD);
18132 if (emit)
18133 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18134 break;
18136 case PRE_MODIFY:
18137 case POST_MODIFY:
18138 /* Autoicrement addressing modes should never have overlapping
18139 base and destination registers, and overlapping index registers
18140 are already prohibited, so this doesn't need to worry about
18141 fix_cm3_ldrd. */
18142 otherops[0] = operands[0];
18143 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18144 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18146 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18148 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18150 /* Registers overlap so split out the increment. */
18151 if (emit)
18153 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18154 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18156 if (count)
18157 *count = 2;
18159 else
18161 /* Use a single insn if we can.
18162 FIXME: IWMMXT allows offsets larger than ldrd can
18163 handle, fix these up with a pair of ldr. */
18164 if (TARGET_THUMB2
18165 || !CONST_INT_P (otherops[2])
18166 || (INTVAL (otherops[2]) > -256
18167 && INTVAL (otherops[2]) < 256))
18169 if (emit)
18170 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18172 else
18174 if (emit)
18176 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18177 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18179 if (count)
18180 *count = 2;
18185 else
18187 /* Use a single insn if we can.
18188 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18189 fix these up with a pair of ldr. */
18190 if (TARGET_THUMB2
18191 || !CONST_INT_P (otherops[2])
18192 || (INTVAL (otherops[2]) > -256
18193 && INTVAL (otherops[2]) < 256))
18195 if (emit)
18196 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18198 else
18200 if (emit)
18202 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18203 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18205 if (count)
18206 *count = 2;
18209 break;
18211 case LABEL_REF:
18212 case CONST:
18213 /* We might be able to use ldrd %0, %1 here. However the range is
18214 different to ldr/adr, and it is broken on some ARMv7-M
18215 implementations. */
18216 /* Use the second register of the pair to avoid problematic
18217 overlap. */
18218 otherops[1] = operands[1];
18219 if (emit)
18220 output_asm_insn ("adr%?\t%0, %1", otherops);
18221 operands[1] = otherops[0];
18222 if (emit)
18224 if (TARGET_LDRD)
18225 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18226 else
18227 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18230 if (count)
18231 *count = 2;
18232 break;
18234 /* ??? This needs checking for thumb2. */
18235 default:
18236 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18237 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18239 otherops[0] = operands[0];
18240 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18241 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18243 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18245 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18247 switch ((int) INTVAL (otherops[2]))
18249 case -8:
18250 if (emit)
18251 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18252 return "";
18253 case -4:
18254 if (TARGET_THUMB2)
18255 break;
18256 if (emit)
18257 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18258 return "";
18259 case 4:
18260 if (TARGET_THUMB2)
18261 break;
18262 if (emit)
18263 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18264 return "";
18267 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18268 operands[1] = otherops[0];
18269 if (TARGET_LDRD
18270 && (REG_P (otherops[2])
18271 || TARGET_THUMB2
18272 || (CONST_INT_P (otherops[2])
18273 && INTVAL (otherops[2]) > -256
18274 && INTVAL (otherops[2]) < 256)))
18276 if (reg_overlap_mentioned_p (operands[0],
18277 otherops[2]))
18279 /* Swap base and index registers over to
18280 avoid a conflict. */
18281 std::swap (otherops[1], otherops[2]);
18283 /* If both registers conflict, it will usually
18284 have been fixed by a splitter. */
18285 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18286 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18288 if (emit)
18290 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18291 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18293 if (count)
18294 *count = 2;
18296 else
18298 otherops[0] = operands[0];
18299 if (emit)
18300 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18302 return "";
18305 if (CONST_INT_P (otherops[2]))
18307 if (emit)
18309 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18310 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18311 else
18312 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18315 else
18317 if (emit)
18318 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18321 else
18323 if (emit)
18324 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18327 if (count)
18328 *count = 2;
18330 if (TARGET_LDRD)
18331 return "ldrd%?\t%0, [%1]";
18333 return "ldmia%?\t%1, %M0";
18335 else
18337 otherops[1] = adjust_address (operands[1], SImode, 4);
18338 /* Take care of overlapping base/data reg. */
18339 if (reg_mentioned_p (operands[0], operands[1]))
18341 if (emit)
18343 output_asm_insn ("ldr%?\t%0, %1", otherops);
18344 output_asm_insn ("ldr%?\t%0, %1", operands);
18346 if (count)
18347 *count = 2;
18350 else
18352 if (emit)
18354 output_asm_insn ("ldr%?\t%0, %1", operands);
18355 output_asm_insn ("ldr%?\t%0, %1", otherops);
18357 if (count)
18358 *count = 2;
18363 else
18365 /* Constraints should ensure this. */
18366 gcc_assert (code0 == MEM && code1 == REG);
18367 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18368 || (TARGET_ARM && TARGET_LDRD));
18370 switch (GET_CODE (XEXP (operands[0], 0)))
18372 case REG:
18373 if (emit)
18375 if (TARGET_LDRD)
18376 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18377 else
18378 output_asm_insn ("stm%?\t%m0, %M1", operands);
18380 break;
18382 case PRE_INC:
18383 gcc_assert (TARGET_LDRD);
18384 if (emit)
18385 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18386 break;
18388 case PRE_DEC:
18389 if (emit)
18391 if (TARGET_LDRD)
18392 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18393 else
18394 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18396 break;
18398 case POST_INC:
18399 if (emit)
18401 if (TARGET_LDRD)
18402 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18403 else
18404 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18406 break;
18408 case POST_DEC:
18409 gcc_assert (TARGET_LDRD);
18410 if (emit)
18411 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18412 break;
18414 case PRE_MODIFY:
18415 case POST_MODIFY:
18416 otherops[0] = operands[1];
18417 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18418 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18420 /* IWMMXT allows offsets larger than ldrd can handle,
18421 fix these up with a pair of ldr. */
18422 if (!TARGET_THUMB2
18423 && CONST_INT_P (otherops[2])
18424 && (INTVAL(otherops[2]) <= -256
18425 || INTVAL(otherops[2]) >= 256))
18427 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18429 if (emit)
18431 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18432 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18434 if (count)
18435 *count = 2;
18437 else
18439 if (emit)
18441 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18442 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18444 if (count)
18445 *count = 2;
18448 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18450 if (emit)
18451 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18453 else
18455 if (emit)
18456 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18458 break;
18460 case PLUS:
18461 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18462 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18464 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18466 case -8:
18467 if (emit)
18468 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18469 return "";
18471 case -4:
18472 if (TARGET_THUMB2)
18473 break;
18474 if (emit)
18475 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18476 return "";
18478 case 4:
18479 if (TARGET_THUMB2)
18480 break;
18481 if (emit)
18482 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18483 return "";
18486 if (TARGET_LDRD
18487 && (REG_P (otherops[2])
18488 || TARGET_THUMB2
18489 || (CONST_INT_P (otherops[2])
18490 && INTVAL (otherops[2]) > -256
18491 && INTVAL (otherops[2]) < 256)))
18493 otherops[0] = operands[1];
18494 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18495 if (emit)
18496 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18497 return "";
18499 /* Fall through */
18501 default:
18502 otherops[0] = adjust_address (operands[0], SImode, 4);
18503 otherops[1] = operands[1];
18504 if (emit)
18506 output_asm_insn ("str%?\t%1, %0", operands);
18507 output_asm_insn ("str%?\t%H1, %0", otherops);
18509 if (count)
18510 *count = 2;
18514 return "";
18517 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18518 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18520 const char *
18521 output_move_quad (rtx *operands)
18523 if (REG_P (operands[0]))
18525 /* Load, or reg->reg move. */
18527 if (MEM_P (operands[1]))
18529 switch (GET_CODE (XEXP (operands[1], 0)))
18531 case REG:
18532 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18533 break;
18535 case LABEL_REF:
18536 case CONST:
18537 output_asm_insn ("adr%?\t%0, %1", operands);
18538 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18539 break;
18541 default:
18542 gcc_unreachable ();
18545 else
18547 rtx ops[2];
18548 int dest, src, i;
18550 gcc_assert (REG_P (operands[1]));
18552 dest = REGNO (operands[0]);
18553 src = REGNO (operands[1]);
18555 /* This seems pretty dumb, but hopefully GCC won't try to do it
18556 very often. */
18557 if (dest < src)
18558 for (i = 0; i < 4; i++)
18560 ops[0] = gen_rtx_REG (SImode, dest + i);
18561 ops[1] = gen_rtx_REG (SImode, src + i);
18562 output_asm_insn ("mov%?\t%0, %1", ops);
18564 else
18565 for (i = 3; i >= 0; i--)
18567 ops[0] = gen_rtx_REG (SImode, dest + i);
18568 ops[1] = gen_rtx_REG (SImode, src + i);
18569 output_asm_insn ("mov%?\t%0, %1", ops);
18573 else
18575 gcc_assert (MEM_P (operands[0]));
18576 gcc_assert (REG_P (operands[1]));
18577 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18579 switch (GET_CODE (XEXP (operands[0], 0)))
18581 case REG:
18582 output_asm_insn ("stm%?\t%m0, %M1", operands);
18583 break;
18585 default:
18586 gcc_unreachable ();
18590 return "";
18593 /* Output a VFP load or store instruction. */
18595 const char *
18596 output_move_vfp (rtx *operands)
18598 rtx reg, mem, addr, ops[2];
18599 int load = REG_P (operands[0]);
18600 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18601 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18602 const char *templ;
18603 char buff[50];
18604 machine_mode mode;
18606 reg = operands[!load];
18607 mem = operands[load];
18609 mode = GET_MODE (reg);
18611 gcc_assert (REG_P (reg));
18612 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18613 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT && TARGET_VFP)
18614 || mode == SFmode
18615 || mode == DFmode
18616 || mode == SImode
18617 || mode == DImode
18618 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18619 gcc_assert (MEM_P (mem));
18621 addr = XEXP (mem, 0);
18623 switch (GET_CODE (addr))
18625 case PRE_DEC:
18626 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18627 ops[0] = XEXP (addr, 0);
18628 ops[1] = reg;
18629 break;
18631 case POST_INC:
18632 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18633 ops[0] = XEXP (addr, 0);
18634 ops[1] = reg;
18635 break;
18637 default:
18638 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18639 ops[0] = reg;
18640 ops[1] = mem;
18641 break;
18644 sprintf (buff, templ,
18645 load ? "ld" : "st",
18646 dp ? "64" : "32",
18647 dp ? "P" : "",
18648 integer_p ? "\t%@ int" : "");
18649 output_asm_insn (buff, ops);
18651 return "";
18654 /* Output a Neon double-word or quad-word load or store, or a load
18655 or store for larger structure modes.
18657 WARNING: The ordering of elements is weird in big-endian mode,
18658 because the EABI requires that vectors stored in memory appear
18659 as though they were stored by a VSTM, as required by the EABI.
18660 GCC RTL defines element ordering based on in-memory order.
18661 This can be different from the architectural ordering of elements
18662 within a NEON register. The intrinsics defined in arm_neon.h use the
18663 NEON register element ordering, not the GCC RTL element ordering.
18665 For example, the in-memory ordering of a big-endian a quadword
18666 vector with 16-bit elements when stored from register pair {d0,d1}
18667 will be (lowest address first, d0[N] is NEON register element N):
18669 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18671 When necessary, quadword registers (dN, dN+1) are moved to ARM
18672 registers from rN in the order:
18674 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18676 So that STM/LDM can be used on vectors in ARM registers, and the
18677 same memory layout will result as if VSTM/VLDM were used.
18679 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18680 possible, which allows use of appropriate alignment tags.
18681 Note that the choice of "64" is independent of the actual vector
18682 element size; this size simply ensures that the behavior is
18683 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18685 Due to limitations of those instructions, use of VST1.64/VLD1.64
18686 is not possible if:
18687 - the address contains PRE_DEC, or
18688 - the mode refers to more than 4 double-word registers
18690 In those cases, it would be possible to replace VSTM/VLDM by a
18691 sequence of instructions; this is not currently implemented since
18692 this is not certain to actually improve performance. */
18694 const char *
18695 output_move_neon (rtx *operands)
18697 rtx reg, mem, addr, ops[2];
18698 int regno, nregs, load = REG_P (operands[0]);
18699 const char *templ;
18700 char buff[50];
18701 machine_mode mode;
18703 reg = operands[!load];
18704 mem = operands[load];
18706 mode = GET_MODE (reg);
18708 gcc_assert (REG_P (reg));
18709 regno = REGNO (reg);
18710 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18711 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18712 || NEON_REGNO_OK_FOR_QUAD (regno));
18713 gcc_assert (VALID_NEON_DREG_MODE (mode)
18714 || VALID_NEON_QREG_MODE (mode)
18715 || VALID_NEON_STRUCT_MODE (mode));
18716 gcc_assert (MEM_P (mem));
18718 addr = XEXP (mem, 0);
18720 /* Strip off const from addresses like (const (plus (...))). */
18721 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18722 addr = XEXP (addr, 0);
18724 switch (GET_CODE (addr))
18726 case POST_INC:
18727 /* We have to use vldm / vstm for too-large modes. */
18728 if (nregs > 4)
18730 templ = "v%smia%%?\t%%0!, %%h1";
18731 ops[0] = XEXP (addr, 0);
18733 else
18735 templ = "v%s1.64\t%%h1, %%A0";
18736 ops[0] = mem;
18738 ops[1] = reg;
18739 break;
18741 case PRE_DEC:
18742 /* We have to use vldm / vstm in this case, since there is no
18743 pre-decrement form of the vld1 / vst1 instructions. */
18744 templ = "v%smdb%%?\t%%0!, %%h1";
18745 ops[0] = XEXP (addr, 0);
18746 ops[1] = reg;
18747 break;
18749 case POST_MODIFY:
18750 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18751 gcc_unreachable ();
18753 case REG:
18754 /* We have to use vldm / vstm for too-large modes. */
18755 if (nregs > 1)
18757 if (nregs > 4)
18758 templ = "v%smia%%?\t%%m0, %%h1";
18759 else
18760 templ = "v%s1.64\t%%h1, %%A0";
18762 ops[0] = mem;
18763 ops[1] = reg;
18764 break;
18766 /* Fall through. */
18767 case LABEL_REF:
18768 case PLUS:
18770 int i;
18771 int overlap = -1;
18772 for (i = 0; i < nregs; i++)
18774 /* We're only using DImode here because it's a convenient size. */
18775 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18776 ops[1] = adjust_address (mem, DImode, 8 * i);
18777 if (reg_overlap_mentioned_p (ops[0], mem))
18779 gcc_assert (overlap == -1);
18780 overlap = i;
18782 else
18784 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18785 output_asm_insn (buff, ops);
18788 if (overlap != -1)
18790 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18791 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18792 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18793 output_asm_insn (buff, ops);
18796 return "";
18799 default:
18800 gcc_unreachable ();
18803 sprintf (buff, templ, load ? "ld" : "st");
18804 output_asm_insn (buff, ops);
18806 return "";
18809 /* Compute and return the length of neon_mov<mode>, where <mode> is
18810 one of VSTRUCT modes: EI, OI, CI or XI. */
18812 arm_attr_length_move_neon (rtx_insn *insn)
18814 rtx reg, mem, addr;
18815 int load;
18816 machine_mode mode;
18818 extract_insn_cached (insn);
18820 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18822 mode = GET_MODE (recog_data.operand[0]);
18823 switch (mode)
18825 case EImode:
18826 case OImode:
18827 return 8;
18828 case CImode:
18829 return 12;
18830 case XImode:
18831 return 16;
18832 default:
18833 gcc_unreachable ();
18837 load = REG_P (recog_data.operand[0]);
18838 reg = recog_data.operand[!load];
18839 mem = recog_data.operand[load];
18841 gcc_assert (MEM_P (mem));
18843 mode = GET_MODE (reg);
18844 addr = XEXP (mem, 0);
18846 /* Strip off const from addresses like (const (plus (...))). */
18847 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18848 addr = XEXP (addr, 0);
18850 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18852 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18853 return insns * 4;
18855 else
18856 return 4;
18859 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18860 return zero. */
18863 arm_address_offset_is_imm (rtx_insn *insn)
18865 rtx mem, addr;
18867 extract_insn_cached (insn);
18869 if (REG_P (recog_data.operand[0]))
18870 return 0;
18872 mem = recog_data.operand[0];
18874 gcc_assert (MEM_P (mem));
18876 addr = XEXP (mem, 0);
18878 if (REG_P (addr)
18879 || (GET_CODE (addr) == PLUS
18880 && REG_P (XEXP (addr, 0))
18881 && CONST_INT_P (XEXP (addr, 1))))
18882 return 1;
18883 else
18884 return 0;
18887 /* Output an ADD r, s, #n where n may be too big for one instruction.
18888 If adding zero to one register, output nothing. */
18889 const char *
18890 output_add_immediate (rtx *operands)
18892 HOST_WIDE_INT n = INTVAL (operands[2]);
18894 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18896 if (n < 0)
18897 output_multi_immediate (operands,
18898 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18899 -n);
18900 else
18901 output_multi_immediate (operands,
18902 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18906 return "";
18909 /* Output a multiple immediate operation.
18910 OPERANDS is the vector of operands referred to in the output patterns.
18911 INSTR1 is the output pattern to use for the first constant.
18912 INSTR2 is the output pattern to use for subsequent constants.
18913 IMMED_OP is the index of the constant slot in OPERANDS.
18914 N is the constant value. */
18915 static const char *
18916 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18917 int immed_op, HOST_WIDE_INT n)
18919 #if HOST_BITS_PER_WIDE_INT > 32
18920 n &= 0xffffffff;
18921 #endif
18923 if (n == 0)
18925 /* Quick and easy output. */
18926 operands[immed_op] = const0_rtx;
18927 output_asm_insn (instr1, operands);
18929 else
18931 int i;
18932 const char * instr = instr1;
18934 /* Note that n is never zero here (which would give no output). */
18935 for (i = 0; i < 32; i += 2)
18937 if (n & (3 << i))
18939 operands[immed_op] = GEN_INT (n & (255 << i));
18940 output_asm_insn (instr, operands);
18941 instr = instr2;
18942 i += 6;
18947 return "";
18950 /* Return the name of a shifter operation. */
18951 static const char *
18952 arm_shift_nmem(enum rtx_code code)
18954 switch (code)
18956 case ASHIFT:
18957 return ARM_LSL_NAME;
18959 case ASHIFTRT:
18960 return "asr";
18962 case LSHIFTRT:
18963 return "lsr";
18965 case ROTATERT:
18966 return "ror";
18968 default:
18969 abort();
18973 /* Return the appropriate ARM instruction for the operation code.
18974 The returned result should not be overwritten. OP is the rtx of the
18975 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18976 was shifted. */
18977 const char *
18978 arithmetic_instr (rtx op, int shift_first_arg)
18980 switch (GET_CODE (op))
18982 case PLUS:
18983 return "add";
18985 case MINUS:
18986 return shift_first_arg ? "rsb" : "sub";
18988 case IOR:
18989 return "orr";
18991 case XOR:
18992 return "eor";
18994 case AND:
18995 return "and";
18997 case ASHIFT:
18998 case ASHIFTRT:
18999 case LSHIFTRT:
19000 case ROTATERT:
19001 return arm_shift_nmem(GET_CODE(op));
19003 default:
19004 gcc_unreachable ();
19008 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19009 for the operation code. The returned result should not be overwritten.
19010 OP is the rtx code of the shift.
19011 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19012 shift. */
19013 static const char *
19014 shift_op (rtx op, HOST_WIDE_INT *amountp)
19016 const char * mnem;
19017 enum rtx_code code = GET_CODE (op);
19019 switch (code)
19021 case ROTATE:
19022 if (!CONST_INT_P (XEXP (op, 1)))
19024 output_operand_lossage ("invalid shift operand");
19025 return NULL;
19028 code = ROTATERT;
19029 *amountp = 32 - INTVAL (XEXP (op, 1));
19030 mnem = "ror";
19031 break;
19033 case ASHIFT:
19034 case ASHIFTRT:
19035 case LSHIFTRT:
19036 case ROTATERT:
19037 mnem = arm_shift_nmem(code);
19038 if (CONST_INT_P (XEXP (op, 1)))
19040 *amountp = INTVAL (XEXP (op, 1));
19042 else if (REG_P (XEXP (op, 1)))
19044 *amountp = -1;
19045 return mnem;
19047 else
19049 output_operand_lossage ("invalid shift operand");
19050 return NULL;
19052 break;
19054 case MULT:
19055 /* We never have to worry about the amount being other than a
19056 power of 2, since this case can never be reloaded from a reg. */
19057 if (!CONST_INT_P (XEXP (op, 1)))
19059 output_operand_lossage ("invalid shift operand");
19060 return NULL;
19063 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19065 /* Amount must be a power of two. */
19066 if (*amountp & (*amountp - 1))
19068 output_operand_lossage ("invalid shift operand");
19069 return NULL;
19072 *amountp = int_log2 (*amountp);
19073 return ARM_LSL_NAME;
19075 default:
19076 output_operand_lossage ("invalid shift operand");
19077 return NULL;
19080 /* This is not 100% correct, but follows from the desire to merge
19081 multiplication by a power of 2 with the recognizer for a
19082 shift. >=32 is not a valid shift for "lsl", so we must try and
19083 output a shift that produces the correct arithmetical result.
19084 Using lsr #32 is identical except for the fact that the carry bit
19085 is not set correctly if we set the flags; but we never use the
19086 carry bit from such an operation, so we can ignore that. */
19087 if (code == ROTATERT)
19088 /* Rotate is just modulo 32. */
19089 *amountp &= 31;
19090 else if (*amountp != (*amountp & 31))
19092 if (code == ASHIFT)
19093 mnem = "lsr";
19094 *amountp = 32;
19097 /* Shifts of 0 are no-ops. */
19098 if (*amountp == 0)
19099 return NULL;
19101 return mnem;
19104 /* Obtain the shift from the POWER of two. */
19106 static HOST_WIDE_INT
19107 int_log2 (HOST_WIDE_INT power)
19109 HOST_WIDE_INT shift = 0;
19111 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
19113 gcc_assert (shift <= 31);
19114 shift++;
19117 return shift;
19120 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19121 because /bin/as is horribly restrictive. The judgement about
19122 whether or not each character is 'printable' (and can be output as
19123 is) or not (and must be printed with an octal escape) must be made
19124 with reference to the *host* character set -- the situation is
19125 similar to that discussed in the comments above pp_c_char in
19126 c-pretty-print.c. */
19128 #define MAX_ASCII_LEN 51
19130 void
19131 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19133 int i;
19134 int len_so_far = 0;
19136 fputs ("\t.ascii\t\"", stream);
19138 for (i = 0; i < len; i++)
19140 int c = p[i];
19142 if (len_so_far >= MAX_ASCII_LEN)
19144 fputs ("\"\n\t.ascii\t\"", stream);
19145 len_so_far = 0;
19148 if (ISPRINT (c))
19150 if (c == '\\' || c == '\"')
19152 putc ('\\', stream);
19153 len_so_far++;
19155 putc (c, stream);
19156 len_so_far++;
19158 else
19160 fprintf (stream, "\\%03o", c);
19161 len_so_far += 4;
19165 fputs ("\"\n", stream);
19168 /* Whether a register is callee saved or not. This is necessary because high
19169 registers are marked as caller saved when optimizing for size on Thumb-1
19170 targets despite being callee saved in order to avoid using them. */
19171 #define callee_saved_reg_p(reg) \
19172 (!call_used_regs[reg] \
19173 || (TARGET_THUMB1 && optimize_size \
19174 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19176 /* Compute the register save mask for registers 0 through 12
19177 inclusive. This code is used by arm_compute_save_reg_mask. */
19179 static unsigned long
19180 arm_compute_save_reg0_reg12_mask (void)
19182 unsigned long func_type = arm_current_func_type ();
19183 unsigned long save_reg_mask = 0;
19184 unsigned int reg;
19186 if (IS_INTERRUPT (func_type))
19188 unsigned int max_reg;
19189 /* Interrupt functions must not corrupt any registers,
19190 even call clobbered ones. If this is a leaf function
19191 we can just examine the registers used by the RTL, but
19192 otherwise we have to assume that whatever function is
19193 called might clobber anything, and so we have to save
19194 all the call-clobbered registers as well. */
19195 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19196 /* FIQ handlers have registers r8 - r12 banked, so
19197 we only need to check r0 - r7, Normal ISRs only
19198 bank r14 and r15, so we must check up to r12.
19199 r13 is the stack pointer which is always preserved,
19200 so we do not need to consider it here. */
19201 max_reg = 7;
19202 else
19203 max_reg = 12;
19205 for (reg = 0; reg <= max_reg; reg++)
19206 if (df_regs_ever_live_p (reg)
19207 || (! crtl->is_leaf && call_used_regs[reg]))
19208 save_reg_mask |= (1 << reg);
19210 /* Also save the pic base register if necessary. */
19211 if (flag_pic
19212 && !TARGET_SINGLE_PIC_BASE
19213 && arm_pic_register != INVALID_REGNUM
19214 && crtl->uses_pic_offset_table)
19215 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19217 else if (IS_VOLATILE(func_type))
19219 /* For noreturn functions we historically omitted register saves
19220 altogether. However this really messes up debugging. As a
19221 compromise save just the frame pointers. Combined with the link
19222 register saved elsewhere this should be sufficient to get
19223 a backtrace. */
19224 if (frame_pointer_needed)
19225 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19226 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19227 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19228 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19229 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19231 else
19233 /* In the normal case we only need to save those registers
19234 which are call saved and which are used by this function. */
19235 for (reg = 0; reg <= 11; reg++)
19236 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19237 save_reg_mask |= (1 << reg);
19239 /* Handle the frame pointer as a special case. */
19240 if (frame_pointer_needed)
19241 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19243 /* If we aren't loading the PIC register,
19244 don't stack it even though it may be live. */
19245 if (flag_pic
19246 && !TARGET_SINGLE_PIC_BASE
19247 && arm_pic_register != INVALID_REGNUM
19248 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19249 || crtl->uses_pic_offset_table))
19250 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19252 /* The prologue will copy SP into R0, so save it. */
19253 if (IS_STACKALIGN (func_type))
19254 save_reg_mask |= 1;
19257 /* Save registers so the exception handler can modify them. */
19258 if (crtl->calls_eh_return)
19260 unsigned int i;
19262 for (i = 0; ; i++)
19264 reg = EH_RETURN_DATA_REGNO (i);
19265 if (reg == INVALID_REGNUM)
19266 break;
19267 save_reg_mask |= 1 << reg;
19271 return save_reg_mask;
19274 /* Return true if r3 is live at the start of the function. */
19276 static bool
19277 arm_r3_live_at_start_p (void)
19279 /* Just look at cfg info, which is still close enough to correct at this
19280 point. This gives false positives for broken functions that might use
19281 uninitialized data that happens to be allocated in r3, but who cares? */
19282 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19285 /* Compute the number of bytes used to store the static chain register on the
19286 stack, above the stack frame. We need to know this accurately to get the
19287 alignment of the rest of the stack frame correct. */
19289 static int
19290 arm_compute_static_chain_stack_bytes (void)
19292 /* See the defining assertion in arm_expand_prologue. */
19293 if (IS_NESTED (arm_current_func_type ())
19294 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19295 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19296 && !df_regs_ever_live_p (LR_REGNUM)))
19297 && arm_r3_live_at_start_p ()
19298 && crtl->args.pretend_args_size == 0)
19299 return 4;
19301 return 0;
19304 /* Compute a bit mask of which registers need to be
19305 saved on the stack for the current function.
19306 This is used by arm_get_frame_offsets, which may add extra registers. */
19308 static unsigned long
19309 arm_compute_save_reg_mask (void)
19311 unsigned int save_reg_mask = 0;
19312 unsigned long func_type = arm_current_func_type ();
19313 unsigned int reg;
19315 if (IS_NAKED (func_type))
19316 /* This should never really happen. */
19317 return 0;
19319 /* If we are creating a stack frame, then we must save the frame pointer,
19320 IP (which will hold the old stack pointer), LR and the PC. */
19321 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19322 save_reg_mask |=
19323 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19324 | (1 << IP_REGNUM)
19325 | (1 << LR_REGNUM)
19326 | (1 << PC_REGNUM);
19328 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19330 /* Decide if we need to save the link register.
19331 Interrupt routines have their own banked link register,
19332 so they never need to save it.
19333 Otherwise if we do not use the link register we do not need to save
19334 it. If we are pushing other registers onto the stack however, we
19335 can save an instruction in the epilogue by pushing the link register
19336 now and then popping it back into the PC. This incurs extra memory
19337 accesses though, so we only do it when optimizing for size, and only
19338 if we know that we will not need a fancy return sequence. */
19339 if (df_regs_ever_live_p (LR_REGNUM)
19340 || (save_reg_mask
19341 && optimize_size
19342 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19343 && !crtl->tail_call_emit
19344 && !crtl->calls_eh_return))
19345 save_reg_mask |= 1 << LR_REGNUM;
19347 if (cfun->machine->lr_save_eliminated)
19348 save_reg_mask &= ~ (1 << LR_REGNUM);
19350 if (TARGET_REALLY_IWMMXT
19351 && ((bit_count (save_reg_mask)
19352 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19353 arm_compute_static_chain_stack_bytes())
19354 ) % 2) != 0)
19356 /* The total number of registers that are going to be pushed
19357 onto the stack is odd. We need to ensure that the stack
19358 is 64-bit aligned before we start to save iWMMXt registers,
19359 and also before we start to create locals. (A local variable
19360 might be a double or long long which we will load/store using
19361 an iWMMXt instruction). Therefore we need to push another
19362 ARM register, so that the stack will be 64-bit aligned. We
19363 try to avoid using the arg registers (r0 -r3) as they might be
19364 used to pass values in a tail call. */
19365 for (reg = 4; reg <= 12; reg++)
19366 if ((save_reg_mask & (1 << reg)) == 0)
19367 break;
19369 if (reg <= 12)
19370 save_reg_mask |= (1 << reg);
19371 else
19373 cfun->machine->sibcall_blocked = 1;
19374 save_reg_mask |= (1 << 3);
19378 /* We may need to push an additional register for use initializing the
19379 PIC base register. */
19380 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19381 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19383 reg = thumb_find_work_register (1 << 4);
19384 if (!call_used_regs[reg])
19385 save_reg_mask |= (1 << reg);
19388 return save_reg_mask;
19391 /* Compute a bit mask of which registers need to be
19392 saved on the stack for the current function. */
19393 static unsigned long
19394 thumb1_compute_save_reg_mask (void)
19396 unsigned long mask;
19397 unsigned reg;
19399 mask = 0;
19400 for (reg = 0; reg < 12; reg ++)
19401 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19402 mask |= 1 << reg;
19404 if (flag_pic
19405 && !TARGET_SINGLE_PIC_BASE
19406 && arm_pic_register != INVALID_REGNUM
19407 && crtl->uses_pic_offset_table)
19408 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19410 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19411 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19412 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19414 /* LR will also be pushed if any lo regs are pushed. */
19415 if (mask & 0xff || thumb_force_lr_save ())
19416 mask |= (1 << LR_REGNUM);
19418 /* Make sure we have a low work register if we need one.
19419 We will need one if we are going to push a high register,
19420 but we are not currently intending to push a low register. */
19421 if ((mask & 0xff) == 0
19422 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19424 /* Use thumb_find_work_register to choose which register
19425 we will use. If the register is live then we will
19426 have to push it. Use LAST_LO_REGNUM as our fallback
19427 choice for the register to select. */
19428 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19429 /* Make sure the register returned by thumb_find_work_register is
19430 not part of the return value. */
19431 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19432 reg = LAST_LO_REGNUM;
19434 if (callee_saved_reg_p (reg))
19435 mask |= 1 << reg;
19438 /* The 504 below is 8 bytes less than 512 because there are two possible
19439 alignment words. We can't tell here if they will be present or not so we
19440 have to play it safe and assume that they are. */
19441 if ((CALLER_INTERWORKING_SLOT_SIZE +
19442 ROUND_UP_WORD (get_frame_size ()) +
19443 crtl->outgoing_args_size) >= 504)
19445 /* This is the same as the code in thumb1_expand_prologue() which
19446 determines which register to use for stack decrement. */
19447 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19448 if (mask & (1 << reg))
19449 break;
19451 if (reg > LAST_LO_REGNUM)
19453 /* Make sure we have a register available for stack decrement. */
19454 mask |= 1 << LAST_LO_REGNUM;
19458 return mask;
19462 /* Return the number of bytes required to save VFP registers. */
19463 static int
19464 arm_get_vfp_saved_size (void)
19466 unsigned int regno;
19467 int count;
19468 int saved;
19470 saved = 0;
19471 /* Space for saved VFP registers. */
19472 if (TARGET_HARD_FLOAT && TARGET_VFP)
19474 count = 0;
19475 for (regno = FIRST_VFP_REGNUM;
19476 regno < LAST_VFP_REGNUM;
19477 regno += 2)
19479 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19480 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19482 if (count > 0)
19484 /* Workaround ARM10 VFPr1 bug. */
19485 if (count == 2 && !arm_arch6)
19486 count++;
19487 saved += count * 8;
19489 count = 0;
19491 else
19492 count++;
19494 if (count > 0)
19496 if (count == 2 && !arm_arch6)
19497 count++;
19498 saved += count * 8;
19501 return saved;
19505 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19506 everything bar the final return instruction. If simple_return is true,
19507 then do not output epilogue, because it has already been emitted in RTL. */
19508 const char *
19509 output_return_instruction (rtx operand, bool really_return, bool reverse,
19510 bool simple_return)
19512 char conditional[10];
19513 char instr[100];
19514 unsigned reg;
19515 unsigned long live_regs_mask;
19516 unsigned long func_type;
19517 arm_stack_offsets *offsets;
19519 func_type = arm_current_func_type ();
19521 if (IS_NAKED (func_type))
19522 return "";
19524 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19526 /* If this function was declared non-returning, and we have
19527 found a tail call, then we have to trust that the called
19528 function won't return. */
19529 if (really_return)
19531 rtx ops[2];
19533 /* Otherwise, trap an attempted return by aborting. */
19534 ops[0] = operand;
19535 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19536 : "abort");
19537 assemble_external_libcall (ops[1]);
19538 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19541 return "";
19544 gcc_assert (!cfun->calls_alloca || really_return);
19546 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19548 cfun->machine->return_used_this_function = 1;
19550 offsets = arm_get_frame_offsets ();
19551 live_regs_mask = offsets->saved_regs_mask;
19553 if (!simple_return && live_regs_mask)
19555 const char * return_reg;
19557 /* If we do not have any special requirements for function exit
19558 (e.g. interworking) then we can load the return address
19559 directly into the PC. Otherwise we must load it into LR. */
19560 if (really_return
19561 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19562 return_reg = reg_names[PC_REGNUM];
19563 else
19564 return_reg = reg_names[LR_REGNUM];
19566 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19568 /* There are three possible reasons for the IP register
19569 being saved. 1) a stack frame was created, in which case
19570 IP contains the old stack pointer, or 2) an ISR routine
19571 corrupted it, or 3) it was saved to align the stack on
19572 iWMMXt. In case 1, restore IP into SP, otherwise just
19573 restore IP. */
19574 if (frame_pointer_needed)
19576 live_regs_mask &= ~ (1 << IP_REGNUM);
19577 live_regs_mask |= (1 << SP_REGNUM);
19579 else
19580 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19583 /* On some ARM architectures it is faster to use LDR rather than
19584 LDM to load a single register. On other architectures, the
19585 cost is the same. In 26 bit mode, or for exception handlers,
19586 we have to use LDM to load the PC so that the CPSR is also
19587 restored. */
19588 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19589 if (live_regs_mask == (1U << reg))
19590 break;
19592 if (reg <= LAST_ARM_REGNUM
19593 && (reg != LR_REGNUM
19594 || ! really_return
19595 || ! IS_INTERRUPT (func_type)))
19597 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19598 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19600 else
19602 char *p;
19603 int first = 1;
19605 /* Generate the load multiple instruction to restore the
19606 registers. Note we can get here, even if
19607 frame_pointer_needed is true, but only if sp already
19608 points to the base of the saved core registers. */
19609 if (live_regs_mask & (1 << SP_REGNUM))
19611 unsigned HOST_WIDE_INT stack_adjust;
19613 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19614 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19616 if (stack_adjust && arm_arch5 && TARGET_ARM)
19617 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19618 else
19620 /* If we can't use ldmib (SA110 bug),
19621 then try to pop r3 instead. */
19622 if (stack_adjust)
19623 live_regs_mask |= 1 << 3;
19625 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19628 /* For interrupt returns we have to use an LDM rather than
19629 a POP so that we can use the exception return variant. */
19630 else if (IS_INTERRUPT (func_type))
19631 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19632 else
19633 sprintf (instr, "pop%s\t{", conditional);
19635 p = instr + strlen (instr);
19637 for (reg = 0; reg <= SP_REGNUM; reg++)
19638 if (live_regs_mask & (1 << reg))
19640 int l = strlen (reg_names[reg]);
19642 if (first)
19643 first = 0;
19644 else
19646 memcpy (p, ", ", 2);
19647 p += 2;
19650 memcpy (p, "%|", 2);
19651 memcpy (p + 2, reg_names[reg], l);
19652 p += l + 2;
19655 if (live_regs_mask & (1 << LR_REGNUM))
19657 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19658 /* If returning from an interrupt, restore the CPSR. */
19659 if (IS_INTERRUPT (func_type))
19660 strcat (p, "^");
19662 else
19663 strcpy (p, "}");
19666 output_asm_insn (instr, & operand);
19668 /* See if we need to generate an extra instruction to
19669 perform the actual function return. */
19670 if (really_return
19671 && func_type != ARM_FT_INTERWORKED
19672 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19674 /* The return has already been handled
19675 by loading the LR into the PC. */
19676 return "";
19680 if (really_return)
19682 switch ((int) ARM_FUNC_TYPE (func_type))
19684 case ARM_FT_ISR:
19685 case ARM_FT_FIQ:
19686 /* ??? This is wrong for unified assembly syntax. */
19687 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19688 break;
19690 case ARM_FT_INTERWORKED:
19691 gcc_assert (arm_arch5 || arm_arch4t);
19692 sprintf (instr, "bx%s\t%%|lr", conditional);
19693 break;
19695 case ARM_FT_EXCEPTION:
19696 /* ??? This is wrong for unified assembly syntax. */
19697 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19698 break;
19700 default:
19701 /* Use bx if it's available. */
19702 if (arm_arch5 || arm_arch4t)
19703 sprintf (instr, "bx%s\t%%|lr", conditional);
19704 else
19705 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19706 break;
19709 output_asm_insn (instr, & operand);
19712 return "";
19715 /* Write the function name into the code section, directly preceding
19716 the function prologue.
19718 Code will be output similar to this:
19720 .ascii "arm_poke_function_name", 0
19721 .align
19723 .word 0xff000000 + (t1 - t0)
19724 arm_poke_function_name
19725 mov ip, sp
19726 stmfd sp!, {fp, ip, lr, pc}
19727 sub fp, ip, #4
19729 When performing a stack backtrace, code can inspect the value
19730 of 'pc' stored at 'fp' + 0. If the trace function then looks
19731 at location pc - 12 and the top 8 bits are set, then we know
19732 that there is a function name embedded immediately preceding this
19733 location and has length ((pc[-3]) & 0xff000000).
19735 We assume that pc is declared as a pointer to an unsigned long.
19737 It is of no benefit to output the function name if we are assembling
19738 a leaf function. These function types will not contain a stack
19739 backtrace structure, therefore it is not possible to determine the
19740 function name. */
19741 void
19742 arm_poke_function_name (FILE *stream, const char *name)
19744 unsigned long alignlength;
19745 unsigned long length;
19746 rtx x;
19748 length = strlen (name) + 1;
19749 alignlength = ROUND_UP_WORD (length);
19751 ASM_OUTPUT_ASCII (stream, name, length);
19752 ASM_OUTPUT_ALIGN (stream, 2);
19753 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19754 assemble_aligned_integer (UNITS_PER_WORD, x);
19757 /* Place some comments into the assembler stream
19758 describing the current function. */
19759 static void
19760 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19762 unsigned long func_type;
19764 /* ??? Do we want to print some of the below anyway? */
19765 if (TARGET_THUMB1)
19766 return;
19768 /* Sanity check. */
19769 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19771 func_type = arm_current_func_type ();
19773 switch ((int) ARM_FUNC_TYPE (func_type))
19775 default:
19776 case ARM_FT_NORMAL:
19777 break;
19778 case ARM_FT_INTERWORKED:
19779 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19780 break;
19781 case ARM_FT_ISR:
19782 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19783 break;
19784 case ARM_FT_FIQ:
19785 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19786 break;
19787 case ARM_FT_EXCEPTION:
19788 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19789 break;
19792 if (IS_NAKED (func_type))
19793 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19795 if (IS_VOLATILE (func_type))
19796 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19798 if (IS_NESTED (func_type))
19799 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19800 if (IS_STACKALIGN (func_type))
19801 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19803 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19804 crtl->args.size,
19805 crtl->args.pretend_args_size, frame_size);
19807 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19808 frame_pointer_needed,
19809 cfun->machine->uses_anonymous_args);
19811 if (cfun->machine->lr_save_eliminated)
19812 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19814 if (crtl->calls_eh_return)
19815 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19819 static void
19820 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19821 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19823 arm_stack_offsets *offsets;
19825 if (TARGET_THUMB1)
19827 int regno;
19829 /* Emit any call-via-reg trampolines that are needed for v4t support
19830 of call_reg and call_value_reg type insns. */
19831 for (regno = 0; regno < LR_REGNUM; regno++)
19833 rtx label = cfun->machine->call_via[regno];
19835 if (label != NULL)
19837 switch_to_section (function_section (current_function_decl));
19838 targetm.asm_out.internal_label (asm_out_file, "L",
19839 CODE_LABEL_NUMBER (label));
19840 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19844 /* ??? Probably not safe to set this here, since it assumes that a
19845 function will be emitted as assembly immediately after we generate
19846 RTL for it. This does not happen for inline functions. */
19847 cfun->machine->return_used_this_function = 0;
19849 else /* TARGET_32BIT */
19851 /* We need to take into account any stack-frame rounding. */
19852 offsets = arm_get_frame_offsets ();
19854 gcc_assert (!use_return_insn (FALSE, NULL)
19855 || (cfun->machine->return_used_this_function != 0)
19856 || offsets->saved_regs == offsets->outgoing_args
19857 || frame_pointer_needed);
19861 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19862 STR and STRD. If an even number of registers are being pushed, one
19863 or more STRD patterns are created for each register pair. If an
19864 odd number of registers are pushed, emit an initial STR followed by
19865 as many STRD instructions as are needed. This works best when the
19866 stack is initially 64-bit aligned (the normal case), since it
19867 ensures that each STRD is also 64-bit aligned. */
19868 static void
19869 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19871 int num_regs = 0;
19872 int i;
19873 int regno;
19874 rtx par = NULL_RTX;
19875 rtx dwarf = NULL_RTX;
19876 rtx tmp;
19877 bool first = true;
19879 num_regs = bit_count (saved_regs_mask);
19881 /* Must be at least one register to save, and can't save SP or PC. */
19882 gcc_assert (num_regs > 0 && num_regs <= 14);
19883 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19884 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19886 /* Create sequence for DWARF info. All the frame-related data for
19887 debugging is held in this wrapper. */
19888 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19890 /* Describe the stack adjustment. */
19891 tmp = gen_rtx_SET (stack_pointer_rtx,
19892 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19893 RTX_FRAME_RELATED_P (tmp) = 1;
19894 XVECEXP (dwarf, 0, 0) = tmp;
19896 /* Find the first register. */
19897 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19900 i = 0;
19902 /* If there's an odd number of registers to push. Start off by
19903 pushing a single register. This ensures that subsequent strd
19904 operations are dword aligned (assuming that SP was originally
19905 64-bit aligned). */
19906 if ((num_regs & 1) != 0)
19908 rtx reg, mem, insn;
19910 reg = gen_rtx_REG (SImode, regno);
19911 if (num_regs == 1)
19912 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19913 stack_pointer_rtx));
19914 else
19915 mem = gen_frame_mem (Pmode,
19916 gen_rtx_PRE_MODIFY
19917 (Pmode, stack_pointer_rtx,
19918 plus_constant (Pmode, stack_pointer_rtx,
19919 -4 * num_regs)));
19921 tmp = gen_rtx_SET (mem, reg);
19922 RTX_FRAME_RELATED_P (tmp) = 1;
19923 insn = emit_insn (tmp);
19924 RTX_FRAME_RELATED_P (insn) = 1;
19925 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19926 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19927 RTX_FRAME_RELATED_P (tmp) = 1;
19928 i++;
19929 regno++;
19930 XVECEXP (dwarf, 0, i) = tmp;
19931 first = false;
19934 while (i < num_regs)
19935 if (saved_regs_mask & (1 << regno))
19937 rtx reg1, reg2, mem1, mem2;
19938 rtx tmp0, tmp1, tmp2;
19939 int regno2;
19941 /* Find the register to pair with this one. */
19942 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19943 regno2++)
19946 reg1 = gen_rtx_REG (SImode, regno);
19947 reg2 = gen_rtx_REG (SImode, regno2);
19949 if (first)
19951 rtx insn;
19953 first = false;
19954 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19955 stack_pointer_rtx,
19956 -4 * num_regs));
19957 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19958 stack_pointer_rtx,
19959 -4 * (num_regs - 1)));
19960 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19961 plus_constant (Pmode, stack_pointer_rtx,
19962 -4 * (num_regs)));
19963 tmp1 = gen_rtx_SET (mem1, reg1);
19964 tmp2 = gen_rtx_SET (mem2, reg2);
19965 RTX_FRAME_RELATED_P (tmp0) = 1;
19966 RTX_FRAME_RELATED_P (tmp1) = 1;
19967 RTX_FRAME_RELATED_P (tmp2) = 1;
19968 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19969 XVECEXP (par, 0, 0) = tmp0;
19970 XVECEXP (par, 0, 1) = tmp1;
19971 XVECEXP (par, 0, 2) = tmp2;
19972 insn = emit_insn (par);
19973 RTX_FRAME_RELATED_P (insn) = 1;
19974 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19976 else
19978 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19979 stack_pointer_rtx,
19980 4 * i));
19981 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19982 stack_pointer_rtx,
19983 4 * (i + 1)));
19984 tmp1 = gen_rtx_SET (mem1, reg1);
19985 tmp2 = gen_rtx_SET (mem2, reg2);
19986 RTX_FRAME_RELATED_P (tmp1) = 1;
19987 RTX_FRAME_RELATED_P (tmp2) = 1;
19988 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19989 XVECEXP (par, 0, 0) = tmp1;
19990 XVECEXP (par, 0, 1) = tmp2;
19991 emit_insn (par);
19994 /* Create unwind information. This is an approximation. */
19995 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19996 plus_constant (Pmode,
19997 stack_pointer_rtx,
19998 4 * i)),
19999 reg1);
20000 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20001 plus_constant (Pmode,
20002 stack_pointer_rtx,
20003 4 * (i + 1))),
20004 reg2);
20006 RTX_FRAME_RELATED_P (tmp1) = 1;
20007 RTX_FRAME_RELATED_P (tmp2) = 1;
20008 XVECEXP (dwarf, 0, i + 1) = tmp1;
20009 XVECEXP (dwarf, 0, i + 2) = tmp2;
20010 i += 2;
20011 regno = regno2 + 1;
20013 else
20014 regno++;
20016 return;
20019 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20020 whenever possible, otherwise it emits single-word stores. The first store
20021 also allocates stack space for all saved registers, using writeback with
20022 post-addressing mode. All other stores use offset addressing. If no STRD
20023 can be emitted, this function emits a sequence of single-word stores,
20024 and not an STM as before, because single-word stores provide more freedom
20025 scheduling and can be turned into an STM by peephole optimizations. */
20026 static void
20027 arm_emit_strd_push (unsigned long saved_regs_mask)
20029 int num_regs = 0;
20030 int i, j, dwarf_index = 0;
20031 int offset = 0;
20032 rtx dwarf = NULL_RTX;
20033 rtx insn = NULL_RTX;
20034 rtx tmp, mem;
20036 /* TODO: A more efficient code can be emitted by changing the
20037 layout, e.g., first push all pairs that can use STRD to keep the
20038 stack aligned, and then push all other registers. */
20039 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20040 if (saved_regs_mask & (1 << i))
20041 num_regs++;
20043 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20044 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20045 gcc_assert (num_regs > 0);
20047 /* Create sequence for DWARF info. */
20048 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20050 /* For dwarf info, we generate explicit stack update. */
20051 tmp = gen_rtx_SET (stack_pointer_rtx,
20052 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20053 RTX_FRAME_RELATED_P (tmp) = 1;
20054 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20056 /* Save registers. */
20057 offset = - 4 * num_regs;
20058 j = 0;
20059 while (j <= LAST_ARM_REGNUM)
20060 if (saved_regs_mask & (1 << j))
20062 if ((j % 2 == 0)
20063 && (saved_regs_mask & (1 << (j + 1))))
20065 /* Current register and previous register form register pair for
20066 which STRD can be generated. */
20067 if (offset < 0)
20069 /* Allocate stack space for all saved registers. */
20070 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20071 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20072 mem = gen_frame_mem (DImode, tmp);
20073 offset = 0;
20075 else if (offset > 0)
20076 mem = gen_frame_mem (DImode,
20077 plus_constant (Pmode,
20078 stack_pointer_rtx,
20079 offset));
20080 else
20081 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20083 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20084 RTX_FRAME_RELATED_P (tmp) = 1;
20085 tmp = emit_insn (tmp);
20087 /* Record the first store insn. */
20088 if (dwarf_index == 1)
20089 insn = tmp;
20091 /* Generate dwarf info. */
20092 mem = gen_frame_mem (SImode,
20093 plus_constant (Pmode,
20094 stack_pointer_rtx,
20095 offset));
20096 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20097 RTX_FRAME_RELATED_P (tmp) = 1;
20098 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20100 mem = gen_frame_mem (SImode,
20101 plus_constant (Pmode,
20102 stack_pointer_rtx,
20103 offset + 4));
20104 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20105 RTX_FRAME_RELATED_P (tmp) = 1;
20106 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20108 offset += 8;
20109 j += 2;
20111 else
20113 /* Emit a single word store. */
20114 if (offset < 0)
20116 /* Allocate stack space for all saved registers. */
20117 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20118 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20119 mem = gen_frame_mem (SImode, tmp);
20120 offset = 0;
20122 else if (offset > 0)
20123 mem = gen_frame_mem (SImode,
20124 plus_constant (Pmode,
20125 stack_pointer_rtx,
20126 offset));
20127 else
20128 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20130 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20131 RTX_FRAME_RELATED_P (tmp) = 1;
20132 tmp = emit_insn (tmp);
20134 /* Record the first store insn. */
20135 if (dwarf_index == 1)
20136 insn = tmp;
20138 /* Generate dwarf info. */
20139 mem = gen_frame_mem (SImode,
20140 plus_constant(Pmode,
20141 stack_pointer_rtx,
20142 offset));
20143 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20144 RTX_FRAME_RELATED_P (tmp) = 1;
20145 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20147 offset += 4;
20148 j += 1;
20151 else
20152 j++;
20154 /* Attach dwarf info to the first insn we generate. */
20155 gcc_assert (insn != NULL_RTX);
20156 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20157 RTX_FRAME_RELATED_P (insn) = 1;
20160 /* Generate and emit an insn that we will recognize as a push_multi.
20161 Unfortunately, since this insn does not reflect very well the actual
20162 semantics of the operation, we need to annotate the insn for the benefit
20163 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20164 MASK for registers that should be annotated for DWARF2 frame unwind
20165 information. */
20166 static rtx
20167 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20169 int num_regs = 0;
20170 int num_dwarf_regs = 0;
20171 int i, j;
20172 rtx par;
20173 rtx dwarf;
20174 int dwarf_par_index;
20175 rtx tmp, reg;
20177 /* We don't record the PC in the dwarf frame information. */
20178 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20180 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20182 if (mask & (1 << i))
20183 num_regs++;
20184 if (dwarf_regs_mask & (1 << i))
20185 num_dwarf_regs++;
20188 gcc_assert (num_regs && num_regs <= 16);
20189 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20191 /* For the body of the insn we are going to generate an UNSPEC in
20192 parallel with several USEs. This allows the insn to be recognized
20193 by the push_multi pattern in the arm.md file.
20195 The body of the insn looks something like this:
20197 (parallel [
20198 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20199 (const_int:SI <num>)))
20200 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20201 (use (reg:SI XX))
20202 (use (reg:SI YY))
20206 For the frame note however, we try to be more explicit and actually
20207 show each register being stored into the stack frame, plus a (single)
20208 decrement of the stack pointer. We do it this way in order to be
20209 friendly to the stack unwinding code, which only wants to see a single
20210 stack decrement per instruction. The RTL we generate for the note looks
20211 something like this:
20213 (sequence [
20214 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20215 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20216 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20217 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20221 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20222 instead we'd have a parallel expression detailing all
20223 the stores to the various memory addresses so that debug
20224 information is more up-to-date. Remember however while writing
20225 this to take care of the constraints with the push instruction.
20227 Note also that this has to be taken care of for the VFP registers.
20229 For more see PR43399. */
20231 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20232 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20233 dwarf_par_index = 1;
20235 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20237 if (mask & (1 << i))
20239 reg = gen_rtx_REG (SImode, i);
20241 XVECEXP (par, 0, 0)
20242 = gen_rtx_SET (gen_frame_mem
20243 (BLKmode,
20244 gen_rtx_PRE_MODIFY (Pmode,
20245 stack_pointer_rtx,
20246 plus_constant
20247 (Pmode, stack_pointer_rtx,
20248 -4 * num_regs))
20250 gen_rtx_UNSPEC (BLKmode,
20251 gen_rtvec (1, reg),
20252 UNSPEC_PUSH_MULT));
20254 if (dwarf_regs_mask & (1 << i))
20256 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20257 reg);
20258 RTX_FRAME_RELATED_P (tmp) = 1;
20259 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20262 break;
20266 for (j = 1, i++; j < num_regs; i++)
20268 if (mask & (1 << i))
20270 reg = gen_rtx_REG (SImode, i);
20272 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20274 if (dwarf_regs_mask & (1 << i))
20277 = gen_rtx_SET (gen_frame_mem
20278 (SImode,
20279 plus_constant (Pmode, stack_pointer_rtx,
20280 4 * j)),
20281 reg);
20282 RTX_FRAME_RELATED_P (tmp) = 1;
20283 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20286 j++;
20290 par = emit_insn (par);
20292 tmp = gen_rtx_SET (stack_pointer_rtx,
20293 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20294 RTX_FRAME_RELATED_P (tmp) = 1;
20295 XVECEXP (dwarf, 0, 0) = tmp;
20297 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20299 return par;
20302 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20303 SIZE is the offset to be adjusted.
20304 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20305 static void
20306 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20308 rtx dwarf;
20310 RTX_FRAME_RELATED_P (insn) = 1;
20311 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20312 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20315 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20316 SAVED_REGS_MASK shows which registers need to be restored.
20318 Unfortunately, since this insn does not reflect very well the actual
20319 semantics of the operation, we need to annotate the insn for the benefit
20320 of DWARF2 frame unwind information. */
20321 static void
20322 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20324 int num_regs = 0;
20325 int i, j;
20326 rtx par;
20327 rtx dwarf = NULL_RTX;
20328 rtx tmp, reg;
20329 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20330 int offset_adj;
20331 int emit_update;
20333 offset_adj = return_in_pc ? 1 : 0;
20334 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20335 if (saved_regs_mask & (1 << i))
20336 num_regs++;
20338 gcc_assert (num_regs && num_regs <= 16);
20340 /* If SP is in reglist, then we don't emit SP update insn. */
20341 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20343 /* The parallel needs to hold num_regs SETs
20344 and one SET for the stack update. */
20345 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20347 if (return_in_pc)
20348 XVECEXP (par, 0, 0) = ret_rtx;
20350 if (emit_update)
20352 /* Increment the stack pointer, based on there being
20353 num_regs 4-byte registers to restore. */
20354 tmp = gen_rtx_SET (stack_pointer_rtx,
20355 plus_constant (Pmode,
20356 stack_pointer_rtx,
20357 4 * num_regs));
20358 RTX_FRAME_RELATED_P (tmp) = 1;
20359 XVECEXP (par, 0, offset_adj) = tmp;
20362 /* Now restore every reg, which may include PC. */
20363 for (j = 0, i = 0; j < num_regs; i++)
20364 if (saved_regs_mask & (1 << i))
20366 reg = gen_rtx_REG (SImode, i);
20367 if ((num_regs == 1) && emit_update && !return_in_pc)
20369 /* Emit single load with writeback. */
20370 tmp = gen_frame_mem (SImode,
20371 gen_rtx_POST_INC (Pmode,
20372 stack_pointer_rtx));
20373 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20374 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20375 return;
20378 tmp = gen_rtx_SET (reg,
20379 gen_frame_mem
20380 (SImode,
20381 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20382 RTX_FRAME_RELATED_P (tmp) = 1;
20383 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20385 /* We need to maintain a sequence for DWARF info too. As dwarf info
20386 should not have PC, skip PC. */
20387 if (i != PC_REGNUM)
20388 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20390 j++;
20393 if (return_in_pc)
20394 par = emit_jump_insn (par);
20395 else
20396 par = emit_insn (par);
20398 REG_NOTES (par) = dwarf;
20399 if (!return_in_pc)
20400 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20401 stack_pointer_rtx, stack_pointer_rtx);
20404 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20405 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20407 Unfortunately, since this insn does not reflect very well the actual
20408 semantics of the operation, we need to annotate the insn for the benefit
20409 of DWARF2 frame unwind information. */
20410 static void
20411 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20413 int i, j;
20414 rtx par;
20415 rtx dwarf = NULL_RTX;
20416 rtx tmp, reg;
20418 gcc_assert (num_regs && num_regs <= 32);
20420 /* Workaround ARM10 VFPr1 bug. */
20421 if (num_regs == 2 && !arm_arch6)
20423 if (first_reg == 15)
20424 first_reg--;
20426 num_regs++;
20429 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20430 there could be up to 32 D-registers to restore.
20431 If there are more than 16 D-registers, make two recursive calls,
20432 each of which emits one pop_multi instruction. */
20433 if (num_regs > 16)
20435 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20436 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20437 return;
20440 /* The parallel needs to hold num_regs SETs
20441 and one SET for the stack update. */
20442 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20444 /* Increment the stack pointer, based on there being
20445 num_regs 8-byte registers to restore. */
20446 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20447 RTX_FRAME_RELATED_P (tmp) = 1;
20448 XVECEXP (par, 0, 0) = tmp;
20450 /* Now show every reg that will be restored, using a SET for each. */
20451 for (j = 0, i=first_reg; j < num_regs; i += 2)
20453 reg = gen_rtx_REG (DFmode, i);
20455 tmp = gen_rtx_SET (reg,
20456 gen_frame_mem
20457 (DFmode,
20458 plus_constant (Pmode, base_reg, 8 * j)));
20459 RTX_FRAME_RELATED_P (tmp) = 1;
20460 XVECEXP (par, 0, j + 1) = tmp;
20462 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20464 j++;
20467 par = emit_insn (par);
20468 REG_NOTES (par) = dwarf;
20470 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20471 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20473 RTX_FRAME_RELATED_P (par) = 1;
20474 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20476 else
20477 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20478 base_reg, base_reg);
20481 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20482 number of registers are being popped, multiple LDRD patterns are created for
20483 all register pairs. If odd number of registers are popped, last register is
20484 loaded by using LDR pattern. */
20485 static void
20486 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20488 int num_regs = 0;
20489 int i, j;
20490 rtx par = NULL_RTX;
20491 rtx dwarf = NULL_RTX;
20492 rtx tmp, reg, tmp1;
20493 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20495 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20496 if (saved_regs_mask & (1 << i))
20497 num_regs++;
20499 gcc_assert (num_regs && num_regs <= 16);
20501 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20502 to be popped. So, if num_regs is even, now it will become odd,
20503 and we can generate pop with PC. If num_regs is odd, it will be
20504 even now, and ldr with return can be generated for PC. */
20505 if (return_in_pc)
20506 num_regs--;
20508 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20510 /* Var j iterates over all the registers to gather all the registers in
20511 saved_regs_mask. Var i gives index of saved registers in stack frame.
20512 A PARALLEL RTX of register-pair is created here, so that pattern for
20513 LDRD can be matched. As PC is always last register to be popped, and
20514 we have already decremented num_regs if PC, we don't have to worry
20515 about PC in this loop. */
20516 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20517 if (saved_regs_mask & (1 << j))
20519 /* Create RTX for memory load. */
20520 reg = gen_rtx_REG (SImode, j);
20521 tmp = gen_rtx_SET (reg,
20522 gen_frame_mem (SImode,
20523 plus_constant (Pmode,
20524 stack_pointer_rtx, 4 * i)));
20525 RTX_FRAME_RELATED_P (tmp) = 1;
20527 if (i % 2 == 0)
20529 /* When saved-register index (i) is even, the RTX to be emitted is
20530 yet to be created. Hence create it first. The LDRD pattern we
20531 are generating is :
20532 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20533 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20534 where target registers need not be consecutive. */
20535 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20536 dwarf = NULL_RTX;
20539 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20540 added as 0th element and if i is odd, reg_i is added as 1st element
20541 of LDRD pattern shown above. */
20542 XVECEXP (par, 0, (i % 2)) = tmp;
20543 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20545 if ((i % 2) == 1)
20547 /* When saved-register index (i) is odd, RTXs for both the registers
20548 to be loaded are generated in above given LDRD pattern, and the
20549 pattern can be emitted now. */
20550 par = emit_insn (par);
20551 REG_NOTES (par) = dwarf;
20552 RTX_FRAME_RELATED_P (par) = 1;
20555 i++;
20558 /* If the number of registers pushed is odd AND return_in_pc is false OR
20559 number of registers are even AND return_in_pc is true, last register is
20560 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20561 then LDR with post increment. */
20563 /* Increment the stack pointer, based on there being
20564 num_regs 4-byte registers to restore. */
20565 tmp = gen_rtx_SET (stack_pointer_rtx,
20566 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20567 RTX_FRAME_RELATED_P (tmp) = 1;
20568 tmp = emit_insn (tmp);
20569 if (!return_in_pc)
20571 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20572 stack_pointer_rtx, stack_pointer_rtx);
20575 dwarf = NULL_RTX;
20577 if (((num_regs % 2) == 1 && !return_in_pc)
20578 || ((num_regs % 2) == 0 && return_in_pc))
20580 /* Scan for the single register to be popped. Skip until the saved
20581 register is found. */
20582 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20584 /* Gen LDR with post increment here. */
20585 tmp1 = gen_rtx_MEM (SImode,
20586 gen_rtx_POST_INC (SImode,
20587 stack_pointer_rtx));
20588 set_mem_alias_set (tmp1, get_frame_alias_set ());
20590 reg = gen_rtx_REG (SImode, j);
20591 tmp = gen_rtx_SET (reg, tmp1);
20592 RTX_FRAME_RELATED_P (tmp) = 1;
20593 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20595 if (return_in_pc)
20597 /* If return_in_pc, j must be PC_REGNUM. */
20598 gcc_assert (j == PC_REGNUM);
20599 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20600 XVECEXP (par, 0, 0) = ret_rtx;
20601 XVECEXP (par, 0, 1) = tmp;
20602 par = emit_jump_insn (par);
20604 else
20606 par = emit_insn (tmp);
20607 REG_NOTES (par) = dwarf;
20608 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20609 stack_pointer_rtx, stack_pointer_rtx);
20613 else if ((num_regs % 2) == 1 && return_in_pc)
20615 /* There are 2 registers to be popped. So, generate the pattern
20616 pop_multiple_with_stack_update_and_return to pop in PC. */
20617 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20620 return;
20623 /* LDRD in ARM mode needs consecutive registers as operands. This function
20624 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20625 offset addressing and then generates one separate stack udpate. This provides
20626 more scheduling freedom, compared to writeback on every load. However,
20627 if the function returns using load into PC directly
20628 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20629 before the last load. TODO: Add a peephole optimization to recognize
20630 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20631 peephole optimization to merge the load at stack-offset zero
20632 with the stack update instruction using load with writeback
20633 in post-index addressing mode. */
20634 static void
20635 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20637 int j = 0;
20638 int offset = 0;
20639 rtx par = NULL_RTX;
20640 rtx dwarf = NULL_RTX;
20641 rtx tmp, mem;
20643 /* Restore saved registers. */
20644 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20645 j = 0;
20646 while (j <= LAST_ARM_REGNUM)
20647 if (saved_regs_mask & (1 << j))
20649 if ((j % 2) == 0
20650 && (saved_regs_mask & (1 << (j + 1)))
20651 && (j + 1) != PC_REGNUM)
20653 /* Current register and next register form register pair for which
20654 LDRD can be generated. PC is always the last register popped, and
20655 we handle it separately. */
20656 if (offset > 0)
20657 mem = gen_frame_mem (DImode,
20658 plus_constant (Pmode,
20659 stack_pointer_rtx,
20660 offset));
20661 else
20662 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20664 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20665 tmp = emit_insn (tmp);
20666 RTX_FRAME_RELATED_P (tmp) = 1;
20668 /* Generate dwarf info. */
20670 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20671 gen_rtx_REG (SImode, j),
20672 NULL_RTX);
20673 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20674 gen_rtx_REG (SImode, j + 1),
20675 dwarf);
20677 REG_NOTES (tmp) = dwarf;
20679 offset += 8;
20680 j += 2;
20682 else if (j != PC_REGNUM)
20684 /* Emit a single word load. */
20685 if (offset > 0)
20686 mem = gen_frame_mem (SImode,
20687 plus_constant (Pmode,
20688 stack_pointer_rtx,
20689 offset));
20690 else
20691 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20693 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20694 tmp = emit_insn (tmp);
20695 RTX_FRAME_RELATED_P (tmp) = 1;
20697 /* Generate dwarf info. */
20698 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20699 gen_rtx_REG (SImode, j),
20700 NULL_RTX);
20702 offset += 4;
20703 j += 1;
20705 else /* j == PC_REGNUM */
20706 j++;
20708 else
20709 j++;
20711 /* Update the stack. */
20712 if (offset > 0)
20714 tmp = gen_rtx_SET (stack_pointer_rtx,
20715 plus_constant (Pmode,
20716 stack_pointer_rtx,
20717 offset));
20718 tmp = emit_insn (tmp);
20719 arm_add_cfa_adjust_cfa_note (tmp, offset,
20720 stack_pointer_rtx, stack_pointer_rtx);
20721 offset = 0;
20724 if (saved_regs_mask & (1 << PC_REGNUM))
20726 /* Only PC is to be popped. */
20727 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20728 XVECEXP (par, 0, 0) = ret_rtx;
20729 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20730 gen_frame_mem (SImode,
20731 gen_rtx_POST_INC (SImode,
20732 stack_pointer_rtx)));
20733 RTX_FRAME_RELATED_P (tmp) = 1;
20734 XVECEXP (par, 0, 1) = tmp;
20735 par = emit_jump_insn (par);
20737 /* Generate dwarf info. */
20738 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20739 gen_rtx_REG (SImode, PC_REGNUM),
20740 NULL_RTX);
20741 REG_NOTES (par) = dwarf;
20742 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20743 stack_pointer_rtx, stack_pointer_rtx);
20747 /* Calculate the size of the return value that is passed in registers. */
20748 static unsigned
20749 arm_size_return_regs (void)
20751 machine_mode mode;
20753 if (crtl->return_rtx != 0)
20754 mode = GET_MODE (crtl->return_rtx);
20755 else
20756 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20758 return GET_MODE_SIZE (mode);
20761 /* Return true if the current function needs to save/restore LR. */
20762 static bool
20763 thumb_force_lr_save (void)
20765 return !cfun->machine->lr_save_eliminated
20766 && (!leaf_function_p ()
20767 || thumb_far_jump_used_p ()
20768 || df_regs_ever_live_p (LR_REGNUM));
20771 /* We do not know if r3 will be available because
20772 we do have an indirect tailcall happening in this
20773 particular case. */
20774 static bool
20775 is_indirect_tailcall_p (rtx call)
20777 rtx pat = PATTERN (call);
20779 /* Indirect tail call. */
20780 pat = XVECEXP (pat, 0, 0);
20781 if (GET_CODE (pat) == SET)
20782 pat = SET_SRC (pat);
20784 pat = XEXP (XEXP (pat, 0), 0);
20785 return REG_P (pat);
20788 /* Return true if r3 is used by any of the tail call insns in the
20789 current function. */
20790 static bool
20791 any_sibcall_could_use_r3 (void)
20793 edge_iterator ei;
20794 edge e;
20796 if (!crtl->tail_call_emit)
20797 return false;
20798 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20799 if (e->flags & EDGE_SIBCALL)
20801 rtx call = BB_END (e->src);
20802 if (!CALL_P (call))
20803 call = prev_nonnote_nondebug_insn (call);
20804 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20805 if (find_regno_fusage (call, USE, 3)
20806 || is_indirect_tailcall_p (call))
20807 return true;
20809 return false;
20813 /* Compute the distance from register FROM to register TO.
20814 These can be the arg pointer (26), the soft frame pointer (25),
20815 the stack pointer (13) or the hard frame pointer (11).
20816 In thumb mode r7 is used as the soft frame pointer, if needed.
20817 Typical stack layout looks like this:
20819 old stack pointer -> | |
20820 ----
20821 | | \
20822 | | saved arguments for
20823 | | vararg functions
20824 | | /
20826 hard FP & arg pointer -> | | \
20827 | | stack
20828 | | frame
20829 | | /
20831 | | \
20832 | | call saved
20833 | | registers
20834 soft frame pointer -> | | /
20836 | | \
20837 | | local
20838 | | variables
20839 locals base pointer -> | | /
20841 | | \
20842 | | outgoing
20843 | | arguments
20844 current stack pointer -> | | /
20847 For a given function some or all of these stack components
20848 may not be needed, giving rise to the possibility of
20849 eliminating some of the registers.
20851 The values returned by this function must reflect the behavior
20852 of arm_expand_prologue() and arm_compute_save_reg_mask().
20854 The sign of the number returned reflects the direction of stack
20855 growth, so the values are positive for all eliminations except
20856 from the soft frame pointer to the hard frame pointer.
20858 SFP may point just inside the local variables block to ensure correct
20859 alignment. */
20862 /* Calculate stack offsets. These are used to calculate register elimination
20863 offsets and in prologue/epilogue code. Also calculates which registers
20864 should be saved. */
20866 static arm_stack_offsets *
20867 arm_get_frame_offsets (void)
20869 struct arm_stack_offsets *offsets;
20870 unsigned long func_type;
20871 int leaf;
20872 int saved;
20873 int core_saved;
20874 HOST_WIDE_INT frame_size;
20875 int i;
20877 offsets = &cfun->machine->stack_offsets;
20879 /* We need to know if we are a leaf function. Unfortunately, it
20880 is possible to be called after start_sequence has been called,
20881 which causes get_insns to return the insns for the sequence,
20882 not the function, which will cause leaf_function_p to return
20883 the incorrect result.
20885 to know about leaf functions once reload has completed, and the
20886 frame size cannot be changed after that time, so we can safely
20887 use the cached value. */
20889 if (reload_completed)
20890 return offsets;
20892 /* Initially this is the size of the local variables. It will translated
20893 into an offset once we have determined the size of preceding data. */
20894 frame_size = ROUND_UP_WORD (get_frame_size ());
20896 leaf = leaf_function_p ();
20898 /* Space for variadic functions. */
20899 offsets->saved_args = crtl->args.pretend_args_size;
20901 /* In Thumb mode this is incorrect, but never used. */
20902 offsets->frame
20903 = (offsets->saved_args
20904 + arm_compute_static_chain_stack_bytes ()
20905 + (frame_pointer_needed ? 4 : 0));
20907 if (TARGET_32BIT)
20909 unsigned int regno;
20911 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20912 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20913 saved = core_saved;
20915 /* We know that SP will be doubleword aligned on entry, and we must
20916 preserve that condition at any subroutine call. We also require the
20917 soft frame pointer to be doubleword aligned. */
20919 if (TARGET_REALLY_IWMMXT)
20921 /* Check for the call-saved iWMMXt registers. */
20922 for (regno = FIRST_IWMMXT_REGNUM;
20923 regno <= LAST_IWMMXT_REGNUM;
20924 regno++)
20925 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20926 saved += 8;
20929 func_type = arm_current_func_type ();
20930 /* Space for saved VFP registers. */
20931 if (! IS_VOLATILE (func_type)
20932 && TARGET_HARD_FLOAT && TARGET_VFP)
20933 saved += arm_get_vfp_saved_size ();
20935 else /* TARGET_THUMB1 */
20937 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20938 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20939 saved = core_saved;
20940 if (TARGET_BACKTRACE)
20941 saved += 16;
20944 /* Saved registers include the stack frame. */
20945 offsets->saved_regs
20946 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20947 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20949 /* A leaf function does not need any stack alignment if it has nothing
20950 on the stack. */
20951 if (leaf && frame_size == 0
20952 /* However if it calls alloca(), we have a dynamically allocated
20953 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20954 && ! cfun->calls_alloca)
20956 offsets->outgoing_args = offsets->soft_frame;
20957 offsets->locals_base = offsets->soft_frame;
20958 return offsets;
20961 /* Ensure SFP has the correct alignment. */
20962 if (ARM_DOUBLEWORD_ALIGN
20963 && (offsets->soft_frame & 7))
20965 offsets->soft_frame += 4;
20966 /* Try to align stack by pushing an extra reg. Don't bother doing this
20967 when there is a stack frame as the alignment will be rolled into
20968 the normal stack adjustment. */
20969 if (frame_size + crtl->outgoing_args_size == 0)
20971 int reg = -1;
20973 /* Register r3 is caller-saved. Normally it does not need to be
20974 saved on entry by the prologue. However if we choose to save
20975 it for padding then we may confuse the compiler into thinking
20976 a prologue sequence is required when in fact it is not. This
20977 will occur when shrink-wrapping if r3 is used as a scratch
20978 register and there are no other callee-saved writes.
20980 This situation can be avoided when other callee-saved registers
20981 are available and r3 is not mandatory if we choose a callee-saved
20982 register for padding. */
20983 bool prefer_callee_reg_p = false;
20985 /* If it is safe to use r3, then do so. This sometimes
20986 generates better code on Thumb-2 by avoiding the need to
20987 use 32-bit push/pop instructions. */
20988 if (! any_sibcall_could_use_r3 ()
20989 && arm_size_return_regs () <= 12
20990 && (offsets->saved_regs_mask & (1 << 3)) == 0
20991 && (TARGET_THUMB2
20992 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20994 reg = 3;
20995 if (!TARGET_THUMB2)
20996 prefer_callee_reg_p = true;
20998 if (reg == -1
20999 || prefer_callee_reg_p)
21001 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21003 /* Avoid fixed registers; they may be changed at
21004 arbitrary times so it's unsafe to restore them
21005 during the epilogue. */
21006 if (!fixed_regs[i]
21007 && (offsets->saved_regs_mask & (1 << i)) == 0)
21009 reg = i;
21010 break;
21015 if (reg != -1)
21017 offsets->saved_regs += 4;
21018 offsets->saved_regs_mask |= (1 << reg);
21023 offsets->locals_base = offsets->soft_frame + frame_size;
21024 offsets->outgoing_args = (offsets->locals_base
21025 + crtl->outgoing_args_size);
21027 if (ARM_DOUBLEWORD_ALIGN)
21029 /* Ensure SP remains doubleword aligned. */
21030 if (offsets->outgoing_args & 7)
21031 offsets->outgoing_args += 4;
21032 gcc_assert (!(offsets->outgoing_args & 7));
21035 return offsets;
21039 /* Calculate the relative offsets for the different stack pointers. Positive
21040 offsets are in the direction of stack growth. */
21042 HOST_WIDE_INT
21043 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21045 arm_stack_offsets *offsets;
21047 offsets = arm_get_frame_offsets ();
21049 /* OK, now we have enough information to compute the distances.
21050 There must be an entry in these switch tables for each pair
21051 of registers in ELIMINABLE_REGS, even if some of the entries
21052 seem to be redundant or useless. */
21053 switch (from)
21055 case ARG_POINTER_REGNUM:
21056 switch (to)
21058 case THUMB_HARD_FRAME_POINTER_REGNUM:
21059 return 0;
21061 case FRAME_POINTER_REGNUM:
21062 /* This is the reverse of the soft frame pointer
21063 to hard frame pointer elimination below. */
21064 return offsets->soft_frame - offsets->saved_args;
21066 case ARM_HARD_FRAME_POINTER_REGNUM:
21067 /* This is only non-zero in the case where the static chain register
21068 is stored above the frame. */
21069 return offsets->frame - offsets->saved_args - 4;
21071 case STACK_POINTER_REGNUM:
21072 /* If nothing has been pushed on the stack at all
21073 then this will return -4. This *is* correct! */
21074 return offsets->outgoing_args - (offsets->saved_args + 4);
21076 default:
21077 gcc_unreachable ();
21079 gcc_unreachable ();
21081 case FRAME_POINTER_REGNUM:
21082 switch (to)
21084 case THUMB_HARD_FRAME_POINTER_REGNUM:
21085 return 0;
21087 case ARM_HARD_FRAME_POINTER_REGNUM:
21088 /* The hard frame pointer points to the top entry in the
21089 stack frame. The soft frame pointer to the bottom entry
21090 in the stack frame. If there is no stack frame at all,
21091 then they are identical. */
21093 return offsets->frame - offsets->soft_frame;
21095 case STACK_POINTER_REGNUM:
21096 return offsets->outgoing_args - offsets->soft_frame;
21098 default:
21099 gcc_unreachable ();
21101 gcc_unreachable ();
21103 default:
21104 /* You cannot eliminate from the stack pointer.
21105 In theory you could eliminate from the hard frame
21106 pointer to the stack pointer, but this will never
21107 happen, since if a stack frame is not needed the
21108 hard frame pointer will never be used. */
21109 gcc_unreachable ();
21113 /* Given FROM and TO register numbers, say whether this elimination is
21114 allowed. Frame pointer elimination is automatically handled.
21116 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21117 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21118 pointer, we must eliminate FRAME_POINTER_REGNUM into
21119 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21120 ARG_POINTER_REGNUM. */
21122 bool
21123 arm_can_eliminate (const int from, const int to)
21125 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21126 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21127 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21128 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21129 true);
21132 /* Emit RTL to save coprocessor registers on function entry. Returns the
21133 number of bytes pushed. */
21135 static int
21136 arm_save_coproc_regs(void)
21138 int saved_size = 0;
21139 unsigned reg;
21140 unsigned start_reg;
21141 rtx insn;
21143 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21144 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21146 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21147 insn = gen_rtx_MEM (V2SImode, insn);
21148 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21149 RTX_FRAME_RELATED_P (insn) = 1;
21150 saved_size += 8;
21153 if (TARGET_HARD_FLOAT && TARGET_VFP)
21155 start_reg = FIRST_VFP_REGNUM;
21157 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21159 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21160 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21162 if (start_reg != reg)
21163 saved_size += vfp_emit_fstmd (start_reg,
21164 (reg - start_reg) / 2);
21165 start_reg = reg + 2;
21168 if (start_reg != reg)
21169 saved_size += vfp_emit_fstmd (start_reg,
21170 (reg - start_reg) / 2);
21172 return saved_size;
21176 /* Set the Thumb frame pointer from the stack pointer. */
21178 static void
21179 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21181 HOST_WIDE_INT amount;
21182 rtx insn, dwarf;
21184 amount = offsets->outgoing_args - offsets->locals_base;
21185 if (amount < 1024)
21186 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21187 stack_pointer_rtx, GEN_INT (amount)));
21188 else
21190 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21191 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21192 expects the first two operands to be the same. */
21193 if (TARGET_THUMB2)
21195 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21196 stack_pointer_rtx,
21197 hard_frame_pointer_rtx));
21199 else
21201 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21202 hard_frame_pointer_rtx,
21203 stack_pointer_rtx));
21205 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21206 plus_constant (Pmode, stack_pointer_rtx, amount));
21207 RTX_FRAME_RELATED_P (dwarf) = 1;
21208 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21211 RTX_FRAME_RELATED_P (insn) = 1;
21214 struct scratch_reg {
21215 rtx reg;
21216 bool saved;
21219 /* Return a short-lived scratch register for use as a 2nd scratch register on
21220 function entry after the registers are saved in the prologue. This register
21221 must be released by means of release_scratch_register_on_entry. IP is not
21222 considered since it is always used as the 1st scratch register if available.
21224 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21225 mask of live registers. */
21227 static void
21228 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21229 unsigned long live_regs)
21231 int regno = -1;
21233 sr->saved = false;
21235 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21236 regno = LR_REGNUM;
21237 else
21239 unsigned int i;
21241 for (i = 4; i < 11; i++)
21242 if (regno1 != i && (live_regs & (1 << i)) != 0)
21244 regno = i;
21245 break;
21248 if (regno < 0)
21250 /* If IP is used as the 1st scratch register for a nested function,
21251 then either r3 wasn't available or is used to preserve IP. */
21252 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21253 regno1 = 3;
21254 regno = (regno1 == 3 ? 2 : 3);
21255 sr->saved
21256 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21257 regno);
21261 sr->reg = gen_rtx_REG (SImode, regno);
21262 if (sr->saved)
21264 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21265 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21266 rtx x = gen_rtx_SET (stack_pointer_rtx,
21267 plus_constant (Pmode, stack_pointer_rtx, -4));
21268 RTX_FRAME_RELATED_P (insn) = 1;
21269 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21273 /* Release a scratch register obtained from the preceding function. */
21275 static void
21276 release_scratch_register_on_entry (struct scratch_reg *sr)
21278 if (sr->saved)
21280 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21281 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21282 rtx x = gen_rtx_SET (stack_pointer_rtx,
21283 plus_constant (Pmode, stack_pointer_rtx, 4));
21284 RTX_FRAME_RELATED_P (insn) = 1;
21285 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21289 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21291 #if PROBE_INTERVAL > 4096
21292 #error Cannot use indexed addressing mode for stack probing
21293 #endif
21295 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21296 inclusive. These are offsets from the current stack pointer. REGNO1
21297 is the index number of the 1st scratch register and LIVE_REGS is the
21298 mask of live registers. */
21300 static void
21301 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21302 unsigned int regno1, unsigned long live_regs)
21304 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21306 /* See if we have a constant small number of probes to generate. If so,
21307 that's the easy case. */
21308 if (size <= PROBE_INTERVAL)
21310 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21311 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21312 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21315 /* The run-time loop is made up of 10 insns in the generic case while the
21316 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21317 else if (size <= 5 * PROBE_INTERVAL)
21319 HOST_WIDE_INT i, rem;
21321 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21322 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21323 emit_stack_probe (reg1);
21325 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21326 it exceeds SIZE. If only two probes are needed, this will not
21327 generate any code. Then probe at FIRST + SIZE. */
21328 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21330 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21331 emit_stack_probe (reg1);
21334 rem = size - (i - PROBE_INTERVAL);
21335 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21337 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21338 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21340 else
21341 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21344 /* Otherwise, do the same as above, but in a loop. Note that we must be
21345 extra careful with variables wrapping around because we might be at
21346 the very top (or the very bottom) of the address space and we have
21347 to be able to handle this case properly; in particular, we use an
21348 equality test for the loop condition. */
21349 else
21351 HOST_WIDE_INT rounded_size;
21352 struct scratch_reg sr;
21354 get_scratch_register_on_entry (&sr, regno1, live_regs);
21356 emit_move_insn (reg1, GEN_INT (first));
21359 /* Step 1: round SIZE to the previous multiple of the interval. */
21361 rounded_size = size & -PROBE_INTERVAL;
21362 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21365 /* Step 2: compute initial and final value of the loop counter. */
21367 /* TEST_ADDR = SP + FIRST. */
21368 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21370 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21371 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21374 /* Step 3: the loop
21378 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21379 probe at TEST_ADDR
21381 while (TEST_ADDR != LAST_ADDR)
21383 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21384 until it is equal to ROUNDED_SIZE. */
21386 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21389 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21390 that SIZE is equal to ROUNDED_SIZE. */
21392 if (size != rounded_size)
21394 HOST_WIDE_INT rem = size - rounded_size;
21396 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21398 emit_set_insn (sr.reg,
21399 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21400 emit_stack_probe (plus_constant (Pmode, sr.reg,
21401 PROBE_INTERVAL - rem));
21403 else
21404 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21407 release_scratch_register_on_entry (&sr);
21410 /* Make sure nothing is scheduled before we are done. */
21411 emit_insn (gen_blockage ());
21414 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21415 absolute addresses. */
21417 const char *
21418 output_probe_stack_range (rtx reg1, rtx reg2)
21420 static int labelno = 0;
21421 char loop_lab[32];
21422 rtx xops[2];
21424 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21426 /* Loop. */
21427 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21429 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21430 xops[0] = reg1;
21431 xops[1] = GEN_INT (PROBE_INTERVAL);
21432 output_asm_insn ("sub\t%0, %0, %1", xops);
21434 /* Probe at TEST_ADDR. */
21435 output_asm_insn ("str\tr0, [%0, #0]", xops);
21437 /* Test if TEST_ADDR == LAST_ADDR. */
21438 xops[1] = reg2;
21439 output_asm_insn ("cmp\t%0, %1", xops);
21441 /* Branch. */
21442 fputs ("\tbne\t", asm_out_file);
21443 assemble_name_raw (asm_out_file, loop_lab);
21444 fputc ('\n', asm_out_file);
21446 return "";
21449 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21450 function. */
21451 void
21452 arm_expand_prologue (void)
21454 rtx amount;
21455 rtx insn;
21456 rtx ip_rtx;
21457 unsigned long live_regs_mask;
21458 unsigned long func_type;
21459 int fp_offset = 0;
21460 int saved_pretend_args = 0;
21461 int saved_regs = 0;
21462 unsigned HOST_WIDE_INT args_to_push;
21463 HOST_WIDE_INT size;
21464 arm_stack_offsets *offsets;
21465 bool clobber_ip;
21467 func_type = arm_current_func_type ();
21469 /* Naked functions don't have prologues. */
21470 if (IS_NAKED (func_type))
21472 if (flag_stack_usage_info)
21473 current_function_static_stack_size = 0;
21474 return;
21477 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21478 args_to_push = crtl->args.pretend_args_size;
21480 /* Compute which register we will have to save onto the stack. */
21481 offsets = arm_get_frame_offsets ();
21482 live_regs_mask = offsets->saved_regs_mask;
21484 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21486 if (IS_STACKALIGN (func_type))
21488 rtx r0, r1;
21490 /* Handle a word-aligned stack pointer. We generate the following:
21492 mov r0, sp
21493 bic r1, r0, #7
21494 mov sp, r1
21495 <save and restore r0 in normal prologue/epilogue>
21496 mov sp, r0
21497 bx lr
21499 The unwinder doesn't need to know about the stack realignment.
21500 Just tell it we saved SP in r0. */
21501 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21503 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21504 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21506 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21507 RTX_FRAME_RELATED_P (insn) = 1;
21508 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21510 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21512 /* ??? The CFA changes here, which may cause GDB to conclude that it
21513 has entered a different function. That said, the unwind info is
21514 correct, individually, before and after this instruction because
21515 we've described the save of SP, which will override the default
21516 handling of SP as restoring from the CFA. */
21517 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21520 /* The static chain register is the same as the IP register. If it is
21521 clobbered when creating the frame, we need to save and restore it. */
21522 clobber_ip = IS_NESTED (func_type)
21523 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21524 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21525 && !df_regs_ever_live_p (LR_REGNUM)
21526 && arm_r3_live_at_start_p ()));
21528 /* Find somewhere to store IP whilst the frame is being created.
21529 We try the following places in order:
21531 1. The last argument register r3 if it is available.
21532 2. A slot on the stack above the frame if there are no
21533 arguments to push onto the stack.
21534 3. Register r3 again, after pushing the argument registers
21535 onto the stack, if this is a varargs function.
21536 4. The last slot on the stack created for the arguments to
21537 push, if this isn't a varargs function.
21539 Note - we only need to tell the dwarf2 backend about the SP
21540 adjustment in the second variant; the static chain register
21541 doesn't need to be unwound, as it doesn't contain a value
21542 inherited from the caller. */
21543 if (clobber_ip)
21545 if (!arm_r3_live_at_start_p ())
21546 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21547 else if (args_to_push == 0)
21549 rtx addr, dwarf;
21551 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21552 saved_regs += 4;
21554 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21555 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21556 fp_offset = 4;
21558 /* Just tell the dwarf backend that we adjusted SP. */
21559 dwarf = gen_rtx_SET (stack_pointer_rtx,
21560 plus_constant (Pmode, stack_pointer_rtx,
21561 -fp_offset));
21562 RTX_FRAME_RELATED_P (insn) = 1;
21563 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21565 else
21567 /* Store the args on the stack. */
21568 if (cfun->machine->uses_anonymous_args)
21570 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21571 (0xf0 >> (args_to_push / 4)) & 0xf);
21572 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21573 saved_pretend_args = 1;
21575 else
21577 rtx addr, dwarf;
21579 if (args_to_push == 4)
21580 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21581 else
21582 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21583 plus_constant (Pmode,
21584 stack_pointer_rtx,
21585 -args_to_push));
21587 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21589 /* Just tell the dwarf backend that we adjusted SP. */
21590 dwarf = gen_rtx_SET (stack_pointer_rtx,
21591 plus_constant (Pmode, stack_pointer_rtx,
21592 -args_to_push));
21593 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21596 RTX_FRAME_RELATED_P (insn) = 1;
21597 fp_offset = args_to_push;
21598 args_to_push = 0;
21602 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21604 if (IS_INTERRUPT (func_type))
21606 /* Interrupt functions must not corrupt any registers.
21607 Creating a frame pointer however, corrupts the IP
21608 register, so we must push it first. */
21609 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21611 /* Do not set RTX_FRAME_RELATED_P on this insn.
21612 The dwarf stack unwinding code only wants to see one
21613 stack decrement per function, and this is not it. If
21614 this instruction is labeled as being part of the frame
21615 creation sequence then dwarf2out_frame_debug_expr will
21616 die when it encounters the assignment of IP to FP
21617 later on, since the use of SP here establishes SP as
21618 the CFA register and not IP.
21620 Anyway this instruction is not really part of the stack
21621 frame creation although it is part of the prologue. */
21624 insn = emit_set_insn (ip_rtx,
21625 plus_constant (Pmode, stack_pointer_rtx,
21626 fp_offset));
21627 RTX_FRAME_RELATED_P (insn) = 1;
21630 if (args_to_push)
21632 /* Push the argument registers, or reserve space for them. */
21633 if (cfun->machine->uses_anonymous_args)
21634 insn = emit_multi_reg_push
21635 ((0xf0 >> (args_to_push / 4)) & 0xf,
21636 (0xf0 >> (args_to_push / 4)) & 0xf);
21637 else
21638 insn = emit_insn
21639 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21640 GEN_INT (- args_to_push)));
21641 RTX_FRAME_RELATED_P (insn) = 1;
21644 /* If this is an interrupt service routine, and the link register
21645 is going to be pushed, and we're not generating extra
21646 push of IP (needed when frame is needed and frame layout if apcs),
21647 subtracting four from LR now will mean that the function return
21648 can be done with a single instruction. */
21649 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21650 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21651 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21652 && TARGET_ARM)
21654 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21656 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21659 if (live_regs_mask)
21661 unsigned long dwarf_regs_mask = live_regs_mask;
21663 saved_regs += bit_count (live_regs_mask) * 4;
21664 if (optimize_size && !frame_pointer_needed
21665 && saved_regs == offsets->saved_regs - offsets->saved_args)
21667 /* If no coprocessor registers are being pushed and we don't have
21668 to worry about a frame pointer then push extra registers to
21669 create the stack frame. This is done is a way that does not
21670 alter the frame layout, so is independent of the epilogue. */
21671 int n;
21672 int frame;
21673 n = 0;
21674 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21675 n++;
21676 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21677 if (frame && n * 4 >= frame)
21679 n = frame / 4;
21680 live_regs_mask |= (1 << n) - 1;
21681 saved_regs += frame;
21685 if (TARGET_LDRD
21686 && current_tune->prefer_ldrd_strd
21687 && !optimize_function_for_size_p (cfun))
21689 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21690 if (TARGET_THUMB2)
21691 thumb2_emit_strd_push (live_regs_mask);
21692 else if (TARGET_ARM
21693 && !TARGET_APCS_FRAME
21694 && !IS_INTERRUPT (func_type))
21695 arm_emit_strd_push (live_regs_mask);
21696 else
21698 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21699 RTX_FRAME_RELATED_P (insn) = 1;
21702 else
21704 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21705 RTX_FRAME_RELATED_P (insn) = 1;
21709 if (! IS_VOLATILE (func_type))
21710 saved_regs += arm_save_coproc_regs ();
21712 if (frame_pointer_needed && TARGET_ARM)
21714 /* Create the new frame pointer. */
21715 if (TARGET_APCS_FRAME)
21717 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21718 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21719 RTX_FRAME_RELATED_P (insn) = 1;
21721 else
21723 insn = GEN_INT (saved_regs - (4 + fp_offset));
21724 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21725 stack_pointer_rtx, insn));
21726 RTX_FRAME_RELATED_P (insn) = 1;
21730 size = offsets->outgoing_args - offsets->saved_args;
21731 if (flag_stack_usage_info)
21732 current_function_static_stack_size = size;
21734 /* If this isn't an interrupt service routine and we have a frame, then do
21735 stack checking. We use IP as the first scratch register, except for the
21736 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21737 if (!IS_INTERRUPT (func_type)
21738 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21740 unsigned int regno;
21742 if (!IS_NESTED (func_type) || clobber_ip)
21743 regno = IP_REGNUM;
21744 else if (df_regs_ever_live_p (LR_REGNUM))
21745 regno = LR_REGNUM;
21746 else
21747 regno = 3;
21749 if (crtl->is_leaf && !cfun->calls_alloca)
21751 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21752 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21753 size - STACK_CHECK_PROTECT,
21754 regno, live_regs_mask);
21756 else if (size > 0)
21757 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21758 regno, live_regs_mask);
21761 /* Recover the static chain register. */
21762 if (clobber_ip)
21764 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21765 insn = gen_rtx_REG (SImode, 3);
21766 else
21768 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21769 insn = gen_frame_mem (SImode, insn);
21771 emit_set_insn (ip_rtx, insn);
21772 emit_insn (gen_force_register_use (ip_rtx));
21775 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21777 /* This add can produce multiple insns for a large constant, so we
21778 need to get tricky. */
21779 rtx_insn *last = get_last_insn ();
21781 amount = GEN_INT (offsets->saved_args + saved_regs
21782 - offsets->outgoing_args);
21784 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21785 amount));
21788 last = last ? NEXT_INSN (last) : get_insns ();
21789 RTX_FRAME_RELATED_P (last) = 1;
21791 while (last != insn);
21793 /* If the frame pointer is needed, emit a special barrier that
21794 will prevent the scheduler from moving stores to the frame
21795 before the stack adjustment. */
21796 if (frame_pointer_needed)
21797 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21798 hard_frame_pointer_rtx));
21802 if (frame_pointer_needed && TARGET_THUMB2)
21803 thumb_set_frame_pointer (offsets);
21805 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21807 unsigned long mask;
21809 mask = live_regs_mask;
21810 mask &= THUMB2_WORK_REGS;
21811 if (!IS_NESTED (func_type))
21812 mask |= (1 << IP_REGNUM);
21813 arm_load_pic_register (mask);
21816 /* If we are profiling, make sure no instructions are scheduled before
21817 the call to mcount. Similarly if the user has requested no
21818 scheduling in the prolog. Similarly if we want non-call exceptions
21819 using the EABI unwinder, to prevent faulting instructions from being
21820 swapped with a stack adjustment. */
21821 if (crtl->profile || !TARGET_SCHED_PROLOG
21822 || (arm_except_unwind_info (&global_options) == UI_TARGET
21823 && cfun->can_throw_non_call_exceptions))
21824 emit_insn (gen_blockage ());
21826 /* If the link register is being kept alive, with the return address in it,
21827 then make sure that it does not get reused by the ce2 pass. */
21828 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21829 cfun->machine->lr_save_eliminated = 1;
21832 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21833 static void
21834 arm_print_condition (FILE *stream)
21836 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21838 /* Branch conversion is not implemented for Thumb-2. */
21839 if (TARGET_THUMB)
21841 output_operand_lossage ("predicated Thumb instruction");
21842 return;
21844 if (current_insn_predicate != NULL)
21846 output_operand_lossage
21847 ("predicated instruction in conditional sequence");
21848 return;
21851 fputs (arm_condition_codes[arm_current_cc], stream);
21853 else if (current_insn_predicate)
21855 enum arm_cond_code code;
21857 if (TARGET_THUMB1)
21859 output_operand_lossage ("predicated Thumb instruction");
21860 return;
21863 code = get_arm_condition_code (current_insn_predicate);
21864 fputs (arm_condition_codes[code], stream);
21869 /* Globally reserved letters: acln
21870 Puncutation letters currently used: @_|?().!#
21871 Lower case letters currently used: bcdefhimpqtvwxyz
21872 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21873 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21875 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21877 If CODE is 'd', then the X is a condition operand and the instruction
21878 should only be executed if the condition is true.
21879 if CODE is 'D', then the X is a condition operand and the instruction
21880 should only be executed if the condition is false: however, if the mode
21881 of the comparison is CCFPEmode, then always execute the instruction -- we
21882 do this because in these circumstances !GE does not necessarily imply LT;
21883 in these cases the instruction pattern will take care to make sure that
21884 an instruction containing %d will follow, thereby undoing the effects of
21885 doing this instruction unconditionally.
21886 If CODE is 'N' then X is a floating point operand that must be negated
21887 before output.
21888 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21889 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21890 static void
21891 arm_print_operand (FILE *stream, rtx x, int code)
21893 switch (code)
21895 case '@':
21896 fputs (ASM_COMMENT_START, stream);
21897 return;
21899 case '_':
21900 fputs (user_label_prefix, stream);
21901 return;
21903 case '|':
21904 fputs (REGISTER_PREFIX, stream);
21905 return;
21907 case '?':
21908 arm_print_condition (stream);
21909 return;
21911 case '.':
21912 /* The current condition code for a condition code setting instruction.
21913 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21914 fputc('s', stream);
21915 arm_print_condition (stream);
21916 return;
21918 case '!':
21919 /* If the instruction is conditionally executed then print
21920 the current condition code, otherwise print 's'. */
21921 gcc_assert (TARGET_THUMB2);
21922 if (current_insn_predicate)
21923 arm_print_condition (stream);
21924 else
21925 fputc('s', stream);
21926 break;
21928 /* %# is a "break" sequence. It doesn't output anything, but is used to
21929 separate e.g. operand numbers from following text, if that text consists
21930 of further digits which we don't want to be part of the operand
21931 number. */
21932 case '#':
21933 return;
21935 case 'N':
21937 REAL_VALUE_TYPE r;
21938 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21939 fprintf (stream, "%s", fp_const_from_val (&r));
21941 return;
21943 /* An integer or symbol address without a preceding # sign. */
21944 case 'c':
21945 switch (GET_CODE (x))
21947 case CONST_INT:
21948 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21949 break;
21951 case SYMBOL_REF:
21952 output_addr_const (stream, x);
21953 break;
21955 case CONST:
21956 if (GET_CODE (XEXP (x, 0)) == PLUS
21957 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21959 output_addr_const (stream, x);
21960 break;
21962 /* Fall through. */
21964 default:
21965 output_operand_lossage ("Unsupported operand for code '%c'", code);
21967 return;
21969 /* An integer that we want to print in HEX. */
21970 case 'x':
21971 switch (GET_CODE (x))
21973 case CONST_INT:
21974 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21975 break;
21977 default:
21978 output_operand_lossage ("Unsupported operand for code '%c'", code);
21980 return;
21982 case 'B':
21983 if (CONST_INT_P (x))
21985 HOST_WIDE_INT val;
21986 val = ARM_SIGN_EXTEND (~INTVAL (x));
21987 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21989 else
21991 putc ('~', stream);
21992 output_addr_const (stream, x);
21994 return;
21996 case 'b':
21997 /* Print the log2 of a CONST_INT. */
21999 HOST_WIDE_INT val;
22001 if (!CONST_INT_P (x)
22002 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22003 output_operand_lossage ("Unsupported operand for code '%c'", code);
22004 else
22005 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22007 return;
22009 case 'L':
22010 /* The low 16 bits of an immediate constant. */
22011 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22012 return;
22014 case 'i':
22015 fprintf (stream, "%s", arithmetic_instr (x, 1));
22016 return;
22018 case 'I':
22019 fprintf (stream, "%s", arithmetic_instr (x, 0));
22020 return;
22022 case 'S':
22024 HOST_WIDE_INT val;
22025 const char *shift;
22027 shift = shift_op (x, &val);
22029 if (shift)
22031 fprintf (stream, ", %s ", shift);
22032 if (val == -1)
22033 arm_print_operand (stream, XEXP (x, 1), 0);
22034 else
22035 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22038 return;
22040 /* An explanation of the 'Q', 'R' and 'H' register operands:
22042 In a pair of registers containing a DI or DF value the 'Q'
22043 operand returns the register number of the register containing
22044 the least significant part of the value. The 'R' operand returns
22045 the register number of the register containing the most
22046 significant part of the value.
22048 The 'H' operand returns the higher of the two register numbers.
22049 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22050 same as the 'Q' operand, since the most significant part of the
22051 value is held in the lower number register. The reverse is true
22052 on systems where WORDS_BIG_ENDIAN is false.
22054 The purpose of these operands is to distinguish between cases
22055 where the endian-ness of the values is important (for example
22056 when they are added together), and cases where the endian-ness
22057 is irrelevant, but the order of register operations is important.
22058 For example when loading a value from memory into a register
22059 pair, the endian-ness does not matter. Provided that the value
22060 from the lower memory address is put into the lower numbered
22061 register, and the value from the higher address is put into the
22062 higher numbered register, the load will work regardless of whether
22063 the value being loaded is big-wordian or little-wordian. The
22064 order of the two register loads can matter however, if the address
22065 of the memory location is actually held in one of the registers
22066 being overwritten by the load.
22068 The 'Q' and 'R' constraints are also available for 64-bit
22069 constants. */
22070 case 'Q':
22071 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22073 rtx part = gen_lowpart (SImode, x);
22074 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22075 return;
22078 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22080 output_operand_lossage ("invalid operand for code '%c'", code);
22081 return;
22084 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22085 return;
22087 case 'R':
22088 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22090 machine_mode mode = GET_MODE (x);
22091 rtx part;
22093 if (mode == VOIDmode)
22094 mode = DImode;
22095 part = gen_highpart_mode (SImode, mode, x);
22096 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22097 return;
22100 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22102 output_operand_lossage ("invalid operand for code '%c'", code);
22103 return;
22106 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22107 return;
22109 case 'H':
22110 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22112 output_operand_lossage ("invalid operand for code '%c'", code);
22113 return;
22116 asm_fprintf (stream, "%r", REGNO (x) + 1);
22117 return;
22119 case 'J':
22120 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22122 output_operand_lossage ("invalid operand for code '%c'", code);
22123 return;
22126 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22127 return;
22129 case 'K':
22130 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22132 output_operand_lossage ("invalid operand for code '%c'", code);
22133 return;
22136 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22137 return;
22139 case 'm':
22140 asm_fprintf (stream, "%r",
22141 REG_P (XEXP (x, 0))
22142 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22143 return;
22145 case 'M':
22146 asm_fprintf (stream, "{%r-%r}",
22147 REGNO (x),
22148 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22149 return;
22151 /* Like 'M', but writing doubleword vector registers, for use by Neon
22152 insns. */
22153 case 'h':
22155 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22156 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22157 if (numregs == 1)
22158 asm_fprintf (stream, "{d%d}", regno);
22159 else
22160 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22162 return;
22164 case 'd':
22165 /* CONST_TRUE_RTX means always -- that's the default. */
22166 if (x == const_true_rtx)
22167 return;
22169 if (!COMPARISON_P (x))
22171 output_operand_lossage ("invalid operand for code '%c'", code);
22172 return;
22175 fputs (arm_condition_codes[get_arm_condition_code (x)],
22176 stream);
22177 return;
22179 case 'D':
22180 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22181 want to do that. */
22182 if (x == const_true_rtx)
22184 output_operand_lossage ("instruction never executed");
22185 return;
22187 if (!COMPARISON_P (x))
22189 output_operand_lossage ("invalid operand for code '%c'", code);
22190 return;
22193 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22194 (get_arm_condition_code (x))],
22195 stream);
22196 return;
22198 case 's':
22199 case 'V':
22200 case 'W':
22201 case 'X':
22202 case 'Y':
22203 case 'Z':
22204 /* Former Maverick support, removed after GCC-4.7. */
22205 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22206 return;
22208 case 'U':
22209 if (!REG_P (x)
22210 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22211 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22212 /* Bad value for wCG register number. */
22214 output_operand_lossage ("invalid operand for code '%c'", code);
22215 return;
22218 else
22219 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22220 return;
22222 /* Print an iWMMXt control register name. */
22223 case 'w':
22224 if (!CONST_INT_P (x)
22225 || INTVAL (x) < 0
22226 || INTVAL (x) >= 16)
22227 /* Bad value for wC register number. */
22229 output_operand_lossage ("invalid operand for code '%c'", code);
22230 return;
22233 else
22235 static const char * wc_reg_names [16] =
22237 "wCID", "wCon", "wCSSF", "wCASF",
22238 "wC4", "wC5", "wC6", "wC7",
22239 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22240 "wC12", "wC13", "wC14", "wC15"
22243 fputs (wc_reg_names [INTVAL (x)], stream);
22245 return;
22247 /* Print the high single-precision register of a VFP double-precision
22248 register. */
22249 case 'p':
22251 machine_mode mode = GET_MODE (x);
22252 int regno;
22254 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22256 output_operand_lossage ("invalid operand for code '%c'", code);
22257 return;
22260 regno = REGNO (x);
22261 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22263 output_operand_lossage ("invalid operand for code '%c'", code);
22264 return;
22267 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22269 return;
22271 /* Print a VFP/Neon double precision or quad precision register name. */
22272 case 'P':
22273 case 'q':
22275 machine_mode mode = GET_MODE (x);
22276 int is_quad = (code == 'q');
22277 int regno;
22279 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22281 output_operand_lossage ("invalid operand for code '%c'", code);
22282 return;
22285 if (!REG_P (x)
22286 || !IS_VFP_REGNUM (REGNO (x)))
22288 output_operand_lossage ("invalid operand for code '%c'", code);
22289 return;
22292 regno = REGNO (x);
22293 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22294 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22296 output_operand_lossage ("invalid operand for code '%c'", code);
22297 return;
22300 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22301 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22303 return;
22305 /* These two codes print the low/high doubleword register of a Neon quad
22306 register, respectively. For pair-structure types, can also print
22307 low/high quadword registers. */
22308 case 'e':
22309 case 'f':
22311 machine_mode mode = GET_MODE (x);
22312 int regno;
22314 if ((GET_MODE_SIZE (mode) != 16
22315 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22317 output_operand_lossage ("invalid operand for code '%c'", code);
22318 return;
22321 regno = REGNO (x);
22322 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22324 output_operand_lossage ("invalid operand for code '%c'", code);
22325 return;
22328 if (GET_MODE_SIZE (mode) == 16)
22329 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22330 + (code == 'f' ? 1 : 0));
22331 else
22332 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22333 + (code == 'f' ? 1 : 0));
22335 return;
22337 /* Print a VFPv3 floating-point constant, represented as an integer
22338 index. */
22339 case 'G':
22341 int index = vfp3_const_double_index (x);
22342 gcc_assert (index != -1);
22343 fprintf (stream, "%d", index);
22345 return;
22347 /* Print bits representing opcode features for Neon.
22349 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22350 and polynomials as unsigned.
22352 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22354 Bit 2 is 1 for rounding functions, 0 otherwise. */
22356 /* Identify the type as 's', 'u', 'p' or 'f'. */
22357 case 'T':
22359 HOST_WIDE_INT bits = INTVAL (x);
22360 fputc ("uspf"[bits & 3], stream);
22362 return;
22364 /* Likewise, but signed and unsigned integers are both 'i'. */
22365 case 'F':
22367 HOST_WIDE_INT bits = INTVAL (x);
22368 fputc ("iipf"[bits & 3], stream);
22370 return;
22372 /* As for 'T', but emit 'u' instead of 'p'. */
22373 case 't':
22375 HOST_WIDE_INT bits = INTVAL (x);
22376 fputc ("usuf"[bits & 3], stream);
22378 return;
22380 /* Bit 2: rounding (vs none). */
22381 case 'O':
22383 HOST_WIDE_INT bits = INTVAL (x);
22384 fputs ((bits & 4) != 0 ? "r" : "", stream);
22386 return;
22388 /* Memory operand for vld1/vst1 instruction. */
22389 case 'A':
22391 rtx addr;
22392 bool postinc = FALSE;
22393 rtx postinc_reg = NULL;
22394 unsigned align, memsize, align_bits;
22396 gcc_assert (MEM_P (x));
22397 addr = XEXP (x, 0);
22398 if (GET_CODE (addr) == POST_INC)
22400 postinc = 1;
22401 addr = XEXP (addr, 0);
22403 if (GET_CODE (addr) == POST_MODIFY)
22405 postinc_reg = XEXP( XEXP (addr, 1), 1);
22406 addr = XEXP (addr, 0);
22408 asm_fprintf (stream, "[%r", REGNO (addr));
22410 /* We know the alignment of this access, so we can emit a hint in the
22411 instruction (for some alignments) as an aid to the memory subsystem
22412 of the target. */
22413 align = MEM_ALIGN (x) >> 3;
22414 memsize = MEM_SIZE (x);
22416 /* Only certain alignment specifiers are supported by the hardware. */
22417 if (memsize == 32 && (align % 32) == 0)
22418 align_bits = 256;
22419 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22420 align_bits = 128;
22421 else if (memsize >= 8 && (align % 8) == 0)
22422 align_bits = 64;
22423 else
22424 align_bits = 0;
22426 if (align_bits != 0)
22427 asm_fprintf (stream, ":%d", align_bits);
22429 asm_fprintf (stream, "]");
22431 if (postinc)
22432 fputs("!", stream);
22433 if (postinc_reg)
22434 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22436 return;
22438 case 'C':
22440 rtx addr;
22442 gcc_assert (MEM_P (x));
22443 addr = XEXP (x, 0);
22444 gcc_assert (REG_P (addr));
22445 asm_fprintf (stream, "[%r]", REGNO (addr));
22447 return;
22449 /* Translate an S register number into a D register number and element index. */
22450 case 'y':
22452 machine_mode mode = GET_MODE (x);
22453 int regno;
22455 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22457 output_operand_lossage ("invalid operand for code '%c'", code);
22458 return;
22461 regno = REGNO (x);
22462 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22464 output_operand_lossage ("invalid operand for code '%c'", code);
22465 return;
22468 regno = regno - FIRST_VFP_REGNUM;
22469 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22471 return;
22473 case 'v':
22474 gcc_assert (CONST_DOUBLE_P (x));
22475 int result;
22476 result = vfp3_const_double_for_fract_bits (x);
22477 if (result == 0)
22478 result = vfp3_const_double_for_bits (x);
22479 fprintf (stream, "#%d", result);
22480 return;
22482 /* Register specifier for vld1.16/vst1.16. Translate the S register
22483 number into a D register number and element index. */
22484 case 'z':
22486 machine_mode mode = GET_MODE (x);
22487 int regno;
22489 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22491 output_operand_lossage ("invalid operand for code '%c'", code);
22492 return;
22495 regno = REGNO (x);
22496 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22498 output_operand_lossage ("invalid operand for code '%c'", code);
22499 return;
22502 regno = regno - FIRST_VFP_REGNUM;
22503 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22505 return;
22507 default:
22508 if (x == 0)
22510 output_operand_lossage ("missing operand");
22511 return;
22514 switch (GET_CODE (x))
22516 case REG:
22517 asm_fprintf (stream, "%r", REGNO (x));
22518 break;
22520 case MEM:
22521 output_address (GET_MODE (x), XEXP (x, 0));
22522 break;
22524 case CONST_DOUBLE:
22526 char fpstr[20];
22527 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22528 sizeof (fpstr), 0, 1);
22529 fprintf (stream, "#%s", fpstr);
22531 break;
22533 default:
22534 gcc_assert (GET_CODE (x) != NEG);
22535 fputc ('#', stream);
22536 if (GET_CODE (x) == HIGH)
22538 fputs (":lower16:", stream);
22539 x = XEXP (x, 0);
22542 output_addr_const (stream, x);
22543 break;
22548 /* Target hook for printing a memory address. */
22549 static void
22550 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22552 if (TARGET_32BIT)
22554 int is_minus = GET_CODE (x) == MINUS;
22556 if (REG_P (x))
22557 asm_fprintf (stream, "[%r]", REGNO (x));
22558 else if (GET_CODE (x) == PLUS || is_minus)
22560 rtx base = XEXP (x, 0);
22561 rtx index = XEXP (x, 1);
22562 HOST_WIDE_INT offset = 0;
22563 if (!REG_P (base)
22564 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22566 /* Ensure that BASE is a register. */
22567 /* (one of them must be). */
22568 /* Also ensure the SP is not used as in index register. */
22569 std::swap (base, index);
22571 switch (GET_CODE (index))
22573 case CONST_INT:
22574 offset = INTVAL (index);
22575 if (is_minus)
22576 offset = -offset;
22577 asm_fprintf (stream, "[%r, #%wd]",
22578 REGNO (base), offset);
22579 break;
22581 case REG:
22582 asm_fprintf (stream, "[%r, %s%r]",
22583 REGNO (base), is_minus ? "-" : "",
22584 REGNO (index));
22585 break;
22587 case MULT:
22588 case ASHIFTRT:
22589 case LSHIFTRT:
22590 case ASHIFT:
22591 case ROTATERT:
22593 asm_fprintf (stream, "[%r, %s%r",
22594 REGNO (base), is_minus ? "-" : "",
22595 REGNO (XEXP (index, 0)));
22596 arm_print_operand (stream, index, 'S');
22597 fputs ("]", stream);
22598 break;
22601 default:
22602 gcc_unreachable ();
22605 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22606 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22608 gcc_assert (REG_P (XEXP (x, 0)));
22610 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22611 asm_fprintf (stream, "[%r, #%s%d]!",
22612 REGNO (XEXP (x, 0)),
22613 GET_CODE (x) == PRE_DEC ? "-" : "",
22614 GET_MODE_SIZE (mode));
22615 else
22616 asm_fprintf (stream, "[%r], #%s%d",
22617 REGNO (XEXP (x, 0)),
22618 GET_CODE (x) == POST_DEC ? "-" : "",
22619 GET_MODE_SIZE (mode));
22621 else if (GET_CODE (x) == PRE_MODIFY)
22623 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22624 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22625 asm_fprintf (stream, "#%wd]!",
22626 INTVAL (XEXP (XEXP (x, 1), 1)));
22627 else
22628 asm_fprintf (stream, "%r]!",
22629 REGNO (XEXP (XEXP (x, 1), 1)));
22631 else if (GET_CODE (x) == POST_MODIFY)
22633 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22634 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22635 asm_fprintf (stream, "#%wd",
22636 INTVAL (XEXP (XEXP (x, 1), 1)));
22637 else
22638 asm_fprintf (stream, "%r",
22639 REGNO (XEXP (XEXP (x, 1), 1)));
22641 else output_addr_const (stream, x);
22643 else
22645 if (REG_P (x))
22646 asm_fprintf (stream, "[%r]", REGNO (x));
22647 else if (GET_CODE (x) == POST_INC)
22648 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22649 else if (GET_CODE (x) == PLUS)
22651 gcc_assert (REG_P (XEXP (x, 0)));
22652 if (CONST_INT_P (XEXP (x, 1)))
22653 asm_fprintf (stream, "[%r, #%wd]",
22654 REGNO (XEXP (x, 0)),
22655 INTVAL (XEXP (x, 1)));
22656 else
22657 asm_fprintf (stream, "[%r, %r]",
22658 REGNO (XEXP (x, 0)),
22659 REGNO (XEXP (x, 1)));
22661 else
22662 output_addr_const (stream, x);
22666 /* Target hook for indicating whether a punctuation character for
22667 TARGET_PRINT_OPERAND is valid. */
22668 static bool
22669 arm_print_operand_punct_valid_p (unsigned char code)
22671 return (code == '@' || code == '|' || code == '.'
22672 || code == '(' || code == ')' || code == '#'
22673 || (TARGET_32BIT && (code == '?'))
22674 || (TARGET_THUMB2 && (code == '!'))
22675 || (TARGET_THUMB && (code == '_')));
22678 /* Target hook for assembling integer objects. The ARM version needs to
22679 handle word-sized values specially. */
22680 static bool
22681 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22683 machine_mode mode;
22685 if (size == UNITS_PER_WORD && aligned_p)
22687 fputs ("\t.word\t", asm_out_file);
22688 output_addr_const (asm_out_file, x);
22690 /* Mark symbols as position independent. We only do this in the
22691 .text segment, not in the .data segment. */
22692 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22693 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22695 /* See legitimize_pic_address for an explanation of the
22696 TARGET_VXWORKS_RTP check. */
22697 if (!arm_pic_data_is_text_relative
22698 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22699 fputs ("(GOT)", asm_out_file);
22700 else
22701 fputs ("(GOTOFF)", asm_out_file);
22703 fputc ('\n', asm_out_file);
22704 return true;
22707 mode = GET_MODE (x);
22709 if (arm_vector_mode_supported_p (mode))
22711 int i, units;
22713 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22715 units = CONST_VECTOR_NUNITS (x);
22716 size = GET_MODE_UNIT_SIZE (mode);
22718 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22719 for (i = 0; i < units; i++)
22721 rtx elt = CONST_VECTOR_ELT (x, i);
22722 assemble_integer
22723 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22725 else
22726 for (i = 0; i < units; i++)
22728 rtx elt = CONST_VECTOR_ELT (x, i);
22729 assemble_real
22730 (*CONST_DOUBLE_REAL_VALUE (elt), GET_MODE_INNER (mode),
22731 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22734 return true;
22737 return default_assemble_integer (x, size, aligned_p);
22740 static void
22741 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22743 section *s;
22745 if (!TARGET_AAPCS_BASED)
22747 (is_ctor ?
22748 default_named_section_asm_out_constructor
22749 : default_named_section_asm_out_destructor) (symbol, priority);
22750 return;
22753 /* Put these in the .init_array section, using a special relocation. */
22754 if (priority != DEFAULT_INIT_PRIORITY)
22756 char buf[18];
22757 sprintf (buf, "%s.%.5u",
22758 is_ctor ? ".init_array" : ".fini_array",
22759 priority);
22760 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22762 else if (is_ctor)
22763 s = ctors_section;
22764 else
22765 s = dtors_section;
22767 switch_to_section (s);
22768 assemble_align (POINTER_SIZE);
22769 fputs ("\t.word\t", asm_out_file);
22770 output_addr_const (asm_out_file, symbol);
22771 fputs ("(target1)\n", asm_out_file);
22774 /* Add a function to the list of static constructors. */
22776 static void
22777 arm_elf_asm_constructor (rtx symbol, int priority)
22779 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22782 /* Add a function to the list of static destructors. */
22784 static void
22785 arm_elf_asm_destructor (rtx symbol, int priority)
22787 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22790 /* A finite state machine takes care of noticing whether or not instructions
22791 can be conditionally executed, and thus decrease execution time and code
22792 size by deleting branch instructions. The fsm is controlled by
22793 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22795 /* The state of the fsm controlling condition codes are:
22796 0: normal, do nothing special
22797 1: make ASM_OUTPUT_OPCODE not output this instruction
22798 2: make ASM_OUTPUT_OPCODE not output this instruction
22799 3: make instructions conditional
22800 4: make instructions conditional
22802 State transitions (state->state by whom under condition):
22803 0 -> 1 final_prescan_insn if the `target' is a label
22804 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22805 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22806 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22807 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22808 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22809 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22810 (the target insn is arm_target_insn).
22812 If the jump clobbers the conditions then we use states 2 and 4.
22814 A similar thing can be done with conditional return insns.
22816 XXX In case the `target' is an unconditional branch, this conditionalising
22817 of the instructions always reduces code size, but not always execution
22818 time. But then, I want to reduce the code size to somewhere near what
22819 /bin/cc produces. */
22821 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22822 instructions. When a COND_EXEC instruction is seen the subsequent
22823 instructions are scanned so that multiple conditional instructions can be
22824 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22825 specify the length and true/false mask for the IT block. These will be
22826 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22828 /* Returns the index of the ARM condition code string in
22829 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22830 COMPARISON should be an rtx like `(eq (...) (...))'. */
22832 enum arm_cond_code
22833 maybe_get_arm_condition_code (rtx comparison)
22835 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22836 enum arm_cond_code code;
22837 enum rtx_code comp_code = GET_CODE (comparison);
22839 if (GET_MODE_CLASS (mode) != MODE_CC)
22840 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22841 XEXP (comparison, 1));
22843 switch (mode)
22845 case CC_DNEmode: code = ARM_NE; goto dominance;
22846 case CC_DEQmode: code = ARM_EQ; goto dominance;
22847 case CC_DGEmode: code = ARM_GE; goto dominance;
22848 case CC_DGTmode: code = ARM_GT; goto dominance;
22849 case CC_DLEmode: code = ARM_LE; goto dominance;
22850 case CC_DLTmode: code = ARM_LT; goto dominance;
22851 case CC_DGEUmode: code = ARM_CS; goto dominance;
22852 case CC_DGTUmode: code = ARM_HI; goto dominance;
22853 case CC_DLEUmode: code = ARM_LS; goto dominance;
22854 case CC_DLTUmode: code = ARM_CC;
22856 dominance:
22857 if (comp_code == EQ)
22858 return ARM_INVERSE_CONDITION_CODE (code);
22859 if (comp_code == NE)
22860 return code;
22861 return ARM_NV;
22863 case CC_NOOVmode:
22864 switch (comp_code)
22866 case NE: return ARM_NE;
22867 case EQ: return ARM_EQ;
22868 case GE: return ARM_PL;
22869 case LT: return ARM_MI;
22870 default: return ARM_NV;
22873 case CC_Zmode:
22874 switch (comp_code)
22876 case NE: return ARM_NE;
22877 case EQ: return ARM_EQ;
22878 default: return ARM_NV;
22881 case CC_Nmode:
22882 switch (comp_code)
22884 case NE: return ARM_MI;
22885 case EQ: return ARM_PL;
22886 default: return ARM_NV;
22889 case CCFPEmode:
22890 case CCFPmode:
22891 /* We can handle all cases except UNEQ and LTGT. */
22892 switch (comp_code)
22894 case GE: return ARM_GE;
22895 case GT: return ARM_GT;
22896 case LE: return ARM_LS;
22897 case LT: return ARM_MI;
22898 case NE: return ARM_NE;
22899 case EQ: return ARM_EQ;
22900 case ORDERED: return ARM_VC;
22901 case UNORDERED: return ARM_VS;
22902 case UNLT: return ARM_LT;
22903 case UNLE: return ARM_LE;
22904 case UNGT: return ARM_HI;
22905 case UNGE: return ARM_PL;
22906 /* UNEQ and LTGT do not have a representation. */
22907 case UNEQ: /* Fall through. */
22908 case LTGT: /* Fall through. */
22909 default: return ARM_NV;
22912 case CC_SWPmode:
22913 switch (comp_code)
22915 case NE: return ARM_NE;
22916 case EQ: return ARM_EQ;
22917 case GE: return ARM_LE;
22918 case GT: return ARM_LT;
22919 case LE: return ARM_GE;
22920 case LT: return ARM_GT;
22921 case GEU: return ARM_LS;
22922 case GTU: return ARM_CC;
22923 case LEU: return ARM_CS;
22924 case LTU: return ARM_HI;
22925 default: return ARM_NV;
22928 case CC_Cmode:
22929 switch (comp_code)
22931 case LTU: return ARM_CS;
22932 case GEU: return ARM_CC;
22933 default: return ARM_NV;
22936 case CC_CZmode:
22937 switch (comp_code)
22939 case NE: return ARM_NE;
22940 case EQ: return ARM_EQ;
22941 case GEU: return ARM_CS;
22942 case GTU: return ARM_HI;
22943 case LEU: return ARM_LS;
22944 case LTU: return ARM_CC;
22945 default: return ARM_NV;
22948 case CC_NCVmode:
22949 switch (comp_code)
22951 case GE: return ARM_GE;
22952 case LT: return ARM_LT;
22953 case GEU: return ARM_CS;
22954 case LTU: return ARM_CC;
22955 default: return ARM_NV;
22958 case CCmode:
22959 switch (comp_code)
22961 case NE: return ARM_NE;
22962 case EQ: return ARM_EQ;
22963 case GE: return ARM_GE;
22964 case GT: return ARM_GT;
22965 case LE: return ARM_LE;
22966 case LT: return ARM_LT;
22967 case GEU: return ARM_CS;
22968 case GTU: return ARM_HI;
22969 case LEU: return ARM_LS;
22970 case LTU: return ARM_CC;
22971 default: return ARM_NV;
22974 default: gcc_unreachable ();
22978 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22979 static enum arm_cond_code
22980 get_arm_condition_code (rtx comparison)
22982 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22983 gcc_assert (code != ARM_NV);
22984 return code;
22987 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22988 instructions. */
22989 void
22990 thumb2_final_prescan_insn (rtx_insn *insn)
22992 rtx_insn *first_insn = insn;
22993 rtx body = PATTERN (insn);
22994 rtx predicate;
22995 enum arm_cond_code code;
22996 int n;
22997 int mask;
22998 int max;
23000 /* max_insns_skipped in the tune was already taken into account in the
23001 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23002 just emit the IT blocks as we can. It does not make sense to split
23003 the IT blocks. */
23004 max = MAX_INSN_PER_IT_BLOCK;
23006 /* Remove the previous insn from the count of insns to be output. */
23007 if (arm_condexec_count)
23008 arm_condexec_count--;
23010 /* Nothing to do if we are already inside a conditional block. */
23011 if (arm_condexec_count)
23012 return;
23014 if (GET_CODE (body) != COND_EXEC)
23015 return;
23017 /* Conditional jumps are implemented directly. */
23018 if (JUMP_P (insn))
23019 return;
23021 predicate = COND_EXEC_TEST (body);
23022 arm_current_cc = get_arm_condition_code (predicate);
23024 n = get_attr_ce_count (insn);
23025 arm_condexec_count = 1;
23026 arm_condexec_mask = (1 << n) - 1;
23027 arm_condexec_masklen = n;
23028 /* See if subsequent instructions can be combined into the same block. */
23029 for (;;)
23031 insn = next_nonnote_insn (insn);
23033 /* Jumping into the middle of an IT block is illegal, so a label or
23034 barrier terminates the block. */
23035 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23036 break;
23038 body = PATTERN (insn);
23039 /* USE and CLOBBER aren't really insns, so just skip them. */
23040 if (GET_CODE (body) == USE
23041 || GET_CODE (body) == CLOBBER)
23042 continue;
23044 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23045 if (GET_CODE (body) != COND_EXEC)
23046 break;
23047 /* Maximum number of conditionally executed instructions in a block. */
23048 n = get_attr_ce_count (insn);
23049 if (arm_condexec_masklen + n > max)
23050 break;
23052 predicate = COND_EXEC_TEST (body);
23053 code = get_arm_condition_code (predicate);
23054 mask = (1 << n) - 1;
23055 if (arm_current_cc == code)
23056 arm_condexec_mask |= (mask << arm_condexec_masklen);
23057 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23058 break;
23060 arm_condexec_count++;
23061 arm_condexec_masklen += n;
23063 /* A jump must be the last instruction in a conditional block. */
23064 if (JUMP_P (insn))
23065 break;
23067 /* Restore recog_data (getting the attributes of other insns can
23068 destroy this array, but final.c assumes that it remains intact
23069 across this call). */
23070 extract_constrain_insn_cached (first_insn);
23073 void
23074 arm_final_prescan_insn (rtx_insn *insn)
23076 /* BODY will hold the body of INSN. */
23077 rtx body = PATTERN (insn);
23079 /* This will be 1 if trying to repeat the trick, and things need to be
23080 reversed if it appears to fail. */
23081 int reverse = 0;
23083 /* If we start with a return insn, we only succeed if we find another one. */
23084 int seeking_return = 0;
23085 enum rtx_code return_code = UNKNOWN;
23087 /* START_INSN will hold the insn from where we start looking. This is the
23088 first insn after the following code_label if REVERSE is true. */
23089 rtx_insn *start_insn = insn;
23091 /* If in state 4, check if the target branch is reached, in order to
23092 change back to state 0. */
23093 if (arm_ccfsm_state == 4)
23095 if (insn == arm_target_insn)
23097 arm_target_insn = NULL;
23098 arm_ccfsm_state = 0;
23100 return;
23103 /* If in state 3, it is possible to repeat the trick, if this insn is an
23104 unconditional branch to a label, and immediately following this branch
23105 is the previous target label which is only used once, and the label this
23106 branch jumps to is not too far off. */
23107 if (arm_ccfsm_state == 3)
23109 if (simplejump_p (insn))
23111 start_insn = next_nonnote_insn (start_insn);
23112 if (BARRIER_P (start_insn))
23114 /* XXX Isn't this always a barrier? */
23115 start_insn = next_nonnote_insn (start_insn);
23117 if (LABEL_P (start_insn)
23118 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23119 && LABEL_NUSES (start_insn) == 1)
23120 reverse = TRUE;
23121 else
23122 return;
23124 else if (ANY_RETURN_P (body))
23126 start_insn = next_nonnote_insn (start_insn);
23127 if (BARRIER_P (start_insn))
23128 start_insn = next_nonnote_insn (start_insn);
23129 if (LABEL_P (start_insn)
23130 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23131 && LABEL_NUSES (start_insn) == 1)
23133 reverse = TRUE;
23134 seeking_return = 1;
23135 return_code = GET_CODE (body);
23137 else
23138 return;
23140 else
23141 return;
23144 gcc_assert (!arm_ccfsm_state || reverse);
23145 if (!JUMP_P (insn))
23146 return;
23148 /* This jump might be paralleled with a clobber of the condition codes
23149 the jump should always come first */
23150 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23151 body = XVECEXP (body, 0, 0);
23153 if (reverse
23154 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23155 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23157 int insns_skipped;
23158 int fail = FALSE, succeed = FALSE;
23159 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23160 int then_not_else = TRUE;
23161 rtx_insn *this_insn = start_insn;
23162 rtx label = 0;
23164 /* Register the insn jumped to. */
23165 if (reverse)
23167 if (!seeking_return)
23168 label = XEXP (SET_SRC (body), 0);
23170 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23171 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23172 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23174 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23175 then_not_else = FALSE;
23177 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23179 seeking_return = 1;
23180 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23182 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23184 seeking_return = 1;
23185 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23186 then_not_else = FALSE;
23188 else
23189 gcc_unreachable ();
23191 /* See how many insns this branch skips, and what kind of insns. If all
23192 insns are okay, and the label or unconditional branch to the same
23193 label is not too far away, succeed. */
23194 for (insns_skipped = 0;
23195 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23197 rtx scanbody;
23199 this_insn = next_nonnote_insn (this_insn);
23200 if (!this_insn)
23201 break;
23203 switch (GET_CODE (this_insn))
23205 case CODE_LABEL:
23206 /* Succeed if it is the target label, otherwise fail since
23207 control falls in from somewhere else. */
23208 if (this_insn == label)
23210 arm_ccfsm_state = 1;
23211 succeed = TRUE;
23213 else
23214 fail = TRUE;
23215 break;
23217 case BARRIER:
23218 /* Succeed if the following insn is the target label.
23219 Otherwise fail.
23220 If return insns are used then the last insn in a function
23221 will be a barrier. */
23222 this_insn = next_nonnote_insn (this_insn);
23223 if (this_insn && this_insn == label)
23225 arm_ccfsm_state = 1;
23226 succeed = TRUE;
23228 else
23229 fail = TRUE;
23230 break;
23232 case CALL_INSN:
23233 /* The AAPCS says that conditional calls should not be
23234 used since they make interworking inefficient (the
23235 linker can't transform BL<cond> into BLX). That's
23236 only a problem if the machine has BLX. */
23237 if (arm_arch5)
23239 fail = TRUE;
23240 break;
23243 /* Succeed if the following insn is the target label, or
23244 if the following two insns are a barrier and the
23245 target label. */
23246 this_insn = next_nonnote_insn (this_insn);
23247 if (this_insn && BARRIER_P (this_insn))
23248 this_insn = next_nonnote_insn (this_insn);
23250 if (this_insn && this_insn == label
23251 && insns_skipped < max_insns_skipped)
23253 arm_ccfsm_state = 1;
23254 succeed = TRUE;
23256 else
23257 fail = TRUE;
23258 break;
23260 case JUMP_INSN:
23261 /* If this is an unconditional branch to the same label, succeed.
23262 If it is to another label, do nothing. If it is conditional,
23263 fail. */
23264 /* XXX Probably, the tests for SET and the PC are
23265 unnecessary. */
23267 scanbody = PATTERN (this_insn);
23268 if (GET_CODE (scanbody) == SET
23269 && GET_CODE (SET_DEST (scanbody)) == PC)
23271 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23272 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23274 arm_ccfsm_state = 2;
23275 succeed = TRUE;
23277 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23278 fail = TRUE;
23280 /* Fail if a conditional return is undesirable (e.g. on a
23281 StrongARM), but still allow this if optimizing for size. */
23282 else if (GET_CODE (scanbody) == return_code
23283 && !use_return_insn (TRUE, NULL)
23284 && !optimize_size)
23285 fail = TRUE;
23286 else if (GET_CODE (scanbody) == return_code)
23288 arm_ccfsm_state = 2;
23289 succeed = TRUE;
23291 else if (GET_CODE (scanbody) == PARALLEL)
23293 switch (get_attr_conds (this_insn))
23295 case CONDS_NOCOND:
23296 break;
23297 default:
23298 fail = TRUE;
23299 break;
23302 else
23303 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23305 break;
23307 case INSN:
23308 /* Instructions using or affecting the condition codes make it
23309 fail. */
23310 scanbody = PATTERN (this_insn);
23311 if (!(GET_CODE (scanbody) == SET
23312 || GET_CODE (scanbody) == PARALLEL)
23313 || get_attr_conds (this_insn) != CONDS_NOCOND)
23314 fail = TRUE;
23315 break;
23317 default:
23318 break;
23321 if (succeed)
23323 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23324 arm_target_label = CODE_LABEL_NUMBER (label);
23325 else
23327 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23329 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23331 this_insn = next_nonnote_insn (this_insn);
23332 gcc_assert (!this_insn
23333 || (!BARRIER_P (this_insn)
23334 && !LABEL_P (this_insn)));
23336 if (!this_insn)
23338 /* Oh, dear! we ran off the end.. give up. */
23339 extract_constrain_insn_cached (insn);
23340 arm_ccfsm_state = 0;
23341 arm_target_insn = NULL;
23342 return;
23344 arm_target_insn = this_insn;
23347 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23348 what it was. */
23349 if (!reverse)
23350 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23352 if (reverse || then_not_else)
23353 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23356 /* Restore recog_data (getting the attributes of other insns can
23357 destroy this array, but final.c assumes that it remains intact
23358 across this call. */
23359 extract_constrain_insn_cached (insn);
23363 /* Output IT instructions. */
23364 void
23365 thumb2_asm_output_opcode (FILE * stream)
23367 char buff[5];
23368 int n;
23370 if (arm_condexec_mask)
23372 for (n = 0; n < arm_condexec_masklen; n++)
23373 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23374 buff[n] = 0;
23375 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23376 arm_condition_codes[arm_current_cc]);
23377 arm_condexec_mask = 0;
23381 /* Returns true if REGNO is a valid register
23382 for holding a quantity of type MODE. */
23384 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23386 if (GET_MODE_CLASS (mode) == MODE_CC)
23387 return (regno == CC_REGNUM
23388 || (TARGET_HARD_FLOAT && TARGET_VFP
23389 && regno == VFPCC_REGNUM));
23391 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23392 return false;
23394 if (TARGET_THUMB1)
23395 /* For the Thumb we only allow values bigger than SImode in
23396 registers 0 - 6, so that there is always a second low
23397 register available to hold the upper part of the value.
23398 We probably we ought to ensure that the register is the
23399 start of an even numbered register pair. */
23400 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23402 if (TARGET_HARD_FLOAT && TARGET_VFP
23403 && IS_VFP_REGNUM (regno))
23405 if (mode == SFmode || mode == SImode)
23406 return VFP_REGNO_OK_FOR_SINGLE (regno);
23408 if (mode == DFmode)
23409 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23411 if (mode == HFmode)
23412 return VFP_REGNO_OK_FOR_SINGLE (regno);
23414 if (TARGET_NEON)
23415 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23416 || (VALID_NEON_QREG_MODE (mode)
23417 && NEON_REGNO_OK_FOR_QUAD (regno))
23418 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23419 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23420 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23421 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23422 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23424 return FALSE;
23427 if (TARGET_REALLY_IWMMXT)
23429 if (IS_IWMMXT_GR_REGNUM (regno))
23430 return mode == SImode;
23432 if (IS_IWMMXT_REGNUM (regno))
23433 return VALID_IWMMXT_REG_MODE (mode);
23436 /* We allow almost any value to be stored in the general registers.
23437 Restrict doubleword quantities to even register pairs in ARM state
23438 so that we can use ldrd. Do not allow very large Neon structure
23439 opaque modes in general registers; they would use too many. */
23440 if (regno <= LAST_ARM_REGNUM)
23442 if (ARM_NUM_REGS (mode) > 4)
23443 return FALSE;
23445 if (TARGET_THUMB2)
23446 return TRUE;
23448 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23451 if (regno == FRAME_POINTER_REGNUM
23452 || regno == ARG_POINTER_REGNUM)
23453 /* We only allow integers in the fake hard registers. */
23454 return GET_MODE_CLASS (mode) == MODE_INT;
23456 return FALSE;
23459 /* Implement MODES_TIEABLE_P. */
23461 bool
23462 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23464 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23465 return true;
23467 /* We specifically want to allow elements of "structure" modes to
23468 be tieable to the structure. This more general condition allows
23469 other rarer situations too. */
23470 if (TARGET_NEON
23471 && (VALID_NEON_DREG_MODE (mode1)
23472 || VALID_NEON_QREG_MODE (mode1)
23473 || VALID_NEON_STRUCT_MODE (mode1))
23474 && (VALID_NEON_DREG_MODE (mode2)
23475 || VALID_NEON_QREG_MODE (mode2)
23476 || VALID_NEON_STRUCT_MODE (mode2)))
23477 return true;
23479 return false;
23482 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23483 not used in arm mode. */
23485 enum reg_class
23486 arm_regno_class (int regno)
23488 if (regno == PC_REGNUM)
23489 return NO_REGS;
23491 if (TARGET_THUMB1)
23493 if (regno == STACK_POINTER_REGNUM)
23494 return STACK_REG;
23495 if (regno == CC_REGNUM)
23496 return CC_REG;
23497 if (regno < 8)
23498 return LO_REGS;
23499 return HI_REGS;
23502 if (TARGET_THUMB2 && regno < 8)
23503 return LO_REGS;
23505 if ( regno <= LAST_ARM_REGNUM
23506 || regno == FRAME_POINTER_REGNUM
23507 || regno == ARG_POINTER_REGNUM)
23508 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23510 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23511 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23513 if (IS_VFP_REGNUM (regno))
23515 if (regno <= D7_VFP_REGNUM)
23516 return VFP_D0_D7_REGS;
23517 else if (regno <= LAST_LO_VFP_REGNUM)
23518 return VFP_LO_REGS;
23519 else
23520 return VFP_HI_REGS;
23523 if (IS_IWMMXT_REGNUM (regno))
23524 return IWMMXT_REGS;
23526 if (IS_IWMMXT_GR_REGNUM (regno))
23527 return IWMMXT_GR_REGS;
23529 return NO_REGS;
23532 /* Handle a special case when computing the offset
23533 of an argument from the frame pointer. */
23535 arm_debugger_arg_offset (int value, rtx addr)
23537 rtx_insn *insn;
23539 /* We are only interested if dbxout_parms() failed to compute the offset. */
23540 if (value != 0)
23541 return 0;
23543 /* We can only cope with the case where the address is held in a register. */
23544 if (!REG_P (addr))
23545 return 0;
23547 /* If we are using the frame pointer to point at the argument, then
23548 an offset of 0 is correct. */
23549 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23550 return 0;
23552 /* If we are using the stack pointer to point at the
23553 argument, then an offset of 0 is correct. */
23554 /* ??? Check this is consistent with thumb2 frame layout. */
23555 if ((TARGET_THUMB || !frame_pointer_needed)
23556 && REGNO (addr) == SP_REGNUM)
23557 return 0;
23559 /* Oh dear. The argument is pointed to by a register rather
23560 than being held in a register, or being stored at a known
23561 offset from the frame pointer. Since GDB only understands
23562 those two kinds of argument we must translate the address
23563 held in the register into an offset from the frame pointer.
23564 We do this by searching through the insns for the function
23565 looking to see where this register gets its value. If the
23566 register is initialized from the frame pointer plus an offset
23567 then we are in luck and we can continue, otherwise we give up.
23569 This code is exercised by producing debugging information
23570 for a function with arguments like this:
23572 double func (double a, double b, int c, double d) {return d;}
23574 Without this code the stab for parameter 'd' will be set to
23575 an offset of 0 from the frame pointer, rather than 8. */
23577 /* The if() statement says:
23579 If the insn is a normal instruction
23580 and if the insn is setting the value in a register
23581 and if the register being set is the register holding the address of the argument
23582 and if the address is computing by an addition
23583 that involves adding to a register
23584 which is the frame pointer
23585 a constant integer
23587 then... */
23589 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23591 if ( NONJUMP_INSN_P (insn)
23592 && GET_CODE (PATTERN (insn)) == SET
23593 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23594 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23595 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23596 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23597 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23600 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23602 break;
23606 if (value == 0)
23608 debug_rtx (addr);
23609 warning (0, "unable to compute real location of stacked parameter");
23610 value = 8; /* XXX magic hack */
23613 return value;
23616 /* Implement TARGET_PROMOTED_TYPE. */
23618 static tree
23619 arm_promoted_type (const_tree t)
23621 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23622 return float_type_node;
23623 return NULL_TREE;
23626 /* Implement TARGET_CONVERT_TO_TYPE.
23627 Specifically, this hook implements the peculiarity of the ARM
23628 half-precision floating-point C semantics that requires conversions between
23629 __fp16 to or from double to do an intermediate conversion to float. */
23631 static tree
23632 arm_convert_to_type (tree type, tree expr)
23634 tree fromtype = TREE_TYPE (expr);
23635 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23636 return NULL_TREE;
23637 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23638 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23639 return convert (type, convert (float_type_node, expr));
23640 return NULL_TREE;
23643 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23644 This simply adds HFmode as a supported mode; even though we don't
23645 implement arithmetic on this type directly, it's supported by
23646 optabs conversions, much the way the double-word arithmetic is
23647 special-cased in the default hook. */
23649 static bool
23650 arm_scalar_mode_supported_p (machine_mode mode)
23652 if (mode == HFmode)
23653 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23654 else if (ALL_FIXED_POINT_MODE_P (mode))
23655 return true;
23656 else
23657 return default_scalar_mode_supported_p (mode);
23660 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23661 not to early-clobber SRC registers in the process.
23663 We assume that the operands described by SRC and DEST represent a
23664 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23665 number of components into which the copy has been decomposed. */
23666 void
23667 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23669 unsigned int i;
23671 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23672 || REGNO (operands[0]) < REGNO (operands[1]))
23674 for (i = 0; i < count; i++)
23676 operands[2 * i] = dest[i];
23677 operands[2 * i + 1] = src[i];
23680 else
23682 for (i = 0; i < count; i++)
23684 operands[2 * i] = dest[count - i - 1];
23685 operands[2 * i + 1] = src[count - i - 1];
23690 /* Split operands into moves from op[1] + op[2] into op[0]. */
23692 void
23693 neon_split_vcombine (rtx operands[3])
23695 unsigned int dest = REGNO (operands[0]);
23696 unsigned int src1 = REGNO (operands[1]);
23697 unsigned int src2 = REGNO (operands[2]);
23698 machine_mode halfmode = GET_MODE (operands[1]);
23699 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23700 rtx destlo, desthi;
23702 if (src1 == dest && src2 == dest + halfregs)
23704 /* No-op move. Can't split to nothing; emit something. */
23705 emit_note (NOTE_INSN_DELETED);
23706 return;
23709 /* Preserve register attributes for variable tracking. */
23710 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23711 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23712 GET_MODE_SIZE (halfmode));
23714 /* Special case of reversed high/low parts. Use VSWP. */
23715 if (src2 == dest && src1 == dest + halfregs)
23717 rtx x = gen_rtx_SET (destlo, operands[1]);
23718 rtx y = gen_rtx_SET (desthi, operands[2]);
23719 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23720 return;
23723 if (!reg_overlap_mentioned_p (operands[2], destlo))
23725 /* Try to avoid unnecessary moves if part of the result
23726 is in the right place already. */
23727 if (src1 != dest)
23728 emit_move_insn (destlo, operands[1]);
23729 if (src2 != dest + halfregs)
23730 emit_move_insn (desthi, operands[2]);
23732 else
23734 if (src2 != dest + halfregs)
23735 emit_move_insn (desthi, operands[2]);
23736 if (src1 != dest)
23737 emit_move_insn (destlo, operands[1]);
23741 /* Return the number (counting from 0) of
23742 the least significant set bit in MASK. */
23744 inline static int
23745 number_of_first_bit_set (unsigned mask)
23747 return ctz_hwi (mask);
23750 /* Like emit_multi_reg_push, but allowing for a different set of
23751 registers to be described as saved. MASK is the set of registers
23752 to be saved; REAL_REGS is the set of registers to be described as
23753 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23755 static rtx_insn *
23756 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23758 unsigned long regno;
23759 rtx par[10], tmp, reg;
23760 rtx_insn *insn;
23761 int i, j;
23763 /* Build the parallel of the registers actually being stored. */
23764 for (i = 0; mask; ++i, mask &= mask - 1)
23766 regno = ctz_hwi (mask);
23767 reg = gen_rtx_REG (SImode, regno);
23769 if (i == 0)
23770 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23771 else
23772 tmp = gen_rtx_USE (VOIDmode, reg);
23774 par[i] = tmp;
23777 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23778 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23779 tmp = gen_frame_mem (BLKmode, tmp);
23780 tmp = gen_rtx_SET (tmp, par[0]);
23781 par[0] = tmp;
23783 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23784 insn = emit_insn (tmp);
23786 /* Always build the stack adjustment note for unwind info. */
23787 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23788 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23789 par[0] = tmp;
23791 /* Build the parallel of the registers recorded as saved for unwind. */
23792 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23794 regno = ctz_hwi (real_regs);
23795 reg = gen_rtx_REG (SImode, regno);
23797 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23798 tmp = gen_frame_mem (SImode, tmp);
23799 tmp = gen_rtx_SET (tmp, reg);
23800 RTX_FRAME_RELATED_P (tmp) = 1;
23801 par[j + 1] = tmp;
23804 if (j == 0)
23805 tmp = par[0];
23806 else
23808 RTX_FRAME_RELATED_P (par[0]) = 1;
23809 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23812 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23814 return insn;
23817 /* Emit code to push or pop registers to or from the stack. F is the
23818 assembly file. MASK is the registers to pop. */
23819 static void
23820 thumb_pop (FILE *f, unsigned long mask)
23822 int regno;
23823 int lo_mask = mask & 0xFF;
23824 int pushed_words = 0;
23826 gcc_assert (mask);
23828 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23830 /* Special case. Do not generate a POP PC statement here, do it in
23831 thumb_exit() */
23832 thumb_exit (f, -1);
23833 return;
23836 fprintf (f, "\tpop\t{");
23838 /* Look at the low registers first. */
23839 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23841 if (lo_mask & 1)
23843 asm_fprintf (f, "%r", regno);
23845 if ((lo_mask & ~1) != 0)
23846 fprintf (f, ", ");
23848 pushed_words++;
23852 if (mask & (1 << PC_REGNUM))
23854 /* Catch popping the PC. */
23855 if (TARGET_INTERWORK || TARGET_BACKTRACE
23856 || crtl->calls_eh_return)
23858 /* The PC is never poped directly, instead
23859 it is popped into r3 and then BX is used. */
23860 fprintf (f, "}\n");
23862 thumb_exit (f, -1);
23864 return;
23866 else
23868 if (mask & 0xFF)
23869 fprintf (f, ", ");
23871 asm_fprintf (f, "%r", PC_REGNUM);
23875 fprintf (f, "}\n");
23878 /* Generate code to return from a thumb function.
23879 If 'reg_containing_return_addr' is -1, then the return address is
23880 actually on the stack, at the stack pointer. */
23881 static void
23882 thumb_exit (FILE *f, int reg_containing_return_addr)
23884 unsigned regs_available_for_popping;
23885 unsigned regs_to_pop;
23886 int pops_needed;
23887 unsigned available;
23888 unsigned required;
23889 machine_mode mode;
23890 int size;
23891 int restore_a4 = FALSE;
23893 /* Compute the registers we need to pop. */
23894 regs_to_pop = 0;
23895 pops_needed = 0;
23897 if (reg_containing_return_addr == -1)
23899 regs_to_pop |= 1 << LR_REGNUM;
23900 ++pops_needed;
23903 if (TARGET_BACKTRACE)
23905 /* Restore the (ARM) frame pointer and stack pointer. */
23906 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23907 pops_needed += 2;
23910 /* If there is nothing to pop then just emit the BX instruction and
23911 return. */
23912 if (pops_needed == 0)
23914 if (crtl->calls_eh_return)
23915 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23917 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23918 return;
23920 /* Otherwise if we are not supporting interworking and we have not created
23921 a backtrace structure and the function was not entered in ARM mode then
23922 just pop the return address straight into the PC. */
23923 else if (!TARGET_INTERWORK
23924 && !TARGET_BACKTRACE
23925 && !is_called_in_ARM_mode (current_function_decl)
23926 && !crtl->calls_eh_return)
23928 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23929 return;
23932 /* Find out how many of the (return) argument registers we can corrupt. */
23933 regs_available_for_popping = 0;
23935 /* If returning via __builtin_eh_return, the bottom three registers
23936 all contain information needed for the return. */
23937 if (crtl->calls_eh_return)
23938 size = 12;
23939 else
23941 /* If we can deduce the registers used from the function's
23942 return value. This is more reliable that examining
23943 df_regs_ever_live_p () because that will be set if the register is
23944 ever used in the function, not just if the register is used
23945 to hold a return value. */
23947 if (crtl->return_rtx != 0)
23948 mode = GET_MODE (crtl->return_rtx);
23949 else
23950 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23952 size = GET_MODE_SIZE (mode);
23954 if (size == 0)
23956 /* In a void function we can use any argument register.
23957 In a function that returns a structure on the stack
23958 we can use the second and third argument registers. */
23959 if (mode == VOIDmode)
23960 regs_available_for_popping =
23961 (1 << ARG_REGISTER (1))
23962 | (1 << ARG_REGISTER (2))
23963 | (1 << ARG_REGISTER (3));
23964 else
23965 regs_available_for_popping =
23966 (1 << ARG_REGISTER (2))
23967 | (1 << ARG_REGISTER (3));
23969 else if (size <= 4)
23970 regs_available_for_popping =
23971 (1 << ARG_REGISTER (2))
23972 | (1 << ARG_REGISTER (3));
23973 else if (size <= 8)
23974 regs_available_for_popping =
23975 (1 << ARG_REGISTER (3));
23978 /* Match registers to be popped with registers into which we pop them. */
23979 for (available = regs_available_for_popping,
23980 required = regs_to_pop;
23981 required != 0 && available != 0;
23982 available &= ~(available & - available),
23983 required &= ~(required & - required))
23984 -- pops_needed;
23986 /* If we have any popping registers left over, remove them. */
23987 if (available > 0)
23988 regs_available_for_popping &= ~available;
23990 /* Otherwise if we need another popping register we can use
23991 the fourth argument register. */
23992 else if (pops_needed)
23994 /* If we have not found any free argument registers and
23995 reg a4 contains the return address, we must move it. */
23996 if (regs_available_for_popping == 0
23997 && reg_containing_return_addr == LAST_ARG_REGNUM)
23999 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24000 reg_containing_return_addr = LR_REGNUM;
24002 else if (size > 12)
24004 /* Register a4 is being used to hold part of the return value,
24005 but we have dire need of a free, low register. */
24006 restore_a4 = TRUE;
24008 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24011 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24013 /* The fourth argument register is available. */
24014 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24016 --pops_needed;
24020 /* Pop as many registers as we can. */
24021 thumb_pop (f, regs_available_for_popping);
24023 /* Process the registers we popped. */
24024 if (reg_containing_return_addr == -1)
24026 /* The return address was popped into the lowest numbered register. */
24027 regs_to_pop &= ~(1 << LR_REGNUM);
24029 reg_containing_return_addr =
24030 number_of_first_bit_set (regs_available_for_popping);
24032 /* Remove this register for the mask of available registers, so that
24033 the return address will not be corrupted by further pops. */
24034 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24037 /* If we popped other registers then handle them here. */
24038 if (regs_available_for_popping)
24040 int frame_pointer;
24042 /* Work out which register currently contains the frame pointer. */
24043 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24045 /* Move it into the correct place. */
24046 asm_fprintf (f, "\tmov\t%r, %r\n",
24047 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24049 /* (Temporarily) remove it from the mask of popped registers. */
24050 regs_available_for_popping &= ~(1 << frame_pointer);
24051 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24053 if (regs_available_for_popping)
24055 int stack_pointer;
24057 /* We popped the stack pointer as well,
24058 find the register that contains it. */
24059 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24061 /* Move it into the stack register. */
24062 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24064 /* At this point we have popped all necessary registers, so
24065 do not worry about restoring regs_available_for_popping
24066 to its correct value:
24068 assert (pops_needed == 0)
24069 assert (regs_available_for_popping == (1 << frame_pointer))
24070 assert (regs_to_pop == (1 << STACK_POINTER)) */
24072 else
24074 /* Since we have just move the popped value into the frame
24075 pointer, the popping register is available for reuse, and
24076 we know that we still have the stack pointer left to pop. */
24077 regs_available_for_popping |= (1 << frame_pointer);
24081 /* If we still have registers left on the stack, but we no longer have
24082 any registers into which we can pop them, then we must move the return
24083 address into the link register and make available the register that
24084 contained it. */
24085 if (regs_available_for_popping == 0 && pops_needed > 0)
24087 regs_available_for_popping |= 1 << reg_containing_return_addr;
24089 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24090 reg_containing_return_addr);
24092 reg_containing_return_addr = LR_REGNUM;
24095 /* If we have registers left on the stack then pop some more.
24096 We know that at most we will want to pop FP and SP. */
24097 if (pops_needed > 0)
24099 int popped_into;
24100 int move_to;
24102 thumb_pop (f, regs_available_for_popping);
24104 /* We have popped either FP or SP.
24105 Move whichever one it is into the correct register. */
24106 popped_into = number_of_first_bit_set (regs_available_for_popping);
24107 move_to = number_of_first_bit_set (regs_to_pop);
24109 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24111 regs_to_pop &= ~(1 << move_to);
24113 --pops_needed;
24116 /* If we still have not popped everything then we must have only
24117 had one register available to us and we are now popping the SP. */
24118 if (pops_needed > 0)
24120 int popped_into;
24122 thumb_pop (f, regs_available_for_popping);
24124 popped_into = number_of_first_bit_set (regs_available_for_popping);
24126 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24128 assert (regs_to_pop == (1 << STACK_POINTER))
24129 assert (pops_needed == 1)
24133 /* If necessary restore the a4 register. */
24134 if (restore_a4)
24136 if (reg_containing_return_addr != LR_REGNUM)
24138 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24139 reg_containing_return_addr = LR_REGNUM;
24142 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24145 if (crtl->calls_eh_return)
24146 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24148 /* Return to caller. */
24149 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24152 /* Scan INSN just before assembler is output for it.
24153 For Thumb-1, we track the status of the condition codes; this
24154 information is used in the cbranchsi4_insn pattern. */
24155 void
24156 thumb1_final_prescan_insn (rtx_insn *insn)
24158 if (flag_print_asm_name)
24159 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24160 INSN_ADDRESSES (INSN_UID (insn)));
24161 /* Don't overwrite the previous setter when we get to a cbranch. */
24162 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24164 enum attr_conds conds;
24166 if (cfun->machine->thumb1_cc_insn)
24168 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24169 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24170 CC_STATUS_INIT;
24172 conds = get_attr_conds (insn);
24173 if (conds == CONDS_SET)
24175 rtx set = single_set (insn);
24176 cfun->machine->thumb1_cc_insn = insn;
24177 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24178 cfun->machine->thumb1_cc_op1 = const0_rtx;
24179 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24180 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24182 rtx src1 = XEXP (SET_SRC (set), 1);
24183 if (src1 == const0_rtx)
24184 cfun->machine->thumb1_cc_mode = CCmode;
24186 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24188 /* Record the src register operand instead of dest because
24189 cprop_hardreg pass propagates src. */
24190 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24193 else if (conds != CONDS_NOCOND)
24194 cfun->machine->thumb1_cc_insn = NULL_RTX;
24197 /* Check if unexpected far jump is used. */
24198 if (cfun->machine->lr_save_eliminated
24199 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24200 internal_error("Unexpected thumb1 far jump");
24204 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24206 unsigned HOST_WIDE_INT mask = 0xff;
24207 int i;
24209 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24210 if (val == 0) /* XXX */
24211 return 0;
24213 for (i = 0; i < 25; i++)
24214 if ((val & (mask << i)) == val)
24215 return 1;
24217 return 0;
24220 /* Returns nonzero if the current function contains,
24221 or might contain a far jump. */
24222 static int
24223 thumb_far_jump_used_p (void)
24225 rtx_insn *insn;
24226 bool far_jump = false;
24227 unsigned int func_size = 0;
24229 /* This test is only important for leaf functions. */
24230 /* assert (!leaf_function_p ()); */
24232 /* If we have already decided that far jumps may be used,
24233 do not bother checking again, and always return true even if
24234 it turns out that they are not being used. Once we have made
24235 the decision that far jumps are present (and that hence the link
24236 register will be pushed onto the stack) we cannot go back on it. */
24237 if (cfun->machine->far_jump_used)
24238 return 1;
24240 /* If this function is not being called from the prologue/epilogue
24241 generation code then it must be being called from the
24242 INITIAL_ELIMINATION_OFFSET macro. */
24243 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24245 /* In this case we know that we are being asked about the elimination
24246 of the arg pointer register. If that register is not being used,
24247 then there are no arguments on the stack, and we do not have to
24248 worry that a far jump might force the prologue to push the link
24249 register, changing the stack offsets. In this case we can just
24250 return false, since the presence of far jumps in the function will
24251 not affect stack offsets.
24253 If the arg pointer is live (or if it was live, but has now been
24254 eliminated and so set to dead) then we do have to test to see if
24255 the function might contain a far jump. This test can lead to some
24256 false negatives, since before reload is completed, then length of
24257 branch instructions is not known, so gcc defaults to returning their
24258 longest length, which in turn sets the far jump attribute to true.
24260 A false negative will not result in bad code being generated, but it
24261 will result in a needless push and pop of the link register. We
24262 hope that this does not occur too often.
24264 If we need doubleword stack alignment this could affect the other
24265 elimination offsets so we can't risk getting it wrong. */
24266 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24267 cfun->machine->arg_pointer_live = 1;
24268 else if (!cfun->machine->arg_pointer_live)
24269 return 0;
24272 /* We should not change far_jump_used during or after reload, as there is
24273 no chance to change stack frame layout. */
24274 if (reload_in_progress || reload_completed)
24275 return 0;
24277 /* Check to see if the function contains a branch
24278 insn with the far jump attribute set. */
24279 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24281 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24283 far_jump = true;
24285 func_size += get_attr_length (insn);
24288 /* Attribute far_jump will always be true for thumb1 before
24289 shorten_branch pass. So checking far_jump attribute before
24290 shorten_branch isn't much useful.
24292 Following heuristic tries to estimate more accurately if a far jump
24293 may finally be used. The heuristic is very conservative as there is
24294 no chance to roll-back the decision of not to use far jump.
24296 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24297 2-byte insn is associated with a 4 byte constant pool. Using
24298 function size 2048/3 as the threshold is conservative enough. */
24299 if (far_jump)
24301 if ((func_size * 3) >= 2048)
24303 /* Record the fact that we have decided that
24304 the function does use far jumps. */
24305 cfun->machine->far_jump_used = 1;
24306 return 1;
24310 return 0;
24313 /* Return nonzero if FUNC must be entered in ARM mode. */
24314 static bool
24315 is_called_in_ARM_mode (tree func)
24317 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24319 /* Ignore the problem about functions whose address is taken. */
24320 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24321 return true;
24323 #ifdef ARM_PE
24324 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24325 #else
24326 return false;
24327 #endif
24330 /* Given the stack offsets and register mask in OFFSETS, decide how
24331 many additional registers to push instead of subtracting a constant
24332 from SP. For epilogues the principle is the same except we use pop.
24333 FOR_PROLOGUE indicates which we're generating. */
24334 static int
24335 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24337 HOST_WIDE_INT amount;
24338 unsigned long live_regs_mask = offsets->saved_regs_mask;
24339 /* Extract a mask of the ones we can give to the Thumb's push/pop
24340 instruction. */
24341 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24342 /* Then count how many other high registers will need to be pushed. */
24343 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24344 int n_free, reg_base, size;
24346 if (!for_prologue && frame_pointer_needed)
24347 amount = offsets->locals_base - offsets->saved_regs;
24348 else
24349 amount = offsets->outgoing_args - offsets->saved_regs;
24351 /* If the stack frame size is 512 exactly, we can save one load
24352 instruction, which should make this a win even when optimizing
24353 for speed. */
24354 if (!optimize_size && amount != 512)
24355 return 0;
24357 /* Can't do this if there are high registers to push. */
24358 if (high_regs_pushed != 0)
24359 return 0;
24361 /* Shouldn't do it in the prologue if no registers would normally
24362 be pushed at all. In the epilogue, also allow it if we'll have
24363 a pop insn for the PC. */
24364 if (l_mask == 0
24365 && (for_prologue
24366 || TARGET_BACKTRACE
24367 || (live_regs_mask & 1 << LR_REGNUM) == 0
24368 || TARGET_INTERWORK
24369 || crtl->args.pretend_args_size != 0))
24370 return 0;
24372 /* Don't do this if thumb_expand_prologue wants to emit instructions
24373 between the push and the stack frame allocation. */
24374 if (for_prologue
24375 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24376 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24377 return 0;
24379 reg_base = 0;
24380 n_free = 0;
24381 if (!for_prologue)
24383 size = arm_size_return_regs ();
24384 reg_base = ARM_NUM_INTS (size);
24385 live_regs_mask >>= reg_base;
24388 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24389 && (for_prologue || call_used_regs[reg_base + n_free]))
24391 live_regs_mask >>= 1;
24392 n_free++;
24395 if (n_free == 0)
24396 return 0;
24397 gcc_assert (amount / 4 * 4 == amount);
24399 if (amount >= 512 && (amount - n_free * 4) < 512)
24400 return (amount - 508) / 4;
24401 if (amount <= n_free * 4)
24402 return amount / 4;
24403 return 0;
24406 /* The bits which aren't usefully expanded as rtl. */
24407 const char *
24408 thumb1_unexpanded_epilogue (void)
24410 arm_stack_offsets *offsets;
24411 int regno;
24412 unsigned long live_regs_mask = 0;
24413 int high_regs_pushed = 0;
24414 int extra_pop;
24415 int had_to_push_lr;
24416 int size;
24418 if (cfun->machine->return_used_this_function != 0)
24419 return "";
24421 if (IS_NAKED (arm_current_func_type ()))
24422 return "";
24424 offsets = arm_get_frame_offsets ();
24425 live_regs_mask = offsets->saved_regs_mask;
24426 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24428 /* If we can deduce the registers used from the function's return value.
24429 This is more reliable that examining df_regs_ever_live_p () because that
24430 will be set if the register is ever used in the function, not just if
24431 the register is used to hold a return value. */
24432 size = arm_size_return_regs ();
24434 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24435 if (extra_pop > 0)
24437 unsigned long extra_mask = (1 << extra_pop) - 1;
24438 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24441 /* The prolog may have pushed some high registers to use as
24442 work registers. e.g. the testsuite file:
24443 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24444 compiles to produce:
24445 push {r4, r5, r6, r7, lr}
24446 mov r7, r9
24447 mov r6, r8
24448 push {r6, r7}
24449 as part of the prolog. We have to undo that pushing here. */
24451 if (high_regs_pushed)
24453 unsigned long mask = live_regs_mask & 0xff;
24454 int next_hi_reg;
24456 /* The available low registers depend on the size of the value we are
24457 returning. */
24458 if (size <= 12)
24459 mask |= 1 << 3;
24460 if (size <= 8)
24461 mask |= 1 << 2;
24463 if (mask == 0)
24464 /* Oh dear! We have no low registers into which we can pop
24465 high registers! */
24466 internal_error
24467 ("no low registers available for popping high registers");
24469 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24470 if (live_regs_mask & (1 << next_hi_reg))
24471 break;
24473 while (high_regs_pushed)
24475 /* Find lo register(s) into which the high register(s) can
24476 be popped. */
24477 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24479 if (mask & (1 << regno))
24480 high_regs_pushed--;
24481 if (high_regs_pushed == 0)
24482 break;
24485 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24487 /* Pop the values into the low register(s). */
24488 thumb_pop (asm_out_file, mask);
24490 /* Move the value(s) into the high registers. */
24491 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24493 if (mask & (1 << regno))
24495 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24496 regno);
24498 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24499 if (live_regs_mask & (1 << next_hi_reg))
24500 break;
24504 live_regs_mask &= ~0x0f00;
24507 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24508 live_regs_mask &= 0xff;
24510 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24512 /* Pop the return address into the PC. */
24513 if (had_to_push_lr)
24514 live_regs_mask |= 1 << PC_REGNUM;
24516 /* Either no argument registers were pushed or a backtrace
24517 structure was created which includes an adjusted stack
24518 pointer, so just pop everything. */
24519 if (live_regs_mask)
24520 thumb_pop (asm_out_file, live_regs_mask);
24522 /* We have either just popped the return address into the
24523 PC or it is was kept in LR for the entire function.
24524 Note that thumb_pop has already called thumb_exit if the
24525 PC was in the list. */
24526 if (!had_to_push_lr)
24527 thumb_exit (asm_out_file, LR_REGNUM);
24529 else
24531 /* Pop everything but the return address. */
24532 if (live_regs_mask)
24533 thumb_pop (asm_out_file, live_regs_mask);
24535 if (had_to_push_lr)
24537 if (size > 12)
24539 /* We have no free low regs, so save one. */
24540 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24541 LAST_ARG_REGNUM);
24544 /* Get the return address into a temporary register. */
24545 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24547 if (size > 12)
24549 /* Move the return address to lr. */
24550 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24551 LAST_ARG_REGNUM);
24552 /* Restore the low register. */
24553 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24554 IP_REGNUM);
24555 regno = LR_REGNUM;
24557 else
24558 regno = LAST_ARG_REGNUM;
24560 else
24561 regno = LR_REGNUM;
24563 /* Remove the argument registers that were pushed onto the stack. */
24564 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24565 SP_REGNUM, SP_REGNUM,
24566 crtl->args.pretend_args_size);
24568 thumb_exit (asm_out_file, regno);
24571 return "";
24574 /* Functions to save and restore machine-specific function data. */
24575 static struct machine_function *
24576 arm_init_machine_status (void)
24578 struct machine_function *machine;
24579 machine = ggc_cleared_alloc<machine_function> ();
24581 #if ARM_FT_UNKNOWN != 0
24582 machine->func_type = ARM_FT_UNKNOWN;
24583 #endif
24584 return machine;
24587 /* Return an RTX indicating where the return address to the
24588 calling function can be found. */
24590 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24592 if (count != 0)
24593 return NULL_RTX;
24595 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24598 /* Do anything needed before RTL is emitted for each function. */
24599 void
24600 arm_init_expanders (void)
24602 /* Arrange to initialize and mark the machine per-function status. */
24603 init_machine_status = arm_init_machine_status;
24605 /* This is to stop the combine pass optimizing away the alignment
24606 adjustment of va_arg. */
24607 /* ??? It is claimed that this should not be necessary. */
24608 if (cfun)
24609 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24612 /* Check that FUNC is called with a different mode. */
24614 bool
24615 arm_change_mode_p (tree func)
24617 if (TREE_CODE (func) != FUNCTION_DECL)
24618 return false;
24620 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24622 if (!callee_tree)
24623 callee_tree = target_option_default_node;
24625 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24626 int flags = callee_opts->x_target_flags;
24628 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24631 /* Like arm_compute_initial_elimination offset. Simpler because there
24632 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24633 to point at the base of the local variables after static stack
24634 space for a function has been allocated. */
24636 HOST_WIDE_INT
24637 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24639 arm_stack_offsets *offsets;
24641 offsets = arm_get_frame_offsets ();
24643 switch (from)
24645 case ARG_POINTER_REGNUM:
24646 switch (to)
24648 case STACK_POINTER_REGNUM:
24649 return offsets->outgoing_args - offsets->saved_args;
24651 case FRAME_POINTER_REGNUM:
24652 return offsets->soft_frame - offsets->saved_args;
24654 case ARM_HARD_FRAME_POINTER_REGNUM:
24655 return offsets->saved_regs - offsets->saved_args;
24657 case THUMB_HARD_FRAME_POINTER_REGNUM:
24658 return offsets->locals_base - offsets->saved_args;
24660 default:
24661 gcc_unreachable ();
24663 break;
24665 case FRAME_POINTER_REGNUM:
24666 switch (to)
24668 case STACK_POINTER_REGNUM:
24669 return offsets->outgoing_args - offsets->soft_frame;
24671 case ARM_HARD_FRAME_POINTER_REGNUM:
24672 return offsets->saved_regs - offsets->soft_frame;
24674 case THUMB_HARD_FRAME_POINTER_REGNUM:
24675 return offsets->locals_base - offsets->soft_frame;
24677 default:
24678 gcc_unreachable ();
24680 break;
24682 default:
24683 gcc_unreachable ();
24687 /* Generate the function's prologue. */
24689 void
24690 thumb1_expand_prologue (void)
24692 rtx_insn *insn;
24694 HOST_WIDE_INT amount;
24695 HOST_WIDE_INT size;
24696 arm_stack_offsets *offsets;
24697 unsigned long func_type;
24698 int regno;
24699 unsigned long live_regs_mask;
24700 unsigned long l_mask;
24701 unsigned high_regs_pushed = 0;
24703 func_type = arm_current_func_type ();
24705 /* Naked functions don't have prologues. */
24706 if (IS_NAKED (func_type))
24708 if (flag_stack_usage_info)
24709 current_function_static_stack_size = 0;
24710 return;
24713 if (IS_INTERRUPT (func_type))
24715 error ("interrupt Service Routines cannot be coded in Thumb mode");
24716 return;
24719 if (is_called_in_ARM_mode (current_function_decl))
24720 emit_insn (gen_prologue_thumb1_interwork ());
24722 offsets = arm_get_frame_offsets ();
24723 live_regs_mask = offsets->saved_regs_mask;
24725 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24726 l_mask = live_regs_mask & 0x40ff;
24727 /* Then count how many other high registers will need to be pushed. */
24728 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24730 if (crtl->args.pretend_args_size)
24732 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24734 if (cfun->machine->uses_anonymous_args)
24736 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24737 unsigned long mask;
24739 mask = 1ul << (LAST_ARG_REGNUM + 1);
24740 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24742 insn = thumb1_emit_multi_reg_push (mask, 0);
24744 else
24746 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24747 stack_pointer_rtx, x));
24749 RTX_FRAME_RELATED_P (insn) = 1;
24752 if (TARGET_BACKTRACE)
24754 HOST_WIDE_INT offset = 0;
24755 unsigned work_register;
24756 rtx work_reg, x, arm_hfp_rtx;
24758 /* We have been asked to create a stack backtrace structure.
24759 The code looks like this:
24761 0 .align 2
24762 0 func:
24763 0 sub SP, #16 Reserve space for 4 registers.
24764 2 push {R7} Push low registers.
24765 4 add R7, SP, #20 Get the stack pointer before the push.
24766 6 str R7, [SP, #8] Store the stack pointer
24767 (before reserving the space).
24768 8 mov R7, PC Get hold of the start of this code + 12.
24769 10 str R7, [SP, #16] Store it.
24770 12 mov R7, FP Get hold of the current frame pointer.
24771 14 str R7, [SP, #4] Store it.
24772 16 mov R7, LR Get hold of the current return address.
24773 18 str R7, [SP, #12] Store it.
24774 20 add R7, SP, #16 Point at the start of the
24775 backtrace structure.
24776 22 mov FP, R7 Put this value into the frame pointer. */
24778 work_register = thumb_find_work_register (live_regs_mask);
24779 work_reg = gen_rtx_REG (SImode, work_register);
24780 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24782 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24783 stack_pointer_rtx, GEN_INT (-16)));
24784 RTX_FRAME_RELATED_P (insn) = 1;
24786 if (l_mask)
24788 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24789 RTX_FRAME_RELATED_P (insn) = 1;
24791 offset = bit_count (l_mask) * UNITS_PER_WORD;
24794 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24795 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24797 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24798 x = gen_frame_mem (SImode, x);
24799 emit_move_insn (x, work_reg);
24801 /* Make sure that the instruction fetching the PC is in the right place
24802 to calculate "start of backtrace creation code + 12". */
24803 /* ??? The stores using the common WORK_REG ought to be enough to
24804 prevent the scheduler from doing anything weird. Failing that
24805 we could always move all of the following into an UNSPEC_VOLATILE. */
24806 if (l_mask)
24808 x = gen_rtx_REG (SImode, PC_REGNUM);
24809 emit_move_insn (work_reg, x);
24811 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24812 x = gen_frame_mem (SImode, x);
24813 emit_move_insn (x, work_reg);
24815 emit_move_insn (work_reg, arm_hfp_rtx);
24817 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24818 x = gen_frame_mem (SImode, x);
24819 emit_move_insn (x, work_reg);
24821 else
24823 emit_move_insn (work_reg, arm_hfp_rtx);
24825 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24826 x = gen_frame_mem (SImode, x);
24827 emit_move_insn (x, work_reg);
24829 x = gen_rtx_REG (SImode, PC_REGNUM);
24830 emit_move_insn (work_reg, x);
24832 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24833 x = gen_frame_mem (SImode, x);
24834 emit_move_insn (x, work_reg);
24837 x = gen_rtx_REG (SImode, LR_REGNUM);
24838 emit_move_insn (work_reg, x);
24840 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24841 x = gen_frame_mem (SImode, x);
24842 emit_move_insn (x, work_reg);
24844 x = GEN_INT (offset + 12);
24845 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24847 emit_move_insn (arm_hfp_rtx, work_reg);
24849 /* Optimization: If we are not pushing any low registers but we are going
24850 to push some high registers then delay our first push. This will just
24851 be a push of LR and we can combine it with the push of the first high
24852 register. */
24853 else if ((l_mask & 0xff) != 0
24854 || (high_regs_pushed == 0 && l_mask))
24856 unsigned long mask = l_mask;
24857 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24858 insn = thumb1_emit_multi_reg_push (mask, mask);
24859 RTX_FRAME_RELATED_P (insn) = 1;
24862 if (high_regs_pushed)
24864 unsigned pushable_regs;
24865 unsigned next_hi_reg;
24866 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24867 : crtl->args.info.nregs;
24868 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24870 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24871 if (live_regs_mask & (1 << next_hi_reg))
24872 break;
24874 /* Here we need to mask out registers used for passing arguments
24875 even if they can be pushed. This is to avoid using them to stash the high
24876 registers. Such kind of stash may clobber the use of arguments. */
24877 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24879 if (pushable_regs == 0)
24880 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24882 while (high_regs_pushed > 0)
24884 unsigned long real_regs_mask = 0;
24886 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24888 if (pushable_regs & (1 << regno))
24890 emit_move_insn (gen_rtx_REG (SImode, regno),
24891 gen_rtx_REG (SImode, next_hi_reg));
24893 high_regs_pushed --;
24894 real_regs_mask |= (1 << next_hi_reg);
24896 if (high_regs_pushed)
24898 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24899 next_hi_reg --)
24900 if (live_regs_mask & (1 << next_hi_reg))
24901 break;
24903 else
24905 pushable_regs &= ~((1 << regno) - 1);
24906 break;
24911 /* If we had to find a work register and we have not yet
24912 saved the LR then add it to the list of regs to push. */
24913 if (l_mask == (1 << LR_REGNUM))
24915 pushable_regs |= l_mask;
24916 real_regs_mask |= l_mask;
24917 l_mask = 0;
24920 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24921 RTX_FRAME_RELATED_P (insn) = 1;
24925 /* Load the pic register before setting the frame pointer,
24926 so we can use r7 as a temporary work register. */
24927 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24928 arm_load_pic_register (live_regs_mask);
24930 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24931 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24932 stack_pointer_rtx);
24934 size = offsets->outgoing_args - offsets->saved_args;
24935 if (flag_stack_usage_info)
24936 current_function_static_stack_size = size;
24938 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24939 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24940 sorry ("-fstack-check=specific for Thumb-1");
24942 amount = offsets->outgoing_args - offsets->saved_regs;
24943 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24944 if (amount)
24946 if (amount < 512)
24948 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24949 GEN_INT (- amount)));
24950 RTX_FRAME_RELATED_P (insn) = 1;
24952 else
24954 rtx reg, dwarf;
24956 /* The stack decrement is too big for an immediate value in a single
24957 insn. In theory we could issue multiple subtracts, but after
24958 three of them it becomes more space efficient to place the full
24959 value in the constant pool and load into a register. (Also the
24960 ARM debugger really likes to see only one stack decrement per
24961 function). So instead we look for a scratch register into which
24962 we can load the decrement, and then we subtract this from the
24963 stack pointer. Unfortunately on the thumb the only available
24964 scratch registers are the argument registers, and we cannot use
24965 these as they may hold arguments to the function. Instead we
24966 attempt to locate a call preserved register which is used by this
24967 function. If we can find one, then we know that it will have
24968 been pushed at the start of the prologue and so we can corrupt
24969 it now. */
24970 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24971 if (live_regs_mask & (1 << regno))
24972 break;
24974 gcc_assert(regno <= LAST_LO_REGNUM);
24976 reg = gen_rtx_REG (SImode, regno);
24978 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24980 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24981 stack_pointer_rtx, reg));
24983 dwarf = gen_rtx_SET (stack_pointer_rtx,
24984 plus_constant (Pmode, stack_pointer_rtx,
24985 -amount));
24986 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24987 RTX_FRAME_RELATED_P (insn) = 1;
24991 if (frame_pointer_needed)
24992 thumb_set_frame_pointer (offsets);
24994 /* If we are profiling, make sure no instructions are scheduled before
24995 the call to mcount. Similarly if the user has requested no
24996 scheduling in the prolog. Similarly if we want non-call exceptions
24997 using the EABI unwinder, to prevent faulting instructions from being
24998 swapped with a stack adjustment. */
24999 if (crtl->profile || !TARGET_SCHED_PROLOG
25000 || (arm_except_unwind_info (&global_options) == UI_TARGET
25001 && cfun->can_throw_non_call_exceptions))
25002 emit_insn (gen_blockage ());
25004 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25005 if (live_regs_mask & 0xff)
25006 cfun->machine->lr_save_eliminated = 0;
25009 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25010 POP instruction can be generated. LR should be replaced by PC. All
25011 the checks required are already done by USE_RETURN_INSN (). Hence,
25012 all we really need to check here is if single register is to be
25013 returned, or multiple register return. */
25014 void
25015 thumb2_expand_return (bool simple_return)
25017 int i, num_regs;
25018 unsigned long saved_regs_mask;
25019 arm_stack_offsets *offsets;
25021 offsets = arm_get_frame_offsets ();
25022 saved_regs_mask = offsets->saved_regs_mask;
25024 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25025 if (saved_regs_mask & (1 << i))
25026 num_regs++;
25028 if (!simple_return && saved_regs_mask)
25030 if (num_regs == 1)
25032 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25033 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25034 rtx addr = gen_rtx_MEM (SImode,
25035 gen_rtx_POST_INC (SImode,
25036 stack_pointer_rtx));
25037 set_mem_alias_set (addr, get_frame_alias_set ());
25038 XVECEXP (par, 0, 0) = ret_rtx;
25039 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25040 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25041 emit_jump_insn (par);
25043 else
25045 saved_regs_mask &= ~ (1 << LR_REGNUM);
25046 saved_regs_mask |= (1 << PC_REGNUM);
25047 arm_emit_multi_reg_pop (saved_regs_mask);
25050 else
25052 emit_jump_insn (simple_return_rtx);
25056 void
25057 thumb1_expand_epilogue (void)
25059 HOST_WIDE_INT amount;
25060 arm_stack_offsets *offsets;
25061 int regno;
25063 /* Naked functions don't have prologues. */
25064 if (IS_NAKED (arm_current_func_type ()))
25065 return;
25067 offsets = arm_get_frame_offsets ();
25068 amount = offsets->outgoing_args - offsets->saved_regs;
25070 if (frame_pointer_needed)
25072 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25073 amount = offsets->locals_base - offsets->saved_regs;
25075 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25077 gcc_assert (amount >= 0);
25078 if (amount)
25080 emit_insn (gen_blockage ());
25082 if (amount < 512)
25083 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25084 GEN_INT (amount)));
25085 else
25087 /* r3 is always free in the epilogue. */
25088 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25090 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25091 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25095 /* Emit a USE (stack_pointer_rtx), so that
25096 the stack adjustment will not be deleted. */
25097 emit_insn (gen_force_register_use (stack_pointer_rtx));
25099 if (crtl->profile || !TARGET_SCHED_PROLOG)
25100 emit_insn (gen_blockage ());
25102 /* Emit a clobber for each insn that will be restored in the epilogue,
25103 so that flow2 will get register lifetimes correct. */
25104 for (regno = 0; regno < 13; regno++)
25105 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25106 emit_clobber (gen_rtx_REG (SImode, regno));
25108 if (! df_regs_ever_live_p (LR_REGNUM))
25109 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25112 /* Epilogue code for APCS frame. */
25113 static void
25114 arm_expand_epilogue_apcs_frame (bool really_return)
25116 unsigned long func_type;
25117 unsigned long saved_regs_mask;
25118 int num_regs = 0;
25119 int i;
25120 int floats_from_frame = 0;
25121 arm_stack_offsets *offsets;
25123 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25124 func_type = arm_current_func_type ();
25126 /* Get frame offsets for ARM. */
25127 offsets = arm_get_frame_offsets ();
25128 saved_regs_mask = offsets->saved_regs_mask;
25130 /* Find the offset of the floating-point save area in the frame. */
25131 floats_from_frame
25132 = (offsets->saved_args
25133 + arm_compute_static_chain_stack_bytes ()
25134 - offsets->frame);
25136 /* Compute how many core registers saved and how far away the floats are. */
25137 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25138 if (saved_regs_mask & (1 << i))
25140 num_regs++;
25141 floats_from_frame += 4;
25144 if (TARGET_HARD_FLOAT && TARGET_VFP)
25146 int start_reg;
25147 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25149 /* The offset is from IP_REGNUM. */
25150 int saved_size = arm_get_vfp_saved_size ();
25151 if (saved_size > 0)
25153 rtx_insn *insn;
25154 floats_from_frame += saved_size;
25155 insn = emit_insn (gen_addsi3 (ip_rtx,
25156 hard_frame_pointer_rtx,
25157 GEN_INT (-floats_from_frame)));
25158 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25159 ip_rtx, hard_frame_pointer_rtx);
25162 /* Generate VFP register multi-pop. */
25163 start_reg = FIRST_VFP_REGNUM;
25165 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25166 /* Look for a case where a reg does not need restoring. */
25167 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25168 && (!df_regs_ever_live_p (i + 1)
25169 || call_used_regs[i + 1]))
25171 if (start_reg != i)
25172 arm_emit_vfp_multi_reg_pop (start_reg,
25173 (i - start_reg) / 2,
25174 gen_rtx_REG (SImode,
25175 IP_REGNUM));
25176 start_reg = i + 2;
25179 /* Restore the remaining regs that we have discovered (or possibly
25180 even all of them, if the conditional in the for loop never
25181 fired). */
25182 if (start_reg != i)
25183 arm_emit_vfp_multi_reg_pop (start_reg,
25184 (i - start_reg) / 2,
25185 gen_rtx_REG (SImode, IP_REGNUM));
25188 if (TARGET_IWMMXT)
25190 /* The frame pointer is guaranteed to be non-double-word aligned, as
25191 it is set to double-word-aligned old_stack_pointer - 4. */
25192 rtx_insn *insn;
25193 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25195 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25196 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25198 rtx addr = gen_frame_mem (V2SImode,
25199 plus_constant (Pmode, hard_frame_pointer_rtx,
25200 - lrm_count * 4));
25201 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25202 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25203 gen_rtx_REG (V2SImode, i),
25204 NULL_RTX);
25205 lrm_count += 2;
25209 /* saved_regs_mask should contain IP which contains old stack pointer
25210 at the time of activation creation. Since SP and IP are adjacent registers,
25211 we can restore the value directly into SP. */
25212 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25213 saved_regs_mask &= ~(1 << IP_REGNUM);
25214 saved_regs_mask |= (1 << SP_REGNUM);
25216 /* There are two registers left in saved_regs_mask - LR and PC. We
25217 only need to restore LR (the return address), but to
25218 save time we can load it directly into PC, unless we need a
25219 special function exit sequence, or we are not really returning. */
25220 if (really_return
25221 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25222 && !crtl->calls_eh_return)
25223 /* Delete LR from the register mask, so that LR on
25224 the stack is loaded into the PC in the register mask. */
25225 saved_regs_mask &= ~(1 << LR_REGNUM);
25226 else
25227 saved_regs_mask &= ~(1 << PC_REGNUM);
25229 num_regs = bit_count (saved_regs_mask);
25230 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25232 rtx_insn *insn;
25233 emit_insn (gen_blockage ());
25234 /* Unwind the stack to just below the saved registers. */
25235 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25236 hard_frame_pointer_rtx,
25237 GEN_INT (- 4 * num_regs)));
25239 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25240 stack_pointer_rtx, hard_frame_pointer_rtx);
25243 arm_emit_multi_reg_pop (saved_regs_mask);
25245 if (IS_INTERRUPT (func_type))
25247 /* Interrupt handlers will have pushed the
25248 IP onto the stack, so restore it now. */
25249 rtx_insn *insn;
25250 rtx addr = gen_rtx_MEM (SImode,
25251 gen_rtx_POST_INC (SImode,
25252 stack_pointer_rtx));
25253 set_mem_alias_set (addr, get_frame_alias_set ());
25254 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25255 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25256 gen_rtx_REG (SImode, IP_REGNUM),
25257 NULL_RTX);
25260 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25261 return;
25263 if (crtl->calls_eh_return)
25264 emit_insn (gen_addsi3 (stack_pointer_rtx,
25265 stack_pointer_rtx,
25266 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25268 if (IS_STACKALIGN (func_type))
25269 /* Restore the original stack pointer. Before prologue, the stack was
25270 realigned and the original stack pointer saved in r0. For details,
25271 see comment in arm_expand_prologue. */
25272 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25274 emit_jump_insn (simple_return_rtx);
25277 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25278 function is not a sibcall. */
25279 void
25280 arm_expand_epilogue (bool really_return)
25282 unsigned long func_type;
25283 unsigned long saved_regs_mask;
25284 int num_regs = 0;
25285 int i;
25286 int amount;
25287 arm_stack_offsets *offsets;
25289 func_type = arm_current_func_type ();
25291 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25292 let output_return_instruction take care of instruction emission if any. */
25293 if (IS_NAKED (func_type)
25294 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25296 if (really_return)
25297 emit_jump_insn (simple_return_rtx);
25298 return;
25301 /* If we are throwing an exception, then we really must be doing a
25302 return, so we can't tail-call. */
25303 gcc_assert (!crtl->calls_eh_return || really_return);
25305 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25307 arm_expand_epilogue_apcs_frame (really_return);
25308 return;
25311 /* Get frame offsets for ARM. */
25312 offsets = arm_get_frame_offsets ();
25313 saved_regs_mask = offsets->saved_regs_mask;
25314 num_regs = bit_count (saved_regs_mask);
25316 if (frame_pointer_needed)
25318 rtx_insn *insn;
25319 /* Restore stack pointer if necessary. */
25320 if (TARGET_ARM)
25322 /* In ARM mode, frame pointer points to first saved register.
25323 Restore stack pointer to last saved register. */
25324 amount = offsets->frame - offsets->saved_regs;
25326 /* Force out any pending memory operations that reference stacked data
25327 before stack de-allocation occurs. */
25328 emit_insn (gen_blockage ());
25329 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25330 hard_frame_pointer_rtx,
25331 GEN_INT (amount)));
25332 arm_add_cfa_adjust_cfa_note (insn, amount,
25333 stack_pointer_rtx,
25334 hard_frame_pointer_rtx);
25336 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25337 deleted. */
25338 emit_insn (gen_force_register_use (stack_pointer_rtx));
25340 else
25342 /* In Thumb-2 mode, the frame pointer points to the last saved
25343 register. */
25344 amount = offsets->locals_base - offsets->saved_regs;
25345 if (amount)
25347 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25348 hard_frame_pointer_rtx,
25349 GEN_INT (amount)));
25350 arm_add_cfa_adjust_cfa_note (insn, amount,
25351 hard_frame_pointer_rtx,
25352 hard_frame_pointer_rtx);
25355 /* Force out any pending memory operations that reference stacked data
25356 before stack de-allocation occurs. */
25357 emit_insn (gen_blockage ());
25358 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25359 hard_frame_pointer_rtx));
25360 arm_add_cfa_adjust_cfa_note (insn, 0,
25361 stack_pointer_rtx,
25362 hard_frame_pointer_rtx);
25363 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25364 deleted. */
25365 emit_insn (gen_force_register_use (stack_pointer_rtx));
25368 else
25370 /* Pop off outgoing args and local frame to adjust stack pointer to
25371 last saved register. */
25372 amount = offsets->outgoing_args - offsets->saved_regs;
25373 if (amount)
25375 rtx_insn *tmp;
25376 /* Force out any pending memory operations that reference stacked data
25377 before stack de-allocation occurs. */
25378 emit_insn (gen_blockage ());
25379 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25380 stack_pointer_rtx,
25381 GEN_INT (amount)));
25382 arm_add_cfa_adjust_cfa_note (tmp, amount,
25383 stack_pointer_rtx, stack_pointer_rtx);
25384 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25385 not deleted. */
25386 emit_insn (gen_force_register_use (stack_pointer_rtx));
25390 if (TARGET_HARD_FLOAT && TARGET_VFP)
25392 /* Generate VFP register multi-pop. */
25393 int end_reg = LAST_VFP_REGNUM + 1;
25395 /* Scan the registers in reverse order. We need to match
25396 any groupings made in the prologue and generate matching
25397 vldm operations. The need to match groups is because,
25398 unlike pop, vldm can only do consecutive regs. */
25399 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25400 /* Look for a case where a reg does not need restoring. */
25401 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25402 && (!df_regs_ever_live_p (i + 1)
25403 || call_used_regs[i + 1]))
25405 /* Restore the regs discovered so far (from reg+2 to
25406 end_reg). */
25407 if (end_reg > i + 2)
25408 arm_emit_vfp_multi_reg_pop (i + 2,
25409 (end_reg - (i + 2)) / 2,
25410 stack_pointer_rtx);
25411 end_reg = i;
25414 /* Restore the remaining regs that we have discovered (or possibly
25415 even all of them, if the conditional in the for loop never
25416 fired). */
25417 if (end_reg > i + 2)
25418 arm_emit_vfp_multi_reg_pop (i + 2,
25419 (end_reg - (i + 2)) / 2,
25420 stack_pointer_rtx);
25423 if (TARGET_IWMMXT)
25424 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25425 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25427 rtx_insn *insn;
25428 rtx addr = gen_rtx_MEM (V2SImode,
25429 gen_rtx_POST_INC (SImode,
25430 stack_pointer_rtx));
25431 set_mem_alias_set (addr, get_frame_alias_set ());
25432 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25433 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25434 gen_rtx_REG (V2SImode, i),
25435 NULL_RTX);
25436 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25437 stack_pointer_rtx, stack_pointer_rtx);
25440 if (saved_regs_mask)
25442 rtx insn;
25443 bool return_in_pc = false;
25445 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25446 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25447 && !IS_STACKALIGN (func_type)
25448 && really_return
25449 && crtl->args.pretend_args_size == 0
25450 && saved_regs_mask & (1 << LR_REGNUM)
25451 && !crtl->calls_eh_return)
25453 saved_regs_mask &= ~(1 << LR_REGNUM);
25454 saved_regs_mask |= (1 << PC_REGNUM);
25455 return_in_pc = true;
25458 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25460 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25461 if (saved_regs_mask & (1 << i))
25463 rtx addr = gen_rtx_MEM (SImode,
25464 gen_rtx_POST_INC (SImode,
25465 stack_pointer_rtx));
25466 set_mem_alias_set (addr, get_frame_alias_set ());
25468 if (i == PC_REGNUM)
25470 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25471 XVECEXP (insn, 0, 0) = ret_rtx;
25472 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25473 addr);
25474 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25475 insn = emit_jump_insn (insn);
25477 else
25479 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25480 addr));
25481 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25482 gen_rtx_REG (SImode, i),
25483 NULL_RTX);
25484 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25485 stack_pointer_rtx,
25486 stack_pointer_rtx);
25490 else
25492 if (TARGET_LDRD
25493 && current_tune->prefer_ldrd_strd
25494 && !optimize_function_for_size_p (cfun))
25496 if (TARGET_THUMB2)
25497 thumb2_emit_ldrd_pop (saved_regs_mask);
25498 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25499 arm_emit_ldrd_pop (saved_regs_mask);
25500 else
25501 arm_emit_multi_reg_pop (saved_regs_mask);
25503 else
25504 arm_emit_multi_reg_pop (saved_regs_mask);
25507 if (return_in_pc)
25508 return;
25511 amount
25512 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25513 if (amount)
25515 int i, j;
25516 rtx dwarf = NULL_RTX;
25517 rtx_insn *tmp =
25518 emit_insn (gen_addsi3 (stack_pointer_rtx,
25519 stack_pointer_rtx,
25520 GEN_INT (amount)));
25522 RTX_FRAME_RELATED_P (tmp) = 1;
25524 if (cfun->machine->uses_anonymous_args)
25526 /* Restore pretend args. Refer arm_expand_prologue on how to save
25527 pretend_args in stack. */
25528 int num_regs = crtl->args.pretend_args_size / 4;
25529 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25530 for (j = 0, i = 0; j < num_regs; i++)
25531 if (saved_regs_mask & (1 << i))
25533 rtx reg = gen_rtx_REG (SImode, i);
25534 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25535 j++;
25537 REG_NOTES (tmp) = dwarf;
25539 arm_add_cfa_adjust_cfa_note (tmp, amount,
25540 stack_pointer_rtx, stack_pointer_rtx);
25543 if (!really_return)
25544 return;
25546 if (crtl->calls_eh_return)
25547 emit_insn (gen_addsi3 (stack_pointer_rtx,
25548 stack_pointer_rtx,
25549 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25551 if (IS_STACKALIGN (func_type))
25552 /* Restore the original stack pointer. Before prologue, the stack was
25553 realigned and the original stack pointer saved in r0. For details,
25554 see comment in arm_expand_prologue. */
25555 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25557 emit_jump_insn (simple_return_rtx);
25560 /* Implementation of insn prologue_thumb1_interwork. This is the first
25561 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25563 const char *
25564 thumb1_output_interwork (void)
25566 const char * name;
25567 FILE *f = asm_out_file;
25569 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25570 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25571 == SYMBOL_REF);
25572 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25574 /* Generate code sequence to switch us into Thumb mode. */
25575 /* The .code 32 directive has already been emitted by
25576 ASM_DECLARE_FUNCTION_NAME. */
25577 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25578 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25580 /* Generate a label, so that the debugger will notice the
25581 change in instruction sets. This label is also used by
25582 the assembler to bypass the ARM code when this function
25583 is called from a Thumb encoded function elsewhere in the
25584 same file. Hence the definition of STUB_NAME here must
25585 agree with the definition in gas/config/tc-arm.c. */
25587 #define STUB_NAME ".real_start_of"
25589 fprintf (f, "\t.code\t16\n");
25590 #ifdef ARM_PE
25591 if (arm_dllexport_name_p (name))
25592 name = arm_strip_name_encoding (name);
25593 #endif
25594 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25595 fprintf (f, "\t.thumb_func\n");
25596 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25598 return "";
25601 /* Handle the case of a double word load into a low register from
25602 a computed memory address. The computed address may involve a
25603 register which is overwritten by the load. */
25604 const char *
25605 thumb_load_double_from_address (rtx *operands)
25607 rtx addr;
25608 rtx base;
25609 rtx offset;
25610 rtx arg1;
25611 rtx arg2;
25613 gcc_assert (REG_P (operands[0]));
25614 gcc_assert (MEM_P (operands[1]));
25616 /* Get the memory address. */
25617 addr = XEXP (operands[1], 0);
25619 /* Work out how the memory address is computed. */
25620 switch (GET_CODE (addr))
25622 case REG:
25623 operands[2] = adjust_address (operands[1], SImode, 4);
25625 if (REGNO (operands[0]) == REGNO (addr))
25627 output_asm_insn ("ldr\t%H0, %2", operands);
25628 output_asm_insn ("ldr\t%0, %1", operands);
25630 else
25632 output_asm_insn ("ldr\t%0, %1", operands);
25633 output_asm_insn ("ldr\t%H0, %2", operands);
25635 break;
25637 case CONST:
25638 /* Compute <address> + 4 for the high order load. */
25639 operands[2] = adjust_address (operands[1], SImode, 4);
25641 output_asm_insn ("ldr\t%0, %1", operands);
25642 output_asm_insn ("ldr\t%H0, %2", operands);
25643 break;
25645 case PLUS:
25646 arg1 = XEXP (addr, 0);
25647 arg2 = XEXP (addr, 1);
25649 if (CONSTANT_P (arg1))
25650 base = arg2, offset = arg1;
25651 else
25652 base = arg1, offset = arg2;
25654 gcc_assert (REG_P (base));
25656 /* Catch the case of <address> = <reg> + <reg> */
25657 if (REG_P (offset))
25659 int reg_offset = REGNO (offset);
25660 int reg_base = REGNO (base);
25661 int reg_dest = REGNO (operands[0]);
25663 /* Add the base and offset registers together into the
25664 higher destination register. */
25665 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25666 reg_dest + 1, reg_base, reg_offset);
25668 /* Load the lower destination register from the address in
25669 the higher destination register. */
25670 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25671 reg_dest, reg_dest + 1);
25673 /* Load the higher destination register from its own address
25674 plus 4. */
25675 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25676 reg_dest + 1, reg_dest + 1);
25678 else
25680 /* Compute <address> + 4 for the high order load. */
25681 operands[2] = adjust_address (operands[1], SImode, 4);
25683 /* If the computed address is held in the low order register
25684 then load the high order register first, otherwise always
25685 load the low order register first. */
25686 if (REGNO (operands[0]) == REGNO (base))
25688 output_asm_insn ("ldr\t%H0, %2", operands);
25689 output_asm_insn ("ldr\t%0, %1", operands);
25691 else
25693 output_asm_insn ("ldr\t%0, %1", operands);
25694 output_asm_insn ("ldr\t%H0, %2", operands);
25697 break;
25699 case LABEL_REF:
25700 /* With no registers to worry about we can just load the value
25701 directly. */
25702 operands[2] = adjust_address (operands[1], SImode, 4);
25704 output_asm_insn ("ldr\t%H0, %2", operands);
25705 output_asm_insn ("ldr\t%0, %1", operands);
25706 break;
25708 default:
25709 gcc_unreachable ();
25712 return "";
25715 const char *
25716 thumb_output_move_mem_multiple (int n, rtx *operands)
25718 switch (n)
25720 case 2:
25721 if (REGNO (operands[4]) > REGNO (operands[5]))
25722 std::swap (operands[4], operands[5]);
25724 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25725 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25726 break;
25728 case 3:
25729 if (REGNO (operands[4]) > REGNO (operands[5]))
25730 std::swap (operands[4], operands[5]);
25731 if (REGNO (operands[5]) > REGNO (operands[6]))
25732 std::swap (operands[5], operands[6]);
25733 if (REGNO (operands[4]) > REGNO (operands[5]))
25734 std::swap (operands[4], operands[5]);
25736 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25737 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25738 break;
25740 default:
25741 gcc_unreachable ();
25744 return "";
25747 /* Output a call-via instruction for thumb state. */
25748 const char *
25749 thumb_call_via_reg (rtx reg)
25751 int regno = REGNO (reg);
25752 rtx *labelp;
25754 gcc_assert (regno < LR_REGNUM);
25756 /* If we are in the normal text section we can use a single instance
25757 per compilation unit. If we are doing function sections, then we need
25758 an entry per section, since we can't rely on reachability. */
25759 if (in_section == text_section)
25761 thumb_call_reg_needed = 1;
25763 if (thumb_call_via_label[regno] == NULL)
25764 thumb_call_via_label[regno] = gen_label_rtx ();
25765 labelp = thumb_call_via_label + regno;
25767 else
25769 if (cfun->machine->call_via[regno] == NULL)
25770 cfun->machine->call_via[regno] = gen_label_rtx ();
25771 labelp = cfun->machine->call_via + regno;
25774 output_asm_insn ("bl\t%a0", labelp);
25775 return "";
25778 /* Routines for generating rtl. */
25779 void
25780 thumb_expand_movmemqi (rtx *operands)
25782 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25783 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25784 HOST_WIDE_INT len = INTVAL (operands[2]);
25785 HOST_WIDE_INT offset = 0;
25787 while (len >= 12)
25789 emit_insn (gen_movmem12b (out, in, out, in));
25790 len -= 12;
25793 if (len >= 8)
25795 emit_insn (gen_movmem8b (out, in, out, in));
25796 len -= 8;
25799 if (len >= 4)
25801 rtx reg = gen_reg_rtx (SImode);
25802 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25803 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25804 len -= 4;
25805 offset += 4;
25808 if (len >= 2)
25810 rtx reg = gen_reg_rtx (HImode);
25811 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25812 plus_constant (Pmode, in,
25813 offset))));
25814 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25815 offset)),
25816 reg));
25817 len -= 2;
25818 offset += 2;
25821 if (len)
25823 rtx reg = gen_reg_rtx (QImode);
25824 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25825 plus_constant (Pmode, in,
25826 offset))));
25827 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25828 offset)),
25829 reg));
25833 void
25834 thumb_reload_out_hi (rtx *operands)
25836 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25839 /* Handle reading a half-word from memory during reload. */
25840 void
25841 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25843 gcc_unreachable ();
25846 /* Return the length of a function name prefix
25847 that starts with the character 'c'. */
25848 static int
25849 arm_get_strip_length (int c)
25851 switch (c)
25853 ARM_NAME_ENCODING_LENGTHS
25854 default: return 0;
25858 /* Return a pointer to a function's name with any
25859 and all prefix encodings stripped from it. */
25860 const char *
25861 arm_strip_name_encoding (const char *name)
25863 int skip;
25865 while ((skip = arm_get_strip_length (* name)))
25866 name += skip;
25868 return name;
25871 /* If there is a '*' anywhere in the name's prefix, then
25872 emit the stripped name verbatim, otherwise prepend an
25873 underscore if leading underscores are being used. */
25874 void
25875 arm_asm_output_labelref (FILE *stream, const char *name)
25877 int skip;
25878 int verbatim = 0;
25880 while ((skip = arm_get_strip_length (* name)))
25882 verbatim |= (*name == '*');
25883 name += skip;
25886 if (verbatim)
25887 fputs (name, stream);
25888 else
25889 asm_fprintf (stream, "%U%s", name);
25892 /* This function is used to emit an EABI tag and its associated value.
25893 We emit the numerical value of the tag in case the assembler does not
25894 support textual tags. (Eg gas prior to 2.20). If requested we include
25895 the tag name in a comment so that anyone reading the assembler output
25896 will know which tag is being set.
25898 This function is not static because arm-c.c needs it too. */
25900 void
25901 arm_emit_eabi_attribute (const char *name, int num, int val)
25903 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25904 if (flag_verbose_asm || flag_debug_asm)
25905 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25906 asm_fprintf (asm_out_file, "\n");
25909 /* This function is used to print CPU tuning information as comment
25910 in assembler file. Pointers are not printed for now. */
25912 void
25913 arm_print_tune_info (void)
25915 asm_fprintf (asm_out_file, "\t@.tune parameters\n");
25916 asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
25917 current_tune->constant_limit);
25918 asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
25919 current_tune->max_insns_skipped);
25920 asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n",
25921 current_tune->prefetch.num_slots);
25922 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n",
25923 current_tune->prefetch.l1_cache_size);
25924 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25925 current_tune->prefetch.l1_cache_line_size);
25926 asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
25927 (int) current_tune->prefer_constant_pool);
25928 asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25929 asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
25930 asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
25931 current_tune->branch_cost (false, false));
25932 asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
25933 current_tune->branch_cost (false, true));
25934 asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
25935 current_tune->branch_cost (true, false));
25936 asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
25937 current_tune->branch_cost (true, true));
25938 asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
25939 (int) current_tune->prefer_ldrd_strd);
25940 asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25941 (int) current_tune->logical_op_non_short_circuit_thumb,
25942 (int) current_tune->logical_op_non_short_circuit_arm);
25943 asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
25944 (int) current_tune->prefer_neon_for_64bits);
25945 asm_fprintf (asm_out_file,
25946 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25947 (int) current_tune->disparage_flag_setting_t16_encodings);
25948 asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
25949 (int) current_tune->string_ops_prefer_neon);
25950 asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
25951 current_tune->max_insns_inline_memset);
25952 asm_fprintf (asm_out_file, "\t\t@fusible_ops:\t%u\n",
25953 current_tune->fusible_ops);
25954 asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
25955 (int) current_tune->sched_autopref);
25958 static void
25959 arm_file_start (void)
25961 int val;
25963 if (TARGET_BPABI)
25965 if (arm_selected_arch)
25967 /* armv7ve doesn't support any extensions. */
25968 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25970 /* Keep backward compatability for assemblers
25971 which don't support armv7ve. */
25972 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25973 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25974 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25975 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25976 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25978 else
25980 const char* pos = strchr (arm_selected_arch->name, '+');
25981 if (pos)
25983 char buf[15];
25984 gcc_assert (strlen (arm_selected_arch->name)
25985 <= sizeof (buf) / sizeof (*pos));
25986 strncpy (buf, arm_selected_arch->name,
25987 (pos - arm_selected_arch->name) * sizeof (*pos));
25988 buf[pos - arm_selected_arch->name] = '\0';
25989 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25990 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25992 else
25993 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25996 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25997 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25998 else
26000 const char* truncated_name
26001 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
26002 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26005 if (print_tune_info)
26006 arm_print_tune_info ();
26008 if (! TARGET_SOFT_FLOAT && TARGET_VFP)
26010 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26011 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26013 if (TARGET_HARD_FLOAT_ABI)
26014 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26017 /* Some of these attributes only apply when the corresponding features
26018 are used. However we don't have any easy way of figuring this out.
26019 Conservatively record the setting that would have been used. */
26021 if (flag_rounding_math)
26022 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26024 if (!flag_unsafe_math_optimizations)
26026 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26027 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26029 if (flag_signaling_nans)
26030 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26032 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26033 flag_finite_math_only ? 1 : 3);
26035 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26036 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26037 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26038 flag_short_enums ? 1 : 2);
26040 /* Tag_ABI_optimization_goals. */
26041 if (optimize_size)
26042 val = 4;
26043 else if (optimize >= 2)
26044 val = 2;
26045 else if (optimize)
26046 val = 1;
26047 else
26048 val = 6;
26049 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26051 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26052 unaligned_access);
26054 if (arm_fp16_format)
26055 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26056 (int) arm_fp16_format);
26058 if (arm_lang_output_object_attributes_hook)
26059 arm_lang_output_object_attributes_hook();
26062 default_file_start ();
26065 static void
26066 arm_file_end (void)
26068 int regno;
26070 if (NEED_INDICATE_EXEC_STACK)
26071 /* Add .note.GNU-stack. */
26072 file_end_indicate_exec_stack ();
26074 if (! thumb_call_reg_needed)
26075 return;
26077 switch_to_section (text_section);
26078 asm_fprintf (asm_out_file, "\t.code 16\n");
26079 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26081 for (regno = 0; regno < LR_REGNUM; regno++)
26083 rtx label = thumb_call_via_label[regno];
26085 if (label != 0)
26087 targetm.asm_out.internal_label (asm_out_file, "L",
26088 CODE_LABEL_NUMBER (label));
26089 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26094 #ifndef ARM_PE
26095 /* Symbols in the text segment can be accessed without indirecting via the
26096 constant pool; it may take an extra binary operation, but this is still
26097 faster than indirecting via memory. Don't do this when not optimizing,
26098 since we won't be calculating al of the offsets necessary to do this
26099 simplification. */
26101 static void
26102 arm_encode_section_info (tree decl, rtx rtl, int first)
26104 if (optimize > 0 && TREE_CONSTANT (decl))
26105 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26107 default_encode_section_info (decl, rtl, first);
26109 #endif /* !ARM_PE */
26111 static void
26112 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26114 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26115 && !strcmp (prefix, "L"))
26117 arm_ccfsm_state = 0;
26118 arm_target_insn = NULL;
26120 default_internal_label (stream, prefix, labelno);
26123 /* Output code to add DELTA to the first argument, and then jump
26124 to FUNCTION. Used for C++ multiple inheritance. */
26126 static void
26127 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26128 HOST_WIDE_INT, tree function)
26130 static int thunk_label = 0;
26131 char label[256];
26132 char labelpc[256];
26133 int mi_delta = delta;
26134 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26135 int shift = 0;
26136 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26137 ? 1 : 0);
26138 if (mi_delta < 0)
26139 mi_delta = - mi_delta;
26141 final_start_function (emit_barrier (), file, 1);
26143 if (TARGET_THUMB1)
26145 int labelno = thunk_label++;
26146 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26147 /* Thunks are entered in arm mode when avaiable. */
26148 if (TARGET_THUMB1_ONLY)
26150 /* push r3 so we can use it as a temporary. */
26151 /* TODO: Omit this save if r3 is not used. */
26152 fputs ("\tpush {r3}\n", file);
26153 fputs ("\tldr\tr3, ", file);
26155 else
26157 fputs ("\tldr\tr12, ", file);
26159 assemble_name (file, label);
26160 fputc ('\n', file);
26161 if (flag_pic)
26163 /* If we are generating PIC, the ldr instruction below loads
26164 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26165 the address of the add + 8, so we have:
26167 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26168 = target + 1.
26170 Note that we have "+ 1" because some versions of GNU ld
26171 don't set the low bit of the result for R_ARM_REL32
26172 relocations against thumb function symbols.
26173 On ARMv6M this is +4, not +8. */
26174 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26175 assemble_name (file, labelpc);
26176 fputs (":\n", file);
26177 if (TARGET_THUMB1_ONLY)
26179 /* This is 2 insns after the start of the thunk, so we know it
26180 is 4-byte aligned. */
26181 fputs ("\tadd\tr3, pc, r3\n", file);
26182 fputs ("\tmov r12, r3\n", file);
26184 else
26185 fputs ("\tadd\tr12, pc, r12\n", file);
26187 else if (TARGET_THUMB1_ONLY)
26188 fputs ("\tmov r12, r3\n", file);
26190 if (TARGET_THUMB1_ONLY)
26192 if (mi_delta > 255)
26194 fputs ("\tldr\tr3, ", file);
26195 assemble_name (file, label);
26196 fputs ("+4\n", file);
26197 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26198 mi_op, this_regno, this_regno);
26200 else if (mi_delta != 0)
26202 /* Thumb1 unified syntax requires s suffix in instruction name when
26203 one of the operands is immediate. */
26204 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26205 mi_op, this_regno, this_regno,
26206 mi_delta);
26209 else
26211 /* TODO: Use movw/movt for large constants when available. */
26212 while (mi_delta != 0)
26214 if ((mi_delta & (3 << shift)) == 0)
26215 shift += 2;
26216 else
26218 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26219 mi_op, this_regno, this_regno,
26220 mi_delta & (0xff << shift));
26221 mi_delta &= ~(0xff << shift);
26222 shift += 8;
26226 if (TARGET_THUMB1)
26228 if (TARGET_THUMB1_ONLY)
26229 fputs ("\tpop\t{r3}\n", file);
26231 fprintf (file, "\tbx\tr12\n");
26232 ASM_OUTPUT_ALIGN (file, 2);
26233 assemble_name (file, label);
26234 fputs (":\n", file);
26235 if (flag_pic)
26237 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26238 rtx tem = XEXP (DECL_RTL (function), 0);
26239 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26240 pipeline offset is four rather than eight. Adjust the offset
26241 accordingly. */
26242 tem = plus_constant (GET_MODE (tem), tem,
26243 TARGET_THUMB1_ONLY ? -3 : -7);
26244 tem = gen_rtx_MINUS (GET_MODE (tem),
26245 tem,
26246 gen_rtx_SYMBOL_REF (Pmode,
26247 ggc_strdup (labelpc)));
26248 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26250 else
26251 /* Output ".word .LTHUNKn". */
26252 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26254 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26255 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26257 else
26259 fputs ("\tb\t", file);
26260 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26261 if (NEED_PLT_RELOC)
26262 fputs ("(PLT)", file);
26263 fputc ('\n', file);
26266 final_end_function ();
26269 /* MI thunk handling for TARGET_32BIT. */
26271 static void
26272 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26273 HOST_WIDE_INT vcall_offset, tree function)
26275 /* On ARM, this_regno is R0 or R1 depending on
26276 whether the function returns an aggregate or not.
26278 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26279 function)
26280 ? R1_REGNUM : R0_REGNUM);
26282 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26283 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26284 reload_completed = 1;
26285 emit_note (NOTE_INSN_PROLOGUE_END);
26287 /* Add DELTA to THIS_RTX. */
26288 if (delta != 0)
26289 arm_split_constant (PLUS, Pmode, NULL_RTX,
26290 delta, this_rtx, this_rtx, false);
26292 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26293 if (vcall_offset != 0)
26295 /* Load *THIS_RTX. */
26296 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26297 /* Compute *THIS_RTX + VCALL_OFFSET. */
26298 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26299 false);
26300 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26301 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26302 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26305 /* Generate a tail call to the target function. */
26306 if (!TREE_USED (function))
26308 assemble_external (function);
26309 TREE_USED (function) = 1;
26311 rtx funexp = XEXP (DECL_RTL (function), 0);
26312 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26313 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26314 SIBLING_CALL_P (insn) = 1;
26316 insn = get_insns ();
26317 shorten_branches (insn);
26318 final_start_function (insn, file, 1);
26319 final (insn, file, 1);
26320 final_end_function ();
26322 /* Stop pretending this is a post-reload pass. */
26323 reload_completed = 0;
26326 /* Output code to add DELTA to the first argument, and then jump
26327 to FUNCTION. Used for C++ multiple inheritance. */
26329 static void
26330 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26331 HOST_WIDE_INT vcall_offset, tree function)
26333 if (TARGET_32BIT)
26334 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26335 else
26336 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26340 arm_emit_vector_const (FILE *file, rtx x)
26342 int i;
26343 const char * pattern;
26345 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26347 switch (GET_MODE (x))
26349 case V2SImode: pattern = "%08x"; break;
26350 case V4HImode: pattern = "%04x"; break;
26351 case V8QImode: pattern = "%02x"; break;
26352 default: gcc_unreachable ();
26355 fprintf (file, "0x");
26356 for (i = CONST_VECTOR_NUNITS (x); i--;)
26358 rtx element;
26360 element = CONST_VECTOR_ELT (x, i);
26361 fprintf (file, pattern, INTVAL (element));
26364 return 1;
26367 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26368 HFmode constant pool entries are actually loaded with ldr. */
26369 void
26370 arm_emit_fp16_const (rtx c)
26372 long bits;
26374 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26375 if (WORDS_BIG_ENDIAN)
26376 assemble_zeros (2);
26377 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26378 if (!WORDS_BIG_ENDIAN)
26379 assemble_zeros (2);
26382 const char *
26383 arm_output_load_gr (rtx *operands)
26385 rtx reg;
26386 rtx offset;
26387 rtx wcgr;
26388 rtx sum;
26390 if (!MEM_P (operands [1])
26391 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26392 || !REG_P (reg = XEXP (sum, 0))
26393 || !CONST_INT_P (offset = XEXP (sum, 1))
26394 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26395 return "wldrw%?\t%0, %1";
26397 /* Fix up an out-of-range load of a GR register. */
26398 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26399 wcgr = operands[0];
26400 operands[0] = reg;
26401 output_asm_insn ("ldr%?\t%0, %1", operands);
26403 operands[0] = wcgr;
26404 operands[1] = reg;
26405 output_asm_insn ("tmcr%?\t%0, %1", operands);
26406 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26408 return "";
26411 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26413 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26414 named arg and all anonymous args onto the stack.
26415 XXX I know the prologue shouldn't be pushing registers, but it is faster
26416 that way. */
26418 static void
26419 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26420 machine_mode mode,
26421 tree type,
26422 int *pretend_size,
26423 int second_time ATTRIBUTE_UNUSED)
26425 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26426 int nregs;
26428 cfun->machine->uses_anonymous_args = 1;
26429 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26431 nregs = pcum->aapcs_ncrn;
26432 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
26433 nregs++;
26435 else
26436 nregs = pcum->nregs;
26438 if (nregs < NUM_ARG_REGS)
26439 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26442 /* We can't rely on the caller doing the proper promotion when
26443 using APCS or ATPCS. */
26445 static bool
26446 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26448 return !TARGET_AAPCS_BASED;
26451 static machine_mode
26452 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26453 machine_mode mode,
26454 int *punsignedp ATTRIBUTE_UNUSED,
26455 const_tree fntype ATTRIBUTE_UNUSED,
26456 int for_return ATTRIBUTE_UNUSED)
26458 if (GET_MODE_CLASS (mode) == MODE_INT
26459 && GET_MODE_SIZE (mode) < 4)
26460 return SImode;
26462 return mode;
26465 /* AAPCS based ABIs use short enums by default. */
26467 static bool
26468 arm_default_short_enums (void)
26470 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26474 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26476 static bool
26477 arm_align_anon_bitfield (void)
26479 return TARGET_AAPCS_BASED;
26483 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26485 static tree
26486 arm_cxx_guard_type (void)
26488 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26492 /* The EABI says test the least significant bit of a guard variable. */
26494 static bool
26495 arm_cxx_guard_mask_bit (void)
26497 return TARGET_AAPCS_BASED;
26501 /* The EABI specifies that all array cookies are 8 bytes long. */
26503 static tree
26504 arm_get_cookie_size (tree type)
26506 tree size;
26508 if (!TARGET_AAPCS_BASED)
26509 return default_cxx_get_cookie_size (type);
26511 size = build_int_cst (sizetype, 8);
26512 return size;
26516 /* The EABI says that array cookies should also contain the element size. */
26518 static bool
26519 arm_cookie_has_size (void)
26521 return TARGET_AAPCS_BASED;
26525 /* The EABI says constructors and destructors should return a pointer to
26526 the object constructed/destroyed. */
26528 static bool
26529 arm_cxx_cdtor_returns_this (void)
26531 return TARGET_AAPCS_BASED;
26534 /* The EABI says that an inline function may never be the key
26535 method. */
26537 static bool
26538 arm_cxx_key_method_may_be_inline (void)
26540 return !TARGET_AAPCS_BASED;
26543 static void
26544 arm_cxx_determine_class_data_visibility (tree decl)
26546 if (!TARGET_AAPCS_BASED
26547 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26548 return;
26550 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26551 is exported. However, on systems without dynamic vague linkage,
26552 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26553 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26554 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26555 else
26556 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26557 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26560 static bool
26561 arm_cxx_class_data_always_comdat (void)
26563 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26564 vague linkage if the class has no key function. */
26565 return !TARGET_AAPCS_BASED;
26569 /* The EABI says __aeabi_atexit should be used to register static
26570 destructors. */
26572 static bool
26573 arm_cxx_use_aeabi_atexit (void)
26575 return TARGET_AAPCS_BASED;
26579 void
26580 arm_set_return_address (rtx source, rtx scratch)
26582 arm_stack_offsets *offsets;
26583 HOST_WIDE_INT delta;
26584 rtx addr;
26585 unsigned long saved_regs;
26587 offsets = arm_get_frame_offsets ();
26588 saved_regs = offsets->saved_regs_mask;
26590 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26591 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26592 else
26594 if (frame_pointer_needed)
26595 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26596 else
26598 /* LR will be the first saved register. */
26599 delta = offsets->outgoing_args - (offsets->frame + 4);
26602 if (delta >= 4096)
26604 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26605 GEN_INT (delta & ~4095)));
26606 addr = scratch;
26607 delta &= 4095;
26609 else
26610 addr = stack_pointer_rtx;
26612 addr = plus_constant (Pmode, addr, delta);
26614 /* The store needs to be marked as frame related in order to prevent
26615 DSE from deleting it as dead if it is based on fp. */
26616 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26617 RTX_FRAME_RELATED_P (insn) = 1;
26618 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26623 void
26624 thumb_set_return_address (rtx source, rtx scratch)
26626 arm_stack_offsets *offsets;
26627 HOST_WIDE_INT delta;
26628 HOST_WIDE_INT limit;
26629 int reg;
26630 rtx addr;
26631 unsigned long mask;
26633 emit_use (source);
26635 offsets = arm_get_frame_offsets ();
26636 mask = offsets->saved_regs_mask;
26637 if (mask & (1 << LR_REGNUM))
26639 limit = 1024;
26640 /* Find the saved regs. */
26641 if (frame_pointer_needed)
26643 delta = offsets->soft_frame - offsets->saved_args;
26644 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26645 if (TARGET_THUMB1)
26646 limit = 128;
26648 else
26650 delta = offsets->outgoing_args - offsets->saved_args;
26651 reg = SP_REGNUM;
26653 /* Allow for the stack frame. */
26654 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26655 delta -= 16;
26656 /* The link register is always the first saved register. */
26657 delta -= 4;
26659 /* Construct the address. */
26660 addr = gen_rtx_REG (SImode, reg);
26661 if (delta > limit)
26663 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26664 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26665 addr = scratch;
26667 else
26668 addr = plus_constant (Pmode, addr, delta);
26670 /* The store needs to be marked as frame related in order to prevent
26671 DSE from deleting it as dead if it is based on fp. */
26672 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26673 RTX_FRAME_RELATED_P (insn) = 1;
26674 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26676 else
26677 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26680 /* Implements target hook vector_mode_supported_p. */
26681 bool
26682 arm_vector_mode_supported_p (machine_mode mode)
26684 /* Neon also supports V2SImode, etc. listed in the clause below. */
26685 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26686 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26687 || mode == V2DImode || mode == V8HFmode))
26688 return true;
26690 if ((TARGET_NEON || TARGET_IWMMXT)
26691 && ((mode == V2SImode)
26692 || (mode == V4HImode)
26693 || (mode == V8QImode)))
26694 return true;
26696 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26697 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26698 || mode == V2HAmode))
26699 return true;
26701 return false;
26704 /* Implements target hook array_mode_supported_p. */
26706 static bool
26707 arm_array_mode_supported_p (machine_mode mode,
26708 unsigned HOST_WIDE_INT nelems)
26710 if (TARGET_NEON
26711 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26712 && (nelems >= 2 && nelems <= 4))
26713 return true;
26715 return false;
26718 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26719 registers when autovectorizing for Neon, at least until multiple vector
26720 widths are supported properly by the middle-end. */
26722 static machine_mode
26723 arm_preferred_simd_mode (machine_mode mode)
26725 if (TARGET_NEON)
26726 switch (mode)
26728 case SFmode:
26729 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26730 case SImode:
26731 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26732 case HImode:
26733 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26734 case QImode:
26735 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26736 case DImode:
26737 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26738 return V2DImode;
26739 break;
26741 default:;
26744 if (TARGET_REALLY_IWMMXT)
26745 switch (mode)
26747 case SImode:
26748 return V2SImode;
26749 case HImode:
26750 return V4HImode;
26751 case QImode:
26752 return V8QImode;
26754 default:;
26757 return word_mode;
26760 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26762 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26763 using r0-r4 for function arguments, r7 for the stack frame and don't have
26764 enough left over to do doubleword arithmetic. For Thumb-2 all the
26765 potentially problematic instructions accept high registers so this is not
26766 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26767 that require many low registers. */
26768 static bool
26769 arm_class_likely_spilled_p (reg_class_t rclass)
26771 if ((TARGET_THUMB1 && rclass == LO_REGS)
26772 || rclass == CC_REG)
26773 return true;
26775 return false;
26778 /* Implements target hook small_register_classes_for_mode_p. */
26779 bool
26780 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26782 return TARGET_THUMB1;
26785 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26786 ARM insns and therefore guarantee that the shift count is modulo 256.
26787 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26788 guarantee no particular behavior for out-of-range counts. */
26790 static unsigned HOST_WIDE_INT
26791 arm_shift_truncation_mask (machine_mode mode)
26793 return mode == SImode ? 255 : 0;
26797 /* Map internal gcc register numbers to DWARF2 register numbers. */
26799 unsigned int
26800 arm_dbx_register_number (unsigned int regno)
26802 if (regno < 16)
26803 return regno;
26805 if (IS_VFP_REGNUM (regno))
26807 /* See comment in arm_dwarf_register_span. */
26808 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26809 return 64 + regno - FIRST_VFP_REGNUM;
26810 else
26811 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26814 if (IS_IWMMXT_GR_REGNUM (regno))
26815 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26817 if (IS_IWMMXT_REGNUM (regno))
26818 return 112 + regno - FIRST_IWMMXT_REGNUM;
26820 return DWARF_FRAME_REGISTERS;
26823 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26824 GCC models tham as 64 32-bit registers, so we need to describe this to
26825 the DWARF generation code. Other registers can use the default. */
26826 static rtx
26827 arm_dwarf_register_span (rtx rtl)
26829 machine_mode mode;
26830 unsigned regno;
26831 rtx parts[16];
26832 int nregs;
26833 int i;
26835 regno = REGNO (rtl);
26836 if (!IS_VFP_REGNUM (regno))
26837 return NULL_RTX;
26839 /* XXX FIXME: The EABI defines two VFP register ranges:
26840 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26841 256-287: D0-D31
26842 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26843 corresponding D register. Until GDB supports this, we shall use the
26844 legacy encodings. We also use these encodings for D0-D15 for
26845 compatibility with older debuggers. */
26846 mode = GET_MODE (rtl);
26847 if (GET_MODE_SIZE (mode) < 8)
26848 return NULL_RTX;
26850 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26852 nregs = GET_MODE_SIZE (mode) / 4;
26853 for (i = 0; i < nregs; i += 2)
26854 if (TARGET_BIG_END)
26856 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26857 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26859 else
26861 parts[i] = gen_rtx_REG (SImode, regno + i);
26862 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26865 else
26867 nregs = GET_MODE_SIZE (mode) / 8;
26868 for (i = 0; i < nregs; i++)
26869 parts[i] = gen_rtx_REG (DImode, regno + i);
26872 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26875 #if ARM_UNWIND_INFO
26876 /* Emit unwind directives for a store-multiple instruction or stack pointer
26877 push during alignment.
26878 These should only ever be generated by the function prologue code, so
26879 expect them to have a particular form.
26880 The store-multiple instruction sometimes pushes pc as the last register,
26881 although it should not be tracked into unwind information, or for -Os
26882 sometimes pushes some dummy registers before first register that needs
26883 to be tracked in unwind information; such dummy registers are there just
26884 to avoid separate stack adjustment, and will not be restored in the
26885 epilogue. */
26887 static void
26888 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26890 int i;
26891 HOST_WIDE_INT offset;
26892 HOST_WIDE_INT nregs;
26893 int reg_size;
26894 unsigned reg;
26895 unsigned lastreg;
26896 unsigned padfirst = 0, padlast = 0;
26897 rtx e;
26899 e = XVECEXP (p, 0, 0);
26900 gcc_assert (GET_CODE (e) == SET);
26902 /* First insn will adjust the stack pointer. */
26903 gcc_assert (GET_CODE (e) == SET
26904 && REG_P (SET_DEST (e))
26905 && REGNO (SET_DEST (e)) == SP_REGNUM
26906 && GET_CODE (SET_SRC (e)) == PLUS);
26908 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26909 nregs = XVECLEN (p, 0) - 1;
26910 gcc_assert (nregs);
26912 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26913 if (reg < 16)
26915 /* For -Os dummy registers can be pushed at the beginning to
26916 avoid separate stack pointer adjustment. */
26917 e = XVECEXP (p, 0, 1);
26918 e = XEXP (SET_DEST (e), 0);
26919 if (GET_CODE (e) == PLUS)
26920 padfirst = INTVAL (XEXP (e, 1));
26921 gcc_assert (padfirst == 0 || optimize_size);
26922 /* The function prologue may also push pc, but not annotate it as it is
26923 never restored. We turn this into a stack pointer adjustment. */
26924 e = XVECEXP (p, 0, nregs);
26925 e = XEXP (SET_DEST (e), 0);
26926 if (GET_CODE (e) == PLUS)
26927 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26928 else
26929 padlast = offset - 4;
26930 gcc_assert (padlast == 0 || padlast == 4);
26931 if (padlast == 4)
26932 fprintf (asm_out_file, "\t.pad #4\n");
26933 reg_size = 4;
26934 fprintf (asm_out_file, "\t.save {");
26936 else if (IS_VFP_REGNUM (reg))
26938 reg_size = 8;
26939 fprintf (asm_out_file, "\t.vsave {");
26941 else
26942 /* Unknown register type. */
26943 gcc_unreachable ();
26945 /* If the stack increment doesn't match the size of the saved registers,
26946 something has gone horribly wrong. */
26947 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26949 offset = padfirst;
26950 lastreg = 0;
26951 /* The remaining insns will describe the stores. */
26952 for (i = 1; i <= nregs; i++)
26954 /* Expect (set (mem <addr>) (reg)).
26955 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26956 e = XVECEXP (p, 0, i);
26957 gcc_assert (GET_CODE (e) == SET
26958 && MEM_P (SET_DEST (e))
26959 && REG_P (SET_SRC (e)));
26961 reg = REGNO (SET_SRC (e));
26962 gcc_assert (reg >= lastreg);
26964 if (i != 1)
26965 fprintf (asm_out_file, ", ");
26966 /* We can't use %r for vfp because we need to use the
26967 double precision register names. */
26968 if (IS_VFP_REGNUM (reg))
26969 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26970 else
26971 asm_fprintf (asm_out_file, "%r", reg);
26973 if (flag_checking)
26975 /* Check that the addresses are consecutive. */
26976 e = XEXP (SET_DEST (e), 0);
26977 if (GET_CODE (e) == PLUS)
26978 gcc_assert (REG_P (XEXP (e, 0))
26979 && REGNO (XEXP (e, 0)) == SP_REGNUM
26980 && CONST_INT_P (XEXP (e, 1))
26981 && offset == INTVAL (XEXP (e, 1)));
26982 else
26983 gcc_assert (i == 1
26984 && REG_P (e)
26985 && REGNO (e) == SP_REGNUM);
26986 offset += reg_size;
26989 fprintf (asm_out_file, "}\n");
26990 if (padfirst)
26991 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26994 /* Emit unwind directives for a SET. */
26996 static void
26997 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26999 rtx e0;
27000 rtx e1;
27001 unsigned reg;
27003 e0 = XEXP (p, 0);
27004 e1 = XEXP (p, 1);
27005 switch (GET_CODE (e0))
27007 case MEM:
27008 /* Pushing a single register. */
27009 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27010 || !REG_P (XEXP (XEXP (e0, 0), 0))
27011 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27012 abort ();
27014 asm_fprintf (asm_out_file, "\t.save ");
27015 if (IS_VFP_REGNUM (REGNO (e1)))
27016 asm_fprintf(asm_out_file, "{d%d}\n",
27017 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27018 else
27019 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27020 break;
27022 case REG:
27023 if (REGNO (e0) == SP_REGNUM)
27025 /* A stack increment. */
27026 if (GET_CODE (e1) != PLUS
27027 || !REG_P (XEXP (e1, 0))
27028 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27029 || !CONST_INT_P (XEXP (e1, 1)))
27030 abort ();
27032 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27033 -INTVAL (XEXP (e1, 1)));
27035 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27037 HOST_WIDE_INT offset;
27039 if (GET_CODE (e1) == PLUS)
27041 if (!REG_P (XEXP (e1, 0))
27042 || !CONST_INT_P (XEXP (e1, 1)))
27043 abort ();
27044 reg = REGNO (XEXP (e1, 0));
27045 offset = INTVAL (XEXP (e1, 1));
27046 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27047 HARD_FRAME_POINTER_REGNUM, reg,
27048 offset);
27050 else if (REG_P (e1))
27052 reg = REGNO (e1);
27053 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27054 HARD_FRAME_POINTER_REGNUM, reg);
27056 else
27057 abort ();
27059 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27061 /* Move from sp to reg. */
27062 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27064 else if (GET_CODE (e1) == PLUS
27065 && REG_P (XEXP (e1, 0))
27066 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27067 && CONST_INT_P (XEXP (e1, 1)))
27069 /* Set reg to offset from sp. */
27070 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27071 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27073 else
27074 abort ();
27075 break;
27077 default:
27078 abort ();
27083 /* Emit unwind directives for the given insn. */
27085 static void
27086 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27088 rtx note, pat;
27089 bool handled_one = false;
27091 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27092 return;
27094 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27095 && (TREE_NOTHROW (current_function_decl)
27096 || crtl->all_throwers_are_sibcalls))
27097 return;
27099 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27100 return;
27102 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27104 switch (REG_NOTE_KIND (note))
27106 case REG_FRAME_RELATED_EXPR:
27107 pat = XEXP (note, 0);
27108 goto found;
27110 case REG_CFA_REGISTER:
27111 pat = XEXP (note, 0);
27112 if (pat == NULL)
27114 pat = PATTERN (insn);
27115 if (GET_CODE (pat) == PARALLEL)
27116 pat = XVECEXP (pat, 0, 0);
27119 /* Only emitted for IS_STACKALIGN re-alignment. */
27121 rtx dest, src;
27122 unsigned reg;
27124 src = SET_SRC (pat);
27125 dest = SET_DEST (pat);
27127 gcc_assert (src == stack_pointer_rtx);
27128 reg = REGNO (dest);
27129 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27130 reg + 0x90, reg);
27132 handled_one = true;
27133 break;
27135 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27136 to get correct dwarf information for shrink-wrap. We should not
27137 emit unwind information for it because these are used either for
27138 pretend arguments or notes to adjust sp and restore registers from
27139 stack. */
27140 case REG_CFA_DEF_CFA:
27141 case REG_CFA_ADJUST_CFA:
27142 case REG_CFA_RESTORE:
27143 return;
27145 case REG_CFA_EXPRESSION:
27146 case REG_CFA_OFFSET:
27147 /* ??? Only handling here what we actually emit. */
27148 gcc_unreachable ();
27150 default:
27151 break;
27154 if (handled_one)
27155 return;
27156 pat = PATTERN (insn);
27157 found:
27159 switch (GET_CODE (pat))
27161 case SET:
27162 arm_unwind_emit_set (asm_out_file, pat);
27163 break;
27165 case SEQUENCE:
27166 /* Store multiple. */
27167 arm_unwind_emit_sequence (asm_out_file, pat);
27168 break;
27170 default:
27171 abort();
27176 /* Output a reference from a function exception table to the type_info
27177 object X. The EABI specifies that the symbol should be relocated by
27178 an R_ARM_TARGET2 relocation. */
27180 static bool
27181 arm_output_ttype (rtx x)
27183 fputs ("\t.word\t", asm_out_file);
27184 output_addr_const (asm_out_file, x);
27185 /* Use special relocations for symbol references. */
27186 if (!CONST_INT_P (x))
27187 fputs ("(TARGET2)", asm_out_file);
27188 fputc ('\n', asm_out_file);
27190 return TRUE;
27193 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27195 static void
27196 arm_asm_emit_except_personality (rtx personality)
27198 fputs ("\t.personality\t", asm_out_file);
27199 output_addr_const (asm_out_file, personality);
27200 fputc ('\n', asm_out_file);
27203 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27205 static void
27206 arm_asm_init_sections (void)
27208 exception_section = get_unnamed_section (0, output_section_asm_op,
27209 "\t.handlerdata");
27211 #endif /* ARM_UNWIND_INFO */
27213 /* Output unwind directives for the start/end of a function. */
27215 void
27216 arm_output_fn_unwind (FILE * f, bool prologue)
27218 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27219 return;
27221 if (prologue)
27222 fputs ("\t.fnstart\n", f);
27223 else
27225 /* If this function will never be unwound, then mark it as such.
27226 The came condition is used in arm_unwind_emit to suppress
27227 the frame annotations. */
27228 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27229 && (TREE_NOTHROW (current_function_decl)
27230 || crtl->all_throwers_are_sibcalls))
27231 fputs("\t.cantunwind\n", f);
27233 fputs ("\t.fnend\n", f);
27237 static bool
27238 arm_emit_tls_decoration (FILE *fp, rtx x)
27240 enum tls_reloc reloc;
27241 rtx val;
27243 val = XVECEXP (x, 0, 0);
27244 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27246 output_addr_const (fp, val);
27248 switch (reloc)
27250 case TLS_GD32:
27251 fputs ("(tlsgd)", fp);
27252 break;
27253 case TLS_LDM32:
27254 fputs ("(tlsldm)", fp);
27255 break;
27256 case TLS_LDO32:
27257 fputs ("(tlsldo)", fp);
27258 break;
27259 case TLS_IE32:
27260 fputs ("(gottpoff)", fp);
27261 break;
27262 case TLS_LE32:
27263 fputs ("(tpoff)", fp);
27264 break;
27265 case TLS_DESCSEQ:
27266 fputs ("(tlsdesc)", fp);
27267 break;
27268 default:
27269 gcc_unreachable ();
27272 switch (reloc)
27274 case TLS_GD32:
27275 case TLS_LDM32:
27276 case TLS_IE32:
27277 case TLS_DESCSEQ:
27278 fputs (" + (. - ", fp);
27279 output_addr_const (fp, XVECEXP (x, 0, 2));
27280 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27281 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27282 output_addr_const (fp, XVECEXP (x, 0, 3));
27283 fputc (')', fp);
27284 break;
27285 default:
27286 break;
27289 return TRUE;
27292 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27294 static void
27295 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27297 gcc_assert (size == 4);
27298 fputs ("\t.word\t", file);
27299 output_addr_const (file, x);
27300 fputs ("(tlsldo)", file);
27303 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27305 static bool
27306 arm_output_addr_const_extra (FILE *fp, rtx x)
27308 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27309 return arm_emit_tls_decoration (fp, x);
27310 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27312 char label[256];
27313 int labelno = INTVAL (XVECEXP (x, 0, 0));
27315 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27316 assemble_name_raw (fp, label);
27318 return TRUE;
27320 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27322 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27323 if (GOT_PCREL)
27324 fputs ("+.", fp);
27325 fputs ("-(", fp);
27326 output_addr_const (fp, XVECEXP (x, 0, 0));
27327 fputc (')', fp);
27328 return TRUE;
27330 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27332 output_addr_const (fp, XVECEXP (x, 0, 0));
27333 if (GOT_PCREL)
27334 fputs ("+.", fp);
27335 fputs ("-(", fp);
27336 output_addr_const (fp, XVECEXP (x, 0, 1));
27337 fputc (')', fp);
27338 return TRUE;
27340 else if (GET_CODE (x) == CONST_VECTOR)
27341 return arm_emit_vector_const (fp, x);
27343 return FALSE;
27346 /* Output assembly for a shift instruction.
27347 SET_FLAGS determines how the instruction modifies the condition codes.
27348 0 - Do not set condition codes.
27349 1 - Set condition codes.
27350 2 - Use smallest instruction. */
27351 const char *
27352 arm_output_shift(rtx * operands, int set_flags)
27354 char pattern[100];
27355 static const char flag_chars[3] = {'?', '.', '!'};
27356 const char *shift;
27357 HOST_WIDE_INT val;
27358 char c;
27360 c = flag_chars[set_flags];
27361 shift = shift_op(operands[3], &val);
27362 if (shift)
27364 if (val != -1)
27365 operands[2] = GEN_INT(val);
27366 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27368 else
27369 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27371 output_asm_insn (pattern, operands);
27372 return "";
27375 /* Output assembly for a WMMX immediate shift instruction. */
27376 const char *
27377 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27379 int shift = INTVAL (operands[2]);
27380 char templ[50];
27381 machine_mode opmode = GET_MODE (operands[0]);
27383 gcc_assert (shift >= 0);
27385 /* If the shift value in the register versions is > 63 (for D qualifier),
27386 31 (for W qualifier) or 15 (for H qualifier). */
27387 if (((opmode == V4HImode) && (shift > 15))
27388 || ((opmode == V2SImode) && (shift > 31))
27389 || ((opmode == DImode) && (shift > 63)))
27391 if (wror_or_wsra)
27393 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27394 output_asm_insn (templ, operands);
27395 if (opmode == DImode)
27397 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27398 output_asm_insn (templ, operands);
27401 else
27403 /* The destination register will contain all zeros. */
27404 sprintf (templ, "wzero\t%%0");
27405 output_asm_insn (templ, operands);
27407 return "";
27410 if ((opmode == DImode) && (shift > 32))
27412 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27413 output_asm_insn (templ, operands);
27414 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27415 output_asm_insn (templ, operands);
27417 else
27419 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27420 output_asm_insn (templ, operands);
27422 return "";
27425 /* Output assembly for a WMMX tinsr instruction. */
27426 const char *
27427 arm_output_iwmmxt_tinsr (rtx *operands)
27429 int mask = INTVAL (operands[3]);
27430 int i;
27431 char templ[50];
27432 int units = mode_nunits[GET_MODE (operands[0])];
27433 gcc_assert ((mask & (mask - 1)) == 0);
27434 for (i = 0; i < units; ++i)
27436 if ((mask & 0x01) == 1)
27438 break;
27440 mask >>= 1;
27442 gcc_assert (i < units);
27444 switch (GET_MODE (operands[0]))
27446 case V8QImode:
27447 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27448 break;
27449 case V4HImode:
27450 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27451 break;
27452 case V2SImode:
27453 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27454 break;
27455 default:
27456 gcc_unreachable ();
27457 break;
27459 output_asm_insn (templ, operands);
27461 return "";
27464 /* Output a Thumb-1 casesi dispatch sequence. */
27465 const char *
27466 thumb1_output_casesi (rtx *operands)
27468 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27470 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27472 switch (GET_MODE(diff_vec))
27474 case QImode:
27475 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27476 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27477 case HImode:
27478 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27479 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27480 case SImode:
27481 return "bl\t%___gnu_thumb1_case_si";
27482 default:
27483 gcc_unreachable ();
27487 /* Output a Thumb-2 casesi instruction. */
27488 const char *
27489 thumb2_output_casesi (rtx *operands)
27491 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27493 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27495 output_asm_insn ("cmp\t%0, %1", operands);
27496 output_asm_insn ("bhi\t%l3", operands);
27497 switch (GET_MODE(diff_vec))
27499 case QImode:
27500 return "tbb\t[%|pc, %0]";
27501 case HImode:
27502 return "tbh\t[%|pc, %0, lsl #1]";
27503 case SImode:
27504 if (flag_pic)
27506 output_asm_insn ("adr\t%4, %l2", operands);
27507 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27508 output_asm_insn ("add\t%4, %4, %5", operands);
27509 return "bx\t%4";
27511 else
27513 output_asm_insn ("adr\t%4, %l2", operands);
27514 return "ldr\t%|pc, [%4, %0, lsl #2]";
27516 default:
27517 gcc_unreachable ();
27521 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27522 per-core tuning structs. */
27523 static int
27524 arm_issue_rate (void)
27526 return current_tune->issue_rate;
27529 /* Return how many instructions should scheduler lookahead to choose the
27530 best one. */
27531 static int
27532 arm_first_cycle_multipass_dfa_lookahead (void)
27534 int issue_rate = arm_issue_rate ();
27536 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27539 /* Enable modeling of L2 auto-prefetcher. */
27540 static int
27541 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27543 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27546 const char *
27547 arm_mangle_type (const_tree type)
27549 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27550 has to be managled as if it is in the "std" namespace. */
27551 if (TARGET_AAPCS_BASED
27552 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27553 return "St9__va_list";
27555 /* Half-precision float. */
27556 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27557 return "Dh";
27559 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27560 builtin type. */
27561 if (TYPE_NAME (type) != NULL)
27562 return arm_mangle_builtin_type (type);
27564 /* Use the default mangling. */
27565 return NULL;
27568 /* Order of allocation of core registers for Thumb: this allocation is
27569 written over the corresponding initial entries of the array
27570 initialized with REG_ALLOC_ORDER. We allocate all low registers
27571 first. Saving and restoring a low register is usually cheaper than
27572 using a call-clobbered high register. */
27574 static const int thumb_core_reg_alloc_order[] =
27576 3, 2, 1, 0, 4, 5, 6, 7,
27577 14, 12, 8, 9, 10, 11
27580 /* Adjust register allocation order when compiling for Thumb. */
27582 void
27583 arm_order_regs_for_local_alloc (void)
27585 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27586 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27587 if (TARGET_THUMB)
27588 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27589 sizeof (thumb_core_reg_alloc_order));
27592 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27594 bool
27595 arm_frame_pointer_required (void)
27597 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27598 return true;
27600 /* If the function receives nonlocal gotos, it needs to save the frame
27601 pointer in the nonlocal_goto_save_area object. */
27602 if (cfun->has_nonlocal_label)
27603 return true;
27605 /* The frame pointer is required for non-leaf APCS frames. */
27606 if (TARGET_ARM && TARGET_APCS_FRAME && !leaf_function_p ())
27607 return true;
27609 /* If we are probing the stack in the prologue, we will have a faulting
27610 instruction prior to the stack adjustment and this requires a frame
27611 pointer if we want to catch the exception using the EABI unwinder. */
27612 if (!IS_INTERRUPT (arm_current_func_type ())
27613 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27614 && arm_except_unwind_info (&global_options) == UI_TARGET
27615 && cfun->can_throw_non_call_exceptions)
27617 HOST_WIDE_INT size = get_frame_size ();
27619 /* That's irrelevant if there is no stack adjustment. */
27620 if (size <= 0)
27621 return false;
27623 /* That's relevant only if there is a stack probe. */
27624 if (crtl->is_leaf && !cfun->calls_alloca)
27626 /* We don't have the final size of the frame so adjust. */
27627 size += 32 * UNITS_PER_WORD;
27628 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27629 return true;
27631 else
27632 return true;
27635 return false;
27638 /* Only thumb1 can't support conditional execution, so return true if
27639 the target is not thumb1. */
27640 static bool
27641 arm_have_conditional_execution (void)
27643 return !TARGET_THUMB1;
27646 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27647 static HOST_WIDE_INT
27648 arm_vector_alignment (const_tree type)
27650 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27652 if (TARGET_AAPCS_BASED)
27653 align = MIN (align, 64);
27655 return align;
27658 static unsigned int
27659 arm_autovectorize_vector_sizes (void)
27661 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27664 static bool
27665 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27667 /* Vectors which aren't in packed structures will not be less aligned than
27668 the natural alignment of their element type, so this is safe. */
27669 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27670 return !is_packed;
27672 return default_builtin_vector_alignment_reachable (type, is_packed);
27675 static bool
27676 arm_builtin_support_vector_misalignment (machine_mode mode,
27677 const_tree type, int misalignment,
27678 bool is_packed)
27680 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27682 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27684 if (is_packed)
27685 return align == 1;
27687 /* If the misalignment is unknown, we should be able to handle the access
27688 so long as it is not to a member of a packed data structure. */
27689 if (misalignment == -1)
27690 return true;
27692 /* Return true if the misalignment is a multiple of the natural alignment
27693 of the vector's element type. This is probably always going to be
27694 true in practice, since we've already established that this isn't a
27695 packed access. */
27696 return ((misalignment % align) == 0);
27699 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27700 is_packed);
27703 static void
27704 arm_conditional_register_usage (void)
27706 int regno;
27708 if (TARGET_THUMB1 && optimize_size)
27710 /* When optimizing for size on Thumb-1, it's better not
27711 to use the HI regs, because of the overhead of
27712 stacking them. */
27713 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27714 fixed_regs[regno] = call_used_regs[regno] = 1;
27717 /* The link register can be clobbered by any branch insn,
27718 but we have no way to track that at present, so mark
27719 it as unavailable. */
27720 if (TARGET_THUMB1)
27721 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27723 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27725 /* VFPv3 registers are disabled when earlier VFP
27726 versions are selected due to the definition of
27727 LAST_VFP_REGNUM. */
27728 for (regno = FIRST_VFP_REGNUM;
27729 regno <= LAST_VFP_REGNUM; ++ regno)
27731 fixed_regs[regno] = 0;
27732 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27733 || regno >= FIRST_VFP_REGNUM + 32;
27737 if (TARGET_REALLY_IWMMXT)
27739 regno = FIRST_IWMMXT_GR_REGNUM;
27740 /* The 2002/10/09 revision of the XScale ABI has wCG0
27741 and wCG1 as call-preserved registers. The 2002/11/21
27742 revision changed this so that all wCG registers are
27743 scratch registers. */
27744 for (regno = FIRST_IWMMXT_GR_REGNUM;
27745 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27746 fixed_regs[regno] = 0;
27747 /* The XScale ABI has wR0 - wR9 as scratch registers,
27748 the rest as call-preserved registers. */
27749 for (regno = FIRST_IWMMXT_REGNUM;
27750 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27752 fixed_regs[regno] = 0;
27753 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27757 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27759 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27760 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27762 else if (TARGET_APCS_STACK)
27764 fixed_regs[10] = 1;
27765 call_used_regs[10] = 1;
27767 /* -mcaller-super-interworking reserves r11 for calls to
27768 _interwork_r11_call_via_rN(). Making the register global
27769 is an easy way of ensuring that it remains valid for all
27770 calls. */
27771 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27772 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27774 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27775 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27776 if (TARGET_CALLER_INTERWORKING)
27777 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27779 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27782 static reg_class_t
27783 arm_preferred_rename_class (reg_class_t rclass)
27785 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27786 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27787 and code size can be reduced. */
27788 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27789 return LO_REGS;
27790 else
27791 return NO_REGS;
27794 /* Compute the atrribute "length" of insn "*push_multi".
27795 So this function MUST be kept in sync with that insn pattern. */
27797 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27799 int i, regno, hi_reg;
27800 int num_saves = XVECLEN (parallel_op, 0);
27802 /* ARM mode. */
27803 if (TARGET_ARM)
27804 return 4;
27805 /* Thumb1 mode. */
27806 if (TARGET_THUMB1)
27807 return 2;
27809 /* Thumb2 mode. */
27810 regno = REGNO (first_op);
27811 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27812 for (i = 1; i < num_saves && !hi_reg; i++)
27814 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27815 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27818 if (!hi_reg)
27819 return 2;
27820 return 4;
27823 /* Compute the number of instructions emitted by output_move_double. */
27825 arm_count_output_move_double_insns (rtx *operands)
27827 int count;
27828 rtx ops[2];
27829 /* output_move_double may modify the operands array, so call it
27830 here on a copy of the array. */
27831 ops[0] = operands[0];
27832 ops[1] = operands[1];
27833 output_move_double (ops, false, &count);
27834 return count;
27838 vfp3_const_double_for_fract_bits (rtx operand)
27840 REAL_VALUE_TYPE r0;
27842 if (!CONST_DOUBLE_P (operand))
27843 return 0;
27845 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
27846 if (exact_real_inverse (DFmode, &r0)
27847 && !REAL_VALUE_NEGATIVE (r0))
27849 if (exact_real_truncate (DFmode, &r0))
27851 HOST_WIDE_INT value = real_to_integer (&r0);
27852 value = value & 0xffffffff;
27853 if ((value != 0) && ( (value & (value - 1)) == 0))
27854 return int_log2 (value);
27857 return 0;
27860 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
27861 log2 is in [1, 32], return that log2. Otherwise return -1.
27862 This is used in the patterns for vcvt.s32.f32 floating-point to
27863 fixed-point conversions. */
27866 vfp3_const_double_for_bits (rtx x)
27868 const REAL_VALUE_TYPE *r;
27870 if (!CONST_DOUBLE_P (x))
27871 return -1;
27873 r = CONST_DOUBLE_REAL_VALUE (x);
27875 if (REAL_VALUE_NEGATIVE (*r)
27876 || REAL_VALUE_ISNAN (*r)
27877 || REAL_VALUE_ISINF (*r)
27878 || !real_isinteger (r, SFmode))
27879 return -1;
27881 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
27883 /* The exact_log2 above will have returned -1 if this is
27884 not an exact log2. */
27885 if (!IN_RANGE (hwint, 1, 32))
27886 return -1;
27888 return hwint;
27892 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27894 static void
27895 arm_pre_atomic_barrier (enum memmodel model)
27897 if (need_atomic_barrier_p (model, true))
27898 emit_insn (gen_memory_barrier ());
27901 static void
27902 arm_post_atomic_barrier (enum memmodel model)
27904 if (need_atomic_barrier_p (model, false))
27905 emit_insn (gen_memory_barrier ());
27908 /* Emit the load-exclusive and store-exclusive instructions.
27909 Use acquire and release versions if necessary. */
27911 static void
27912 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27914 rtx (*gen) (rtx, rtx);
27916 if (acq)
27918 switch (mode)
27920 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27921 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27922 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27923 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27924 default:
27925 gcc_unreachable ();
27928 else
27930 switch (mode)
27932 case QImode: gen = gen_arm_load_exclusiveqi; break;
27933 case HImode: gen = gen_arm_load_exclusivehi; break;
27934 case SImode: gen = gen_arm_load_exclusivesi; break;
27935 case DImode: gen = gen_arm_load_exclusivedi; break;
27936 default:
27937 gcc_unreachable ();
27941 emit_insn (gen (rval, mem));
27944 static void
27945 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27946 rtx mem, bool rel)
27948 rtx (*gen) (rtx, rtx, rtx);
27950 if (rel)
27952 switch (mode)
27954 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27955 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27956 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27957 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27958 default:
27959 gcc_unreachable ();
27962 else
27964 switch (mode)
27966 case QImode: gen = gen_arm_store_exclusiveqi; break;
27967 case HImode: gen = gen_arm_store_exclusivehi; break;
27968 case SImode: gen = gen_arm_store_exclusivesi; break;
27969 case DImode: gen = gen_arm_store_exclusivedi; break;
27970 default:
27971 gcc_unreachable ();
27975 emit_insn (gen (bval, rval, mem));
27978 /* Mark the previous jump instruction as unlikely. */
27980 static void
27981 emit_unlikely_jump (rtx insn)
27983 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27985 insn = emit_jump_insn (insn);
27986 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27989 /* Expand a compare and swap pattern. */
27991 void
27992 arm_expand_compare_and_swap (rtx operands[])
27994 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27995 machine_mode mode;
27996 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27998 bval = operands[0];
27999 rval = operands[1];
28000 mem = operands[2];
28001 oldval = operands[3];
28002 newval = operands[4];
28003 is_weak = operands[5];
28004 mod_s = operands[6];
28005 mod_f = operands[7];
28006 mode = GET_MODE (mem);
28008 /* Normally the succ memory model must be stronger than fail, but in the
28009 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28010 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28012 if (TARGET_HAVE_LDACQ
28013 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28014 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28015 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28017 switch (mode)
28019 case QImode:
28020 case HImode:
28021 /* For narrow modes, we're going to perform the comparison in SImode,
28022 so do the zero-extension now. */
28023 rval = gen_reg_rtx (SImode);
28024 oldval = convert_modes (SImode, mode, oldval, true);
28025 /* FALLTHRU */
28027 case SImode:
28028 /* Force the value into a register if needed. We waited until after
28029 the zero-extension above to do this properly. */
28030 if (!arm_add_operand (oldval, SImode))
28031 oldval = force_reg (SImode, oldval);
28032 break;
28034 case DImode:
28035 if (!cmpdi_operand (oldval, mode))
28036 oldval = force_reg (mode, oldval);
28037 break;
28039 default:
28040 gcc_unreachable ();
28043 switch (mode)
28045 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
28046 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
28047 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
28048 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
28049 default:
28050 gcc_unreachable ();
28053 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28055 if (mode == QImode || mode == HImode)
28056 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28058 /* In all cases, we arrange for success to be signaled by Z set.
28059 This arrangement allows for the boolean result to be used directly
28060 in a subsequent branch, post optimization. */
28061 x = gen_rtx_REG (CCmode, CC_REGNUM);
28062 x = gen_rtx_EQ (SImode, x, const0_rtx);
28063 emit_insn (gen_rtx_SET (bval, x));
28066 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28067 another memory store between the load-exclusive and store-exclusive can
28068 reset the monitor from Exclusive to Open state. This means we must wait
28069 until after reload to split the pattern, lest we get a register spill in
28070 the middle of the atomic sequence. */
28072 void
28073 arm_split_compare_and_swap (rtx operands[])
28075 rtx rval, mem, oldval, newval, scratch;
28076 machine_mode mode;
28077 enum memmodel mod_s, mod_f;
28078 bool is_weak;
28079 rtx_code_label *label1, *label2;
28080 rtx x, cond;
28082 rval = operands[0];
28083 mem = operands[1];
28084 oldval = operands[2];
28085 newval = operands[3];
28086 is_weak = (operands[4] != const0_rtx);
28087 mod_s = memmodel_from_int (INTVAL (operands[5]));
28088 mod_f = memmodel_from_int (INTVAL (operands[6]));
28089 scratch = operands[7];
28090 mode = GET_MODE (mem);
28092 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28094 bool use_acquire = TARGET_HAVE_LDACQ
28095 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28096 || is_mm_release (mod_s));
28098 bool use_release = TARGET_HAVE_LDACQ
28099 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28100 || is_mm_acquire (mod_s));
28102 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28103 a full barrier is emitted after the store-release. */
28104 if (is_armv8_sync)
28105 use_acquire = false;
28107 /* Checks whether a barrier is needed and emits one accordingly. */
28108 if (!(use_acquire || use_release))
28109 arm_pre_atomic_barrier (mod_s);
28111 label1 = NULL;
28112 if (!is_weak)
28114 label1 = gen_label_rtx ();
28115 emit_label (label1);
28117 label2 = gen_label_rtx ();
28119 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28121 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
28122 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28123 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28124 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28125 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28127 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
28129 /* Weak or strong, we want EQ to be true for success, so that we
28130 match the flags that we got from the compare above. */
28131 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28132 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
28133 emit_insn (gen_rtx_SET (cond, x));
28135 if (!is_weak)
28137 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28138 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28139 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
28140 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28143 if (!is_mm_relaxed (mod_f))
28144 emit_label (label2);
28146 /* Checks whether a barrier is needed and emits one accordingly. */
28147 if (is_armv8_sync
28148 || !(use_acquire || use_release))
28149 arm_post_atomic_barrier (mod_s);
28151 if (is_mm_relaxed (mod_f))
28152 emit_label (label2);
28155 void
28156 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28157 rtx value, rtx model_rtx, rtx cond)
28159 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28160 machine_mode mode = GET_MODE (mem);
28161 machine_mode wmode = (mode == DImode ? DImode : SImode);
28162 rtx_code_label *label;
28163 rtx x;
28165 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28167 bool use_acquire = TARGET_HAVE_LDACQ
28168 && !(is_mm_relaxed (model) || is_mm_consume (model)
28169 || is_mm_release (model));
28171 bool use_release = TARGET_HAVE_LDACQ
28172 && !(is_mm_relaxed (model) || is_mm_consume (model)
28173 || is_mm_acquire (model));
28175 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28176 a full barrier is emitted after the store-release. */
28177 if (is_armv8_sync)
28178 use_acquire = false;
28180 /* Checks whether a barrier is needed and emits one accordingly. */
28181 if (!(use_acquire || use_release))
28182 arm_pre_atomic_barrier (model);
28184 label = gen_label_rtx ();
28185 emit_label (label);
28187 if (new_out)
28188 new_out = gen_lowpart (wmode, new_out);
28189 if (old_out)
28190 old_out = gen_lowpart (wmode, old_out);
28191 else
28192 old_out = new_out;
28193 value = simplify_gen_subreg (wmode, value, mode, 0);
28195 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28197 switch (code)
28199 case SET:
28200 new_out = value;
28201 break;
28203 case NOT:
28204 x = gen_rtx_AND (wmode, old_out, value);
28205 emit_insn (gen_rtx_SET (new_out, x));
28206 x = gen_rtx_NOT (wmode, new_out);
28207 emit_insn (gen_rtx_SET (new_out, x));
28208 break;
28210 case MINUS:
28211 if (CONST_INT_P (value))
28213 value = GEN_INT (-INTVAL (value));
28214 code = PLUS;
28216 /* FALLTHRU */
28218 case PLUS:
28219 if (mode == DImode)
28221 /* DImode plus/minus need to clobber flags. */
28222 /* The adddi3 and subdi3 patterns are incorrectly written so that
28223 they require matching operands, even when we could easily support
28224 three operands. Thankfully, this can be fixed up post-splitting,
28225 as the individual add+adc patterns do accept three operands and
28226 post-reload cprop can make these moves go away. */
28227 emit_move_insn (new_out, old_out);
28228 if (code == PLUS)
28229 x = gen_adddi3 (new_out, new_out, value);
28230 else
28231 x = gen_subdi3 (new_out, new_out, value);
28232 emit_insn (x);
28233 break;
28235 /* FALLTHRU */
28237 default:
28238 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28239 emit_insn (gen_rtx_SET (new_out, x));
28240 break;
28243 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28244 use_release);
28246 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28247 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28249 /* Checks whether a barrier is needed and emits one accordingly. */
28250 if (is_armv8_sync
28251 || !(use_acquire || use_release))
28252 arm_post_atomic_barrier (model);
28255 #define MAX_VECT_LEN 16
28257 struct expand_vec_perm_d
28259 rtx target, op0, op1;
28260 unsigned char perm[MAX_VECT_LEN];
28261 machine_mode vmode;
28262 unsigned char nelt;
28263 bool one_vector_p;
28264 bool testing_p;
28267 /* Generate a variable permutation. */
28269 static void
28270 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28272 machine_mode vmode = GET_MODE (target);
28273 bool one_vector_p = rtx_equal_p (op0, op1);
28275 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28276 gcc_checking_assert (GET_MODE (op0) == vmode);
28277 gcc_checking_assert (GET_MODE (op1) == vmode);
28278 gcc_checking_assert (GET_MODE (sel) == vmode);
28279 gcc_checking_assert (TARGET_NEON);
28281 if (one_vector_p)
28283 if (vmode == V8QImode)
28284 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28285 else
28286 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28288 else
28290 rtx pair;
28292 if (vmode == V8QImode)
28294 pair = gen_reg_rtx (V16QImode);
28295 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28296 pair = gen_lowpart (TImode, pair);
28297 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28299 else
28301 pair = gen_reg_rtx (OImode);
28302 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28303 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28308 void
28309 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28311 machine_mode vmode = GET_MODE (target);
28312 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28313 bool one_vector_p = rtx_equal_p (op0, op1);
28314 rtx rmask[MAX_VECT_LEN], mask;
28316 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28317 numbering of elements for big-endian, we must reverse the order. */
28318 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28320 /* The VTBL instruction does not use a modulo index, so we must take care
28321 of that ourselves. */
28322 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28323 for (i = 0; i < nelt; ++i)
28324 rmask[i] = mask;
28325 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28326 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28328 arm_expand_vec_perm_1 (target, op0, op1, sel);
28331 /* Map lane ordering between architectural lane order, and GCC lane order,
28332 taking into account ABI. See comment above output_move_neon for details. */
28334 static int
28335 neon_endian_lane_map (machine_mode mode, int lane)
28337 if (BYTES_BIG_ENDIAN)
28339 int nelems = GET_MODE_NUNITS (mode);
28340 /* Reverse lane order. */
28341 lane = (nelems - 1 - lane);
28342 /* Reverse D register order, to match ABI. */
28343 if (GET_MODE_SIZE (mode) == 16)
28344 lane = lane ^ (nelems / 2);
28346 return lane;
28349 /* Some permutations index into pairs of vectors, this is a helper function
28350 to map indexes into those pairs of vectors. */
28352 static int
28353 neon_pair_endian_lane_map (machine_mode mode, int lane)
28355 int nelem = GET_MODE_NUNITS (mode);
28356 if (BYTES_BIG_ENDIAN)
28357 lane =
28358 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28359 return lane;
28362 /* Generate or test for an insn that supports a constant permutation. */
28364 /* Recognize patterns for the VUZP insns. */
28366 static bool
28367 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28369 unsigned int i, odd, mask, nelt = d->nelt;
28370 rtx out0, out1, in0, in1;
28371 rtx (*gen)(rtx, rtx, rtx, rtx);
28372 int first_elem;
28373 int swap_nelt;
28375 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28376 return false;
28378 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28379 big endian pattern on 64 bit vectors, so we correct for that. */
28380 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28381 && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28383 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28385 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28386 odd = 0;
28387 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28388 odd = 1;
28389 else
28390 return false;
28391 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28393 for (i = 0; i < nelt; i++)
28395 unsigned elt =
28396 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28397 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28398 return false;
28401 /* Success! */
28402 if (d->testing_p)
28403 return true;
28405 switch (d->vmode)
28407 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28408 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28409 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28410 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28411 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28412 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28413 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28414 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28415 default:
28416 gcc_unreachable ();
28419 in0 = d->op0;
28420 in1 = d->op1;
28421 if (swap_nelt != 0)
28422 std::swap (in0, in1);
28424 out0 = d->target;
28425 out1 = gen_reg_rtx (d->vmode);
28426 if (odd)
28427 std::swap (out0, out1);
28429 emit_insn (gen (out0, in0, in1, out1));
28430 return true;
28433 /* Recognize patterns for the VZIP insns. */
28435 static bool
28436 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28438 unsigned int i, high, mask, nelt = d->nelt;
28439 rtx out0, out1, in0, in1;
28440 rtx (*gen)(rtx, rtx, rtx, rtx);
28441 int first_elem;
28442 bool is_swapped;
28444 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28445 return false;
28447 is_swapped = BYTES_BIG_ENDIAN;
28449 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28451 high = nelt / 2;
28452 if (first_elem == neon_endian_lane_map (d->vmode, high))
28454 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28455 high = 0;
28456 else
28457 return false;
28458 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28460 for (i = 0; i < nelt / 2; i++)
28462 unsigned elt =
28463 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28464 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28465 != elt)
28466 return false;
28467 elt =
28468 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28469 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28470 != elt)
28471 return false;
28474 /* Success! */
28475 if (d->testing_p)
28476 return true;
28478 switch (d->vmode)
28480 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28481 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28482 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28483 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28484 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
28485 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
28486 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28487 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28488 default:
28489 gcc_unreachable ();
28492 in0 = d->op0;
28493 in1 = d->op1;
28494 if (is_swapped)
28495 std::swap (in0, in1);
28497 out0 = d->target;
28498 out1 = gen_reg_rtx (d->vmode);
28499 if (high)
28500 std::swap (out0, out1);
28502 emit_insn (gen (out0, in0, in1, out1));
28503 return true;
28506 /* Recognize patterns for the VREV insns. */
28508 static bool
28509 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28511 unsigned int i, j, diff, nelt = d->nelt;
28512 rtx (*gen)(rtx, rtx);
28514 if (!d->one_vector_p)
28515 return false;
28517 diff = d->perm[0];
28518 switch (diff)
28520 case 7:
28521 switch (d->vmode)
28523 case V16QImode: gen = gen_neon_vrev64v16qi; break;
28524 case V8QImode: gen = gen_neon_vrev64v8qi; break;
28525 default:
28526 return false;
28528 break;
28529 case 3:
28530 switch (d->vmode)
28532 case V16QImode: gen = gen_neon_vrev32v16qi; break;
28533 case V8QImode: gen = gen_neon_vrev32v8qi; break;
28534 case V8HImode: gen = gen_neon_vrev64v8hi; break;
28535 case V4HImode: gen = gen_neon_vrev64v4hi; break;
28536 default:
28537 return false;
28539 break;
28540 case 1:
28541 switch (d->vmode)
28543 case V16QImode: gen = gen_neon_vrev16v16qi; break;
28544 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28545 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28546 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28547 case V4SImode: gen = gen_neon_vrev64v4si; break;
28548 case V2SImode: gen = gen_neon_vrev64v2si; break;
28549 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28550 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28551 default:
28552 return false;
28554 break;
28555 default:
28556 return false;
28559 for (i = 0; i < nelt ; i += diff + 1)
28560 for (j = 0; j <= diff; j += 1)
28562 /* This is guaranteed to be true as the value of diff
28563 is 7, 3, 1 and we should have enough elements in the
28564 queue to generate this. Getting a vector mask with a
28565 value of diff other than these values implies that
28566 something is wrong by the time we get here. */
28567 gcc_assert (i + j < nelt);
28568 if (d->perm[i + j] != i + diff - j)
28569 return false;
28572 /* Success! */
28573 if (d->testing_p)
28574 return true;
28576 emit_insn (gen (d->target, d->op0));
28577 return true;
28580 /* Recognize patterns for the VTRN insns. */
28582 static bool
28583 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28585 unsigned int i, odd, mask, nelt = d->nelt;
28586 rtx out0, out1, in0, in1;
28587 rtx (*gen)(rtx, rtx, rtx, rtx);
28589 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28590 return false;
28592 /* Note that these are little-endian tests. Adjust for big-endian later. */
28593 if (d->perm[0] == 0)
28594 odd = 0;
28595 else if (d->perm[0] == 1)
28596 odd = 1;
28597 else
28598 return false;
28599 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28601 for (i = 0; i < nelt; i += 2)
28603 if (d->perm[i] != i + odd)
28604 return false;
28605 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28606 return false;
28609 /* Success! */
28610 if (d->testing_p)
28611 return true;
28613 switch (d->vmode)
28615 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28616 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28617 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28618 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28619 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28620 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28621 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28622 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28623 default:
28624 gcc_unreachable ();
28627 in0 = d->op0;
28628 in1 = d->op1;
28629 if (BYTES_BIG_ENDIAN)
28631 std::swap (in0, in1);
28632 odd = !odd;
28635 out0 = d->target;
28636 out1 = gen_reg_rtx (d->vmode);
28637 if (odd)
28638 std::swap (out0, out1);
28640 emit_insn (gen (out0, in0, in1, out1));
28641 return true;
28644 /* Recognize patterns for the VEXT insns. */
28646 static bool
28647 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28649 unsigned int i, nelt = d->nelt;
28650 rtx (*gen) (rtx, rtx, rtx, rtx);
28651 rtx offset;
28653 unsigned int location;
28655 unsigned int next = d->perm[0] + 1;
28657 /* TODO: Handle GCC's numbering of elements for big-endian. */
28658 if (BYTES_BIG_ENDIAN)
28659 return false;
28661 /* Check if the extracted indexes are increasing by one. */
28662 for (i = 1; i < nelt; next++, i++)
28664 /* If we hit the most significant element of the 2nd vector in
28665 the previous iteration, no need to test further. */
28666 if (next == 2 * nelt)
28667 return false;
28669 /* If we are operating on only one vector: it could be a
28670 rotation. If there are only two elements of size < 64, let
28671 arm_evpc_neon_vrev catch it. */
28672 if (d->one_vector_p && (next == nelt))
28674 if ((nelt == 2) && (d->vmode != V2DImode))
28675 return false;
28676 else
28677 next = 0;
28680 if (d->perm[i] != next)
28681 return false;
28684 location = d->perm[0];
28686 switch (d->vmode)
28688 case V16QImode: gen = gen_neon_vextv16qi; break;
28689 case V8QImode: gen = gen_neon_vextv8qi; break;
28690 case V4HImode: gen = gen_neon_vextv4hi; break;
28691 case V8HImode: gen = gen_neon_vextv8hi; break;
28692 case V2SImode: gen = gen_neon_vextv2si; break;
28693 case V4SImode: gen = gen_neon_vextv4si; break;
28694 case V2SFmode: gen = gen_neon_vextv2sf; break;
28695 case V4SFmode: gen = gen_neon_vextv4sf; break;
28696 case V2DImode: gen = gen_neon_vextv2di; break;
28697 default:
28698 return false;
28701 /* Success! */
28702 if (d->testing_p)
28703 return true;
28705 offset = GEN_INT (location);
28706 emit_insn (gen (d->target, d->op0, d->op1, offset));
28707 return true;
28710 /* The NEON VTBL instruction is a fully variable permuation that's even
28711 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28712 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28713 can do slightly better by expanding this as a constant where we don't
28714 have to apply a mask. */
28716 static bool
28717 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28719 rtx rperm[MAX_VECT_LEN], sel;
28720 machine_mode vmode = d->vmode;
28721 unsigned int i, nelt = d->nelt;
28723 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28724 numbering of elements for big-endian, we must reverse the order. */
28725 if (BYTES_BIG_ENDIAN)
28726 return false;
28728 if (d->testing_p)
28729 return true;
28731 /* Generic code will try constant permutation twice. Once with the
28732 original mode and again with the elements lowered to QImode.
28733 So wait and don't do the selector expansion ourselves. */
28734 if (vmode != V8QImode && vmode != V16QImode)
28735 return false;
28737 for (i = 0; i < nelt; ++i)
28738 rperm[i] = GEN_INT (d->perm[i]);
28739 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28740 sel = force_reg (vmode, sel);
28742 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28743 return true;
28746 static bool
28747 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28749 /* Check if the input mask matches vext before reordering the
28750 operands. */
28751 if (TARGET_NEON)
28752 if (arm_evpc_neon_vext (d))
28753 return true;
28755 /* The pattern matching functions above are written to look for a small
28756 number to begin the sequence (0, 1, N/2). If we begin with an index
28757 from the second operand, we can swap the operands. */
28758 if (d->perm[0] >= d->nelt)
28760 unsigned i, nelt = d->nelt;
28762 for (i = 0; i < nelt; ++i)
28763 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28765 std::swap (d->op0, d->op1);
28768 if (TARGET_NEON)
28770 if (arm_evpc_neon_vuzp (d))
28771 return true;
28772 if (arm_evpc_neon_vzip (d))
28773 return true;
28774 if (arm_evpc_neon_vrev (d))
28775 return true;
28776 if (arm_evpc_neon_vtrn (d))
28777 return true;
28778 return arm_evpc_neon_vtbl (d);
28780 return false;
28783 /* Expand a vec_perm_const pattern. */
28785 bool
28786 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28788 struct expand_vec_perm_d d;
28789 int i, nelt, which;
28791 d.target = target;
28792 d.op0 = op0;
28793 d.op1 = op1;
28795 d.vmode = GET_MODE (target);
28796 gcc_assert (VECTOR_MODE_P (d.vmode));
28797 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28798 d.testing_p = false;
28800 for (i = which = 0; i < nelt; ++i)
28802 rtx e = XVECEXP (sel, 0, i);
28803 int ei = INTVAL (e) & (2 * nelt - 1);
28804 which |= (ei < nelt ? 1 : 2);
28805 d.perm[i] = ei;
28808 switch (which)
28810 default:
28811 gcc_unreachable();
28813 case 3:
28814 d.one_vector_p = false;
28815 if (!rtx_equal_p (op0, op1))
28816 break;
28818 /* The elements of PERM do not suggest that only the first operand
28819 is used, but both operands are identical. Allow easier matching
28820 of the permutation by folding the permutation into the single
28821 input vector. */
28822 /* FALLTHRU */
28823 case 2:
28824 for (i = 0; i < nelt; ++i)
28825 d.perm[i] &= nelt - 1;
28826 d.op0 = op1;
28827 d.one_vector_p = true;
28828 break;
28830 case 1:
28831 d.op1 = op0;
28832 d.one_vector_p = true;
28833 break;
28836 return arm_expand_vec_perm_const_1 (&d);
28839 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28841 static bool
28842 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28843 const unsigned char *sel)
28845 struct expand_vec_perm_d d;
28846 unsigned int i, nelt, which;
28847 bool ret;
28849 d.vmode = vmode;
28850 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28851 d.testing_p = true;
28852 memcpy (d.perm, sel, nelt);
28854 /* Categorize the set of elements in the selector. */
28855 for (i = which = 0; i < nelt; ++i)
28857 unsigned char e = d.perm[i];
28858 gcc_assert (e < 2 * nelt);
28859 which |= (e < nelt ? 1 : 2);
28862 /* For all elements from second vector, fold the elements to first. */
28863 if (which == 2)
28864 for (i = 0; i < nelt; ++i)
28865 d.perm[i] -= nelt;
28867 /* Check whether the mask can be applied to the vector type. */
28868 d.one_vector_p = (which != 3);
28870 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28871 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28872 if (!d.one_vector_p)
28873 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28875 start_sequence ();
28876 ret = arm_expand_vec_perm_const_1 (&d);
28877 end_sequence ();
28879 return ret;
28882 bool
28883 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28885 /* If we are soft float and we do not have ldrd
28886 then all auto increment forms are ok. */
28887 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28888 return true;
28890 switch (code)
28892 /* Post increment and Pre Decrement are supported for all
28893 instruction forms except for vector forms. */
28894 case ARM_POST_INC:
28895 case ARM_PRE_DEC:
28896 if (VECTOR_MODE_P (mode))
28898 if (code != ARM_PRE_DEC)
28899 return true;
28900 else
28901 return false;
28904 return true;
28906 case ARM_POST_DEC:
28907 case ARM_PRE_INC:
28908 /* Without LDRD and mode size greater than
28909 word size, there is no point in auto-incrementing
28910 because ldm and stm will not have these forms. */
28911 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28912 return false;
28914 /* Vector and floating point modes do not support
28915 these auto increment forms. */
28916 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28917 return false;
28919 return true;
28921 default:
28922 return false;
28926 return false;
28929 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28930 on ARM, since we know that shifts by negative amounts are no-ops.
28931 Additionally, the default expansion code is not available or suitable
28932 for post-reload insn splits (this can occur when the register allocator
28933 chooses not to do a shift in NEON).
28935 This function is used in both initial expand and post-reload splits, and
28936 handles all kinds of 64-bit shifts.
28938 Input requirements:
28939 - It is safe for the input and output to be the same register, but
28940 early-clobber rules apply for the shift amount and scratch registers.
28941 - Shift by register requires both scratch registers. In all other cases
28942 the scratch registers may be NULL.
28943 - Ashiftrt by a register also clobbers the CC register. */
28944 void
28945 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28946 rtx amount, rtx scratch1, rtx scratch2)
28948 rtx out_high = gen_highpart (SImode, out);
28949 rtx out_low = gen_lowpart (SImode, out);
28950 rtx in_high = gen_highpart (SImode, in);
28951 rtx in_low = gen_lowpart (SImode, in);
28953 /* Terminology:
28954 in = the register pair containing the input value.
28955 out = the destination register pair.
28956 up = the high- or low-part of each pair.
28957 down = the opposite part to "up".
28958 In a shift, we can consider bits to shift from "up"-stream to
28959 "down"-stream, so in a left-shift "up" is the low-part and "down"
28960 is the high-part of each register pair. */
28962 rtx out_up = code == ASHIFT ? out_low : out_high;
28963 rtx out_down = code == ASHIFT ? out_high : out_low;
28964 rtx in_up = code == ASHIFT ? in_low : in_high;
28965 rtx in_down = code == ASHIFT ? in_high : in_low;
28967 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28968 gcc_assert (out
28969 && (REG_P (out) || GET_CODE (out) == SUBREG)
28970 && GET_MODE (out) == DImode);
28971 gcc_assert (in
28972 && (REG_P (in) || GET_CODE (in) == SUBREG)
28973 && GET_MODE (in) == DImode);
28974 gcc_assert (amount
28975 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28976 && GET_MODE (amount) == SImode)
28977 || CONST_INT_P (amount)));
28978 gcc_assert (scratch1 == NULL
28979 || (GET_CODE (scratch1) == SCRATCH)
28980 || (GET_MODE (scratch1) == SImode
28981 && REG_P (scratch1)));
28982 gcc_assert (scratch2 == NULL
28983 || (GET_CODE (scratch2) == SCRATCH)
28984 || (GET_MODE (scratch2) == SImode
28985 && REG_P (scratch2)));
28986 gcc_assert (!REG_P (out) || !REG_P (amount)
28987 || !HARD_REGISTER_P (out)
28988 || (REGNO (out) != REGNO (amount)
28989 && REGNO (out) + 1 != REGNO (amount)));
28991 /* Macros to make following code more readable. */
28992 #define SUB_32(DEST,SRC) \
28993 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28994 #define RSB_32(DEST,SRC) \
28995 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28996 #define SUB_S_32(DEST,SRC) \
28997 gen_addsi3_compare0 ((DEST), (SRC), \
28998 GEN_INT (-32))
28999 #define SET(DEST,SRC) \
29000 gen_rtx_SET ((DEST), (SRC))
29001 #define SHIFT(CODE,SRC,AMOUNT) \
29002 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29003 #define LSHIFT(CODE,SRC,AMOUNT) \
29004 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29005 SImode, (SRC), (AMOUNT))
29006 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29007 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29008 SImode, (SRC), (AMOUNT))
29009 #define ORR(A,B) \
29010 gen_rtx_IOR (SImode, (A), (B))
29011 #define BRANCH(COND,LABEL) \
29012 gen_arm_cond_branch ((LABEL), \
29013 gen_rtx_ ## COND (CCmode, cc_reg, \
29014 const0_rtx), \
29015 cc_reg)
29017 /* Shifts by register and shifts by constant are handled separately. */
29018 if (CONST_INT_P (amount))
29020 /* We have a shift-by-constant. */
29022 /* First, handle out-of-range shift amounts.
29023 In both cases we try to match the result an ARM instruction in a
29024 shift-by-register would give. This helps reduce execution
29025 differences between optimization levels, but it won't stop other
29026 parts of the compiler doing different things. This is "undefined
29027 behavior, in any case. */
29028 if (INTVAL (amount) <= 0)
29029 emit_insn (gen_movdi (out, in));
29030 else if (INTVAL (amount) >= 64)
29032 if (code == ASHIFTRT)
29034 rtx const31_rtx = GEN_INT (31);
29035 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29036 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29038 else
29039 emit_insn (gen_movdi (out, const0_rtx));
29042 /* Now handle valid shifts. */
29043 else if (INTVAL (amount) < 32)
29045 /* Shifts by a constant less than 32. */
29046 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29048 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29049 emit_insn (SET (out_down,
29050 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29051 out_down)));
29052 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29054 else
29056 /* Shifts by a constant greater than 31. */
29057 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29059 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29060 if (code == ASHIFTRT)
29061 emit_insn (gen_ashrsi3 (out_up, in_up,
29062 GEN_INT (31)));
29063 else
29064 emit_insn (SET (out_up, const0_rtx));
29067 else
29069 /* We have a shift-by-register. */
29070 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29072 /* This alternative requires the scratch registers. */
29073 gcc_assert (scratch1 && REG_P (scratch1));
29074 gcc_assert (scratch2 && REG_P (scratch2));
29076 /* We will need the values "amount-32" and "32-amount" later.
29077 Swapping them around now allows the later code to be more general. */
29078 switch (code)
29080 case ASHIFT:
29081 emit_insn (SUB_32 (scratch1, amount));
29082 emit_insn (RSB_32 (scratch2, amount));
29083 break;
29084 case ASHIFTRT:
29085 emit_insn (RSB_32 (scratch1, amount));
29086 /* Also set CC = amount > 32. */
29087 emit_insn (SUB_S_32 (scratch2, amount));
29088 break;
29089 case LSHIFTRT:
29090 emit_insn (RSB_32 (scratch1, amount));
29091 emit_insn (SUB_32 (scratch2, amount));
29092 break;
29093 default:
29094 gcc_unreachable ();
29097 /* Emit code like this:
29099 arithmetic-left:
29100 out_down = in_down << amount;
29101 out_down = (in_up << (amount - 32)) | out_down;
29102 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29103 out_up = in_up << amount;
29105 arithmetic-right:
29106 out_down = in_down >> amount;
29107 out_down = (in_up << (32 - amount)) | out_down;
29108 if (amount < 32)
29109 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29110 out_up = in_up << amount;
29112 logical-right:
29113 out_down = in_down >> amount;
29114 out_down = (in_up << (32 - amount)) | out_down;
29115 if (amount < 32)
29116 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29117 out_up = in_up << amount;
29119 The ARM and Thumb2 variants are the same but implemented slightly
29120 differently. If this were only called during expand we could just
29121 use the Thumb2 case and let combine do the right thing, but this
29122 can also be called from post-reload splitters. */
29124 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29126 if (!TARGET_THUMB2)
29128 /* Emit code for ARM mode. */
29129 emit_insn (SET (out_down,
29130 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29131 if (code == ASHIFTRT)
29133 rtx_code_label *done_label = gen_label_rtx ();
29134 emit_jump_insn (BRANCH (LT, done_label));
29135 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29136 out_down)));
29137 emit_label (done_label);
29139 else
29140 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29141 out_down)));
29143 else
29145 /* Emit code for Thumb2 mode.
29146 Thumb2 can't do shift and or in one insn. */
29147 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29148 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29150 if (code == ASHIFTRT)
29152 rtx_code_label *done_label = gen_label_rtx ();
29153 emit_jump_insn (BRANCH (LT, done_label));
29154 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29155 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29156 emit_label (done_label);
29158 else
29160 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29161 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29165 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29168 #undef SUB_32
29169 #undef RSB_32
29170 #undef SUB_S_32
29171 #undef SET
29172 #undef SHIFT
29173 #undef LSHIFT
29174 #undef REV_LSHIFT
29175 #undef ORR
29176 #undef BRANCH
29179 /* Returns true if the pattern is a valid symbolic address, which is either a
29180 symbol_ref or (symbol_ref + addend).
29182 According to the ARM ELF ABI, the initial addend of REL-type relocations
29183 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29184 literal field of the instruction as a 16-bit signed value in the range
29185 -32768 <= A < 32768. */
29187 bool
29188 arm_valid_symbolic_address_p (rtx addr)
29190 rtx xop0, xop1 = NULL_RTX;
29191 rtx tmp = addr;
29193 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29194 return true;
29196 /* (const (plus: symbol_ref const_int)) */
29197 if (GET_CODE (addr) == CONST)
29198 tmp = XEXP (addr, 0);
29200 if (GET_CODE (tmp) == PLUS)
29202 xop0 = XEXP (tmp, 0);
29203 xop1 = XEXP (tmp, 1);
29205 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29206 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29209 return false;
29212 /* Returns true if a valid comparison operation and makes
29213 the operands in a form that is valid. */
29214 bool
29215 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29217 enum rtx_code code = GET_CODE (*comparison);
29218 int code_int;
29219 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29220 ? GET_MODE (*op2) : GET_MODE (*op1);
29222 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29224 if (code == UNEQ || code == LTGT)
29225 return false;
29227 code_int = (int)code;
29228 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29229 PUT_CODE (*comparison, (enum rtx_code)code_int);
29231 switch (mode)
29233 case SImode:
29234 if (!arm_add_operand (*op1, mode))
29235 *op1 = force_reg (mode, *op1);
29236 if (!arm_add_operand (*op2, mode))
29237 *op2 = force_reg (mode, *op2);
29238 return true;
29240 case DImode:
29241 if (!cmpdi_operand (*op1, mode))
29242 *op1 = force_reg (mode, *op1);
29243 if (!cmpdi_operand (*op2, mode))
29244 *op2 = force_reg (mode, *op2);
29245 return true;
29247 case SFmode:
29248 case DFmode:
29249 if (!arm_float_compare_operand (*op1, mode))
29250 *op1 = force_reg (mode, *op1);
29251 if (!arm_float_compare_operand (*op2, mode))
29252 *op2 = force_reg (mode, *op2);
29253 return true;
29254 default:
29255 break;
29258 return false;
29262 /* Maximum number of instructions to set block of memory. */
29263 static int
29264 arm_block_set_max_insns (void)
29266 if (optimize_function_for_size_p (cfun))
29267 return 4;
29268 else
29269 return current_tune->max_insns_inline_memset;
29272 /* Return TRUE if it's profitable to set block of memory for
29273 non-vectorized case. VAL is the value to set the memory
29274 with. LENGTH is the number of bytes to set. ALIGN is the
29275 alignment of the destination memory in bytes. UNALIGNED_P
29276 is TRUE if we can only set the memory with instructions
29277 meeting alignment requirements. USE_STRD_P is TRUE if we
29278 can use strd to set the memory. */
29279 static bool
29280 arm_block_set_non_vect_profit_p (rtx val,
29281 unsigned HOST_WIDE_INT length,
29282 unsigned HOST_WIDE_INT align,
29283 bool unaligned_p, bool use_strd_p)
29285 int num = 0;
29286 /* For leftovers in bytes of 0-7, we can set the memory block using
29287 strb/strh/str with minimum instruction number. */
29288 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29290 if (unaligned_p)
29292 num = arm_const_inline_cost (SET, val);
29293 num += length / align + length % align;
29295 else if (use_strd_p)
29297 num = arm_const_double_inline_cost (val);
29298 num += (length >> 3) + leftover[length & 7];
29300 else
29302 num = arm_const_inline_cost (SET, val);
29303 num += (length >> 2) + leftover[length & 3];
29306 /* We may be able to combine last pair STRH/STRB into a single STR
29307 by shifting one byte back. */
29308 if (unaligned_access && length > 3 && (length & 3) == 3)
29309 num--;
29311 return (num <= arm_block_set_max_insns ());
29314 /* Return TRUE if it's profitable to set block of memory for
29315 vectorized case. LENGTH is the number of bytes to set.
29316 ALIGN is the alignment of destination memory in bytes.
29317 MODE is the vector mode used to set the memory. */
29318 static bool
29319 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29320 unsigned HOST_WIDE_INT align,
29321 machine_mode mode)
29323 int num;
29324 bool unaligned_p = ((align & 3) != 0);
29325 unsigned int nelt = GET_MODE_NUNITS (mode);
29327 /* Instruction loading constant value. */
29328 num = 1;
29329 /* Instructions storing the memory. */
29330 num += (length + nelt - 1) / nelt;
29331 /* Instructions adjusting the address expression. Only need to
29332 adjust address expression if it's 4 bytes aligned and bytes
29333 leftover can only be stored by mis-aligned store instruction. */
29334 if (!unaligned_p && (length & 3) != 0)
29335 num++;
29337 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29338 if (!unaligned_p && mode == V16QImode)
29339 num--;
29341 return (num <= arm_block_set_max_insns ());
29344 /* Set a block of memory using vectorization instructions for the
29345 unaligned case. We fill the first LENGTH bytes of the memory
29346 area starting from DSTBASE with byte constant VALUE. ALIGN is
29347 the alignment requirement of memory. Return TRUE if succeeded. */
29348 static bool
29349 arm_block_set_unaligned_vect (rtx dstbase,
29350 unsigned HOST_WIDE_INT length,
29351 unsigned HOST_WIDE_INT value,
29352 unsigned HOST_WIDE_INT align)
29354 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29355 rtx dst, mem;
29356 rtx val_elt, val_vec, reg;
29357 rtx rval[MAX_VECT_LEN];
29358 rtx (*gen_func) (rtx, rtx);
29359 machine_mode mode;
29360 unsigned HOST_WIDE_INT v = value;
29361 unsigned int offset = 0;
29362 gcc_assert ((align & 0x3) != 0);
29363 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29364 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29365 if (length >= nelt_v16)
29367 mode = V16QImode;
29368 gen_func = gen_movmisalignv16qi;
29370 else
29372 mode = V8QImode;
29373 gen_func = gen_movmisalignv8qi;
29375 nelt_mode = GET_MODE_NUNITS (mode);
29376 gcc_assert (length >= nelt_mode);
29377 /* Skip if it isn't profitable. */
29378 if (!arm_block_set_vect_profit_p (length, align, mode))
29379 return false;
29381 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29382 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29384 v = sext_hwi (v, BITS_PER_WORD);
29385 val_elt = GEN_INT (v);
29386 for (j = 0; j < nelt_mode; j++)
29387 rval[j] = val_elt;
29389 reg = gen_reg_rtx (mode);
29390 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29391 /* Emit instruction loading the constant value. */
29392 emit_move_insn (reg, val_vec);
29394 /* Handle nelt_mode bytes in a vector. */
29395 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29397 emit_insn ((*gen_func) (mem, reg));
29398 if (i + 2 * nelt_mode <= length)
29400 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29401 offset += nelt_mode;
29402 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29406 /* If there are not less than nelt_v8 bytes leftover, we must be in
29407 V16QI mode. */
29408 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29410 /* Handle (8, 16) bytes leftover. */
29411 if (i + nelt_v8 < length)
29413 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29414 offset += length - i;
29415 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29417 /* We are shifting bytes back, set the alignment accordingly. */
29418 if ((length & 1) != 0 && align >= 2)
29419 set_mem_align (mem, BITS_PER_UNIT);
29421 emit_insn (gen_movmisalignv16qi (mem, reg));
29423 /* Handle (0, 8] bytes leftover. */
29424 else if (i < length && i + nelt_v8 >= length)
29426 if (mode == V16QImode)
29427 reg = gen_lowpart (V8QImode, reg);
29429 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29430 + (nelt_mode - nelt_v8))));
29431 offset += (length - i) + (nelt_mode - nelt_v8);
29432 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29434 /* We are shifting bytes back, set the alignment accordingly. */
29435 if ((length & 1) != 0 && align >= 2)
29436 set_mem_align (mem, BITS_PER_UNIT);
29438 emit_insn (gen_movmisalignv8qi (mem, reg));
29441 return true;
29444 /* Set a block of memory using vectorization instructions for the
29445 aligned case. We fill the first LENGTH bytes of the memory area
29446 starting from DSTBASE with byte constant VALUE. ALIGN is the
29447 alignment requirement of memory. Return TRUE if succeeded. */
29448 static bool
29449 arm_block_set_aligned_vect (rtx dstbase,
29450 unsigned HOST_WIDE_INT length,
29451 unsigned HOST_WIDE_INT value,
29452 unsigned HOST_WIDE_INT align)
29454 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29455 rtx dst, addr, mem;
29456 rtx val_elt, val_vec, reg;
29457 rtx rval[MAX_VECT_LEN];
29458 machine_mode mode;
29459 unsigned HOST_WIDE_INT v = value;
29460 unsigned int offset = 0;
29462 gcc_assert ((align & 0x3) == 0);
29463 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29464 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29465 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29466 mode = V16QImode;
29467 else
29468 mode = V8QImode;
29470 nelt_mode = GET_MODE_NUNITS (mode);
29471 gcc_assert (length >= nelt_mode);
29472 /* Skip if it isn't profitable. */
29473 if (!arm_block_set_vect_profit_p (length, align, mode))
29474 return false;
29476 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29478 v = sext_hwi (v, BITS_PER_WORD);
29479 val_elt = GEN_INT (v);
29480 for (j = 0; j < nelt_mode; j++)
29481 rval[j] = val_elt;
29483 reg = gen_reg_rtx (mode);
29484 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29485 /* Emit instruction loading the constant value. */
29486 emit_move_insn (reg, val_vec);
29488 i = 0;
29489 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29490 if (mode == V16QImode)
29492 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29493 emit_insn (gen_movmisalignv16qi (mem, reg));
29494 i += nelt_mode;
29495 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29496 if (i + nelt_v8 < length && i + nelt_v16 > length)
29498 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29499 offset += length - nelt_mode;
29500 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29501 /* We are shifting bytes back, set the alignment accordingly. */
29502 if ((length & 0x3) == 0)
29503 set_mem_align (mem, BITS_PER_UNIT * 4);
29504 else if ((length & 0x1) == 0)
29505 set_mem_align (mem, BITS_PER_UNIT * 2);
29506 else
29507 set_mem_align (mem, BITS_PER_UNIT);
29509 emit_insn (gen_movmisalignv16qi (mem, reg));
29510 return true;
29512 /* Fall through for bytes leftover. */
29513 mode = V8QImode;
29514 nelt_mode = GET_MODE_NUNITS (mode);
29515 reg = gen_lowpart (V8QImode, reg);
29518 /* Handle 8 bytes in a vector. */
29519 for (; (i + nelt_mode <= length); i += nelt_mode)
29521 addr = plus_constant (Pmode, dst, i);
29522 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29523 emit_move_insn (mem, reg);
29526 /* Handle single word leftover by shifting 4 bytes back. We can
29527 use aligned access for this case. */
29528 if (i + UNITS_PER_WORD == length)
29530 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29531 offset += i - UNITS_PER_WORD;
29532 mem = adjust_automodify_address (dstbase, mode, addr, offset);
29533 /* We are shifting 4 bytes back, set the alignment accordingly. */
29534 if (align > UNITS_PER_WORD)
29535 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29537 emit_move_insn (mem, reg);
29539 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29540 We have to use unaligned access for this case. */
29541 else if (i < length)
29543 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29544 offset += length - nelt_mode;
29545 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29546 /* We are shifting bytes back, set the alignment accordingly. */
29547 if ((length & 1) == 0)
29548 set_mem_align (mem, BITS_PER_UNIT * 2);
29549 else
29550 set_mem_align (mem, BITS_PER_UNIT);
29552 emit_insn (gen_movmisalignv8qi (mem, reg));
29555 return true;
29558 /* Set a block of memory using plain strh/strb instructions, only
29559 using instructions allowed by ALIGN on processor. We fill the
29560 first LENGTH bytes of the memory area starting from DSTBASE
29561 with byte constant VALUE. ALIGN is the alignment requirement
29562 of memory. */
29563 static bool
29564 arm_block_set_unaligned_non_vect (rtx dstbase,
29565 unsigned HOST_WIDE_INT length,
29566 unsigned HOST_WIDE_INT value,
29567 unsigned HOST_WIDE_INT align)
29569 unsigned int i;
29570 rtx dst, addr, mem;
29571 rtx val_exp, val_reg, reg;
29572 machine_mode mode;
29573 HOST_WIDE_INT v = value;
29575 gcc_assert (align == 1 || align == 2);
29577 if (align == 2)
29578 v |= (value << BITS_PER_UNIT);
29580 v = sext_hwi (v, BITS_PER_WORD);
29581 val_exp = GEN_INT (v);
29582 /* Skip if it isn't profitable. */
29583 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29584 align, true, false))
29585 return false;
29587 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29588 mode = (align == 2 ? HImode : QImode);
29589 val_reg = force_reg (SImode, val_exp);
29590 reg = gen_lowpart (mode, val_reg);
29592 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29594 addr = plus_constant (Pmode, dst, i);
29595 mem = adjust_automodify_address (dstbase, mode, addr, i);
29596 emit_move_insn (mem, reg);
29599 /* Handle single byte leftover. */
29600 if (i + 1 == length)
29602 reg = gen_lowpart (QImode, val_reg);
29603 addr = plus_constant (Pmode, dst, i);
29604 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29605 emit_move_insn (mem, reg);
29606 i++;
29609 gcc_assert (i == length);
29610 return true;
29613 /* Set a block of memory using plain strd/str/strh/strb instructions,
29614 to permit unaligned copies on processors which support unaligned
29615 semantics for those instructions. We fill the first LENGTH bytes
29616 of the memory area starting from DSTBASE with byte constant VALUE.
29617 ALIGN is the alignment requirement of memory. */
29618 static bool
29619 arm_block_set_aligned_non_vect (rtx dstbase,
29620 unsigned HOST_WIDE_INT length,
29621 unsigned HOST_WIDE_INT value,
29622 unsigned HOST_WIDE_INT align)
29624 unsigned int i;
29625 rtx dst, addr, mem;
29626 rtx val_exp, val_reg, reg;
29627 unsigned HOST_WIDE_INT v;
29628 bool use_strd_p;
29630 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29631 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29633 v = (value | (value << 8) | (value << 16) | (value << 24));
29634 if (length < UNITS_PER_WORD)
29635 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29637 if (use_strd_p)
29638 v |= (v << BITS_PER_WORD);
29639 else
29640 v = sext_hwi (v, BITS_PER_WORD);
29642 val_exp = GEN_INT (v);
29643 /* Skip if it isn't profitable. */
29644 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29645 align, false, use_strd_p))
29647 if (!use_strd_p)
29648 return false;
29650 /* Try without strd. */
29651 v = (v >> BITS_PER_WORD);
29652 v = sext_hwi (v, BITS_PER_WORD);
29653 val_exp = GEN_INT (v);
29654 use_strd_p = false;
29655 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29656 align, false, use_strd_p))
29657 return false;
29660 i = 0;
29661 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29662 /* Handle double words using strd if possible. */
29663 if (use_strd_p)
29665 val_reg = force_reg (DImode, val_exp);
29666 reg = val_reg;
29667 for (; (i + 8 <= length); i += 8)
29669 addr = plus_constant (Pmode, dst, i);
29670 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29671 emit_move_insn (mem, reg);
29674 else
29675 val_reg = force_reg (SImode, val_exp);
29677 /* Handle words. */
29678 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29679 for (; (i + 4 <= length); i += 4)
29681 addr = plus_constant (Pmode, dst, i);
29682 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29683 if ((align & 3) == 0)
29684 emit_move_insn (mem, reg);
29685 else
29686 emit_insn (gen_unaligned_storesi (mem, reg));
29689 /* Merge last pair of STRH and STRB into a STR if possible. */
29690 if (unaligned_access && i > 0 && (i + 3) == length)
29692 addr = plus_constant (Pmode, dst, i - 1);
29693 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29694 /* We are shifting one byte back, set the alignment accordingly. */
29695 if ((align & 1) == 0)
29696 set_mem_align (mem, BITS_PER_UNIT);
29698 /* Most likely this is an unaligned access, and we can't tell at
29699 compilation time. */
29700 emit_insn (gen_unaligned_storesi (mem, reg));
29701 return true;
29704 /* Handle half word leftover. */
29705 if (i + 2 <= length)
29707 reg = gen_lowpart (HImode, val_reg);
29708 addr = plus_constant (Pmode, dst, i);
29709 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29710 if ((align & 1) == 0)
29711 emit_move_insn (mem, reg);
29712 else
29713 emit_insn (gen_unaligned_storehi (mem, reg));
29715 i += 2;
29718 /* Handle single byte leftover. */
29719 if (i + 1 == length)
29721 reg = gen_lowpart (QImode, val_reg);
29722 addr = plus_constant (Pmode, dst, i);
29723 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29724 emit_move_insn (mem, reg);
29727 return true;
29730 /* Set a block of memory using vectorization instructions for both
29731 aligned and unaligned cases. We fill the first LENGTH bytes of
29732 the memory area starting from DSTBASE with byte constant VALUE.
29733 ALIGN is the alignment requirement of memory. */
29734 static bool
29735 arm_block_set_vect (rtx dstbase,
29736 unsigned HOST_WIDE_INT length,
29737 unsigned HOST_WIDE_INT value,
29738 unsigned HOST_WIDE_INT align)
29740 /* Check whether we need to use unaligned store instruction. */
29741 if (((align & 3) != 0 || (length & 3) != 0)
29742 /* Check whether unaligned store instruction is available. */
29743 && (!unaligned_access || BYTES_BIG_ENDIAN))
29744 return false;
29746 if ((align & 3) == 0)
29747 return arm_block_set_aligned_vect (dstbase, length, value, align);
29748 else
29749 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29752 /* Expand string store operation. Firstly we try to do that by using
29753 vectorization instructions, then try with ARM unaligned access and
29754 double-word store if profitable. OPERANDS[0] is the destination,
29755 OPERANDS[1] is the number of bytes, operands[2] is the value to
29756 initialize the memory, OPERANDS[3] is the known alignment of the
29757 destination. */
29758 bool
29759 arm_gen_setmem (rtx *operands)
29761 rtx dstbase = operands[0];
29762 unsigned HOST_WIDE_INT length;
29763 unsigned HOST_WIDE_INT value;
29764 unsigned HOST_WIDE_INT align;
29766 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29767 return false;
29769 length = UINTVAL (operands[1]);
29770 if (length > 64)
29771 return false;
29773 value = (UINTVAL (operands[2]) & 0xFF);
29774 align = UINTVAL (operands[3]);
29775 if (TARGET_NEON && length >= 8
29776 && current_tune->string_ops_prefer_neon
29777 && arm_block_set_vect (dstbase, length, value, align))
29778 return true;
29780 if (!unaligned_access && (align & 3) != 0)
29781 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29783 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29787 static bool
29788 arm_macro_fusion_p (void)
29790 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
29794 static bool
29795 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
29797 rtx set_dest;
29798 rtx prev_set = single_set (prev);
29799 rtx curr_set = single_set (curr);
29801 if (!prev_set
29802 || !curr_set)
29803 return false;
29805 if (any_condjump_p (curr))
29806 return false;
29808 if (!arm_macro_fusion_p ())
29809 return false;
29811 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
29812 && aarch_crypto_can_dual_issue (prev, curr))
29813 return true;
29815 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT)
29817 /* We are trying to fuse
29818 movw imm / movt imm
29819 instructions as a group that gets scheduled together. */
29821 set_dest = SET_DEST (curr_set);
29823 if (GET_MODE (set_dest) != SImode)
29824 return false;
29826 /* We are trying to match:
29827 prev (movw) == (set (reg r0) (const_int imm16))
29828 curr (movt) == (set (zero_extract (reg r0)
29829 (const_int 16)
29830 (const_int 16))
29831 (const_int imm16_1))
29833 prev (movw) == (set (reg r1)
29834 (high (symbol_ref ("SYM"))))
29835 curr (movt) == (set (reg r0)
29836 (lo_sum (reg r1)
29837 (symbol_ref ("SYM")))) */
29838 if (GET_CODE (set_dest) == ZERO_EXTRACT)
29840 if (CONST_INT_P (SET_SRC (curr_set))
29841 && CONST_INT_P (SET_SRC (prev_set))
29842 && REG_P (XEXP (set_dest, 0))
29843 && REG_P (SET_DEST (prev_set))
29844 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
29845 return true;
29847 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
29848 && REG_P (SET_DEST (curr_set))
29849 && REG_P (SET_DEST (prev_set))
29850 && GET_CODE (SET_SRC (prev_set)) == HIGH
29851 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
29852 return true;
29854 return false;
29857 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29859 static unsigned HOST_WIDE_INT
29860 arm_asan_shadow_offset (void)
29862 return HOST_WIDE_INT_1U << 29;
29866 /* This is a temporary fix for PR60655. Ideally we need
29867 to handle most of these cases in the generic part but
29868 currently we reject minus (..) (sym_ref). We try to
29869 ameliorate the case with minus (sym_ref1) (sym_ref2)
29870 where they are in the same section. */
29872 static bool
29873 arm_const_not_ok_for_debug_p (rtx p)
29875 tree decl_op0 = NULL;
29876 tree decl_op1 = NULL;
29878 if (GET_CODE (p) == MINUS)
29880 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29882 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29883 if (decl_op1
29884 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29885 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29887 if ((TREE_CODE (decl_op1) == VAR_DECL
29888 || TREE_CODE (decl_op1) == CONST_DECL)
29889 && (TREE_CODE (decl_op0) == VAR_DECL
29890 || TREE_CODE (decl_op0) == CONST_DECL))
29891 return (get_variable_section (decl_op1, false)
29892 != get_variable_section (decl_op0, false));
29894 if (TREE_CODE (decl_op1) == LABEL_DECL
29895 && TREE_CODE (decl_op0) == LABEL_DECL)
29896 return (DECL_CONTEXT (decl_op1)
29897 != DECL_CONTEXT (decl_op0));
29900 return true;
29904 return false;
29907 /* return TRUE if x is a reference to a value in a constant pool */
29908 extern bool
29909 arm_is_constant_pool_ref (rtx x)
29911 return (MEM_P (x)
29912 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29913 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29916 /* Remember the last target of arm_set_current_function. */
29917 static GTY(()) tree arm_previous_fndecl;
29919 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
29921 void
29922 save_restore_target_globals (tree new_tree)
29924 /* If we have a previous state, use it. */
29925 if (TREE_TARGET_GLOBALS (new_tree))
29926 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
29927 else if (new_tree == target_option_default_node)
29928 restore_target_globals (&default_target_globals);
29929 else
29931 /* Call target_reinit and save the state for TARGET_GLOBALS. */
29932 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
29935 arm_option_params_internal ();
29938 /* Invalidate arm_previous_fndecl. */
29940 void
29941 arm_reset_previous_fndecl (void)
29943 arm_previous_fndecl = NULL_TREE;
29946 /* Establish appropriate back-end context for processing the function
29947 FNDECL. The argument might be NULL to indicate processing at top
29948 level, outside of any function scope. */
29950 static void
29951 arm_set_current_function (tree fndecl)
29953 if (!fndecl || fndecl == arm_previous_fndecl)
29954 return;
29956 tree old_tree = (arm_previous_fndecl
29957 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
29958 : NULL_TREE);
29960 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
29962 /* If current function has no attributes but previous one did,
29963 use the default node. */
29964 if (! new_tree && old_tree)
29965 new_tree = target_option_default_node;
29967 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
29968 the default have been handled by save_restore_target_globals from
29969 arm_pragma_target_parse. */
29970 if (old_tree == new_tree)
29971 return;
29973 arm_previous_fndecl = fndecl;
29975 /* First set the target options. */
29976 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
29978 save_restore_target_globals (new_tree);
29981 /* Implement TARGET_OPTION_PRINT. */
29983 static void
29984 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
29986 int flags = ptr->x_target_flags;
29987 const struct arm_fpu_desc *fpu_desc = &all_fpus[ptr->x_arm_fpu_index];
29989 fprintf (file, "%*sselected arch %s\n", indent, "",
29990 TARGET_THUMB2_P (flags) ? "thumb2" :
29991 TARGET_THUMB_P (flags) ? "thumb1" :
29992 "arm");
29994 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_desc->name);
29997 /* Hook to determine if one function can safely inline another. */
29999 static bool
30000 arm_can_inline_p (tree caller, tree callee)
30002 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30003 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30005 struct cl_target_option *caller_opts
30006 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30007 : target_option_default_node);
30009 struct cl_target_option *callee_opts
30010 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30011 : target_option_default_node);
30013 const struct arm_fpu_desc *caller_fpu
30014 = &all_fpus[caller_opts->x_arm_fpu_index];
30015 const struct arm_fpu_desc *callee_fpu
30016 = &all_fpus[callee_opts->x_arm_fpu_index];
30018 /* Callee's fpu features should be a subset of the caller's. */
30019 if ((caller_fpu->features & callee_fpu->features) != callee_fpu->features)
30020 return false;
30022 /* Need same model and regs. */
30023 if (callee_fpu->model != caller_fpu->model
30024 || callee_fpu->regs != callee_fpu->regs)
30025 return false;
30027 /* OK to inline between different modes.
30028 Function with mode specific instructions, e.g using asm,
30029 must be explicitly protected with noinline. */
30030 return true;
30033 /* Hook to fix function's alignment affected by target attribute. */
30035 static void
30036 arm_relayout_function (tree fndecl)
30038 if (DECL_USER_ALIGN (fndecl))
30039 return;
30041 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30043 if (!callee_tree)
30044 callee_tree = target_option_default_node;
30046 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30047 SET_DECL_ALIGN (fndecl, FUNCTION_BOUNDARY_P (opts->x_target_flags));
30050 /* Inner function to process the attribute((target(...))), take an argument and
30051 set the current options from the argument. If we have a list, recursively
30052 go over the list. */
30054 static bool
30055 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30057 if (TREE_CODE (args) == TREE_LIST)
30059 bool ret = true;
30061 for (; args; args = TREE_CHAIN (args))
30062 if (TREE_VALUE (args)
30063 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30064 ret = false;
30065 return ret;
30068 else if (TREE_CODE (args) != STRING_CST)
30070 error ("attribute %<target%> argument not a string");
30071 return false;
30074 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30075 char *q;
30077 while ((q = strtok (argstr, ",")) != NULL)
30079 while (ISSPACE (*q)) ++q;
30081 argstr = NULL;
30082 if (!strncmp (q, "thumb", 5))
30083 opts->x_target_flags |= MASK_THUMB;
30085 else if (!strncmp (q, "arm", 3))
30086 opts->x_target_flags &= ~MASK_THUMB;
30088 else if (!strncmp (q, "fpu=", 4))
30090 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30091 &opts->x_arm_fpu_index, CL_TARGET))
30093 error ("invalid fpu for attribute(target(\"%s\"))", q);
30094 return false;
30097 else
30099 error ("attribute(target(\"%s\")) is unknown", q);
30100 return false;
30103 arm_option_check_internal (opts);
30106 return true;
30109 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30111 tree
30112 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30113 struct gcc_options *opts_set)
30115 if (!arm_valid_target_attribute_rec (args, opts))
30116 return NULL_TREE;
30118 /* Do any overrides, such as global options arch=xxx. */
30119 arm_option_override_internal (opts, opts_set);
30121 return build_target_option_node (opts);
30124 static void
30125 add_attribute (const char * mode, tree *attributes)
30127 size_t len = strlen (mode);
30128 tree value = build_string (len, mode);
30130 TREE_TYPE (value) = build_array_type (char_type_node,
30131 build_index_type (size_int (len)));
30133 *attributes = tree_cons (get_identifier ("target"),
30134 build_tree_list (NULL_TREE, value),
30135 *attributes);
30138 /* For testing. Insert thumb or arm modes alternatively on functions. */
30140 static void
30141 arm_insert_attributes (tree fndecl, tree * attributes)
30143 const char *mode;
30145 if (! TARGET_FLIP_THUMB)
30146 return;
30148 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30149 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30150 return;
30152 /* Nested definitions must inherit mode. */
30153 if (current_function_decl)
30155 mode = TARGET_THUMB ? "thumb" : "arm";
30156 add_attribute (mode, attributes);
30157 return;
30160 /* If there is already a setting don't change it. */
30161 if (lookup_attribute ("target", *attributes) != NULL)
30162 return;
30164 mode = thumb_flipper ? "thumb" : "arm";
30165 add_attribute (mode, attributes);
30167 thumb_flipper = !thumb_flipper;
30170 /* Hook to validate attribute((target("string"))). */
30172 static bool
30173 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30174 tree args, int ARG_UNUSED (flags))
30176 bool ret = true;
30177 struct gcc_options func_options;
30178 tree cur_tree, new_optimize;
30179 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30181 /* Get the optimization options of the current function. */
30182 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30184 /* If the function changed the optimization levels as well as setting target
30185 options, start with the optimizations specified. */
30186 if (!func_optimize)
30187 func_optimize = optimization_default_node;
30189 /* Init func_options. */
30190 memset (&func_options, 0, sizeof (func_options));
30191 init_options_struct (&func_options, NULL);
30192 lang_hooks.init_options_struct (&func_options);
30194 /* Initialize func_options to the defaults. */
30195 cl_optimization_restore (&func_options,
30196 TREE_OPTIMIZATION (func_optimize));
30198 cl_target_option_restore (&func_options,
30199 TREE_TARGET_OPTION (target_option_default_node));
30201 /* Set func_options flags with new target mode. */
30202 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30203 &global_options_set);
30205 if (cur_tree == NULL_TREE)
30206 ret = false;
30208 new_optimize = build_optimization_node (&func_options);
30210 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30212 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30214 finalize_options_struct (&func_options);
30216 return ret;
30219 void
30220 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30223 fprintf (stream, "\t.syntax unified\n");
30225 if (TARGET_THUMB)
30227 if (is_called_in_ARM_mode (decl)
30228 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30229 && cfun->is_thunk))
30230 fprintf (stream, "\t.code 32\n");
30231 else if (TARGET_THUMB1)
30232 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30233 else
30234 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30236 else
30237 fprintf (stream, "\t.arm\n");
30239 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30240 TARGET_SOFT_FLOAT ? "softvfp" : TARGET_FPU_NAME);
30242 if (TARGET_POKE_FUNCTION_NAME)
30243 arm_poke_function_name (stream, (const char *) name);
30246 /* If MEM is in the form of [base+offset], extract the two parts
30247 of address and set to BASE and OFFSET, otherwise return false
30248 after clearing BASE and OFFSET. */
30250 static bool
30251 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30253 rtx addr;
30255 gcc_assert (MEM_P (mem));
30257 addr = XEXP (mem, 0);
30259 /* Strip off const from addresses like (const (addr)). */
30260 if (GET_CODE (addr) == CONST)
30261 addr = XEXP (addr, 0);
30263 if (GET_CODE (addr) == REG)
30265 *base = addr;
30266 *offset = const0_rtx;
30267 return true;
30270 if (GET_CODE (addr) == PLUS
30271 && GET_CODE (XEXP (addr, 0)) == REG
30272 && CONST_INT_P (XEXP (addr, 1)))
30274 *base = XEXP (addr, 0);
30275 *offset = XEXP (addr, 1);
30276 return true;
30279 *base = NULL_RTX;
30280 *offset = NULL_RTX;
30282 return false;
30285 /* If INSN is a load or store of address in the form of [base+offset],
30286 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30287 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30288 otherwise return FALSE. */
30290 static bool
30291 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30293 rtx x, dest, src;
30295 gcc_assert (INSN_P (insn));
30296 x = PATTERN (insn);
30297 if (GET_CODE (x) != SET)
30298 return false;
30300 src = SET_SRC (x);
30301 dest = SET_DEST (x);
30302 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30304 *is_load = false;
30305 extract_base_offset_in_addr (dest, base, offset);
30307 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30309 *is_load = true;
30310 extract_base_offset_in_addr (src, base, offset);
30312 else
30313 return false;
30315 return (*base != NULL_RTX && *offset != NULL_RTX);
30318 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30320 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30321 and PRI are only calculated for these instructions. For other instruction,
30322 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30323 instruction fusion can be supported by returning different priorities.
30325 It's important that irrelevant instructions get the largest FUSION_PRI. */
30327 static void
30328 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30329 int *fusion_pri, int *pri)
30331 int tmp, off_val;
30332 bool is_load;
30333 rtx base, offset;
30335 gcc_assert (INSN_P (insn));
30337 tmp = max_pri - 1;
30338 if (!fusion_load_store (insn, &base, &offset, &is_load))
30340 *pri = tmp;
30341 *fusion_pri = tmp;
30342 return;
30345 /* Load goes first. */
30346 if (is_load)
30347 *fusion_pri = tmp - 1;
30348 else
30349 *fusion_pri = tmp - 2;
30351 tmp /= 2;
30353 /* INSN with smaller base register goes first. */
30354 tmp -= ((REGNO (base) & 0xff) << 20);
30356 /* INSN with smaller offset goes first. */
30357 off_val = (int)(INTVAL (offset));
30358 if (off_val >= 0)
30359 tmp -= (off_val & 0xfffff);
30360 else
30361 tmp += ((- off_val) & 0xfffff);
30363 *pri = tmp;
30364 return;
30368 /* Construct and return a PARALLEL RTX vector with elements numbering the
30369 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30370 the vector - from the perspective of the architecture. This does not
30371 line up with GCC's perspective on lane numbers, so we end up with
30372 different masks depending on our target endian-ness. The diagram
30373 below may help. We must draw the distinction when building masks
30374 which select one half of the vector. An instruction selecting
30375 architectural low-lanes for a big-endian target, must be described using
30376 a mask selecting GCC high-lanes.
30378 Big-Endian Little-Endian
30380 GCC 0 1 2 3 3 2 1 0
30381 | x | x | x | x | | x | x | x | x |
30382 Architecture 3 2 1 0 3 2 1 0
30384 Low Mask: { 2, 3 } { 0, 1 }
30385 High Mask: { 0, 1 } { 2, 3 }
30389 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30391 int nunits = GET_MODE_NUNITS (mode);
30392 rtvec v = rtvec_alloc (nunits / 2);
30393 int high_base = nunits / 2;
30394 int low_base = 0;
30395 int base;
30396 rtx t1;
30397 int i;
30399 if (BYTES_BIG_ENDIAN)
30400 base = high ? low_base : high_base;
30401 else
30402 base = high ? high_base : low_base;
30404 for (i = 0; i < nunits / 2; i++)
30405 RTVEC_ELT (v, i) = GEN_INT (base + i);
30407 t1 = gen_rtx_PARALLEL (mode, v);
30408 return t1;
30411 /* Check OP for validity as a PARALLEL RTX vector with elements
30412 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30413 from the perspective of the architecture. See the diagram above
30414 arm_simd_vect_par_cnst_half_p for more details. */
30416 bool
30417 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30418 bool high)
30420 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30421 HOST_WIDE_INT count_op = XVECLEN (op, 0);
30422 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30423 int i = 0;
30425 if (!VECTOR_MODE_P (mode))
30426 return false;
30428 if (count_op != count_ideal)
30429 return false;
30431 for (i = 0; i < count_ideal; i++)
30433 rtx elt_op = XVECEXP (op, 0, i);
30434 rtx elt_ideal = XVECEXP (ideal, 0, i);
30436 if (!CONST_INT_P (elt_op)
30437 || INTVAL (elt_ideal) != INTVAL (elt_op))
30438 return false;
30440 return true;
30443 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30444 in Thumb1. */
30445 static bool
30446 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30447 const_tree)
30449 /* For now, we punt and not handle this for TARGET_THUMB1. */
30450 if (vcall_offset && TARGET_THUMB1)
30451 return false;
30453 /* Otherwise ok. */
30454 return true;
30457 #include "gt-arm.h"