[ARM] PR target/67929 Tighten vfp3_const_double_for_bits checks
[official-gcc.git] / gcc / config / arm / arm.c
bloba598c84392d6626bd5d0e85210be7036082e9a71
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "cfghooks.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "emit-rtl.h"
37 #include "recog.h"
38 #include "cgraph.h"
39 #include "diagnostic-core.h"
40 #include "alias.h"
41 #include "fold-const.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "varasm.h"
45 #include "output.h"
46 #include "insn-attr.h"
47 #include "flags.h"
48 #include "reload.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "cfgrtl.h"
52 #include "sched-int.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
55 #include "intl.h"
56 #include "libfuncs.h"
57 #include "params.h"
58 #include "opts.h"
59 #include "dumpfile.h"
60 #include "target-globals.h"
61 #include "builtins.h"
62 #include "tm-constrs.h"
63 #include "rtl-iter.h"
65 /* This file should be included last. */
66 #include "target-def.h"
68 /* Forward definitions of types. */
69 typedef struct minipool_node Mnode;
70 typedef struct minipool_fixup Mfix;
72 void (*arm_lang_output_object_attributes_hook)(void);
74 struct four_ints
76 int i[4];
79 /* Forward function declarations. */
80 static bool arm_const_not_ok_for_debug_p (rtx);
81 static bool arm_needs_doubleword_align (machine_mode, const_tree);
82 static int arm_compute_static_chain_stack_bytes (void);
83 static arm_stack_offsets *arm_get_frame_offsets (void);
84 static void arm_add_gc_roots (void);
85 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
86 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
87 static unsigned bit_count (unsigned long);
88 static unsigned feature_count (const arm_feature_set*);
89 static int arm_address_register_rtx_p (rtx, int);
90 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
91 static bool is_called_in_ARM_mode (tree);
92 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
93 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
94 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
95 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
96 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
97 inline static int thumb1_index_register_rtx_p (rtx, int);
98 static int thumb_far_jump_used_p (void);
99 static bool thumb_force_lr_save (void);
100 static unsigned arm_size_return_regs (void);
101 static bool arm_assemble_integer (rtx, unsigned int, int);
102 static void arm_print_operand (FILE *, rtx, int);
103 static void arm_print_operand_address (FILE *, rtx);
104 static bool arm_print_operand_punct_valid_p (unsigned char code);
105 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
106 static arm_cc get_arm_condition_code (rtx);
107 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
108 static const char *output_multi_immediate (rtx *, const char *, const char *,
109 int, HOST_WIDE_INT);
110 static const char *shift_op (rtx, HOST_WIDE_INT *);
111 static struct machine_function *arm_init_machine_status (void);
112 static void thumb_exit (FILE *, int);
113 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
114 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
115 static Mnode *add_minipool_forward_ref (Mfix *);
116 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
117 static Mnode *add_minipool_backward_ref (Mfix *);
118 static void assign_minipool_offsets (Mfix *);
119 static void arm_print_value (FILE *, rtx);
120 static void dump_minipool (rtx_insn *);
121 static int arm_barrier_cost (rtx_insn *);
122 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
123 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
124 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
125 machine_mode, rtx);
126 static void arm_reorg (void);
127 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
128 static unsigned long arm_compute_save_reg0_reg12_mask (void);
129 static unsigned long arm_compute_save_reg_mask (void);
130 static unsigned long arm_isr_value (tree);
131 static unsigned long arm_compute_func_type (void);
132 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
133 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
134 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
135 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
136 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
137 #endif
138 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
139 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
140 static int arm_comp_type_attributes (const_tree, const_tree);
141 static void arm_set_default_type_attributes (tree);
142 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
143 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
144 static int optimal_immediate_sequence (enum rtx_code code,
145 unsigned HOST_WIDE_INT val,
146 struct four_ints *return_sequence);
147 static int optimal_immediate_sequence_1 (enum rtx_code code,
148 unsigned HOST_WIDE_INT val,
149 struct four_ints *return_sequence,
150 int i);
151 static int arm_get_strip_length (int);
152 static bool arm_function_ok_for_sibcall (tree, tree);
153 static machine_mode arm_promote_function_mode (const_tree,
154 machine_mode, int *,
155 const_tree, int);
156 static bool arm_return_in_memory (const_tree, const_tree);
157 static rtx arm_function_value (const_tree, const_tree, bool);
158 static rtx arm_libcall_value_1 (machine_mode);
159 static rtx arm_libcall_value (machine_mode, const_rtx);
160 static bool arm_function_value_regno_p (const unsigned int);
161 static void arm_internal_label (FILE *, const char *, unsigned long);
162 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
163 tree);
164 static bool arm_have_conditional_execution (void);
165 static bool arm_cannot_force_const_mem (machine_mode, rtx);
166 static bool arm_legitimate_constant_p (machine_mode, rtx);
167 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
168 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
169 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
170 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
171 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
172 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
173 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
174 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
175 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
176 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
177 static void emit_constant_insn (rtx cond, rtx pattern);
178 static rtx_insn *emit_set_insn (rtx, rtx);
179 static rtx emit_multi_reg_push (unsigned long, unsigned long);
180 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
181 tree, bool);
182 static rtx arm_function_arg (cumulative_args_t, machine_mode,
183 const_tree, bool);
184 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
185 const_tree, bool);
186 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
187 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
188 const_tree);
189 static rtx aapcs_libcall_value (machine_mode);
190 static int aapcs_select_return_coproc (const_tree, const_tree);
192 #ifdef OBJECT_FORMAT_ELF
193 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
194 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
195 #endif
196 #ifndef ARM_PE
197 static void arm_encode_section_info (tree, rtx, int);
198 #endif
200 static void arm_file_end (void);
201 static void arm_file_start (void);
202 static void arm_insert_attributes (tree, tree *);
204 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
205 tree, int *, int);
206 static bool arm_pass_by_reference (cumulative_args_t,
207 machine_mode, const_tree, bool);
208 static bool arm_promote_prototypes (const_tree);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree);
212 static bool arm_must_pass_in_stack (machine_mode, const_tree);
213 static bool arm_return_in_memory (const_tree, const_tree);
214 #if ARM_UNWIND_INFO
215 static void arm_unwind_emit (FILE *, rtx_insn *);
216 static bool arm_output_ttype (rtx);
217 static void arm_asm_emit_except_personality (rtx);
218 static void arm_asm_init_sections (void);
219 #endif
220 static rtx arm_dwarf_register_span (rtx);
222 static tree arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree arm_get_cookie_size (tree);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree, rtx);
234 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
235 static void arm_option_override (void);
236 static void arm_override_options_after_change (void);
237 static void arm_option_print (FILE *, int, struct cl_target_option *);
238 static void arm_set_current_function (tree);
239 static bool arm_can_inline_p (tree, tree);
240 static void arm_relayout_function (tree);
241 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
242 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
243 static bool arm_macro_fusion_p (void);
244 static bool arm_cannot_copy_insn_p (rtx_insn *);
245 static int arm_issue_rate (void);
246 static int arm_first_cycle_multipass_dfa_lookahead (void);
247 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
248 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
249 static bool arm_output_addr_const_extra (FILE *, rtx);
250 static bool arm_allocate_stack_slots_for_args (void);
251 static bool arm_warn_func_return (tree);
252 static const char *arm_invalid_parameter_type (const_tree t);
253 static const char *arm_invalid_return_type (const_tree t);
254 static tree arm_promoted_type (const_tree t);
255 static tree arm_convert_to_type (tree type, tree expr);
256 static bool arm_scalar_mode_supported_p (machine_mode);
257 static bool arm_frame_pointer_required (void);
258 static bool arm_can_eliminate (const int, const int);
259 static void arm_asm_trampoline_template (FILE *);
260 static void arm_trampoline_init (rtx, tree, rtx);
261 static rtx arm_trampoline_adjust_address (rtx);
262 static rtx arm_pic_static_addr (rtx orig, rtx reg);
263 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
264 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
265 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
266 static bool arm_array_mode_supported_p (machine_mode,
267 unsigned HOST_WIDE_INT);
268 static machine_mode arm_preferred_simd_mode (machine_mode);
269 static bool arm_class_likely_spilled_p (reg_class_t);
270 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
271 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
272 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
273 const_tree type,
274 int misalignment,
275 bool is_packed);
276 static void arm_conditional_register_usage (void);
277 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
278 static unsigned int arm_autovectorize_vector_sizes (void);
279 static int arm_default_branch_cost (bool, bool);
280 static int arm_cortex_a5_branch_cost (bool, bool);
281 static int arm_cortex_m_branch_cost (bool, bool);
282 static int arm_cortex_m7_branch_cost (bool, bool);
284 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
285 const unsigned char *sel);
287 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
289 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
290 tree vectype,
291 int misalign ATTRIBUTE_UNUSED);
292 static unsigned arm_add_stmt_cost (void *data, int count,
293 enum vect_cost_for_stmt kind,
294 struct _stmt_vec_info *stmt_info,
295 int misalign,
296 enum vect_cost_model_location where);
298 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
299 bool op0_preserve_value);
300 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
302 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
304 /* Table of machine attributes. */
305 static const struct attribute_spec arm_attribute_table[] =
307 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
308 affects_type_identity } */
309 /* Function calls made to this symbol must be done indirectly, because
310 it may lie outside of the 26 bit addressing range of a normal function
311 call. */
312 { "long_call", 0, 0, false, true, true, NULL, false },
313 /* Whereas these functions are always known to reside within the 26 bit
314 addressing range. */
315 { "short_call", 0, 0, false, true, true, NULL, false },
316 /* Specify the procedure call conventions for a function. */
317 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
318 false },
319 /* Interrupt Service Routines have special prologue and epilogue requirements. */
320 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
321 false },
322 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
323 false },
324 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
325 false },
326 #ifdef ARM_PE
327 /* ARM/PE has three new attributes:
328 interfacearm - ?
329 dllexport - for exporting a function/variable that will live in a dll
330 dllimport - for importing a function/variable from a dll
332 Microsoft allows multiple declspecs in one __declspec, separating
333 them with spaces. We do NOT support this. Instead, use __declspec
334 multiple times.
336 { "dllimport", 0, 0, true, false, false, NULL, false },
337 { "dllexport", 0, 0, true, false, false, NULL, false },
338 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
339 false },
340 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
341 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
342 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
343 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
344 false },
345 #endif
346 { NULL, 0, 0, false, false, false, NULL, false }
349 /* Initialize the GCC target structure. */
350 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
351 #undef TARGET_MERGE_DECL_ATTRIBUTES
352 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
353 #endif
355 #undef TARGET_LEGITIMIZE_ADDRESS
356 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
358 #undef TARGET_LRA_P
359 #define TARGET_LRA_P hook_bool_void_true
361 #undef TARGET_ATTRIBUTE_TABLE
362 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
364 #undef TARGET_INSERT_ATTRIBUTES
365 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
367 #undef TARGET_ASM_FILE_START
368 #define TARGET_ASM_FILE_START arm_file_start
369 #undef TARGET_ASM_FILE_END
370 #define TARGET_ASM_FILE_END arm_file_end
372 #undef TARGET_ASM_ALIGNED_SI_OP
373 #define TARGET_ASM_ALIGNED_SI_OP NULL
374 #undef TARGET_ASM_INTEGER
375 #define TARGET_ASM_INTEGER arm_assemble_integer
377 #undef TARGET_PRINT_OPERAND
378 #define TARGET_PRINT_OPERAND arm_print_operand
379 #undef TARGET_PRINT_OPERAND_ADDRESS
380 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
381 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
382 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
384 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
385 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
387 #undef TARGET_ASM_FUNCTION_PROLOGUE
388 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
390 #undef TARGET_ASM_FUNCTION_EPILOGUE
391 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
393 #undef TARGET_CAN_INLINE_P
394 #define TARGET_CAN_INLINE_P arm_can_inline_p
396 #undef TARGET_RELAYOUT_FUNCTION
397 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
399 #undef TARGET_OPTION_OVERRIDE
400 #define TARGET_OPTION_OVERRIDE arm_option_override
402 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
403 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
405 #undef TARGET_OPTION_PRINT
406 #define TARGET_OPTION_PRINT arm_option_print
408 #undef TARGET_COMP_TYPE_ATTRIBUTES
409 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
411 #undef TARGET_SCHED_MACRO_FUSION_P
412 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
414 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
415 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
417 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
418 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
420 #undef TARGET_SCHED_ADJUST_COST
421 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
423 #undef TARGET_SET_CURRENT_FUNCTION
424 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
426 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
427 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
429 #undef TARGET_SCHED_REORDER
430 #define TARGET_SCHED_REORDER arm_sched_reorder
432 #undef TARGET_REGISTER_MOVE_COST
433 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
435 #undef TARGET_MEMORY_MOVE_COST
436 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
438 #undef TARGET_ENCODE_SECTION_INFO
439 #ifdef ARM_PE
440 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
441 #else
442 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
443 #endif
445 #undef TARGET_STRIP_NAME_ENCODING
446 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
448 #undef TARGET_ASM_INTERNAL_LABEL
449 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
451 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
452 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
454 #undef TARGET_FUNCTION_VALUE
455 #define TARGET_FUNCTION_VALUE arm_function_value
457 #undef TARGET_LIBCALL_VALUE
458 #define TARGET_LIBCALL_VALUE arm_libcall_value
460 #undef TARGET_FUNCTION_VALUE_REGNO_P
461 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
463 #undef TARGET_ASM_OUTPUT_MI_THUNK
464 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
465 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
466 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
468 #undef TARGET_RTX_COSTS
469 #define TARGET_RTX_COSTS arm_rtx_costs
470 #undef TARGET_ADDRESS_COST
471 #define TARGET_ADDRESS_COST arm_address_cost
473 #undef TARGET_SHIFT_TRUNCATION_MASK
474 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
475 #undef TARGET_VECTOR_MODE_SUPPORTED_P
476 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
477 #undef TARGET_ARRAY_MODE_SUPPORTED_P
478 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
479 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
480 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
481 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
482 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
483 arm_autovectorize_vector_sizes
485 #undef TARGET_MACHINE_DEPENDENT_REORG
486 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
488 #undef TARGET_INIT_BUILTINS
489 #define TARGET_INIT_BUILTINS arm_init_builtins
490 #undef TARGET_EXPAND_BUILTIN
491 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
492 #undef TARGET_BUILTIN_DECL
493 #define TARGET_BUILTIN_DECL arm_builtin_decl
495 #undef TARGET_INIT_LIBFUNCS
496 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
498 #undef TARGET_PROMOTE_FUNCTION_MODE
499 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
500 #undef TARGET_PROMOTE_PROTOTYPES
501 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
502 #undef TARGET_PASS_BY_REFERENCE
503 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
504 #undef TARGET_ARG_PARTIAL_BYTES
505 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
506 #undef TARGET_FUNCTION_ARG
507 #define TARGET_FUNCTION_ARG arm_function_arg
508 #undef TARGET_FUNCTION_ARG_ADVANCE
509 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
510 #undef TARGET_FUNCTION_ARG_BOUNDARY
511 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
513 #undef TARGET_SETUP_INCOMING_VARARGS
514 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
516 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
517 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
519 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
520 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
521 #undef TARGET_TRAMPOLINE_INIT
522 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
523 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
524 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
526 #undef TARGET_WARN_FUNC_RETURN
527 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
529 #undef TARGET_DEFAULT_SHORT_ENUMS
530 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
532 #undef TARGET_ALIGN_ANON_BITFIELD
533 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
535 #undef TARGET_NARROW_VOLATILE_BITFIELD
536 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
538 #undef TARGET_CXX_GUARD_TYPE
539 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
541 #undef TARGET_CXX_GUARD_MASK_BIT
542 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
544 #undef TARGET_CXX_GET_COOKIE_SIZE
545 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
547 #undef TARGET_CXX_COOKIE_HAS_SIZE
548 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
550 #undef TARGET_CXX_CDTOR_RETURNS_THIS
551 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
553 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
554 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
556 #undef TARGET_CXX_USE_AEABI_ATEXIT
557 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
559 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
560 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
561 arm_cxx_determine_class_data_visibility
563 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
564 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
566 #undef TARGET_RETURN_IN_MSB
567 #define TARGET_RETURN_IN_MSB arm_return_in_msb
569 #undef TARGET_RETURN_IN_MEMORY
570 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
572 #undef TARGET_MUST_PASS_IN_STACK
573 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
575 #if ARM_UNWIND_INFO
576 #undef TARGET_ASM_UNWIND_EMIT
577 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
579 /* EABI unwinding tables use a different format for the typeinfo tables. */
580 #undef TARGET_ASM_TTYPE
581 #define TARGET_ASM_TTYPE arm_output_ttype
583 #undef TARGET_ARM_EABI_UNWINDER
584 #define TARGET_ARM_EABI_UNWINDER true
586 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
587 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
589 #undef TARGET_ASM_INIT_SECTIONS
590 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
591 #endif /* ARM_UNWIND_INFO */
593 #undef TARGET_DWARF_REGISTER_SPAN
594 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
596 #undef TARGET_CANNOT_COPY_INSN_P
597 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
599 #ifdef HAVE_AS_TLS
600 #undef TARGET_HAVE_TLS
601 #define TARGET_HAVE_TLS true
602 #endif
604 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
605 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
607 #undef TARGET_LEGITIMATE_CONSTANT_P
608 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
610 #undef TARGET_CANNOT_FORCE_CONST_MEM
611 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
613 #undef TARGET_MAX_ANCHOR_OFFSET
614 #define TARGET_MAX_ANCHOR_OFFSET 4095
616 /* The minimum is set such that the total size of the block
617 for a particular anchor is -4088 + 1 + 4095 bytes, which is
618 divisible by eight, ensuring natural spacing of anchors. */
619 #undef TARGET_MIN_ANCHOR_OFFSET
620 #define TARGET_MIN_ANCHOR_OFFSET -4088
622 #undef TARGET_SCHED_ISSUE_RATE
623 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
625 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
626 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
627 arm_first_cycle_multipass_dfa_lookahead
629 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
630 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
631 arm_first_cycle_multipass_dfa_lookahead_guard
633 #undef TARGET_MANGLE_TYPE
634 #define TARGET_MANGLE_TYPE arm_mangle_type
636 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
637 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
639 #undef TARGET_BUILD_BUILTIN_VA_LIST
640 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
641 #undef TARGET_EXPAND_BUILTIN_VA_START
642 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
643 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
644 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
646 #ifdef HAVE_AS_TLS
647 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
648 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
649 #endif
651 #undef TARGET_LEGITIMATE_ADDRESS_P
652 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
654 #undef TARGET_PREFERRED_RELOAD_CLASS
655 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
657 #undef TARGET_INVALID_PARAMETER_TYPE
658 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
660 #undef TARGET_INVALID_RETURN_TYPE
661 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
663 #undef TARGET_PROMOTED_TYPE
664 #define TARGET_PROMOTED_TYPE arm_promoted_type
666 #undef TARGET_CONVERT_TO_TYPE
667 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
669 #undef TARGET_SCALAR_MODE_SUPPORTED_P
670 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
672 #undef TARGET_FRAME_POINTER_REQUIRED
673 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
675 #undef TARGET_CAN_ELIMINATE
676 #define TARGET_CAN_ELIMINATE arm_can_eliminate
678 #undef TARGET_CONDITIONAL_REGISTER_USAGE
679 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
681 #undef TARGET_CLASS_LIKELY_SPILLED_P
682 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
684 #undef TARGET_VECTORIZE_BUILTINS
685 #define TARGET_VECTORIZE_BUILTINS
687 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
688 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
689 arm_builtin_vectorized_function
691 #undef TARGET_VECTOR_ALIGNMENT
692 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
694 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
695 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
696 arm_vector_alignment_reachable
698 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
699 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
700 arm_builtin_support_vector_misalignment
702 #undef TARGET_PREFERRED_RENAME_CLASS
703 #define TARGET_PREFERRED_RENAME_CLASS \
704 arm_preferred_rename_class
706 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
707 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
708 arm_vectorize_vec_perm_const_ok
710 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
711 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
712 arm_builtin_vectorization_cost
713 #undef TARGET_VECTORIZE_ADD_STMT_COST
714 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
716 #undef TARGET_CANONICALIZE_COMPARISON
717 #define TARGET_CANONICALIZE_COMPARISON \
718 arm_canonicalize_comparison
720 #undef TARGET_ASAN_SHADOW_OFFSET
721 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
723 #undef MAX_INSN_PER_IT_BLOCK
724 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
726 #undef TARGET_CAN_USE_DOLOOP_P
727 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
729 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
730 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
732 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
733 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
735 #undef TARGET_SCHED_FUSION_PRIORITY
736 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
738 struct gcc_target targetm = TARGET_INITIALIZER;
740 /* Obstack for minipool constant handling. */
741 static struct obstack minipool_obstack;
742 static char * minipool_startobj;
744 /* The maximum number of insns skipped which
745 will be conditionalised if possible. */
746 static int max_insns_skipped = 5;
748 extern FILE * asm_out_file;
750 /* True if we are currently building a constant table. */
751 int making_const_table;
753 /* The processor for which instructions should be scheduled. */
754 enum processor_type arm_tune = arm_none;
756 /* The current tuning set. */
757 const struct tune_params *current_tune;
759 /* Which floating point hardware to schedule for. */
760 int arm_fpu_attr;
762 /* Which floating popint hardware to use. */
763 const struct arm_fpu_desc *arm_fpu_desc;
765 /* Used for Thumb call_via trampolines. */
766 rtx thumb_call_via_label[14];
767 static int thumb_call_reg_needed;
769 /* The bits in this mask specify which
770 instructions we are allowed to generate. */
771 arm_feature_set insn_flags = ARM_FSET_EMPTY;
773 /* The bits in this mask specify which instruction scheduling options should
774 be used. */
775 arm_feature_set tune_flags = ARM_FSET_EMPTY;
777 /* The highest ARM architecture version supported by the
778 target. */
779 enum base_architecture arm_base_arch = BASE_ARCH_0;
781 /* The following are used in the arm.md file as equivalents to bits
782 in the above two flag variables. */
784 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
785 int arm_arch3m = 0;
787 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
788 int arm_arch4 = 0;
790 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
791 int arm_arch4t = 0;
793 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
794 int arm_arch5 = 0;
796 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
797 int arm_arch5e = 0;
799 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
800 int arm_arch6 = 0;
802 /* Nonzero if this chip supports the ARM 6K extensions. */
803 int arm_arch6k = 0;
805 /* Nonzero if this chip supports the ARM 6KZ extensions. */
806 int arm_arch6kz = 0;
808 /* Nonzero if instructions present in ARMv6-M can be used. */
809 int arm_arch6m = 0;
811 /* Nonzero if this chip supports the ARM 7 extensions. */
812 int arm_arch7 = 0;
814 /* Nonzero if instructions not present in the 'M' profile can be used. */
815 int arm_arch_notm = 0;
817 /* Nonzero if instructions present in ARMv7E-M can be used. */
818 int arm_arch7em = 0;
820 /* Nonzero if instructions present in ARMv8 can be used. */
821 int arm_arch8 = 0;
823 /* Nonzero if this chip can benefit from load scheduling. */
824 int arm_ld_sched = 0;
826 /* Nonzero if this chip is a StrongARM. */
827 int arm_tune_strongarm = 0;
829 /* Nonzero if this chip supports Intel Wireless MMX technology. */
830 int arm_arch_iwmmxt = 0;
832 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
833 int arm_arch_iwmmxt2 = 0;
835 /* Nonzero if this chip is an XScale. */
836 int arm_arch_xscale = 0;
838 /* Nonzero if tuning for XScale */
839 int arm_tune_xscale = 0;
841 /* Nonzero if we want to tune for stores that access the write-buffer.
842 This typically means an ARM6 or ARM7 with MMU or MPU. */
843 int arm_tune_wbuf = 0;
845 /* Nonzero if tuning for Cortex-A9. */
846 int arm_tune_cortex_a9 = 0;
848 /* Nonzero if we should define __THUMB_INTERWORK__ in the
849 preprocessor.
850 XXX This is a bit of a hack, it's intended to help work around
851 problems in GLD which doesn't understand that armv5t code is
852 interworking clean. */
853 int arm_cpp_interwork = 0;
855 /* Nonzero if chip supports Thumb 2. */
856 int arm_arch_thumb2;
858 /* Nonzero if chip supports integer division instruction. */
859 int arm_arch_arm_hwdiv;
860 int arm_arch_thumb_hwdiv;
862 /* Nonzero if chip disallows volatile memory access in IT block. */
863 int arm_arch_no_volatile_ce;
865 /* Nonzero if we should use Neon to handle 64-bits operations rather
866 than core registers. */
867 int prefer_neon_for_64bits = 0;
869 /* Nonzero if we shouldn't use literal pools. */
870 bool arm_disable_literal_pool = false;
872 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
873 we must report the mode of the memory reference from
874 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
875 machine_mode output_memory_reference_mode;
877 /* The register number to be used for the PIC offset register. */
878 unsigned arm_pic_register = INVALID_REGNUM;
880 enum arm_pcs arm_pcs_default;
882 /* For an explanation of these variables, see final_prescan_insn below. */
883 int arm_ccfsm_state;
884 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
885 enum arm_cond_code arm_current_cc;
887 rtx arm_target_insn;
888 int arm_target_label;
889 /* The number of conditionally executed insns, including the current insn. */
890 int arm_condexec_count = 0;
891 /* A bitmask specifying the patterns for the IT block.
892 Zero means do not output an IT block before this insn. */
893 int arm_condexec_mask = 0;
894 /* The number of bits used in arm_condexec_mask. */
895 int arm_condexec_masklen = 0;
897 /* Nonzero if chip supports the ARMv8 CRC instructions. */
898 int arm_arch_crc = 0;
900 /* Nonzero if the core has a very small, high-latency, multiply unit. */
901 int arm_m_profile_small_mul = 0;
903 /* The condition codes of the ARM, and the inverse function. */
904 static const char * const arm_condition_codes[] =
906 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
907 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
910 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
911 int arm_regs_in_sequence[] =
913 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
916 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
917 #define streq(string1, string2) (strcmp (string1, string2) == 0)
919 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
920 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
921 | (1 << PIC_OFFSET_TABLE_REGNUM)))
923 /* Initialization code. */
925 struct processors
927 const char *const name;
928 enum processor_type core;
929 const char *arch;
930 enum base_architecture base_arch;
931 const arm_feature_set flags;
932 const struct tune_params *const tune;
936 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
937 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
939 num_slots, \
940 l1_size, \
941 l1_line_size \
944 /* arm generic vectorizer costs. */
945 static const
946 struct cpu_vec_costs arm_default_vec_cost = {
947 1, /* scalar_stmt_cost. */
948 1, /* scalar load_cost. */
949 1, /* scalar_store_cost. */
950 1, /* vec_stmt_cost. */
951 1, /* vec_to_scalar_cost. */
952 1, /* scalar_to_vec_cost. */
953 1, /* vec_align_load_cost. */
954 1, /* vec_unalign_load_cost. */
955 1, /* vec_unalign_store_cost. */
956 1, /* vec_store_cost. */
957 3, /* cond_taken_branch_cost. */
958 1, /* cond_not_taken_branch_cost. */
961 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
962 #include "aarch-cost-tables.h"
966 const struct cpu_cost_table cortexa9_extra_costs =
968 /* ALU */
970 0, /* arith. */
971 0, /* logical. */
972 0, /* shift. */
973 COSTS_N_INSNS (1), /* shift_reg. */
974 COSTS_N_INSNS (1), /* arith_shift. */
975 COSTS_N_INSNS (2), /* arith_shift_reg. */
976 0, /* log_shift. */
977 COSTS_N_INSNS (1), /* log_shift_reg. */
978 COSTS_N_INSNS (1), /* extend. */
979 COSTS_N_INSNS (2), /* extend_arith. */
980 COSTS_N_INSNS (1), /* bfi. */
981 COSTS_N_INSNS (1), /* bfx. */
982 0, /* clz. */
983 0, /* rev. */
984 0, /* non_exec. */
985 true /* non_exec_costs_exec. */
988 /* MULT SImode */
990 COSTS_N_INSNS (3), /* simple. */
991 COSTS_N_INSNS (3), /* flag_setting. */
992 COSTS_N_INSNS (2), /* extend. */
993 COSTS_N_INSNS (3), /* add. */
994 COSTS_N_INSNS (2), /* extend_add. */
995 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
997 /* MULT DImode */
999 0, /* simple (N/A). */
1000 0, /* flag_setting (N/A). */
1001 COSTS_N_INSNS (4), /* extend. */
1002 0, /* add (N/A). */
1003 COSTS_N_INSNS (4), /* extend_add. */
1004 0 /* idiv (N/A). */
1007 /* LD/ST */
1009 COSTS_N_INSNS (2), /* load. */
1010 COSTS_N_INSNS (2), /* load_sign_extend. */
1011 COSTS_N_INSNS (2), /* ldrd. */
1012 COSTS_N_INSNS (2), /* ldm_1st. */
1013 1, /* ldm_regs_per_insn_1st. */
1014 2, /* ldm_regs_per_insn_subsequent. */
1015 COSTS_N_INSNS (5), /* loadf. */
1016 COSTS_N_INSNS (5), /* loadd. */
1017 COSTS_N_INSNS (1), /* load_unaligned. */
1018 COSTS_N_INSNS (2), /* store. */
1019 COSTS_N_INSNS (2), /* strd. */
1020 COSTS_N_INSNS (2), /* stm_1st. */
1021 1, /* stm_regs_per_insn_1st. */
1022 2, /* stm_regs_per_insn_subsequent. */
1023 COSTS_N_INSNS (1), /* storef. */
1024 COSTS_N_INSNS (1), /* stored. */
1025 COSTS_N_INSNS (1), /* store_unaligned. */
1026 COSTS_N_INSNS (1), /* loadv. */
1027 COSTS_N_INSNS (1) /* storev. */
1030 /* FP SFmode */
1032 COSTS_N_INSNS (14), /* div. */
1033 COSTS_N_INSNS (4), /* mult. */
1034 COSTS_N_INSNS (7), /* mult_addsub. */
1035 COSTS_N_INSNS (30), /* fma. */
1036 COSTS_N_INSNS (3), /* addsub. */
1037 COSTS_N_INSNS (1), /* fpconst. */
1038 COSTS_N_INSNS (1), /* neg. */
1039 COSTS_N_INSNS (3), /* compare. */
1040 COSTS_N_INSNS (3), /* widen. */
1041 COSTS_N_INSNS (3), /* narrow. */
1042 COSTS_N_INSNS (3), /* toint. */
1043 COSTS_N_INSNS (3), /* fromint. */
1044 COSTS_N_INSNS (3) /* roundint. */
1046 /* FP DFmode */
1048 COSTS_N_INSNS (24), /* div. */
1049 COSTS_N_INSNS (5), /* mult. */
1050 COSTS_N_INSNS (8), /* mult_addsub. */
1051 COSTS_N_INSNS (30), /* fma. */
1052 COSTS_N_INSNS (3), /* addsub. */
1053 COSTS_N_INSNS (1), /* fpconst. */
1054 COSTS_N_INSNS (1), /* neg. */
1055 COSTS_N_INSNS (3), /* compare. */
1056 COSTS_N_INSNS (3), /* widen. */
1057 COSTS_N_INSNS (3), /* narrow. */
1058 COSTS_N_INSNS (3), /* toint. */
1059 COSTS_N_INSNS (3), /* fromint. */
1060 COSTS_N_INSNS (3) /* roundint. */
1063 /* Vector */
1065 COSTS_N_INSNS (1) /* alu. */
1069 const struct cpu_cost_table cortexa8_extra_costs =
1071 /* ALU */
1073 0, /* arith. */
1074 0, /* logical. */
1075 COSTS_N_INSNS (1), /* shift. */
1076 0, /* shift_reg. */
1077 COSTS_N_INSNS (1), /* arith_shift. */
1078 0, /* arith_shift_reg. */
1079 COSTS_N_INSNS (1), /* log_shift. */
1080 0, /* log_shift_reg. */
1081 0, /* extend. */
1082 0, /* extend_arith. */
1083 0, /* bfi. */
1084 0, /* bfx. */
1085 0, /* clz. */
1086 0, /* rev. */
1087 0, /* non_exec. */
1088 true /* non_exec_costs_exec. */
1091 /* MULT SImode */
1093 COSTS_N_INSNS (1), /* simple. */
1094 COSTS_N_INSNS (1), /* flag_setting. */
1095 COSTS_N_INSNS (1), /* extend. */
1096 COSTS_N_INSNS (1), /* add. */
1097 COSTS_N_INSNS (1), /* extend_add. */
1098 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1100 /* MULT DImode */
1102 0, /* simple (N/A). */
1103 0, /* flag_setting (N/A). */
1104 COSTS_N_INSNS (2), /* extend. */
1105 0, /* add (N/A). */
1106 COSTS_N_INSNS (2), /* extend_add. */
1107 0 /* idiv (N/A). */
1110 /* LD/ST */
1112 COSTS_N_INSNS (1), /* load. */
1113 COSTS_N_INSNS (1), /* load_sign_extend. */
1114 COSTS_N_INSNS (1), /* ldrd. */
1115 COSTS_N_INSNS (1), /* ldm_1st. */
1116 1, /* ldm_regs_per_insn_1st. */
1117 2, /* ldm_regs_per_insn_subsequent. */
1118 COSTS_N_INSNS (1), /* loadf. */
1119 COSTS_N_INSNS (1), /* loadd. */
1120 COSTS_N_INSNS (1), /* load_unaligned. */
1121 COSTS_N_INSNS (1), /* store. */
1122 COSTS_N_INSNS (1), /* strd. */
1123 COSTS_N_INSNS (1), /* stm_1st. */
1124 1, /* stm_regs_per_insn_1st. */
1125 2, /* stm_regs_per_insn_subsequent. */
1126 COSTS_N_INSNS (1), /* storef. */
1127 COSTS_N_INSNS (1), /* stored. */
1128 COSTS_N_INSNS (1), /* store_unaligned. */
1129 COSTS_N_INSNS (1), /* loadv. */
1130 COSTS_N_INSNS (1) /* storev. */
1133 /* FP SFmode */
1135 COSTS_N_INSNS (36), /* div. */
1136 COSTS_N_INSNS (11), /* mult. */
1137 COSTS_N_INSNS (20), /* mult_addsub. */
1138 COSTS_N_INSNS (30), /* fma. */
1139 COSTS_N_INSNS (9), /* addsub. */
1140 COSTS_N_INSNS (3), /* fpconst. */
1141 COSTS_N_INSNS (3), /* neg. */
1142 COSTS_N_INSNS (6), /* compare. */
1143 COSTS_N_INSNS (4), /* widen. */
1144 COSTS_N_INSNS (4), /* narrow. */
1145 COSTS_N_INSNS (8), /* toint. */
1146 COSTS_N_INSNS (8), /* fromint. */
1147 COSTS_N_INSNS (8) /* roundint. */
1149 /* FP DFmode */
1151 COSTS_N_INSNS (64), /* div. */
1152 COSTS_N_INSNS (16), /* mult. */
1153 COSTS_N_INSNS (25), /* mult_addsub. */
1154 COSTS_N_INSNS (30), /* fma. */
1155 COSTS_N_INSNS (9), /* addsub. */
1156 COSTS_N_INSNS (3), /* fpconst. */
1157 COSTS_N_INSNS (3), /* neg. */
1158 COSTS_N_INSNS (6), /* compare. */
1159 COSTS_N_INSNS (6), /* widen. */
1160 COSTS_N_INSNS (6), /* narrow. */
1161 COSTS_N_INSNS (8), /* toint. */
1162 COSTS_N_INSNS (8), /* fromint. */
1163 COSTS_N_INSNS (8) /* roundint. */
1166 /* Vector */
1168 COSTS_N_INSNS (1) /* alu. */
1172 const struct cpu_cost_table cortexa5_extra_costs =
1174 /* ALU */
1176 0, /* arith. */
1177 0, /* logical. */
1178 COSTS_N_INSNS (1), /* shift. */
1179 COSTS_N_INSNS (1), /* shift_reg. */
1180 COSTS_N_INSNS (1), /* arith_shift. */
1181 COSTS_N_INSNS (1), /* arith_shift_reg. */
1182 COSTS_N_INSNS (1), /* log_shift. */
1183 COSTS_N_INSNS (1), /* log_shift_reg. */
1184 COSTS_N_INSNS (1), /* extend. */
1185 COSTS_N_INSNS (1), /* extend_arith. */
1186 COSTS_N_INSNS (1), /* bfi. */
1187 COSTS_N_INSNS (1), /* bfx. */
1188 COSTS_N_INSNS (1), /* clz. */
1189 COSTS_N_INSNS (1), /* rev. */
1190 0, /* non_exec. */
1191 true /* non_exec_costs_exec. */
1195 /* MULT SImode */
1197 0, /* simple. */
1198 COSTS_N_INSNS (1), /* flag_setting. */
1199 COSTS_N_INSNS (1), /* extend. */
1200 COSTS_N_INSNS (1), /* add. */
1201 COSTS_N_INSNS (1), /* extend_add. */
1202 COSTS_N_INSNS (7) /* idiv. */
1204 /* MULT DImode */
1206 0, /* simple (N/A). */
1207 0, /* flag_setting (N/A). */
1208 COSTS_N_INSNS (1), /* extend. */
1209 0, /* add. */
1210 COSTS_N_INSNS (2), /* extend_add. */
1211 0 /* idiv (N/A). */
1214 /* LD/ST */
1216 COSTS_N_INSNS (1), /* load. */
1217 COSTS_N_INSNS (1), /* load_sign_extend. */
1218 COSTS_N_INSNS (6), /* ldrd. */
1219 COSTS_N_INSNS (1), /* ldm_1st. */
1220 1, /* ldm_regs_per_insn_1st. */
1221 2, /* ldm_regs_per_insn_subsequent. */
1222 COSTS_N_INSNS (2), /* loadf. */
1223 COSTS_N_INSNS (4), /* loadd. */
1224 COSTS_N_INSNS (1), /* load_unaligned. */
1225 COSTS_N_INSNS (1), /* store. */
1226 COSTS_N_INSNS (3), /* strd. */
1227 COSTS_N_INSNS (1), /* stm_1st. */
1228 1, /* stm_regs_per_insn_1st. */
1229 2, /* stm_regs_per_insn_subsequent. */
1230 COSTS_N_INSNS (2), /* storef. */
1231 COSTS_N_INSNS (2), /* stored. */
1232 COSTS_N_INSNS (1), /* store_unaligned. */
1233 COSTS_N_INSNS (1), /* loadv. */
1234 COSTS_N_INSNS (1) /* storev. */
1237 /* FP SFmode */
1239 COSTS_N_INSNS (15), /* div. */
1240 COSTS_N_INSNS (3), /* mult. */
1241 COSTS_N_INSNS (7), /* mult_addsub. */
1242 COSTS_N_INSNS (7), /* fma. */
1243 COSTS_N_INSNS (3), /* addsub. */
1244 COSTS_N_INSNS (3), /* fpconst. */
1245 COSTS_N_INSNS (3), /* neg. */
1246 COSTS_N_INSNS (3), /* compare. */
1247 COSTS_N_INSNS (3), /* widen. */
1248 COSTS_N_INSNS (3), /* narrow. */
1249 COSTS_N_INSNS (3), /* toint. */
1250 COSTS_N_INSNS (3), /* fromint. */
1251 COSTS_N_INSNS (3) /* roundint. */
1253 /* FP DFmode */
1255 COSTS_N_INSNS (30), /* div. */
1256 COSTS_N_INSNS (6), /* mult. */
1257 COSTS_N_INSNS (10), /* mult_addsub. */
1258 COSTS_N_INSNS (7), /* fma. */
1259 COSTS_N_INSNS (3), /* addsub. */
1260 COSTS_N_INSNS (3), /* fpconst. */
1261 COSTS_N_INSNS (3), /* neg. */
1262 COSTS_N_INSNS (3), /* compare. */
1263 COSTS_N_INSNS (3), /* widen. */
1264 COSTS_N_INSNS (3), /* narrow. */
1265 COSTS_N_INSNS (3), /* toint. */
1266 COSTS_N_INSNS (3), /* fromint. */
1267 COSTS_N_INSNS (3) /* roundint. */
1270 /* Vector */
1272 COSTS_N_INSNS (1) /* alu. */
1277 const struct cpu_cost_table cortexa7_extra_costs =
1279 /* ALU */
1281 0, /* arith. */
1282 0, /* logical. */
1283 COSTS_N_INSNS (1), /* shift. */
1284 COSTS_N_INSNS (1), /* shift_reg. */
1285 COSTS_N_INSNS (1), /* arith_shift. */
1286 COSTS_N_INSNS (1), /* arith_shift_reg. */
1287 COSTS_N_INSNS (1), /* log_shift. */
1288 COSTS_N_INSNS (1), /* log_shift_reg. */
1289 COSTS_N_INSNS (1), /* extend. */
1290 COSTS_N_INSNS (1), /* extend_arith. */
1291 COSTS_N_INSNS (1), /* bfi. */
1292 COSTS_N_INSNS (1), /* bfx. */
1293 COSTS_N_INSNS (1), /* clz. */
1294 COSTS_N_INSNS (1), /* rev. */
1295 0, /* non_exec. */
1296 true /* non_exec_costs_exec. */
1300 /* MULT SImode */
1302 0, /* simple. */
1303 COSTS_N_INSNS (1), /* flag_setting. */
1304 COSTS_N_INSNS (1), /* extend. */
1305 COSTS_N_INSNS (1), /* add. */
1306 COSTS_N_INSNS (1), /* extend_add. */
1307 COSTS_N_INSNS (7) /* idiv. */
1309 /* MULT DImode */
1311 0, /* simple (N/A). */
1312 0, /* flag_setting (N/A). */
1313 COSTS_N_INSNS (1), /* extend. */
1314 0, /* add. */
1315 COSTS_N_INSNS (2), /* extend_add. */
1316 0 /* idiv (N/A). */
1319 /* LD/ST */
1321 COSTS_N_INSNS (1), /* load. */
1322 COSTS_N_INSNS (1), /* load_sign_extend. */
1323 COSTS_N_INSNS (3), /* ldrd. */
1324 COSTS_N_INSNS (1), /* ldm_1st. */
1325 1, /* ldm_regs_per_insn_1st. */
1326 2, /* ldm_regs_per_insn_subsequent. */
1327 COSTS_N_INSNS (2), /* loadf. */
1328 COSTS_N_INSNS (2), /* loadd. */
1329 COSTS_N_INSNS (1), /* load_unaligned. */
1330 COSTS_N_INSNS (1), /* store. */
1331 COSTS_N_INSNS (3), /* strd. */
1332 COSTS_N_INSNS (1), /* stm_1st. */
1333 1, /* stm_regs_per_insn_1st. */
1334 2, /* stm_regs_per_insn_subsequent. */
1335 COSTS_N_INSNS (2), /* storef. */
1336 COSTS_N_INSNS (2), /* stored. */
1337 COSTS_N_INSNS (1), /* store_unaligned. */
1338 COSTS_N_INSNS (1), /* loadv. */
1339 COSTS_N_INSNS (1) /* storev. */
1342 /* FP SFmode */
1344 COSTS_N_INSNS (15), /* div. */
1345 COSTS_N_INSNS (3), /* mult. */
1346 COSTS_N_INSNS (7), /* mult_addsub. */
1347 COSTS_N_INSNS (7), /* fma. */
1348 COSTS_N_INSNS (3), /* addsub. */
1349 COSTS_N_INSNS (3), /* fpconst. */
1350 COSTS_N_INSNS (3), /* neg. */
1351 COSTS_N_INSNS (3), /* compare. */
1352 COSTS_N_INSNS (3), /* widen. */
1353 COSTS_N_INSNS (3), /* narrow. */
1354 COSTS_N_INSNS (3), /* toint. */
1355 COSTS_N_INSNS (3), /* fromint. */
1356 COSTS_N_INSNS (3) /* roundint. */
1358 /* FP DFmode */
1360 COSTS_N_INSNS (30), /* div. */
1361 COSTS_N_INSNS (6), /* mult. */
1362 COSTS_N_INSNS (10), /* mult_addsub. */
1363 COSTS_N_INSNS (7), /* fma. */
1364 COSTS_N_INSNS (3), /* addsub. */
1365 COSTS_N_INSNS (3), /* fpconst. */
1366 COSTS_N_INSNS (3), /* neg. */
1367 COSTS_N_INSNS (3), /* compare. */
1368 COSTS_N_INSNS (3), /* widen. */
1369 COSTS_N_INSNS (3), /* narrow. */
1370 COSTS_N_INSNS (3), /* toint. */
1371 COSTS_N_INSNS (3), /* fromint. */
1372 COSTS_N_INSNS (3) /* roundint. */
1375 /* Vector */
1377 COSTS_N_INSNS (1) /* alu. */
1381 const struct cpu_cost_table cortexa12_extra_costs =
1383 /* ALU */
1385 0, /* arith. */
1386 0, /* logical. */
1387 0, /* shift. */
1388 COSTS_N_INSNS (1), /* shift_reg. */
1389 COSTS_N_INSNS (1), /* arith_shift. */
1390 COSTS_N_INSNS (1), /* arith_shift_reg. */
1391 COSTS_N_INSNS (1), /* log_shift. */
1392 COSTS_N_INSNS (1), /* log_shift_reg. */
1393 0, /* extend. */
1394 COSTS_N_INSNS (1), /* extend_arith. */
1395 0, /* bfi. */
1396 COSTS_N_INSNS (1), /* bfx. */
1397 COSTS_N_INSNS (1), /* clz. */
1398 COSTS_N_INSNS (1), /* rev. */
1399 0, /* non_exec. */
1400 true /* non_exec_costs_exec. */
1402 /* MULT SImode */
1405 COSTS_N_INSNS (2), /* simple. */
1406 COSTS_N_INSNS (3), /* flag_setting. */
1407 COSTS_N_INSNS (2), /* extend. */
1408 COSTS_N_INSNS (3), /* add. */
1409 COSTS_N_INSNS (2), /* extend_add. */
1410 COSTS_N_INSNS (18) /* idiv. */
1412 /* MULT DImode */
1414 0, /* simple (N/A). */
1415 0, /* flag_setting (N/A). */
1416 COSTS_N_INSNS (3), /* extend. */
1417 0, /* add (N/A). */
1418 COSTS_N_INSNS (3), /* extend_add. */
1419 0 /* idiv (N/A). */
1422 /* LD/ST */
1424 COSTS_N_INSNS (3), /* load. */
1425 COSTS_N_INSNS (3), /* load_sign_extend. */
1426 COSTS_N_INSNS (3), /* ldrd. */
1427 COSTS_N_INSNS (3), /* ldm_1st. */
1428 1, /* ldm_regs_per_insn_1st. */
1429 2, /* ldm_regs_per_insn_subsequent. */
1430 COSTS_N_INSNS (3), /* loadf. */
1431 COSTS_N_INSNS (3), /* loadd. */
1432 0, /* load_unaligned. */
1433 0, /* store. */
1434 0, /* strd. */
1435 0, /* stm_1st. */
1436 1, /* stm_regs_per_insn_1st. */
1437 2, /* stm_regs_per_insn_subsequent. */
1438 COSTS_N_INSNS (2), /* storef. */
1439 COSTS_N_INSNS (2), /* stored. */
1440 0, /* store_unaligned. */
1441 COSTS_N_INSNS (1), /* loadv. */
1442 COSTS_N_INSNS (1) /* storev. */
1445 /* FP SFmode */
1447 COSTS_N_INSNS (17), /* div. */
1448 COSTS_N_INSNS (4), /* mult. */
1449 COSTS_N_INSNS (8), /* mult_addsub. */
1450 COSTS_N_INSNS (8), /* fma. */
1451 COSTS_N_INSNS (4), /* addsub. */
1452 COSTS_N_INSNS (2), /* fpconst. */
1453 COSTS_N_INSNS (2), /* neg. */
1454 COSTS_N_INSNS (2), /* compare. */
1455 COSTS_N_INSNS (4), /* widen. */
1456 COSTS_N_INSNS (4), /* narrow. */
1457 COSTS_N_INSNS (4), /* toint. */
1458 COSTS_N_INSNS (4), /* fromint. */
1459 COSTS_N_INSNS (4) /* roundint. */
1461 /* FP DFmode */
1463 COSTS_N_INSNS (31), /* div. */
1464 COSTS_N_INSNS (4), /* mult. */
1465 COSTS_N_INSNS (8), /* mult_addsub. */
1466 COSTS_N_INSNS (8), /* fma. */
1467 COSTS_N_INSNS (4), /* addsub. */
1468 COSTS_N_INSNS (2), /* fpconst. */
1469 COSTS_N_INSNS (2), /* neg. */
1470 COSTS_N_INSNS (2), /* compare. */
1471 COSTS_N_INSNS (4), /* widen. */
1472 COSTS_N_INSNS (4), /* narrow. */
1473 COSTS_N_INSNS (4), /* toint. */
1474 COSTS_N_INSNS (4), /* fromint. */
1475 COSTS_N_INSNS (4) /* roundint. */
1478 /* Vector */
1480 COSTS_N_INSNS (1) /* alu. */
1484 const struct cpu_cost_table cortexa15_extra_costs =
1486 /* ALU */
1488 0, /* arith. */
1489 0, /* logical. */
1490 0, /* shift. */
1491 0, /* shift_reg. */
1492 COSTS_N_INSNS (1), /* arith_shift. */
1493 COSTS_N_INSNS (1), /* arith_shift_reg. */
1494 COSTS_N_INSNS (1), /* log_shift. */
1495 COSTS_N_INSNS (1), /* log_shift_reg. */
1496 0, /* extend. */
1497 COSTS_N_INSNS (1), /* extend_arith. */
1498 COSTS_N_INSNS (1), /* bfi. */
1499 0, /* bfx. */
1500 0, /* clz. */
1501 0, /* rev. */
1502 0, /* non_exec. */
1503 true /* non_exec_costs_exec. */
1505 /* MULT SImode */
1508 COSTS_N_INSNS (2), /* simple. */
1509 COSTS_N_INSNS (3), /* flag_setting. */
1510 COSTS_N_INSNS (2), /* extend. */
1511 COSTS_N_INSNS (2), /* add. */
1512 COSTS_N_INSNS (2), /* extend_add. */
1513 COSTS_N_INSNS (18) /* idiv. */
1515 /* MULT DImode */
1517 0, /* simple (N/A). */
1518 0, /* flag_setting (N/A). */
1519 COSTS_N_INSNS (3), /* extend. */
1520 0, /* add (N/A). */
1521 COSTS_N_INSNS (3), /* extend_add. */
1522 0 /* idiv (N/A). */
1525 /* LD/ST */
1527 COSTS_N_INSNS (3), /* load. */
1528 COSTS_N_INSNS (3), /* load_sign_extend. */
1529 COSTS_N_INSNS (3), /* ldrd. */
1530 COSTS_N_INSNS (4), /* ldm_1st. */
1531 1, /* ldm_regs_per_insn_1st. */
1532 2, /* ldm_regs_per_insn_subsequent. */
1533 COSTS_N_INSNS (4), /* loadf. */
1534 COSTS_N_INSNS (4), /* loadd. */
1535 0, /* load_unaligned. */
1536 0, /* store. */
1537 0, /* strd. */
1538 COSTS_N_INSNS (1), /* stm_1st. */
1539 1, /* stm_regs_per_insn_1st. */
1540 2, /* stm_regs_per_insn_subsequent. */
1541 0, /* storef. */
1542 0, /* stored. */
1543 0, /* store_unaligned. */
1544 COSTS_N_INSNS (1), /* loadv. */
1545 COSTS_N_INSNS (1) /* storev. */
1548 /* FP SFmode */
1550 COSTS_N_INSNS (17), /* div. */
1551 COSTS_N_INSNS (4), /* mult. */
1552 COSTS_N_INSNS (8), /* mult_addsub. */
1553 COSTS_N_INSNS (8), /* fma. */
1554 COSTS_N_INSNS (4), /* addsub. */
1555 COSTS_N_INSNS (2), /* fpconst. */
1556 COSTS_N_INSNS (2), /* neg. */
1557 COSTS_N_INSNS (5), /* compare. */
1558 COSTS_N_INSNS (4), /* widen. */
1559 COSTS_N_INSNS (4), /* narrow. */
1560 COSTS_N_INSNS (4), /* toint. */
1561 COSTS_N_INSNS (4), /* fromint. */
1562 COSTS_N_INSNS (4) /* roundint. */
1564 /* FP DFmode */
1566 COSTS_N_INSNS (31), /* div. */
1567 COSTS_N_INSNS (4), /* mult. */
1568 COSTS_N_INSNS (8), /* mult_addsub. */
1569 COSTS_N_INSNS (8), /* fma. */
1570 COSTS_N_INSNS (4), /* addsub. */
1571 COSTS_N_INSNS (2), /* fpconst. */
1572 COSTS_N_INSNS (2), /* neg. */
1573 COSTS_N_INSNS (2), /* compare. */
1574 COSTS_N_INSNS (4), /* widen. */
1575 COSTS_N_INSNS (4), /* narrow. */
1576 COSTS_N_INSNS (4), /* toint. */
1577 COSTS_N_INSNS (4), /* fromint. */
1578 COSTS_N_INSNS (4) /* roundint. */
1581 /* Vector */
1583 COSTS_N_INSNS (1) /* alu. */
1587 const struct cpu_cost_table v7m_extra_costs =
1589 /* ALU */
1591 0, /* arith. */
1592 0, /* logical. */
1593 0, /* shift. */
1594 0, /* shift_reg. */
1595 0, /* arith_shift. */
1596 COSTS_N_INSNS (1), /* arith_shift_reg. */
1597 0, /* log_shift. */
1598 COSTS_N_INSNS (1), /* log_shift_reg. */
1599 0, /* extend. */
1600 COSTS_N_INSNS (1), /* extend_arith. */
1601 0, /* bfi. */
1602 0, /* bfx. */
1603 0, /* clz. */
1604 0, /* rev. */
1605 COSTS_N_INSNS (1), /* non_exec. */
1606 false /* non_exec_costs_exec. */
1609 /* MULT SImode */
1611 COSTS_N_INSNS (1), /* simple. */
1612 COSTS_N_INSNS (1), /* flag_setting. */
1613 COSTS_N_INSNS (2), /* extend. */
1614 COSTS_N_INSNS (1), /* add. */
1615 COSTS_N_INSNS (3), /* extend_add. */
1616 COSTS_N_INSNS (8) /* idiv. */
1618 /* MULT DImode */
1620 0, /* simple (N/A). */
1621 0, /* flag_setting (N/A). */
1622 COSTS_N_INSNS (2), /* extend. */
1623 0, /* add (N/A). */
1624 COSTS_N_INSNS (3), /* extend_add. */
1625 0 /* idiv (N/A). */
1628 /* LD/ST */
1630 COSTS_N_INSNS (2), /* load. */
1631 0, /* load_sign_extend. */
1632 COSTS_N_INSNS (3), /* ldrd. */
1633 COSTS_N_INSNS (2), /* ldm_1st. */
1634 1, /* ldm_regs_per_insn_1st. */
1635 1, /* ldm_regs_per_insn_subsequent. */
1636 COSTS_N_INSNS (2), /* loadf. */
1637 COSTS_N_INSNS (3), /* loadd. */
1638 COSTS_N_INSNS (1), /* load_unaligned. */
1639 COSTS_N_INSNS (2), /* store. */
1640 COSTS_N_INSNS (3), /* strd. */
1641 COSTS_N_INSNS (2), /* stm_1st. */
1642 1, /* stm_regs_per_insn_1st. */
1643 1, /* stm_regs_per_insn_subsequent. */
1644 COSTS_N_INSNS (2), /* storef. */
1645 COSTS_N_INSNS (3), /* stored. */
1646 COSTS_N_INSNS (1), /* store_unaligned. */
1647 COSTS_N_INSNS (1), /* loadv. */
1648 COSTS_N_INSNS (1) /* storev. */
1651 /* FP SFmode */
1653 COSTS_N_INSNS (7), /* div. */
1654 COSTS_N_INSNS (2), /* mult. */
1655 COSTS_N_INSNS (5), /* mult_addsub. */
1656 COSTS_N_INSNS (3), /* fma. */
1657 COSTS_N_INSNS (1), /* addsub. */
1658 0, /* fpconst. */
1659 0, /* neg. */
1660 0, /* compare. */
1661 0, /* widen. */
1662 0, /* narrow. */
1663 0, /* toint. */
1664 0, /* fromint. */
1665 0 /* roundint. */
1667 /* FP DFmode */
1669 COSTS_N_INSNS (15), /* div. */
1670 COSTS_N_INSNS (5), /* mult. */
1671 COSTS_N_INSNS (7), /* mult_addsub. */
1672 COSTS_N_INSNS (7), /* fma. */
1673 COSTS_N_INSNS (3), /* addsub. */
1674 0, /* fpconst. */
1675 0, /* neg. */
1676 0, /* compare. */
1677 0, /* widen. */
1678 0, /* narrow. */
1679 0, /* toint. */
1680 0, /* fromint. */
1681 0 /* roundint. */
1684 /* Vector */
1686 COSTS_N_INSNS (1) /* alu. */
1690 const struct tune_params arm_slowmul_tune =
1692 arm_slowmul_rtx_costs,
1693 NULL, /* Insn extra costs. */
1694 NULL, /* Sched adj cost. */
1695 arm_default_branch_cost,
1696 &arm_default_vec_cost,
1697 3, /* Constant limit. */
1698 5, /* Max cond insns. */
1699 8, /* Memset max inline. */
1700 1, /* Issue rate. */
1701 ARM_PREFETCH_NOT_BENEFICIAL,
1702 tune_params::PREF_CONST_POOL_TRUE,
1703 tune_params::PREF_LDRD_FALSE,
1704 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1705 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1706 tune_params::DISPARAGE_FLAGS_NEITHER,
1707 tune_params::PREF_NEON_64_FALSE,
1708 tune_params::PREF_NEON_STRINGOPS_FALSE,
1709 tune_params::FUSE_NOTHING,
1710 tune_params::SCHED_AUTOPREF_OFF
1713 const struct tune_params arm_fastmul_tune =
1715 arm_fastmul_rtx_costs,
1716 NULL, /* Insn extra costs. */
1717 NULL, /* Sched adj cost. */
1718 arm_default_branch_cost,
1719 &arm_default_vec_cost,
1720 1, /* Constant limit. */
1721 5, /* Max cond insns. */
1722 8, /* Memset max inline. */
1723 1, /* Issue rate. */
1724 ARM_PREFETCH_NOT_BENEFICIAL,
1725 tune_params::PREF_CONST_POOL_TRUE,
1726 tune_params::PREF_LDRD_FALSE,
1727 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1728 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1729 tune_params::DISPARAGE_FLAGS_NEITHER,
1730 tune_params::PREF_NEON_64_FALSE,
1731 tune_params::PREF_NEON_STRINGOPS_FALSE,
1732 tune_params::FUSE_NOTHING,
1733 tune_params::SCHED_AUTOPREF_OFF
1736 /* StrongARM has early execution of branches, so a sequence that is worth
1737 skipping is shorter. Set max_insns_skipped to a lower value. */
1739 const struct tune_params arm_strongarm_tune =
1741 arm_fastmul_rtx_costs,
1742 NULL, /* Insn extra costs. */
1743 NULL, /* Sched adj cost. */
1744 arm_default_branch_cost,
1745 &arm_default_vec_cost,
1746 1, /* Constant limit. */
1747 3, /* Max cond insns. */
1748 8, /* Memset max inline. */
1749 1, /* Issue rate. */
1750 ARM_PREFETCH_NOT_BENEFICIAL,
1751 tune_params::PREF_CONST_POOL_TRUE,
1752 tune_params::PREF_LDRD_FALSE,
1753 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1754 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1755 tune_params::DISPARAGE_FLAGS_NEITHER,
1756 tune_params::PREF_NEON_64_FALSE,
1757 tune_params::PREF_NEON_STRINGOPS_FALSE,
1758 tune_params::FUSE_NOTHING,
1759 tune_params::SCHED_AUTOPREF_OFF
1762 const struct tune_params arm_xscale_tune =
1764 arm_xscale_rtx_costs,
1765 NULL, /* Insn extra costs. */
1766 xscale_sched_adjust_cost,
1767 arm_default_branch_cost,
1768 &arm_default_vec_cost,
1769 2, /* Constant limit. */
1770 3, /* Max cond insns. */
1771 8, /* Memset max inline. */
1772 1, /* Issue rate. */
1773 ARM_PREFETCH_NOT_BENEFICIAL,
1774 tune_params::PREF_CONST_POOL_TRUE,
1775 tune_params::PREF_LDRD_FALSE,
1776 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1777 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1778 tune_params::DISPARAGE_FLAGS_NEITHER,
1779 tune_params::PREF_NEON_64_FALSE,
1780 tune_params::PREF_NEON_STRINGOPS_FALSE,
1781 tune_params::FUSE_NOTHING,
1782 tune_params::SCHED_AUTOPREF_OFF
1785 const struct tune_params arm_9e_tune =
1787 arm_9e_rtx_costs,
1788 NULL, /* Insn extra costs. */
1789 NULL, /* Sched adj cost. */
1790 arm_default_branch_cost,
1791 &arm_default_vec_cost,
1792 1, /* Constant limit. */
1793 5, /* Max cond insns. */
1794 8, /* Memset max inline. */
1795 1, /* Issue rate. */
1796 ARM_PREFETCH_NOT_BENEFICIAL,
1797 tune_params::PREF_CONST_POOL_TRUE,
1798 tune_params::PREF_LDRD_FALSE,
1799 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1800 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1801 tune_params::DISPARAGE_FLAGS_NEITHER,
1802 tune_params::PREF_NEON_64_FALSE,
1803 tune_params::PREF_NEON_STRINGOPS_FALSE,
1804 tune_params::FUSE_NOTHING,
1805 tune_params::SCHED_AUTOPREF_OFF
1808 const struct tune_params arm_marvell_pj4_tune =
1810 arm_9e_rtx_costs,
1811 NULL, /* Insn extra costs. */
1812 NULL, /* Sched adj cost. */
1813 arm_default_branch_cost,
1814 &arm_default_vec_cost,
1815 1, /* Constant limit. */
1816 5, /* Max cond insns. */
1817 8, /* Memset max inline. */
1818 2, /* Issue rate. */
1819 ARM_PREFETCH_NOT_BENEFICIAL,
1820 tune_params::PREF_CONST_POOL_TRUE,
1821 tune_params::PREF_LDRD_FALSE,
1822 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1823 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1824 tune_params::DISPARAGE_FLAGS_NEITHER,
1825 tune_params::PREF_NEON_64_FALSE,
1826 tune_params::PREF_NEON_STRINGOPS_FALSE,
1827 tune_params::FUSE_NOTHING,
1828 tune_params::SCHED_AUTOPREF_OFF
1831 const struct tune_params arm_v6t2_tune =
1833 arm_9e_rtx_costs,
1834 NULL, /* Insn extra costs. */
1835 NULL, /* Sched adj cost. */
1836 arm_default_branch_cost,
1837 &arm_default_vec_cost,
1838 1, /* Constant limit. */
1839 5, /* Max cond insns. */
1840 8, /* Memset max inline. */
1841 1, /* Issue rate. */
1842 ARM_PREFETCH_NOT_BENEFICIAL,
1843 tune_params::PREF_CONST_POOL_FALSE,
1844 tune_params::PREF_LDRD_FALSE,
1845 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1846 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1847 tune_params::DISPARAGE_FLAGS_NEITHER,
1848 tune_params::PREF_NEON_64_FALSE,
1849 tune_params::PREF_NEON_STRINGOPS_FALSE,
1850 tune_params::FUSE_NOTHING,
1851 tune_params::SCHED_AUTOPREF_OFF
1855 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1856 const struct tune_params arm_cortex_tune =
1858 arm_9e_rtx_costs,
1859 &generic_extra_costs,
1860 NULL, /* Sched adj cost. */
1861 arm_default_branch_cost,
1862 &arm_default_vec_cost,
1863 1, /* Constant limit. */
1864 5, /* Max cond insns. */
1865 8, /* Memset max inline. */
1866 2, /* Issue rate. */
1867 ARM_PREFETCH_NOT_BENEFICIAL,
1868 tune_params::PREF_CONST_POOL_FALSE,
1869 tune_params::PREF_LDRD_FALSE,
1870 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1871 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1872 tune_params::DISPARAGE_FLAGS_NEITHER,
1873 tune_params::PREF_NEON_64_FALSE,
1874 tune_params::PREF_NEON_STRINGOPS_FALSE,
1875 tune_params::FUSE_NOTHING,
1876 tune_params::SCHED_AUTOPREF_OFF
1879 const struct tune_params arm_cortex_a8_tune =
1881 arm_9e_rtx_costs,
1882 &cortexa8_extra_costs,
1883 NULL, /* Sched adj cost. */
1884 arm_default_branch_cost,
1885 &arm_default_vec_cost,
1886 1, /* Constant limit. */
1887 5, /* Max cond insns. */
1888 8, /* Memset max inline. */
1889 2, /* Issue rate. */
1890 ARM_PREFETCH_NOT_BENEFICIAL,
1891 tune_params::PREF_CONST_POOL_FALSE,
1892 tune_params::PREF_LDRD_FALSE,
1893 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1894 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1895 tune_params::DISPARAGE_FLAGS_NEITHER,
1896 tune_params::PREF_NEON_64_FALSE,
1897 tune_params::PREF_NEON_STRINGOPS_TRUE,
1898 tune_params::FUSE_NOTHING,
1899 tune_params::SCHED_AUTOPREF_OFF
1902 const struct tune_params arm_cortex_a7_tune =
1904 arm_9e_rtx_costs,
1905 &cortexa7_extra_costs,
1906 NULL, /* Sched adj cost. */
1907 arm_default_branch_cost,
1908 &arm_default_vec_cost,
1909 1, /* Constant limit. */
1910 5, /* Max cond insns. */
1911 8, /* Memset max inline. */
1912 2, /* Issue rate. */
1913 ARM_PREFETCH_NOT_BENEFICIAL,
1914 tune_params::PREF_CONST_POOL_FALSE,
1915 tune_params::PREF_LDRD_FALSE,
1916 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1917 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1918 tune_params::DISPARAGE_FLAGS_NEITHER,
1919 tune_params::PREF_NEON_64_FALSE,
1920 tune_params::PREF_NEON_STRINGOPS_TRUE,
1921 tune_params::FUSE_NOTHING,
1922 tune_params::SCHED_AUTOPREF_OFF
1925 const struct tune_params arm_cortex_a15_tune =
1927 arm_9e_rtx_costs,
1928 &cortexa15_extra_costs,
1929 NULL, /* Sched adj cost. */
1930 arm_default_branch_cost,
1931 &arm_default_vec_cost,
1932 1, /* Constant limit. */
1933 2, /* Max cond insns. */
1934 8, /* Memset max inline. */
1935 3, /* Issue rate. */
1936 ARM_PREFETCH_NOT_BENEFICIAL,
1937 tune_params::PREF_CONST_POOL_FALSE,
1938 tune_params::PREF_LDRD_TRUE,
1939 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1940 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1941 tune_params::DISPARAGE_FLAGS_ALL,
1942 tune_params::PREF_NEON_64_FALSE,
1943 tune_params::PREF_NEON_STRINGOPS_TRUE,
1944 tune_params::FUSE_NOTHING,
1945 tune_params::SCHED_AUTOPREF_FULL
1948 const struct tune_params arm_cortex_a53_tune =
1950 arm_9e_rtx_costs,
1951 &cortexa53_extra_costs,
1952 NULL, /* Sched adj cost. */
1953 arm_default_branch_cost,
1954 &arm_default_vec_cost,
1955 1, /* Constant limit. */
1956 5, /* Max cond insns. */
1957 8, /* Memset max inline. */
1958 2, /* Issue rate. */
1959 ARM_PREFETCH_NOT_BENEFICIAL,
1960 tune_params::PREF_CONST_POOL_FALSE,
1961 tune_params::PREF_LDRD_FALSE,
1962 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1963 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1964 tune_params::DISPARAGE_FLAGS_NEITHER,
1965 tune_params::PREF_NEON_64_FALSE,
1966 tune_params::PREF_NEON_STRINGOPS_TRUE,
1967 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1968 tune_params::SCHED_AUTOPREF_OFF
1971 const struct tune_params arm_cortex_a57_tune =
1973 arm_9e_rtx_costs,
1974 &cortexa57_extra_costs,
1975 NULL, /* Sched adj cost. */
1976 arm_default_branch_cost,
1977 &arm_default_vec_cost,
1978 1, /* Constant limit. */
1979 2, /* Max cond insns. */
1980 8, /* Memset max inline. */
1981 3, /* Issue rate. */
1982 ARM_PREFETCH_NOT_BENEFICIAL,
1983 tune_params::PREF_CONST_POOL_FALSE,
1984 tune_params::PREF_LDRD_TRUE,
1985 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1986 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1987 tune_params::DISPARAGE_FLAGS_ALL,
1988 tune_params::PREF_NEON_64_FALSE,
1989 tune_params::PREF_NEON_STRINGOPS_TRUE,
1990 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1991 tune_params::SCHED_AUTOPREF_FULL
1994 const struct tune_params arm_xgene1_tune =
1996 arm_9e_rtx_costs,
1997 &xgene1_extra_costs,
1998 NULL, /* Sched adj cost. */
1999 arm_default_branch_cost,
2000 &arm_default_vec_cost,
2001 1, /* Constant limit. */
2002 2, /* Max cond insns. */
2003 32, /* Memset max inline. */
2004 4, /* Issue rate. */
2005 ARM_PREFETCH_NOT_BENEFICIAL,
2006 tune_params::PREF_CONST_POOL_FALSE,
2007 tune_params::PREF_LDRD_TRUE,
2008 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2009 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2010 tune_params::DISPARAGE_FLAGS_ALL,
2011 tune_params::PREF_NEON_64_FALSE,
2012 tune_params::PREF_NEON_STRINGOPS_FALSE,
2013 tune_params::FUSE_NOTHING,
2014 tune_params::SCHED_AUTOPREF_OFF
2017 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2018 less appealing. Set max_insns_skipped to a low value. */
2020 const struct tune_params arm_cortex_a5_tune =
2022 arm_9e_rtx_costs,
2023 &cortexa5_extra_costs,
2024 NULL, /* Sched adj cost. */
2025 arm_cortex_a5_branch_cost,
2026 &arm_default_vec_cost,
2027 1, /* Constant limit. */
2028 1, /* Max cond insns. */
2029 8, /* Memset max inline. */
2030 2, /* Issue rate. */
2031 ARM_PREFETCH_NOT_BENEFICIAL,
2032 tune_params::PREF_CONST_POOL_FALSE,
2033 tune_params::PREF_LDRD_FALSE,
2034 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2035 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2036 tune_params::DISPARAGE_FLAGS_NEITHER,
2037 tune_params::PREF_NEON_64_FALSE,
2038 tune_params::PREF_NEON_STRINGOPS_TRUE,
2039 tune_params::FUSE_NOTHING,
2040 tune_params::SCHED_AUTOPREF_OFF
2043 const struct tune_params arm_cortex_a9_tune =
2045 arm_9e_rtx_costs,
2046 &cortexa9_extra_costs,
2047 cortex_a9_sched_adjust_cost,
2048 arm_default_branch_cost,
2049 &arm_default_vec_cost,
2050 1, /* Constant limit. */
2051 5, /* Max cond insns. */
2052 8, /* Memset max inline. */
2053 2, /* Issue rate. */
2054 ARM_PREFETCH_BENEFICIAL(4,32,32),
2055 tune_params::PREF_CONST_POOL_FALSE,
2056 tune_params::PREF_LDRD_FALSE,
2057 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2058 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2059 tune_params::DISPARAGE_FLAGS_NEITHER,
2060 tune_params::PREF_NEON_64_FALSE,
2061 tune_params::PREF_NEON_STRINGOPS_FALSE,
2062 tune_params::FUSE_NOTHING,
2063 tune_params::SCHED_AUTOPREF_OFF
2066 const struct tune_params arm_cortex_a12_tune =
2068 arm_9e_rtx_costs,
2069 &cortexa12_extra_costs,
2070 NULL, /* Sched adj cost. */
2071 arm_default_branch_cost,
2072 &arm_default_vec_cost, /* Vectorizer costs. */
2073 1, /* Constant limit. */
2074 2, /* Max cond insns. */
2075 8, /* Memset max inline. */
2076 2, /* Issue rate. */
2077 ARM_PREFETCH_NOT_BENEFICIAL,
2078 tune_params::PREF_CONST_POOL_FALSE,
2079 tune_params::PREF_LDRD_TRUE,
2080 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2081 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2082 tune_params::DISPARAGE_FLAGS_ALL,
2083 tune_params::PREF_NEON_64_FALSE,
2084 tune_params::PREF_NEON_STRINGOPS_TRUE,
2085 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2086 tune_params::SCHED_AUTOPREF_OFF
2089 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2090 cycle to execute each. An LDR from the constant pool also takes two cycles
2091 to execute, but mildly increases pipelining opportunity (consecutive
2092 loads/stores can be pipelined together, saving one cycle), and may also
2093 improve icache utilisation. Hence we prefer the constant pool for such
2094 processors. */
2096 const struct tune_params arm_v7m_tune =
2098 arm_9e_rtx_costs,
2099 &v7m_extra_costs,
2100 NULL, /* Sched adj cost. */
2101 arm_cortex_m_branch_cost,
2102 &arm_default_vec_cost,
2103 1, /* Constant limit. */
2104 2, /* Max cond insns. */
2105 8, /* Memset max inline. */
2106 1, /* Issue rate. */
2107 ARM_PREFETCH_NOT_BENEFICIAL,
2108 tune_params::PREF_CONST_POOL_TRUE,
2109 tune_params::PREF_LDRD_FALSE,
2110 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2111 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2112 tune_params::DISPARAGE_FLAGS_NEITHER,
2113 tune_params::PREF_NEON_64_FALSE,
2114 tune_params::PREF_NEON_STRINGOPS_FALSE,
2115 tune_params::FUSE_NOTHING,
2116 tune_params::SCHED_AUTOPREF_OFF
2119 /* Cortex-M7 tuning. */
2121 const struct tune_params arm_cortex_m7_tune =
2123 arm_9e_rtx_costs,
2124 &v7m_extra_costs,
2125 NULL, /* Sched adj cost. */
2126 arm_cortex_m7_branch_cost,
2127 &arm_default_vec_cost,
2128 0, /* Constant limit. */
2129 1, /* Max cond insns. */
2130 8, /* Memset max inline. */
2131 2, /* Issue rate. */
2132 ARM_PREFETCH_NOT_BENEFICIAL,
2133 tune_params::PREF_CONST_POOL_TRUE,
2134 tune_params::PREF_LDRD_FALSE,
2135 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2136 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2137 tune_params::DISPARAGE_FLAGS_NEITHER,
2138 tune_params::PREF_NEON_64_FALSE,
2139 tune_params::PREF_NEON_STRINGOPS_FALSE,
2140 tune_params::FUSE_NOTHING,
2141 tune_params::SCHED_AUTOPREF_OFF
2144 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2145 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2146 const struct tune_params arm_v6m_tune =
2148 arm_9e_rtx_costs,
2149 NULL, /* Insn extra costs. */
2150 NULL, /* Sched adj cost. */
2151 arm_default_branch_cost,
2152 &arm_default_vec_cost, /* Vectorizer costs. */
2153 1, /* Constant limit. */
2154 5, /* Max cond insns. */
2155 8, /* Memset max inline. */
2156 1, /* Issue rate. */
2157 ARM_PREFETCH_NOT_BENEFICIAL,
2158 tune_params::PREF_CONST_POOL_FALSE,
2159 tune_params::PREF_LDRD_FALSE,
2160 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2161 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2162 tune_params::DISPARAGE_FLAGS_NEITHER,
2163 tune_params::PREF_NEON_64_FALSE,
2164 tune_params::PREF_NEON_STRINGOPS_FALSE,
2165 tune_params::FUSE_NOTHING,
2166 tune_params::SCHED_AUTOPREF_OFF
2169 const struct tune_params arm_fa726te_tune =
2171 arm_9e_rtx_costs,
2172 NULL, /* Insn extra costs. */
2173 fa726te_sched_adjust_cost,
2174 arm_default_branch_cost,
2175 &arm_default_vec_cost,
2176 1, /* Constant limit. */
2177 5, /* Max cond insns. */
2178 8, /* Memset max inline. */
2179 2, /* Issue rate. */
2180 ARM_PREFETCH_NOT_BENEFICIAL,
2181 tune_params::PREF_CONST_POOL_TRUE,
2182 tune_params::PREF_LDRD_FALSE,
2183 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2184 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2185 tune_params::DISPARAGE_FLAGS_NEITHER,
2186 tune_params::PREF_NEON_64_FALSE,
2187 tune_params::PREF_NEON_STRINGOPS_FALSE,
2188 tune_params::FUSE_NOTHING,
2189 tune_params::SCHED_AUTOPREF_OFF
2193 /* Not all of these give usefully different compilation alternatives,
2194 but there is no simple way of generalizing them. */
2195 static const struct processors all_cores[] =
2197 /* ARM Cores */
2198 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2199 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2200 FLAGS, &arm_##COSTS##_tune},
2201 #include "arm-cores.def"
2202 #undef ARM_CORE
2203 {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
2206 static const struct processors all_architectures[] =
2208 /* ARM Architectures */
2209 /* We don't specify tuning costs here as it will be figured out
2210 from the core. */
2212 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2213 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2214 #include "arm-arches.def"
2215 #undef ARM_ARCH
2216 {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
2220 /* These are populated as commandline arguments are processed, or NULL
2221 if not specified. */
2222 static const struct processors *arm_selected_arch;
2223 static const struct processors *arm_selected_cpu;
2224 static const struct processors *arm_selected_tune;
2226 /* The name of the preprocessor macro to define for this architecture. */
2228 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2230 /* Available values for -mfpu=. */
2232 static const struct arm_fpu_desc all_fpus[] =
2234 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, FEATURES) \
2235 { NAME, MODEL, REV, VFP_REGS, FEATURES },
2236 #include "arm-fpus.def"
2237 #undef ARM_FPU
2241 /* Supported TLS relocations. */
2243 enum tls_reloc {
2244 TLS_GD32,
2245 TLS_LDM32,
2246 TLS_LDO32,
2247 TLS_IE32,
2248 TLS_LE32,
2249 TLS_DESCSEQ /* GNU scheme */
2252 /* The maximum number of insns to be used when loading a constant. */
2253 inline static int
2254 arm_constant_limit (bool size_p)
2256 return size_p ? 1 : current_tune->constant_limit;
2259 /* Emit an insn that's a simple single-set. Both the operands must be known
2260 to be valid. */
2261 inline static rtx_insn *
2262 emit_set_insn (rtx x, rtx y)
2264 return emit_insn (gen_rtx_SET (x, y));
2267 /* Return the number of bits set in VALUE. */
2268 static unsigned
2269 bit_count (unsigned long value)
2271 unsigned long count = 0;
2273 while (value)
2275 count++;
2276 value &= value - 1; /* Clear the least-significant set bit. */
2279 return count;
2282 /* Return the number of features in feature-set SET. */
2283 static unsigned
2284 feature_count (const arm_feature_set * set)
2286 return (bit_count (ARM_FSET_CPU1 (*set))
2287 + bit_count (ARM_FSET_CPU2 (*set)));
2290 typedef struct
2292 machine_mode mode;
2293 const char *name;
2294 } arm_fixed_mode_set;
2296 /* A small helper for setting fixed-point library libfuncs. */
2298 static void
2299 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2300 const char *funcname, const char *modename,
2301 int num_suffix)
2303 char buffer[50];
2305 if (num_suffix == 0)
2306 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2307 else
2308 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2310 set_optab_libfunc (optable, mode, buffer);
2313 static void
2314 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2315 machine_mode from, const char *funcname,
2316 const char *toname, const char *fromname)
2318 char buffer[50];
2319 const char *maybe_suffix_2 = "";
2321 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2322 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2323 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2324 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2325 maybe_suffix_2 = "2";
2327 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2328 maybe_suffix_2);
2330 set_conv_libfunc (optable, to, from, buffer);
2333 /* Set up library functions unique to ARM. */
2335 static void
2336 arm_init_libfuncs (void)
2338 /* For Linux, we have access to kernel support for atomic operations. */
2339 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2340 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2342 /* There are no special library functions unless we are using the
2343 ARM BPABI. */
2344 if (!TARGET_BPABI)
2345 return;
2347 /* The functions below are described in Section 4 of the "Run-Time
2348 ABI for the ARM architecture", Version 1.0. */
2350 /* Double-precision floating-point arithmetic. Table 2. */
2351 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2352 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2353 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2354 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2355 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2357 /* Double-precision comparisons. Table 3. */
2358 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2359 set_optab_libfunc (ne_optab, DFmode, NULL);
2360 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2361 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2362 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2363 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2364 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2366 /* Single-precision floating-point arithmetic. Table 4. */
2367 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2368 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2369 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2370 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2371 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2373 /* Single-precision comparisons. Table 5. */
2374 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2375 set_optab_libfunc (ne_optab, SFmode, NULL);
2376 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2377 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2378 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2379 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2380 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2382 /* Floating-point to integer conversions. Table 6. */
2383 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2384 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2385 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2386 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2387 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2388 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2389 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2390 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2392 /* Conversions between floating types. Table 7. */
2393 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2394 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2396 /* Integer to floating-point conversions. Table 8. */
2397 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2398 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2399 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2400 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2401 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2402 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2403 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2404 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2406 /* Long long. Table 9. */
2407 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2408 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2409 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2410 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2411 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2412 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2413 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2414 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2416 /* Integer (32/32->32) division. \S 4.3.1. */
2417 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2418 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2420 /* The divmod functions are designed so that they can be used for
2421 plain division, even though they return both the quotient and the
2422 remainder. The quotient is returned in the usual location (i.e.,
2423 r0 for SImode, {r0, r1} for DImode), just as would be expected
2424 for an ordinary division routine. Because the AAPCS calling
2425 conventions specify that all of { r0, r1, r2, r3 } are
2426 callee-saved registers, there is no need to tell the compiler
2427 explicitly that those registers are clobbered by these
2428 routines. */
2429 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2430 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2432 /* For SImode division the ABI provides div-without-mod routines,
2433 which are faster. */
2434 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2435 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2437 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2438 divmod libcalls instead. */
2439 set_optab_libfunc (smod_optab, DImode, NULL);
2440 set_optab_libfunc (umod_optab, DImode, NULL);
2441 set_optab_libfunc (smod_optab, SImode, NULL);
2442 set_optab_libfunc (umod_optab, SImode, NULL);
2444 /* Half-precision float operations. The compiler handles all operations
2445 with NULL libfuncs by converting the SFmode. */
2446 switch (arm_fp16_format)
2448 case ARM_FP16_FORMAT_IEEE:
2449 case ARM_FP16_FORMAT_ALTERNATIVE:
2451 /* Conversions. */
2452 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2453 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2454 ? "__gnu_f2h_ieee"
2455 : "__gnu_f2h_alternative"));
2456 set_conv_libfunc (sext_optab, SFmode, HFmode,
2457 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2458 ? "__gnu_h2f_ieee"
2459 : "__gnu_h2f_alternative"));
2461 /* Arithmetic. */
2462 set_optab_libfunc (add_optab, HFmode, NULL);
2463 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2464 set_optab_libfunc (smul_optab, HFmode, NULL);
2465 set_optab_libfunc (neg_optab, HFmode, NULL);
2466 set_optab_libfunc (sub_optab, HFmode, NULL);
2468 /* Comparisons. */
2469 set_optab_libfunc (eq_optab, HFmode, NULL);
2470 set_optab_libfunc (ne_optab, HFmode, NULL);
2471 set_optab_libfunc (lt_optab, HFmode, NULL);
2472 set_optab_libfunc (le_optab, HFmode, NULL);
2473 set_optab_libfunc (ge_optab, HFmode, NULL);
2474 set_optab_libfunc (gt_optab, HFmode, NULL);
2475 set_optab_libfunc (unord_optab, HFmode, NULL);
2476 break;
2478 default:
2479 break;
2482 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2484 const arm_fixed_mode_set fixed_arith_modes[] =
2486 { QQmode, "qq" },
2487 { UQQmode, "uqq" },
2488 { HQmode, "hq" },
2489 { UHQmode, "uhq" },
2490 { SQmode, "sq" },
2491 { USQmode, "usq" },
2492 { DQmode, "dq" },
2493 { UDQmode, "udq" },
2494 { TQmode, "tq" },
2495 { UTQmode, "utq" },
2496 { HAmode, "ha" },
2497 { UHAmode, "uha" },
2498 { SAmode, "sa" },
2499 { USAmode, "usa" },
2500 { DAmode, "da" },
2501 { UDAmode, "uda" },
2502 { TAmode, "ta" },
2503 { UTAmode, "uta" }
2505 const arm_fixed_mode_set fixed_conv_modes[] =
2507 { QQmode, "qq" },
2508 { UQQmode, "uqq" },
2509 { HQmode, "hq" },
2510 { UHQmode, "uhq" },
2511 { SQmode, "sq" },
2512 { USQmode, "usq" },
2513 { DQmode, "dq" },
2514 { UDQmode, "udq" },
2515 { TQmode, "tq" },
2516 { UTQmode, "utq" },
2517 { HAmode, "ha" },
2518 { UHAmode, "uha" },
2519 { SAmode, "sa" },
2520 { USAmode, "usa" },
2521 { DAmode, "da" },
2522 { UDAmode, "uda" },
2523 { TAmode, "ta" },
2524 { UTAmode, "uta" },
2525 { QImode, "qi" },
2526 { HImode, "hi" },
2527 { SImode, "si" },
2528 { DImode, "di" },
2529 { TImode, "ti" },
2530 { SFmode, "sf" },
2531 { DFmode, "df" }
2533 unsigned int i, j;
2535 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2537 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2538 "add", fixed_arith_modes[i].name, 3);
2539 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2540 "ssadd", fixed_arith_modes[i].name, 3);
2541 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2542 "usadd", fixed_arith_modes[i].name, 3);
2543 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2544 "sub", fixed_arith_modes[i].name, 3);
2545 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2546 "sssub", fixed_arith_modes[i].name, 3);
2547 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2548 "ussub", fixed_arith_modes[i].name, 3);
2549 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2550 "mul", fixed_arith_modes[i].name, 3);
2551 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2552 "ssmul", fixed_arith_modes[i].name, 3);
2553 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2554 "usmul", fixed_arith_modes[i].name, 3);
2555 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2556 "div", fixed_arith_modes[i].name, 3);
2557 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2558 "udiv", fixed_arith_modes[i].name, 3);
2559 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2560 "ssdiv", fixed_arith_modes[i].name, 3);
2561 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2562 "usdiv", fixed_arith_modes[i].name, 3);
2563 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2564 "neg", fixed_arith_modes[i].name, 2);
2565 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2566 "ssneg", fixed_arith_modes[i].name, 2);
2567 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2568 "usneg", fixed_arith_modes[i].name, 2);
2569 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2570 "ashl", fixed_arith_modes[i].name, 3);
2571 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2572 "ashr", fixed_arith_modes[i].name, 3);
2573 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2574 "lshr", fixed_arith_modes[i].name, 3);
2575 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2576 "ssashl", fixed_arith_modes[i].name, 3);
2577 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2578 "usashl", fixed_arith_modes[i].name, 3);
2579 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2580 "cmp", fixed_arith_modes[i].name, 2);
2583 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2584 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2586 if (i == j
2587 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2588 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2589 continue;
2591 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2592 fixed_conv_modes[j].mode, "fract",
2593 fixed_conv_modes[i].name,
2594 fixed_conv_modes[j].name);
2595 arm_set_fixed_conv_libfunc (satfract_optab,
2596 fixed_conv_modes[i].mode,
2597 fixed_conv_modes[j].mode, "satfract",
2598 fixed_conv_modes[i].name,
2599 fixed_conv_modes[j].name);
2600 arm_set_fixed_conv_libfunc (fractuns_optab,
2601 fixed_conv_modes[i].mode,
2602 fixed_conv_modes[j].mode, "fractuns",
2603 fixed_conv_modes[i].name,
2604 fixed_conv_modes[j].name);
2605 arm_set_fixed_conv_libfunc (satfractuns_optab,
2606 fixed_conv_modes[i].mode,
2607 fixed_conv_modes[j].mode, "satfractuns",
2608 fixed_conv_modes[i].name,
2609 fixed_conv_modes[j].name);
2613 if (TARGET_AAPCS_BASED)
2614 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2617 /* On AAPCS systems, this is the "struct __va_list". */
2618 static GTY(()) tree va_list_type;
2620 /* Return the type to use as __builtin_va_list. */
2621 static tree
2622 arm_build_builtin_va_list (void)
2624 tree va_list_name;
2625 tree ap_field;
2627 if (!TARGET_AAPCS_BASED)
2628 return std_build_builtin_va_list ();
2630 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2631 defined as:
2633 struct __va_list
2635 void *__ap;
2638 The C Library ABI further reinforces this definition in \S
2639 4.1.
2641 We must follow this definition exactly. The structure tag
2642 name is visible in C++ mangled names, and thus forms a part
2643 of the ABI. The field name may be used by people who
2644 #include <stdarg.h>. */
2645 /* Create the type. */
2646 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2647 /* Give it the required name. */
2648 va_list_name = build_decl (BUILTINS_LOCATION,
2649 TYPE_DECL,
2650 get_identifier ("__va_list"),
2651 va_list_type);
2652 DECL_ARTIFICIAL (va_list_name) = 1;
2653 TYPE_NAME (va_list_type) = va_list_name;
2654 TYPE_STUB_DECL (va_list_type) = va_list_name;
2655 /* Create the __ap field. */
2656 ap_field = build_decl (BUILTINS_LOCATION,
2657 FIELD_DECL,
2658 get_identifier ("__ap"),
2659 ptr_type_node);
2660 DECL_ARTIFICIAL (ap_field) = 1;
2661 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2662 TYPE_FIELDS (va_list_type) = ap_field;
2663 /* Compute its layout. */
2664 layout_type (va_list_type);
2666 return va_list_type;
2669 /* Return an expression of type "void *" pointing to the next
2670 available argument in a variable-argument list. VALIST is the
2671 user-level va_list object, of type __builtin_va_list. */
2672 static tree
2673 arm_extract_valist_ptr (tree valist)
2675 if (TREE_TYPE (valist) == error_mark_node)
2676 return error_mark_node;
2678 /* On an AAPCS target, the pointer is stored within "struct
2679 va_list". */
2680 if (TARGET_AAPCS_BASED)
2682 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2683 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2684 valist, ap_field, NULL_TREE);
2687 return valist;
2690 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2691 static void
2692 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2694 valist = arm_extract_valist_ptr (valist);
2695 std_expand_builtin_va_start (valist, nextarg);
2698 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2699 static tree
2700 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2701 gimple_seq *post_p)
2703 valist = arm_extract_valist_ptr (valist);
2704 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2707 /* Check any incompatible options that the user has specified. */
2708 static void
2709 arm_option_check_internal (struct gcc_options *opts)
2711 int flags = opts->x_target_flags;
2713 /* Make sure that the processor choice does not conflict with any of the
2714 other command line choices. */
2715 if (TARGET_ARM_P (flags) && !ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM))
2716 error ("target CPU does not support ARM mode");
2718 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2719 from here where no function is being compiled currently. */
2720 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2721 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2723 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2724 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2726 /* If this target is normally configured to use APCS frames, warn if they
2727 are turned off and debugging is turned on. */
2728 if (TARGET_ARM_P (flags)
2729 && write_symbols != NO_DEBUG
2730 && !TARGET_APCS_FRAME
2731 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2732 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2734 /* iWMMXt unsupported under Thumb mode. */
2735 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2736 error ("iWMMXt unsupported under Thumb mode");
2738 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2739 error ("can not use -mtp=cp15 with 16-bit Thumb");
2741 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2743 error ("RTP PIC is incompatible with Thumb");
2744 flag_pic = 0;
2747 /* We only support -mslow-flash-data on armv7-m targets. */
2748 if (target_slow_flash_data
2749 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2750 || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON)))
2751 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2754 /* Recompute the global settings depending on target attribute options. */
2756 static void
2757 arm_option_params_internal (void)
2759 /* If we are not using the default (ARM mode) section anchor offset
2760 ranges, then set the correct ranges now. */
2761 if (TARGET_THUMB1)
2763 /* Thumb-1 LDR instructions cannot have negative offsets.
2764 Permissible positive offset ranges are 5-bit (for byte loads),
2765 6-bit (for halfword loads), or 7-bit (for word loads).
2766 Empirical results suggest a 7-bit anchor range gives the best
2767 overall code size. */
2768 targetm.min_anchor_offset = 0;
2769 targetm.max_anchor_offset = 127;
2771 else if (TARGET_THUMB2)
2773 /* The minimum is set such that the total size of the block
2774 for a particular anchor is 248 + 1 + 4095 bytes, which is
2775 divisible by eight, ensuring natural spacing of anchors. */
2776 targetm.min_anchor_offset = -248;
2777 targetm.max_anchor_offset = 4095;
2779 else
2781 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2782 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2785 if (optimize_size)
2787 /* If optimizing for size, bump the number of instructions that we
2788 are prepared to conditionally execute (even on a StrongARM). */
2789 max_insns_skipped = 6;
2791 /* For THUMB2, we limit the conditional sequence to one IT block. */
2792 if (TARGET_THUMB2)
2793 max_insns_skipped = arm_restrict_it ? 1 : 4;
2795 else
2796 /* When -mrestrict-it is in use tone down the if-conversion. */
2797 max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2798 ? 1 : current_tune->max_insns_skipped;
2801 /* True if -mflip-thumb should next add an attribute for the default
2802 mode, false if it should next add an attribute for the opposite mode. */
2803 static GTY(()) bool thumb_flipper;
2805 /* Options after initial target override. */
2806 static GTY(()) tree init_optimize;
2808 static void
2809 arm_override_options_after_change_1 (struct gcc_options *opts)
2811 if (opts->x_align_functions <= 0)
2812 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2813 && opts->x_optimize_size ? 2 : 4;
2816 /* Implement targetm.override_options_after_change. */
2818 static void
2819 arm_override_options_after_change (void)
2821 arm_override_options_after_change_1 (&global_options);
2824 /* Reset options between modes that the user has specified. */
2825 static void
2826 arm_option_override_internal (struct gcc_options *opts,
2827 struct gcc_options *opts_set)
2829 arm_override_options_after_change_1 (opts);
2831 if (TARGET_THUMB_P (opts->x_target_flags)
2832 && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB)))
2834 warning (0, "target CPU does not support THUMB instructions");
2835 opts->x_target_flags &= ~MASK_THUMB;
2838 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2840 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2841 opts->x_target_flags &= ~MASK_APCS_FRAME;
2844 /* Callee super interworking implies thumb interworking. Adding
2845 this to the flags here simplifies the logic elsewhere. */
2846 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2847 opts->x_target_flags |= MASK_INTERWORK;
2849 /* need to remember initial values so combinaisons of options like
2850 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2851 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2853 if (! opts_set->x_arm_restrict_it)
2854 opts->x_arm_restrict_it = arm_arch8;
2856 if (!TARGET_THUMB2_P (opts->x_target_flags))
2857 opts->x_arm_restrict_it = 0;
2859 /* Don't warn since it's on by default in -O2. */
2860 if (TARGET_THUMB1_P (opts->x_target_flags))
2861 opts->x_flag_schedule_insns = 0;
2862 else
2863 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
2865 /* Disable shrink-wrap when optimizing function for size, since it tends to
2866 generate additional returns. */
2867 if (optimize_function_for_size_p (cfun)
2868 && TARGET_THUMB2_P (opts->x_target_flags))
2869 opts->x_flag_shrink_wrap = false;
2870 else
2871 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
2873 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2874 - epilogue_insns - does not accurately model the corresponding insns
2875 emitted in the asm file. In particular, see the comment in thumb_exit
2876 'Find out how many of the (return) argument registers we can corrupt'.
2877 As a consequence, the epilogue may clobber registers without fipa-ra
2878 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
2879 TODO: Accurately model clobbers for epilogue_insns and reenable
2880 fipa-ra. */
2881 if (TARGET_THUMB1_P (opts->x_target_flags))
2882 opts->x_flag_ipa_ra = 0;
2883 else
2884 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
2886 /* Thumb2 inline assembly code should always use unified syntax.
2887 This will apply to ARM and Thumb1 eventually. */
2888 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
2891 /* Fix up any incompatible options that the user has specified. */
2892 static void
2893 arm_option_override (void)
2895 arm_selected_arch = NULL;
2896 arm_selected_cpu = NULL;
2897 arm_selected_tune = NULL;
2899 if (global_options_set.x_arm_arch_option)
2900 arm_selected_arch = &all_architectures[arm_arch_option];
2902 if (global_options_set.x_arm_cpu_option)
2904 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2905 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2908 if (global_options_set.x_arm_tune_option)
2909 arm_selected_tune = &all_cores[(int) arm_tune_option];
2911 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2912 SUBTARGET_OVERRIDE_OPTIONS;
2913 #endif
2915 if (arm_selected_arch)
2917 if (arm_selected_cpu)
2919 const arm_feature_set tuning_flags = ARM_FSET_MAKE_CPU1 (FL_TUNE);
2920 arm_feature_set selected_flags;
2921 ARM_FSET_XOR (selected_flags, arm_selected_cpu->flags,
2922 arm_selected_arch->flags);
2923 ARM_FSET_EXCLUDE (selected_flags, selected_flags, tuning_flags);
2924 /* Check for conflict between mcpu and march. */
2925 if (!ARM_FSET_IS_EMPTY (selected_flags))
2927 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2928 arm_selected_cpu->name, arm_selected_arch->name);
2929 /* -march wins for code generation.
2930 -mcpu wins for default tuning. */
2931 if (!arm_selected_tune)
2932 arm_selected_tune = arm_selected_cpu;
2934 arm_selected_cpu = arm_selected_arch;
2936 else
2937 /* -mcpu wins. */
2938 arm_selected_arch = NULL;
2940 else
2941 /* Pick a CPU based on the architecture. */
2942 arm_selected_cpu = arm_selected_arch;
2945 /* If the user did not specify a processor, choose one for them. */
2946 if (!arm_selected_cpu)
2948 const struct processors * sel;
2949 arm_feature_set sought = ARM_FSET_EMPTY;;
2951 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2952 if (!arm_selected_cpu->name)
2954 #ifdef SUBTARGET_CPU_DEFAULT
2955 /* Use the subtarget default CPU if none was specified by
2956 configure. */
2957 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2958 #endif
2959 /* Default to ARM6. */
2960 if (!arm_selected_cpu->name)
2961 arm_selected_cpu = &all_cores[arm6];
2964 sel = arm_selected_cpu;
2965 insn_flags = sel->flags;
2967 /* Now check to see if the user has specified some command line
2968 switch that require certain abilities from the cpu. */
2970 if (TARGET_INTERWORK || TARGET_THUMB)
2972 ARM_FSET_ADD_CPU1 (sought, FL_THUMB);
2973 ARM_FSET_ADD_CPU1 (sought, FL_MODE32);
2975 /* There are no ARM processors that support both APCS-26 and
2976 interworking. Therefore we force FL_MODE26 to be removed
2977 from insn_flags here (if it was set), so that the search
2978 below will always be able to find a compatible processor. */
2979 ARM_FSET_DEL_CPU1 (insn_flags, FL_MODE26);
2982 if (!ARM_FSET_IS_EMPTY (sought)
2983 && !(ARM_FSET_CPU_SUBSET (sought, insn_flags)))
2985 /* Try to locate a CPU type that supports all of the abilities
2986 of the default CPU, plus the extra abilities requested by
2987 the user. */
2988 for (sel = all_cores; sel->name != NULL; sel++)
2989 if (ARM_FSET_CPU_SUBSET (sought, sel->flags))
2990 break;
2992 if (sel->name == NULL)
2994 unsigned current_bit_count = 0;
2995 const struct processors * best_fit = NULL;
2997 /* Ideally we would like to issue an error message here
2998 saying that it was not possible to find a CPU compatible
2999 with the default CPU, but which also supports the command
3000 line options specified by the programmer, and so they
3001 ought to use the -mcpu=<name> command line option to
3002 override the default CPU type.
3004 If we cannot find a cpu that has both the
3005 characteristics of the default cpu and the given
3006 command line options we scan the array again looking
3007 for a best match. */
3008 for (sel = all_cores; sel->name != NULL; sel++)
3010 arm_feature_set required = ARM_FSET_EMPTY;
3011 ARM_FSET_UNION (required, sought, insn_flags);
3012 if (ARM_FSET_CPU_SUBSET (required, sel->flags))
3014 unsigned count;
3015 arm_feature_set flags;
3016 ARM_FSET_INTER (flags, sel->flags, insn_flags);
3017 count = feature_count (&flags);
3019 if (count >= current_bit_count)
3021 best_fit = sel;
3022 current_bit_count = count;
3026 gcc_assert (best_fit);
3027 sel = best_fit;
3030 arm_selected_cpu = sel;
3034 gcc_assert (arm_selected_cpu);
3035 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3036 if (!arm_selected_tune)
3037 arm_selected_tune = &all_cores[arm_selected_cpu->core];
3039 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
3040 insn_flags = arm_selected_cpu->flags;
3041 arm_base_arch = arm_selected_cpu->base_arch;
3043 arm_tune = arm_selected_tune->core;
3044 tune_flags = arm_selected_tune->flags;
3045 current_tune = arm_selected_tune->tune;
3047 /* TBD: Dwarf info for apcs frame is not handled yet. */
3048 if (TARGET_APCS_FRAME)
3049 flag_shrink_wrap = false;
3051 /* BPABI targets use linker tricks to allow interworking on cores
3052 without thumb support. */
3053 if (TARGET_INTERWORK
3054 && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB) || TARGET_BPABI))
3056 warning (0, "target CPU does not support interworking" );
3057 target_flags &= ~MASK_INTERWORK;
3060 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3062 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3063 target_flags |= MASK_APCS_FRAME;
3066 if (TARGET_POKE_FUNCTION_NAME)
3067 target_flags |= MASK_APCS_FRAME;
3069 if (TARGET_APCS_REENT && flag_pic)
3070 error ("-fpic and -mapcs-reent are incompatible");
3072 if (TARGET_APCS_REENT)
3073 warning (0, "APCS reentrant code not supported. Ignored");
3075 if (TARGET_APCS_FLOAT)
3076 warning (0, "passing floating point arguments in fp regs not yet supported");
3078 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3079 arm_arch3m = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH3M);
3080 arm_arch4 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH4);
3081 arm_arch4t = arm_arch4 && (ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB));
3082 arm_arch5 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5);
3083 arm_arch5e = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5E);
3084 arm_arch6 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6);
3085 arm_arch6k = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6K);
3086 arm_arch6kz = arm_arch6k && ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6KZ);
3087 arm_arch_notm = ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM);
3088 arm_arch6m = arm_arch6 && !arm_arch_notm;
3089 arm_arch7 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7);
3090 arm_arch7em = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7EM);
3091 arm_arch8 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH8);
3092 arm_arch_thumb2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB2);
3093 arm_arch_xscale = ARM_FSET_HAS_CPU1 (insn_flags, FL_XSCALE);
3095 arm_ld_sched = ARM_FSET_HAS_CPU1 (tune_flags, FL_LDSCHED);
3096 arm_tune_strongarm = ARM_FSET_HAS_CPU1 (tune_flags, FL_STRONG);
3097 arm_tune_wbuf = ARM_FSET_HAS_CPU1 (tune_flags, FL_WBUF);
3098 arm_tune_xscale = ARM_FSET_HAS_CPU1 (tune_flags, FL_XSCALE);
3099 arm_arch_iwmmxt = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT);
3100 arm_arch_iwmmxt2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT2);
3101 arm_arch_thumb_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB_DIV);
3102 arm_arch_arm_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARM_DIV);
3103 arm_arch_no_volatile_ce = ARM_FSET_HAS_CPU1 (insn_flags, FL_NO_VOLATILE_CE);
3104 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
3105 arm_arch_crc = ARM_FSET_HAS_CPU1 (insn_flags, FL_CRC32);
3106 arm_m_profile_small_mul = ARM_FSET_HAS_CPU1 (insn_flags, FL_SMALLMUL);
3108 /* V5 code we generate is completely interworking capable, so we turn off
3109 TARGET_INTERWORK here to avoid many tests later on. */
3111 /* XXX However, we must pass the right pre-processor defines to CPP
3112 or GLD can get confused. This is a hack. */
3113 if (TARGET_INTERWORK)
3114 arm_cpp_interwork = 1;
3116 if (arm_arch5)
3117 target_flags &= ~MASK_INTERWORK;
3119 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3120 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3122 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3123 error ("iwmmxt abi requires an iwmmxt capable cpu");
3125 if (!global_options_set.x_arm_fpu_index)
3127 const char *target_fpu_name;
3128 bool ok;
3130 #ifdef FPUTYPE_DEFAULT
3131 target_fpu_name = FPUTYPE_DEFAULT;
3132 #else
3133 target_fpu_name = "vfp";
3134 #endif
3136 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
3137 CL_TARGET);
3138 gcc_assert (ok);
3141 arm_fpu_desc = &all_fpus[arm_fpu_index];
3143 switch (arm_fpu_desc->model)
3145 case ARM_FP_MODEL_VFP:
3146 arm_fpu_attr = FPU_VFP;
3147 break;
3149 default:
3150 gcc_unreachable();
3153 if (TARGET_AAPCS_BASED)
3155 if (TARGET_CALLER_INTERWORKING)
3156 error ("AAPCS does not support -mcaller-super-interworking");
3157 else
3158 if (TARGET_CALLEE_INTERWORKING)
3159 error ("AAPCS does not support -mcallee-super-interworking");
3162 /* iWMMXt and NEON are incompatible. */
3163 if (TARGET_IWMMXT && TARGET_NEON)
3164 error ("iWMMXt and NEON are incompatible");
3166 /* __fp16 support currently assumes the core has ldrh. */
3167 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3168 sorry ("__fp16 and no ldrh");
3170 /* If soft-float is specified then don't use FPU. */
3171 if (TARGET_SOFT_FLOAT)
3172 arm_fpu_attr = FPU_NONE;
3174 if (TARGET_AAPCS_BASED)
3176 if (arm_abi == ARM_ABI_IWMMXT)
3177 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3178 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3179 && TARGET_HARD_FLOAT
3180 && TARGET_VFP)
3181 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3182 else
3183 arm_pcs_default = ARM_PCS_AAPCS;
3185 else
3187 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
3188 sorry ("-mfloat-abi=hard and VFP");
3190 if (arm_abi == ARM_ABI_APCS)
3191 arm_pcs_default = ARM_PCS_APCS;
3192 else
3193 arm_pcs_default = ARM_PCS_ATPCS;
3196 /* For arm2/3 there is no need to do any scheduling if we are doing
3197 software floating-point. */
3198 if (TARGET_SOFT_FLOAT && !ARM_FSET_HAS_CPU1 (tune_flags, FL_MODE32))
3199 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3201 /* Use the cp15 method if it is available. */
3202 if (target_thread_pointer == TP_AUTO)
3204 if (arm_arch6k && !TARGET_THUMB1)
3205 target_thread_pointer = TP_CP15;
3206 else
3207 target_thread_pointer = TP_SOFT;
3210 /* Override the default structure alignment for AAPCS ABI. */
3211 if (!global_options_set.x_arm_structure_size_boundary)
3213 if (TARGET_AAPCS_BASED)
3214 arm_structure_size_boundary = 8;
3216 else
3218 if (arm_structure_size_boundary != 8
3219 && arm_structure_size_boundary != 32
3220 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3222 if (ARM_DOUBLEWORD_ALIGN)
3223 warning (0,
3224 "structure size boundary can only be set to 8, 32 or 64");
3225 else
3226 warning (0, "structure size boundary can only be set to 8 or 32");
3227 arm_structure_size_boundary
3228 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3232 /* If stack checking is disabled, we can use r10 as the PIC register,
3233 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3234 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3236 if (TARGET_VXWORKS_RTP)
3237 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3238 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3241 if (flag_pic && TARGET_VXWORKS_RTP)
3242 arm_pic_register = 9;
3244 if (arm_pic_register_string != NULL)
3246 int pic_register = decode_reg_name (arm_pic_register_string);
3248 if (!flag_pic)
3249 warning (0, "-mpic-register= is useless without -fpic");
3251 /* Prevent the user from choosing an obviously stupid PIC register. */
3252 else if (pic_register < 0 || call_used_regs[pic_register]
3253 || pic_register == HARD_FRAME_POINTER_REGNUM
3254 || pic_register == STACK_POINTER_REGNUM
3255 || pic_register >= PC_REGNUM
3256 || (TARGET_VXWORKS_RTP
3257 && (unsigned int) pic_register != arm_pic_register))
3258 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3259 else
3260 arm_pic_register = pic_register;
3263 if (TARGET_VXWORKS_RTP
3264 && !global_options_set.x_arm_pic_data_is_text_relative)
3265 arm_pic_data_is_text_relative = 0;
3267 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3268 if (fix_cm3_ldrd == 2)
3270 if (arm_selected_cpu->core == cortexm3)
3271 fix_cm3_ldrd = 1;
3272 else
3273 fix_cm3_ldrd = 0;
3276 /* Enable -munaligned-access by default for
3277 - all ARMv6 architecture-based processors
3278 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3279 - ARMv8 architecture-base processors.
3281 Disable -munaligned-access by default for
3282 - all pre-ARMv6 architecture-based processors
3283 - ARMv6-M architecture-based processors. */
3285 if (unaligned_access == 2)
3287 if (arm_arch6 && (arm_arch_notm || arm_arch7))
3288 unaligned_access = 1;
3289 else
3290 unaligned_access = 0;
3292 else if (unaligned_access == 1
3293 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3295 warning (0, "target CPU does not support unaligned accesses");
3296 unaligned_access = 0;
3299 /* Hot/Cold partitioning is not currently supported, since we can't
3300 handle literal pool placement in that case. */
3301 if (flag_reorder_blocks_and_partition)
3303 inform (input_location,
3304 "-freorder-blocks-and-partition not supported on this architecture");
3305 flag_reorder_blocks_and_partition = 0;
3306 flag_reorder_blocks = 1;
3309 if (flag_pic)
3310 /* Hoisting PIC address calculations more aggressively provides a small,
3311 but measurable, size reduction for PIC code. Therefore, we decrease
3312 the bar for unrestricted expression hoisting to the cost of PIC address
3313 calculation, which is 2 instructions. */
3314 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3315 global_options.x_param_values,
3316 global_options_set.x_param_values);
3318 /* ARM EABI defaults to strict volatile bitfields. */
3319 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3320 && abi_version_at_least(2))
3321 flag_strict_volatile_bitfields = 1;
3323 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3324 have deemed it beneficial (signified by setting
3325 prefetch.num_slots to 1 or more). */
3326 if (flag_prefetch_loop_arrays < 0
3327 && HAVE_prefetch
3328 && optimize >= 3
3329 && current_tune->prefetch.num_slots > 0)
3330 flag_prefetch_loop_arrays = 1;
3332 /* Set up parameters to be used in prefetching algorithm. Do not
3333 override the defaults unless we are tuning for a core we have
3334 researched values for. */
3335 if (current_tune->prefetch.num_slots > 0)
3336 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3337 current_tune->prefetch.num_slots,
3338 global_options.x_param_values,
3339 global_options_set.x_param_values);
3340 if (current_tune->prefetch.l1_cache_line_size >= 0)
3341 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3342 current_tune->prefetch.l1_cache_line_size,
3343 global_options.x_param_values,
3344 global_options_set.x_param_values);
3345 if (current_tune->prefetch.l1_cache_size >= 0)
3346 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3347 current_tune->prefetch.l1_cache_size,
3348 global_options.x_param_values,
3349 global_options_set.x_param_values);
3351 /* Use Neon to perform 64-bits operations rather than core
3352 registers. */
3353 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3354 if (use_neon_for_64bits == 1)
3355 prefer_neon_for_64bits = true;
3357 /* Use the alternative scheduling-pressure algorithm by default. */
3358 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3359 global_options.x_param_values,
3360 global_options_set.x_param_values);
3362 /* Look through ready list and all of queue for instructions
3363 relevant for L2 auto-prefetcher. */
3364 int param_sched_autopref_queue_depth;
3366 switch (current_tune->sched_autopref)
3368 case tune_params::SCHED_AUTOPREF_OFF:
3369 param_sched_autopref_queue_depth = -1;
3370 break;
3372 case tune_params::SCHED_AUTOPREF_RANK:
3373 param_sched_autopref_queue_depth = 0;
3374 break;
3376 case tune_params::SCHED_AUTOPREF_FULL:
3377 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3378 break;
3380 default:
3381 gcc_unreachable ();
3384 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3385 param_sched_autopref_queue_depth,
3386 global_options.x_param_values,
3387 global_options_set.x_param_values);
3389 /* Currently, for slow flash data, we just disable literal pools. */
3390 if (target_slow_flash_data)
3391 arm_disable_literal_pool = true;
3393 /* Disable scheduling fusion by default if it's not armv7 processor
3394 or doesn't prefer ldrd/strd. */
3395 if (flag_schedule_fusion == 2
3396 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3397 flag_schedule_fusion = 0;
3399 /* Need to remember initial options before they are overriden. */
3400 init_optimize = build_optimization_node (&global_options);
3402 arm_option_override_internal (&global_options, &global_options_set);
3403 arm_option_check_internal (&global_options);
3404 arm_option_params_internal ();
3406 /* Register global variables with the garbage collector. */
3407 arm_add_gc_roots ();
3409 /* Save the initial options in case the user does function specific
3410 options. */
3411 target_option_default_node = target_option_current_node
3412 = build_target_option_node (&global_options);
3414 /* Init initial mode for testing. */
3415 thumb_flipper = TARGET_THUMB;
3418 static void
3419 arm_add_gc_roots (void)
3421 gcc_obstack_init(&minipool_obstack);
3422 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3425 /* A table of known ARM exception types.
3426 For use with the interrupt function attribute. */
3428 typedef struct
3430 const char *const arg;
3431 const unsigned long return_value;
3433 isr_attribute_arg;
3435 static const isr_attribute_arg isr_attribute_args [] =
3437 { "IRQ", ARM_FT_ISR },
3438 { "irq", ARM_FT_ISR },
3439 { "FIQ", ARM_FT_FIQ },
3440 { "fiq", ARM_FT_FIQ },
3441 { "ABORT", ARM_FT_ISR },
3442 { "abort", ARM_FT_ISR },
3443 { "ABORT", ARM_FT_ISR },
3444 { "abort", ARM_FT_ISR },
3445 { "UNDEF", ARM_FT_EXCEPTION },
3446 { "undef", ARM_FT_EXCEPTION },
3447 { "SWI", ARM_FT_EXCEPTION },
3448 { "swi", ARM_FT_EXCEPTION },
3449 { NULL, ARM_FT_NORMAL }
3452 /* Returns the (interrupt) function type of the current
3453 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3455 static unsigned long
3456 arm_isr_value (tree argument)
3458 const isr_attribute_arg * ptr;
3459 const char * arg;
3461 if (!arm_arch_notm)
3462 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3464 /* No argument - default to IRQ. */
3465 if (argument == NULL_TREE)
3466 return ARM_FT_ISR;
3468 /* Get the value of the argument. */
3469 if (TREE_VALUE (argument) == NULL_TREE
3470 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3471 return ARM_FT_UNKNOWN;
3473 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3475 /* Check it against the list of known arguments. */
3476 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3477 if (streq (arg, ptr->arg))
3478 return ptr->return_value;
3480 /* An unrecognized interrupt type. */
3481 return ARM_FT_UNKNOWN;
3484 /* Computes the type of the current function. */
3486 static unsigned long
3487 arm_compute_func_type (void)
3489 unsigned long type = ARM_FT_UNKNOWN;
3490 tree a;
3491 tree attr;
3493 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3495 /* Decide if the current function is volatile. Such functions
3496 never return, and many memory cycles can be saved by not storing
3497 register values that will never be needed again. This optimization
3498 was added to speed up context switching in a kernel application. */
3499 if (optimize > 0
3500 && (TREE_NOTHROW (current_function_decl)
3501 || !(flag_unwind_tables
3502 || (flag_exceptions
3503 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3504 && TREE_THIS_VOLATILE (current_function_decl))
3505 type |= ARM_FT_VOLATILE;
3507 if (cfun->static_chain_decl != NULL)
3508 type |= ARM_FT_NESTED;
3510 attr = DECL_ATTRIBUTES (current_function_decl);
3512 a = lookup_attribute ("naked", attr);
3513 if (a != NULL_TREE)
3514 type |= ARM_FT_NAKED;
3516 a = lookup_attribute ("isr", attr);
3517 if (a == NULL_TREE)
3518 a = lookup_attribute ("interrupt", attr);
3520 if (a == NULL_TREE)
3521 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3522 else
3523 type |= arm_isr_value (TREE_VALUE (a));
3525 return type;
3528 /* Returns the type of the current function. */
3530 unsigned long
3531 arm_current_func_type (void)
3533 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3534 cfun->machine->func_type = arm_compute_func_type ();
3536 return cfun->machine->func_type;
3539 bool
3540 arm_allocate_stack_slots_for_args (void)
3542 /* Naked functions should not allocate stack slots for arguments. */
3543 return !IS_NAKED (arm_current_func_type ());
3546 static bool
3547 arm_warn_func_return (tree decl)
3549 /* Naked functions are implemented entirely in assembly, including the
3550 return sequence, so suppress warnings about this. */
3551 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3555 /* Output assembler code for a block containing the constant parts
3556 of a trampoline, leaving space for the variable parts.
3558 On the ARM, (if r8 is the static chain regnum, and remembering that
3559 referencing pc adds an offset of 8) the trampoline looks like:
3560 ldr r8, [pc, #0]
3561 ldr pc, [pc]
3562 .word static chain value
3563 .word function's address
3564 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3566 static void
3567 arm_asm_trampoline_template (FILE *f)
3569 if (TARGET_UNIFIED_ASM)
3570 fprintf (f, "\t.syntax unified\n");
3571 else
3572 fprintf (f, "\t.syntax divided\n");
3574 if (TARGET_ARM)
3576 fprintf (f, "\t.arm\n");
3577 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3578 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3580 else if (TARGET_THUMB2)
3582 fprintf (f, "\t.thumb\n");
3583 /* The Thumb-2 trampoline is similar to the arm implementation.
3584 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3585 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3586 STATIC_CHAIN_REGNUM, PC_REGNUM);
3587 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3589 else
3591 ASM_OUTPUT_ALIGN (f, 2);
3592 fprintf (f, "\t.code\t16\n");
3593 fprintf (f, ".Ltrampoline_start:\n");
3594 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3595 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3596 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3597 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3598 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3599 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3601 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3602 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3605 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3607 static void
3608 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3610 rtx fnaddr, mem, a_tramp;
3612 emit_block_move (m_tramp, assemble_trampoline_template (),
3613 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3615 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3616 emit_move_insn (mem, chain_value);
3618 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3619 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3620 emit_move_insn (mem, fnaddr);
3622 a_tramp = XEXP (m_tramp, 0);
3623 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3624 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3625 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3628 /* Thumb trampolines should be entered in thumb mode, so set
3629 the bottom bit of the address. */
3631 static rtx
3632 arm_trampoline_adjust_address (rtx addr)
3634 if (TARGET_THUMB)
3635 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3636 NULL, 0, OPTAB_LIB_WIDEN);
3637 return addr;
3640 /* Return 1 if it is possible to return using a single instruction.
3641 If SIBLING is non-null, this is a test for a return before a sibling
3642 call. SIBLING is the call insn, so we can examine its register usage. */
3645 use_return_insn (int iscond, rtx sibling)
3647 int regno;
3648 unsigned int func_type;
3649 unsigned long saved_int_regs;
3650 unsigned HOST_WIDE_INT stack_adjust;
3651 arm_stack_offsets *offsets;
3653 /* Never use a return instruction before reload has run. */
3654 if (!reload_completed)
3655 return 0;
3657 func_type = arm_current_func_type ();
3659 /* Naked, volatile and stack alignment functions need special
3660 consideration. */
3661 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3662 return 0;
3664 /* So do interrupt functions that use the frame pointer and Thumb
3665 interrupt functions. */
3666 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3667 return 0;
3669 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3670 && !optimize_function_for_size_p (cfun))
3671 return 0;
3673 offsets = arm_get_frame_offsets ();
3674 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3676 /* As do variadic functions. */
3677 if (crtl->args.pretend_args_size
3678 || cfun->machine->uses_anonymous_args
3679 /* Or if the function calls __builtin_eh_return () */
3680 || crtl->calls_eh_return
3681 /* Or if the function calls alloca */
3682 || cfun->calls_alloca
3683 /* Or if there is a stack adjustment. However, if the stack pointer
3684 is saved on the stack, we can use a pre-incrementing stack load. */
3685 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3686 && stack_adjust == 4))
3687 /* Or if the static chain register was saved above the frame, under the
3688 assumption that the stack pointer isn't saved on the stack. */
3689 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3690 && arm_compute_static_chain_stack_bytes() != 0))
3691 return 0;
3693 saved_int_regs = offsets->saved_regs_mask;
3695 /* Unfortunately, the insn
3697 ldmib sp, {..., sp, ...}
3699 triggers a bug on most SA-110 based devices, such that the stack
3700 pointer won't be correctly restored if the instruction takes a
3701 page fault. We work around this problem by popping r3 along with
3702 the other registers, since that is never slower than executing
3703 another instruction.
3705 We test for !arm_arch5 here, because code for any architecture
3706 less than this could potentially be run on one of the buggy
3707 chips. */
3708 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3710 /* Validate that r3 is a call-clobbered register (always true in
3711 the default abi) ... */
3712 if (!call_used_regs[3])
3713 return 0;
3715 /* ... that it isn't being used for a return value ... */
3716 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3717 return 0;
3719 /* ... or for a tail-call argument ... */
3720 if (sibling)
3722 gcc_assert (CALL_P (sibling));
3724 if (find_regno_fusage (sibling, USE, 3))
3725 return 0;
3728 /* ... and that there are no call-saved registers in r0-r2
3729 (always true in the default ABI). */
3730 if (saved_int_regs & 0x7)
3731 return 0;
3734 /* Can't be done if interworking with Thumb, and any registers have been
3735 stacked. */
3736 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3737 return 0;
3739 /* On StrongARM, conditional returns are expensive if they aren't
3740 taken and multiple registers have been stacked. */
3741 if (iscond && arm_tune_strongarm)
3743 /* Conditional return when just the LR is stored is a simple
3744 conditional-load instruction, that's not expensive. */
3745 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3746 return 0;
3748 if (flag_pic
3749 && arm_pic_register != INVALID_REGNUM
3750 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3751 return 0;
3754 /* If there are saved registers but the LR isn't saved, then we need
3755 two instructions for the return. */
3756 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3757 return 0;
3759 /* Can't be done if any of the VFP regs are pushed,
3760 since this also requires an insn. */
3761 if (TARGET_HARD_FLOAT && TARGET_VFP)
3762 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3763 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3764 return 0;
3766 if (TARGET_REALLY_IWMMXT)
3767 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3768 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3769 return 0;
3771 return 1;
3774 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3775 shrink-wrapping if possible. This is the case if we need to emit a
3776 prologue, which we can test by looking at the offsets. */
3777 bool
3778 use_simple_return_p (void)
3780 arm_stack_offsets *offsets;
3782 offsets = arm_get_frame_offsets ();
3783 return offsets->outgoing_args != 0;
3786 /* Return TRUE if int I is a valid immediate ARM constant. */
3789 const_ok_for_arm (HOST_WIDE_INT i)
3791 int lowbit;
3793 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3794 be all zero, or all one. */
3795 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3796 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3797 != ((~(unsigned HOST_WIDE_INT) 0)
3798 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3799 return FALSE;
3801 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3803 /* Fast return for 0 and small values. We must do this for zero, since
3804 the code below can't handle that one case. */
3805 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3806 return TRUE;
3808 /* Get the number of trailing zeros. */
3809 lowbit = ffs((int) i) - 1;
3811 /* Only even shifts are allowed in ARM mode so round down to the
3812 nearest even number. */
3813 if (TARGET_ARM)
3814 lowbit &= ~1;
3816 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3817 return TRUE;
3819 if (TARGET_ARM)
3821 /* Allow rotated constants in ARM mode. */
3822 if (lowbit <= 4
3823 && ((i & ~0xc000003f) == 0
3824 || (i & ~0xf000000f) == 0
3825 || (i & ~0xfc000003) == 0))
3826 return TRUE;
3828 else
3830 HOST_WIDE_INT v;
3832 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3833 v = i & 0xff;
3834 v |= v << 16;
3835 if (i == v || i == (v | (v << 8)))
3836 return TRUE;
3838 /* Allow repeated pattern 0xXY00XY00. */
3839 v = i & 0xff00;
3840 v |= v << 16;
3841 if (i == v)
3842 return TRUE;
3845 return FALSE;
3848 /* Return true if I is a valid constant for the operation CODE. */
3850 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3852 if (const_ok_for_arm (i))
3853 return 1;
3855 switch (code)
3857 case SET:
3858 /* See if we can use movw. */
3859 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3860 return 1;
3861 else
3862 /* Otherwise, try mvn. */
3863 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3865 case PLUS:
3866 /* See if we can use addw or subw. */
3867 if (TARGET_THUMB2
3868 && ((i & 0xfffff000) == 0
3869 || ((-i) & 0xfffff000) == 0))
3870 return 1;
3871 /* else fall through. */
3873 case COMPARE:
3874 case EQ:
3875 case NE:
3876 case GT:
3877 case LE:
3878 case LT:
3879 case GE:
3880 case GEU:
3881 case LTU:
3882 case GTU:
3883 case LEU:
3884 case UNORDERED:
3885 case ORDERED:
3886 case UNEQ:
3887 case UNGE:
3888 case UNLT:
3889 case UNGT:
3890 case UNLE:
3891 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3893 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3894 case XOR:
3895 return 0;
3897 case IOR:
3898 if (TARGET_THUMB2)
3899 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3900 return 0;
3902 case AND:
3903 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3905 default:
3906 gcc_unreachable ();
3910 /* Return true if I is a valid di mode constant for the operation CODE. */
3912 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3914 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3915 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3916 rtx hi = GEN_INT (hi_val);
3917 rtx lo = GEN_INT (lo_val);
3919 if (TARGET_THUMB1)
3920 return 0;
3922 switch (code)
3924 case AND:
3925 case IOR:
3926 case XOR:
3927 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3928 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3929 case PLUS:
3930 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3932 default:
3933 return 0;
3937 /* Emit a sequence of insns to handle a large constant.
3938 CODE is the code of the operation required, it can be any of SET, PLUS,
3939 IOR, AND, XOR, MINUS;
3940 MODE is the mode in which the operation is being performed;
3941 VAL is the integer to operate on;
3942 SOURCE is the other operand (a register, or a null-pointer for SET);
3943 SUBTARGETS means it is safe to create scratch registers if that will
3944 either produce a simpler sequence, or we will want to cse the values.
3945 Return value is the number of insns emitted. */
3947 /* ??? Tweak this for thumb2. */
3949 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3950 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3952 rtx cond;
3954 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3955 cond = COND_EXEC_TEST (PATTERN (insn));
3956 else
3957 cond = NULL_RTX;
3959 if (subtargets || code == SET
3960 || (REG_P (target) && REG_P (source)
3961 && REGNO (target) != REGNO (source)))
3963 /* After arm_reorg has been called, we can't fix up expensive
3964 constants by pushing them into memory so we must synthesize
3965 them in-line, regardless of the cost. This is only likely to
3966 be more costly on chips that have load delay slots and we are
3967 compiling without running the scheduler (so no splitting
3968 occurred before the final instruction emission).
3970 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3972 if (!cfun->machine->after_arm_reorg
3973 && !cond
3974 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3975 1, 0)
3976 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3977 + (code != SET))))
3979 if (code == SET)
3981 /* Currently SET is the only monadic value for CODE, all
3982 the rest are diadic. */
3983 if (TARGET_USE_MOVT)
3984 arm_emit_movpair (target, GEN_INT (val));
3985 else
3986 emit_set_insn (target, GEN_INT (val));
3988 return 1;
3990 else
3992 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3994 if (TARGET_USE_MOVT)
3995 arm_emit_movpair (temp, GEN_INT (val));
3996 else
3997 emit_set_insn (temp, GEN_INT (val));
3999 /* For MINUS, the value is subtracted from, since we never
4000 have subtraction of a constant. */
4001 if (code == MINUS)
4002 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4003 else
4004 emit_set_insn (target,
4005 gen_rtx_fmt_ee (code, mode, source, temp));
4006 return 2;
4011 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4015 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4016 ARM/THUMB2 immediates, and add up to VAL.
4017 Thr function return value gives the number of insns required. */
4018 static int
4019 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4020 struct four_ints *return_sequence)
4022 int best_consecutive_zeros = 0;
4023 int i;
4024 int best_start = 0;
4025 int insns1, insns2;
4026 struct four_ints tmp_sequence;
4028 /* If we aren't targeting ARM, the best place to start is always at
4029 the bottom, otherwise look more closely. */
4030 if (TARGET_ARM)
4032 for (i = 0; i < 32; i += 2)
4034 int consecutive_zeros = 0;
4036 if (!(val & (3 << i)))
4038 while ((i < 32) && !(val & (3 << i)))
4040 consecutive_zeros += 2;
4041 i += 2;
4043 if (consecutive_zeros > best_consecutive_zeros)
4045 best_consecutive_zeros = consecutive_zeros;
4046 best_start = i - consecutive_zeros;
4048 i -= 2;
4053 /* So long as it won't require any more insns to do so, it's
4054 desirable to emit a small constant (in bits 0...9) in the last
4055 insn. This way there is more chance that it can be combined with
4056 a later addressing insn to form a pre-indexed load or store
4057 operation. Consider:
4059 *((volatile int *)0xe0000100) = 1;
4060 *((volatile int *)0xe0000110) = 2;
4062 We want this to wind up as:
4064 mov rA, #0xe0000000
4065 mov rB, #1
4066 str rB, [rA, #0x100]
4067 mov rB, #2
4068 str rB, [rA, #0x110]
4070 rather than having to synthesize both large constants from scratch.
4072 Therefore, we calculate how many insns would be required to emit
4073 the constant starting from `best_start', and also starting from
4074 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4075 yield a shorter sequence, we may as well use zero. */
4076 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4077 if (best_start != 0
4078 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
4080 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4081 if (insns2 <= insns1)
4083 *return_sequence = tmp_sequence;
4084 insns1 = insns2;
4088 return insns1;
4091 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4092 static int
4093 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4094 struct four_ints *return_sequence, int i)
4096 int remainder = val & 0xffffffff;
4097 int insns = 0;
4099 /* Try and find a way of doing the job in either two or three
4100 instructions.
4102 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4103 location. We start at position I. This may be the MSB, or
4104 optimial_immediate_sequence may have positioned it at the largest block
4105 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4106 wrapping around to the top of the word when we drop off the bottom.
4107 In the worst case this code should produce no more than four insns.
4109 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4110 constants, shifted to any arbitrary location. We should always start
4111 at the MSB. */
4114 int end;
4115 unsigned int b1, b2, b3, b4;
4116 unsigned HOST_WIDE_INT result;
4117 int loc;
4119 gcc_assert (insns < 4);
4121 if (i <= 0)
4122 i += 32;
4124 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4125 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4127 loc = i;
4128 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4129 /* We can use addw/subw for the last 12 bits. */
4130 result = remainder;
4131 else
4133 /* Use an 8-bit shifted/rotated immediate. */
4134 end = i - 8;
4135 if (end < 0)
4136 end += 32;
4137 result = remainder & ((0x0ff << end)
4138 | ((i < end) ? (0xff >> (32 - end))
4139 : 0));
4140 i -= 8;
4143 else
4145 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4146 arbitrary shifts. */
4147 i -= TARGET_ARM ? 2 : 1;
4148 continue;
4151 /* Next, see if we can do a better job with a thumb2 replicated
4152 constant.
4154 We do it this way around to catch the cases like 0x01F001E0 where
4155 two 8-bit immediates would work, but a replicated constant would
4156 make it worse.
4158 TODO: 16-bit constants that don't clear all the bits, but still win.
4159 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4160 if (TARGET_THUMB2)
4162 b1 = (remainder & 0xff000000) >> 24;
4163 b2 = (remainder & 0x00ff0000) >> 16;
4164 b3 = (remainder & 0x0000ff00) >> 8;
4165 b4 = remainder & 0xff;
4167 if (loc > 24)
4169 /* The 8-bit immediate already found clears b1 (and maybe b2),
4170 but must leave b3 and b4 alone. */
4172 /* First try to find a 32-bit replicated constant that clears
4173 almost everything. We can assume that we can't do it in one,
4174 or else we wouldn't be here. */
4175 unsigned int tmp = b1 & b2 & b3 & b4;
4176 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4177 + (tmp << 24);
4178 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4179 + (tmp == b3) + (tmp == b4);
4180 if (tmp
4181 && (matching_bytes >= 3
4182 || (matching_bytes == 2
4183 && const_ok_for_op (remainder & ~tmp2, code))))
4185 /* At least 3 of the bytes match, and the fourth has at
4186 least as many bits set, or two of the bytes match
4187 and it will only require one more insn to finish. */
4188 result = tmp2;
4189 i = tmp != b1 ? 32
4190 : tmp != b2 ? 24
4191 : tmp != b3 ? 16
4192 : 8;
4195 /* Second, try to find a 16-bit replicated constant that can
4196 leave three of the bytes clear. If b2 or b4 is already
4197 zero, then we can. If the 8-bit from above would not
4198 clear b2 anyway, then we still win. */
4199 else if (b1 == b3 && (!b2 || !b4
4200 || (remainder & 0x00ff0000 & ~result)))
4202 result = remainder & 0xff00ff00;
4203 i = 24;
4206 else if (loc > 16)
4208 /* The 8-bit immediate already found clears b2 (and maybe b3)
4209 and we don't get here unless b1 is alredy clear, but it will
4210 leave b4 unchanged. */
4212 /* If we can clear b2 and b4 at once, then we win, since the
4213 8-bits couldn't possibly reach that far. */
4214 if (b2 == b4)
4216 result = remainder & 0x00ff00ff;
4217 i = 16;
4222 return_sequence->i[insns++] = result;
4223 remainder &= ~result;
4225 if (code == SET || code == MINUS)
4226 code = PLUS;
4228 while (remainder);
4230 return insns;
4233 /* Emit an instruction with the indicated PATTERN. If COND is
4234 non-NULL, conditionalize the execution of the instruction on COND
4235 being true. */
4237 static void
4238 emit_constant_insn (rtx cond, rtx pattern)
4240 if (cond)
4241 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4242 emit_insn (pattern);
4245 /* As above, but extra parameter GENERATE which, if clear, suppresses
4246 RTL generation. */
4248 static int
4249 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4250 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4251 int subtargets, int generate)
4253 int can_invert = 0;
4254 int can_negate = 0;
4255 int final_invert = 0;
4256 int i;
4257 int set_sign_bit_copies = 0;
4258 int clear_sign_bit_copies = 0;
4259 int clear_zero_bit_copies = 0;
4260 int set_zero_bit_copies = 0;
4261 int insns = 0, neg_insns, inv_insns;
4262 unsigned HOST_WIDE_INT temp1, temp2;
4263 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4264 struct four_ints *immediates;
4265 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4267 /* Find out which operations are safe for a given CODE. Also do a quick
4268 check for degenerate cases; these can occur when DImode operations
4269 are split. */
4270 switch (code)
4272 case SET:
4273 can_invert = 1;
4274 break;
4276 case PLUS:
4277 can_negate = 1;
4278 break;
4280 case IOR:
4281 if (remainder == 0xffffffff)
4283 if (generate)
4284 emit_constant_insn (cond,
4285 gen_rtx_SET (target,
4286 GEN_INT (ARM_SIGN_EXTEND (val))));
4287 return 1;
4290 if (remainder == 0)
4292 if (reload_completed && rtx_equal_p (target, source))
4293 return 0;
4295 if (generate)
4296 emit_constant_insn (cond, gen_rtx_SET (target, source));
4297 return 1;
4299 break;
4301 case AND:
4302 if (remainder == 0)
4304 if (generate)
4305 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4306 return 1;
4308 if (remainder == 0xffffffff)
4310 if (reload_completed && rtx_equal_p (target, source))
4311 return 0;
4312 if (generate)
4313 emit_constant_insn (cond, gen_rtx_SET (target, source));
4314 return 1;
4316 can_invert = 1;
4317 break;
4319 case XOR:
4320 if (remainder == 0)
4322 if (reload_completed && rtx_equal_p (target, source))
4323 return 0;
4324 if (generate)
4325 emit_constant_insn (cond, gen_rtx_SET (target, source));
4326 return 1;
4329 if (remainder == 0xffffffff)
4331 if (generate)
4332 emit_constant_insn (cond,
4333 gen_rtx_SET (target,
4334 gen_rtx_NOT (mode, source)));
4335 return 1;
4337 final_invert = 1;
4338 break;
4340 case MINUS:
4341 /* We treat MINUS as (val - source), since (source - val) is always
4342 passed as (source + (-val)). */
4343 if (remainder == 0)
4345 if (generate)
4346 emit_constant_insn (cond,
4347 gen_rtx_SET (target,
4348 gen_rtx_NEG (mode, source)));
4349 return 1;
4351 if (const_ok_for_arm (val))
4353 if (generate)
4354 emit_constant_insn (cond,
4355 gen_rtx_SET (target,
4356 gen_rtx_MINUS (mode, GEN_INT (val),
4357 source)));
4358 return 1;
4361 break;
4363 default:
4364 gcc_unreachable ();
4367 /* If we can do it in one insn get out quickly. */
4368 if (const_ok_for_op (val, code))
4370 if (generate)
4371 emit_constant_insn (cond,
4372 gen_rtx_SET (target,
4373 (source
4374 ? gen_rtx_fmt_ee (code, mode, source,
4375 GEN_INT (val))
4376 : GEN_INT (val))));
4377 return 1;
4380 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4381 insn. */
4382 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4383 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4385 if (generate)
4387 if (mode == SImode && i == 16)
4388 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4389 smaller insn. */
4390 emit_constant_insn (cond,
4391 gen_zero_extendhisi2
4392 (target, gen_lowpart (HImode, source)));
4393 else
4394 /* Extz only supports SImode, but we can coerce the operands
4395 into that mode. */
4396 emit_constant_insn (cond,
4397 gen_extzv_t2 (gen_lowpart (SImode, target),
4398 gen_lowpart (SImode, source),
4399 GEN_INT (i), const0_rtx));
4402 return 1;
4405 /* Calculate a few attributes that may be useful for specific
4406 optimizations. */
4407 /* Count number of leading zeros. */
4408 for (i = 31; i >= 0; i--)
4410 if ((remainder & (1 << i)) == 0)
4411 clear_sign_bit_copies++;
4412 else
4413 break;
4416 /* Count number of leading 1's. */
4417 for (i = 31; i >= 0; i--)
4419 if ((remainder & (1 << i)) != 0)
4420 set_sign_bit_copies++;
4421 else
4422 break;
4425 /* Count number of trailing zero's. */
4426 for (i = 0; i <= 31; i++)
4428 if ((remainder & (1 << i)) == 0)
4429 clear_zero_bit_copies++;
4430 else
4431 break;
4434 /* Count number of trailing 1's. */
4435 for (i = 0; i <= 31; i++)
4437 if ((remainder & (1 << i)) != 0)
4438 set_zero_bit_copies++;
4439 else
4440 break;
4443 switch (code)
4445 case SET:
4446 /* See if we can do this by sign_extending a constant that is known
4447 to be negative. This is a good, way of doing it, since the shift
4448 may well merge into a subsequent insn. */
4449 if (set_sign_bit_copies > 1)
4451 if (const_ok_for_arm
4452 (temp1 = ARM_SIGN_EXTEND (remainder
4453 << (set_sign_bit_copies - 1))))
4455 if (generate)
4457 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4458 emit_constant_insn (cond,
4459 gen_rtx_SET (new_src, GEN_INT (temp1)));
4460 emit_constant_insn (cond,
4461 gen_ashrsi3 (target, new_src,
4462 GEN_INT (set_sign_bit_copies - 1)));
4464 return 2;
4466 /* For an inverted constant, we will need to set the low bits,
4467 these will be shifted out of harm's way. */
4468 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4469 if (const_ok_for_arm (~temp1))
4471 if (generate)
4473 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4474 emit_constant_insn (cond,
4475 gen_rtx_SET (new_src, GEN_INT (temp1)));
4476 emit_constant_insn (cond,
4477 gen_ashrsi3 (target, new_src,
4478 GEN_INT (set_sign_bit_copies - 1)));
4480 return 2;
4484 /* See if we can calculate the value as the difference between two
4485 valid immediates. */
4486 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4488 int topshift = clear_sign_bit_copies & ~1;
4490 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4491 & (0xff000000 >> topshift));
4493 /* If temp1 is zero, then that means the 9 most significant
4494 bits of remainder were 1 and we've caused it to overflow.
4495 When topshift is 0 we don't need to do anything since we
4496 can borrow from 'bit 32'. */
4497 if (temp1 == 0 && topshift != 0)
4498 temp1 = 0x80000000 >> (topshift - 1);
4500 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4502 if (const_ok_for_arm (temp2))
4504 if (generate)
4506 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4507 emit_constant_insn (cond,
4508 gen_rtx_SET (new_src, GEN_INT (temp1)));
4509 emit_constant_insn (cond,
4510 gen_addsi3 (target, new_src,
4511 GEN_INT (-temp2)));
4514 return 2;
4518 /* See if we can generate this by setting the bottom (or the top)
4519 16 bits, and then shifting these into the other half of the
4520 word. We only look for the simplest cases, to do more would cost
4521 too much. Be careful, however, not to generate this when the
4522 alternative would take fewer insns. */
4523 if (val & 0xffff0000)
4525 temp1 = remainder & 0xffff0000;
4526 temp2 = remainder & 0x0000ffff;
4528 /* Overlaps outside this range are best done using other methods. */
4529 for (i = 9; i < 24; i++)
4531 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4532 && !const_ok_for_arm (temp2))
4534 rtx new_src = (subtargets
4535 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4536 : target);
4537 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4538 source, subtargets, generate);
4539 source = new_src;
4540 if (generate)
4541 emit_constant_insn
4542 (cond,
4543 gen_rtx_SET
4544 (target,
4545 gen_rtx_IOR (mode,
4546 gen_rtx_ASHIFT (mode, source,
4547 GEN_INT (i)),
4548 source)));
4549 return insns + 1;
4553 /* Don't duplicate cases already considered. */
4554 for (i = 17; i < 24; i++)
4556 if (((temp1 | (temp1 >> i)) == remainder)
4557 && !const_ok_for_arm (temp1))
4559 rtx new_src = (subtargets
4560 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4561 : target);
4562 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4563 source, subtargets, generate);
4564 source = new_src;
4565 if (generate)
4566 emit_constant_insn
4567 (cond,
4568 gen_rtx_SET (target,
4569 gen_rtx_IOR
4570 (mode,
4571 gen_rtx_LSHIFTRT (mode, source,
4572 GEN_INT (i)),
4573 source)));
4574 return insns + 1;
4578 break;
4580 case IOR:
4581 case XOR:
4582 /* If we have IOR or XOR, and the constant can be loaded in a
4583 single instruction, and we can find a temporary to put it in,
4584 then this can be done in two instructions instead of 3-4. */
4585 if (subtargets
4586 /* TARGET can't be NULL if SUBTARGETS is 0 */
4587 || (reload_completed && !reg_mentioned_p (target, source)))
4589 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4591 if (generate)
4593 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4595 emit_constant_insn (cond,
4596 gen_rtx_SET (sub, GEN_INT (val)));
4597 emit_constant_insn (cond,
4598 gen_rtx_SET (target,
4599 gen_rtx_fmt_ee (code, mode,
4600 source, sub)));
4602 return 2;
4606 if (code == XOR)
4607 break;
4609 /* Convert.
4610 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4611 and the remainder 0s for e.g. 0xfff00000)
4612 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4614 This can be done in 2 instructions by using shifts with mov or mvn.
4615 e.g. for
4616 x = x | 0xfff00000;
4617 we generate.
4618 mvn r0, r0, asl #12
4619 mvn r0, r0, lsr #12 */
4620 if (set_sign_bit_copies > 8
4621 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4623 if (generate)
4625 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4626 rtx shift = GEN_INT (set_sign_bit_copies);
4628 emit_constant_insn
4629 (cond,
4630 gen_rtx_SET (sub,
4631 gen_rtx_NOT (mode,
4632 gen_rtx_ASHIFT (mode,
4633 source,
4634 shift))));
4635 emit_constant_insn
4636 (cond,
4637 gen_rtx_SET (target,
4638 gen_rtx_NOT (mode,
4639 gen_rtx_LSHIFTRT (mode, sub,
4640 shift))));
4642 return 2;
4645 /* Convert
4646 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4648 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4650 For eg. r0 = r0 | 0xfff
4651 mvn r0, r0, lsr #12
4652 mvn r0, r0, asl #12
4655 if (set_zero_bit_copies > 8
4656 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4658 if (generate)
4660 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4661 rtx shift = GEN_INT (set_zero_bit_copies);
4663 emit_constant_insn
4664 (cond,
4665 gen_rtx_SET (sub,
4666 gen_rtx_NOT (mode,
4667 gen_rtx_LSHIFTRT (mode,
4668 source,
4669 shift))));
4670 emit_constant_insn
4671 (cond,
4672 gen_rtx_SET (target,
4673 gen_rtx_NOT (mode,
4674 gen_rtx_ASHIFT (mode, sub,
4675 shift))));
4677 return 2;
4680 /* This will never be reached for Thumb2 because orn is a valid
4681 instruction. This is for Thumb1 and the ARM 32 bit cases.
4683 x = y | constant (such that ~constant is a valid constant)
4684 Transform this to
4685 x = ~(~y & ~constant).
4687 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4689 if (generate)
4691 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4692 emit_constant_insn (cond,
4693 gen_rtx_SET (sub,
4694 gen_rtx_NOT (mode, source)));
4695 source = sub;
4696 if (subtargets)
4697 sub = gen_reg_rtx (mode);
4698 emit_constant_insn (cond,
4699 gen_rtx_SET (sub,
4700 gen_rtx_AND (mode, source,
4701 GEN_INT (temp1))));
4702 emit_constant_insn (cond,
4703 gen_rtx_SET (target,
4704 gen_rtx_NOT (mode, sub)));
4706 return 3;
4708 break;
4710 case AND:
4711 /* See if two shifts will do 2 or more insn's worth of work. */
4712 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4714 HOST_WIDE_INT shift_mask = ((0xffffffff
4715 << (32 - clear_sign_bit_copies))
4716 & 0xffffffff);
4718 if ((remainder | shift_mask) != 0xffffffff)
4720 HOST_WIDE_INT new_val
4721 = ARM_SIGN_EXTEND (remainder | shift_mask);
4723 if (generate)
4725 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4726 insns = arm_gen_constant (AND, SImode, cond, new_val,
4727 new_src, source, subtargets, 1);
4728 source = new_src;
4730 else
4732 rtx targ = subtargets ? NULL_RTX : target;
4733 insns = arm_gen_constant (AND, mode, cond, new_val,
4734 targ, source, subtargets, 0);
4738 if (generate)
4740 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4741 rtx shift = GEN_INT (clear_sign_bit_copies);
4743 emit_insn (gen_ashlsi3 (new_src, source, shift));
4744 emit_insn (gen_lshrsi3 (target, new_src, shift));
4747 return insns + 2;
4750 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4752 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4754 if ((remainder | shift_mask) != 0xffffffff)
4756 HOST_WIDE_INT new_val
4757 = ARM_SIGN_EXTEND (remainder | shift_mask);
4758 if (generate)
4760 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4762 insns = arm_gen_constant (AND, mode, cond, new_val,
4763 new_src, source, subtargets, 1);
4764 source = new_src;
4766 else
4768 rtx targ = subtargets ? NULL_RTX : target;
4770 insns = arm_gen_constant (AND, mode, cond, new_val,
4771 targ, source, subtargets, 0);
4775 if (generate)
4777 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4778 rtx shift = GEN_INT (clear_zero_bit_copies);
4780 emit_insn (gen_lshrsi3 (new_src, source, shift));
4781 emit_insn (gen_ashlsi3 (target, new_src, shift));
4784 return insns + 2;
4787 break;
4789 default:
4790 break;
4793 /* Calculate what the instruction sequences would be if we generated it
4794 normally, negated, or inverted. */
4795 if (code == AND)
4796 /* AND cannot be split into multiple insns, so invert and use BIC. */
4797 insns = 99;
4798 else
4799 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4801 if (can_negate)
4802 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4803 &neg_immediates);
4804 else
4805 neg_insns = 99;
4807 if (can_invert || final_invert)
4808 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4809 &inv_immediates);
4810 else
4811 inv_insns = 99;
4813 immediates = &pos_immediates;
4815 /* Is the negated immediate sequence more efficient? */
4816 if (neg_insns < insns && neg_insns <= inv_insns)
4818 insns = neg_insns;
4819 immediates = &neg_immediates;
4821 else
4822 can_negate = 0;
4824 /* Is the inverted immediate sequence more efficient?
4825 We must allow for an extra NOT instruction for XOR operations, although
4826 there is some chance that the final 'mvn' will get optimized later. */
4827 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4829 insns = inv_insns;
4830 immediates = &inv_immediates;
4832 else
4834 can_invert = 0;
4835 final_invert = 0;
4838 /* Now output the chosen sequence as instructions. */
4839 if (generate)
4841 for (i = 0; i < insns; i++)
4843 rtx new_src, temp1_rtx;
4845 temp1 = immediates->i[i];
4847 if (code == SET || code == MINUS)
4848 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4849 else if ((final_invert || i < (insns - 1)) && subtargets)
4850 new_src = gen_reg_rtx (mode);
4851 else
4852 new_src = target;
4854 if (can_invert)
4855 temp1 = ~temp1;
4856 else if (can_negate)
4857 temp1 = -temp1;
4859 temp1 = trunc_int_for_mode (temp1, mode);
4860 temp1_rtx = GEN_INT (temp1);
4862 if (code == SET)
4864 else if (code == MINUS)
4865 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4866 else
4867 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4869 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
4870 source = new_src;
4872 if (code == SET)
4874 can_negate = can_invert;
4875 can_invert = 0;
4876 code = PLUS;
4878 else if (code == MINUS)
4879 code = PLUS;
4883 if (final_invert)
4885 if (generate)
4886 emit_constant_insn (cond, gen_rtx_SET (target,
4887 gen_rtx_NOT (mode, source)));
4888 insns++;
4891 return insns;
4894 /* Canonicalize a comparison so that we are more likely to recognize it.
4895 This can be done for a few constant compares, where we can make the
4896 immediate value easier to load. */
4898 static void
4899 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4900 bool op0_preserve_value)
4902 machine_mode mode;
4903 unsigned HOST_WIDE_INT i, maxval;
4905 mode = GET_MODE (*op0);
4906 if (mode == VOIDmode)
4907 mode = GET_MODE (*op1);
4909 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4911 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4912 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4913 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4914 for GTU/LEU in Thumb mode. */
4915 if (mode == DImode)
4918 if (*code == GT || *code == LE
4919 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4921 /* Missing comparison. First try to use an available
4922 comparison. */
4923 if (CONST_INT_P (*op1))
4925 i = INTVAL (*op1);
4926 switch (*code)
4928 case GT:
4929 case LE:
4930 if (i != maxval
4931 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4933 *op1 = GEN_INT (i + 1);
4934 *code = *code == GT ? GE : LT;
4935 return;
4937 break;
4938 case GTU:
4939 case LEU:
4940 if (i != ~((unsigned HOST_WIDE_INT) 0)
4941 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4943 *op1 = GEN_INT (i + 1);
4944 *code = *code == GTU ? GEU : LTU;
4945 return;
4947 break;
4948 default:
4949 gcc_unreachable ();
4953 /* If that did not work, reverse the condition. */
4954 if (!op0_preserve_value)
4956 std::swap (*op0, *op1);
4957 *code = (int)swap_condition ((enum rtx_code)*code);
4960 return;
4963 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4964 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4965 to facilitate possible combining with a cmp into 'ands'. */
4966 if (mode == SImode
4967 && GET_CODE (*op0) == ZERO_EXTEND
4968 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4969 && GET_MODE (XEXP (*op0, 0)) == QImode
4970 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4971 && subreg_lowpart_p (XEXP (*op0, 0))
4972 && *op1 == const0_rtx)
4973 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4974 GEN_INT (255));
4976 /* Comparisons smaller than DImode. Only adjust comparisons against
4977 an out-of-range constant. */
4978 if (!CONST_INT_P (*op1)
4979 || const_ok_for_arm (INTVAL (*op1))
4980 || const_ok_for_arm (- INTVAL (*op1)))
4981 return;
4983 i = INTVAL (*op1);
4985 switch (*code)
4987 case EQ:
4988 case NE:
4989 return;
4991 case GT:
4992 case LE:
4993 if (i != maxval
4994 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4996 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4997 *code = *code == GT ? GE : LT;
4998 return;
5000 break;
5002 case GE:
5003 case LT:
5004 if (i != ~maxval
5005 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5007 *op1 = GEN_INT (i - 1);
5008 *code = *code == GE ? GT : LE;
5009 return;
5011 break;
5013 case GTU:
5014 case LEU:
5015 if (i != ~((unsigned HOST_WIDE_INT) 0)
5016 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5018 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5019 *code = *code == GTU ? GEU : LTU;
5020 return;
5022 break;
5024 case GEU:
5025 case LTU:
5026 if (i != 0
5027 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5029 *op1 = GEN_INT (i - 1);
5030 *code = *code == GEU ? GTU : LEU;
5031 return;
5033 break;
5035 default:
5036 gcc_unreachable ();
5041 /* Define how to find the value returned by a function. */
5043 static rtx
5044 arm_function_value(const_tree type, const_tree func,
5045 bool outgoing ATTRIBUTE_UNUSED)
5047 machine_mode mode;
5048 int unsignedp ATTRIBUTE_UNUSED;
5049 rtx r ATTRIBUTE_UNUSED;
5051 mode = TYPE_MODE (type);
5053 if (TARGET_AAPCS_BASED)
5054 return aapcs_allocate_return_reg (mode, type, func);
5056 /* Promote integer types. */
5057 if (INTEGRAL_TYPE_P (type))
5058 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5060 /* Promotes small structs returned in a register to full-word size
5061 for big-endian AAPCS. */
5062 if (arm_return_in_msb (type))
5064 HOST_WIDE_INT size = int_size_in_bytes (type);
5065 if (size % UNITS_PER_WORD != 0)
5067 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5068 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5072 return arm_libcall_value_1 (mode);
5075 /* libcall hashtable helpers. */
5077 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5079 static inline hashval_t hash (const rtx_def *);
5080 static inline bool equal (const rtx_def *, const rtx_def *);
5081 static inline void remove (rtx_def *);
5084 inline bool
5085 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5087 return rtx_equal_p (p1, p2);
5090 inline hashval_t
5091 libcall_hasher::hash (const rtx_def *p1)
5093 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5096 typedef hash_table<libcall_hasher> libcall_table_type;
5098 static void
5099 add_libcall (libcall_table_type *htab, rtx libcall)
5101 *htab->find_slot (libcall, INSERT) = libcall;
5104 static bool
5105 arm_libcall_uses_aapcs_base (const_rtx libcall)
5107 static bool init_done = false;
5108 static libcall_table_type *libcall_htab = NULL;
5110 if (!init_done)
5112 init_done = true;
5114 libcall_htab = new libcall_table_type (31);
5115 add_libcall (libcall_htab,
5116 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5117 add_libcall (libcall_htab,
5118 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5119 add_libcall (libcall_htab,
5120 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5121 add_libcall (libcall_htab,
5122 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5124 add_libcall (libcall_htab,
5125 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5126 add_libcall (libcall_htab,
5127 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5128 add_libcall (libcall_htab,
5129 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5130 add_libcall (libcall_htab,
5131 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5133 add_libcall (libcall_htab,
5134 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5135 add_libcall (libcall_htab,
5136 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5137 add_libcall (libcall_htab,
5138 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5139 add_libcall (libcall_htab,
5140 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5141 add_libcall (libcall_htab,
5142 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5143 add_libcall (libcall_htab,
5144 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5145 add_libcall (libcall_htab,
5146 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5147 add_libcall (libcall_htab,
5148 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5150 /* Values from double-precision helper functions are returned in core
5151 registers if the selected core only supports single-precision
5152 arithmetic, even if we are using the hard-float ABI. The same is
5153 true for single-precision helpers, but we will never be using the
5154 hard-float ABI on a CPU which doesn't support single-precision
5155 operations in hardware. */
5156 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5157 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5158 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5159 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5160 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5161 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5162 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5163 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5164 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5165 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5166 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5167 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5168 SFmode));
5169 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5170 DFmode));
5173 return libcall && libcall_htab->find (libcall) != NULL;
5176 static rtx
5177 arm_libcall_value_1 (machine_mode mode)
5179 if (TARGET_AAPCS_BASED)
5180 return aapcs_libcall_value (mode);
5181 else if (TARGET_IWMMXT_ABI
5182 && arm_vector_mode_supported_p (mode))
5183 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5184 else
5185 return gen_rtx_REG (mode, ARG_REGISTER (1));
5188 /* Define how to find the value returned by a library function
5189 assuming the value has mode MODE. */
5191 static rtx
5192 arm_libcall_value (machine_mode mode, const_rtx libcall)
5194 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5195 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5197 /* The following libcalls return their result in integer registers,
5198 even though they return a floating point value. */
5199 if (arm_libcall_uses_aapcs_base (libcall))
5200 return gen_rtx_REG (mode, ARG_REGISTER(1));
5204 return arm_libcall_value_1 (mode);
5207 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5209 static bool
5210 arm_function_value_regno_p (const unsigned int regno)
5212 if (regno == ARG_REGISTER (1)
5213 || (TARGET_32BIT
5214 && TARGET_AAPCS_BASED
5215 && TARGET_VFP
5216 && TARGET_HARD_FLOAT
5217 && regno == FIRST_VFP_REGNUM)
5218 || (TARGET_IWMMXT_ABI
5219 && regno == FIRST_IWMMXT_REGNUM))
5220 return true;
5222 return false;
5225 /* Determine the amount of memory needed to store the possible return
5226 registers of an untyped call. */
5228 arm_apply_result_size (void)
5230 int size = 16;
5232 if (TARGET_32BIT)
5234 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
5235 size += 32;
5236 if (TARGET_IWMMXT_ABI)
5237 size += 8;
5240 return size;
5243 /* Decide whether TYPE should be returned in memory (true)
5244 or in a register (false). FNTYPE is the type of the function making
5245 the call. */
5246 static bool
5247 arm_return_in_memory (const_tree type, const_tree fntype)
5249 HOST_WIDE_INT size;
5251 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5253 if (TARGET_AAPCS_BASED)
5255 /* Simple, non-aggregate types (ie not including vectors and
5256 complex) are always returned in a register (or registers).
5257 We don't care about which register here, so we can short-cut
5258 some of the detail. */
5259 if (!AGGREGATE_TYPE_P (type)
5260 && TREE_CODE (type) != VECTOR_TYPE
5261 && TREE_CODE (type) != COMPLEX_TYPE)
5262 return false;
5264 /* Any return value that is no larger than one word can be
5265 returned in r0. */
5266 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5267 return false;
5269 /* Check any available co-processors to see if they accept the
5270 type as a register candidate (VFP, for example, can return
5271 some aggregates in consecutive registers). These aren't
5272 available if the call is variadic. */
5273 if (aapcs_select_return_coproc (type, fntype) >= 0)
5274 return false;
5276 /* Vector values should be returned using ARM registers, not
5277 memory (unless they're over 16 bytes, which will break since
5278 we only have four call-clobbered registers to play with). */
5279 if (TREE_CODE (type) == VECTOR_TYPE)
5280 return (size < 0 || size > (4 * UNITS_PER_WORD));
5282 /* The rest go in memory. */
5283 return true;
5286 if (TREE_CODE (type) == VECTOR_TYPE)
5287 return (size < 0 || size > (4 * UNITS_PER_WORD));
5289 if (!AGGREGATE_TYPE_P (type) &&
5290 (TREE_CODE (type) != VECTOR_TYPE))
5291 /* All simple types are returned in registers. */
5292 return false;
5294 if (arm_abi != ARM_ABI_APCS)
5296 /* ATPCS and later return aggregate types in memory only if they are
5297 larger than a word (or are variable size). */
5298 return (size < 0 || size > UNITS_PER_WORD);
5301 /* For the arm-wince targets we choose to be compatible with Microsoft's
5302 ARM and Thumb compilers, which always return aggregates in memory. */
5303 #ifndef ARM_WINCE
5304 /* All structures/unions bigger than one word are returned in memory.
5305 Also catch the case where int_size_in_bytes returns -1. In this case
5306 the aggregate is either huge or of variable size, and in either case
5307 we will want to return it via memory and not in a register. */
5308 if (size < 0 || size > UNITS_PER_WORD)
5309 return true;
5311 if (TREE_CODE (type) == RECORD_TYPE)
5313 tree field;
5315 /* For a struct the APCS says that we only return in a register
5316 if the type is 'integer like' and every addressable element
5317 has an offset of zero. For practical purposes this means
5318 that the structure can have at most one non bit-field element
5319 and that this element must be the first one in the structure. */
5321 /* Find the first field, ignoring non FIELD_DECL things which will
5322 have been created by C++. */
5323 for (field = TYPE_FIELDS (type);
5324 field && TREE_CODE (field) != FIELD_DECL;
5325 field = DECL_CHAIN (field))
5326 continue;
5328 if (field == NULL)
5329 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5331 /* Check that the first field is valid for returning in a register. */
5333 /* ... Floats are not allowed */
5334 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5335 return true;
5337 /* ... Aggregates that are not themselves valid for returning in
5338 a register are not allowed. */
5339 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5340 return true;
5342 /* Now check the remaining fields, if any. Only bitfields are allowed,
5343 since they are not addressable. */
5344 for (field = DECL_CHAIN (field);
5345 field;
5346 field = DECL_CHAIN (field))
5348 if (TREE_CODE (field) != FIELD_DECL)
5349 continue;
5351 if (!DECL_BIT_FIELD_TYPE (field))
5352 return true;
5355 return false;
5358 if (TREE_CODE (type) == UNION_TYPE)
5360 tree field;
5362 /* Unions can be returned in registers if every element is
5363 integral, or can be returned in an integer register. */
5364 for (field = TYPE_FIELDS (type);
5365 field;
5366 field = DECL_CHAIN (field))
5368 if (TREE_CODE (field) != FIELD_DECL)
5369 continue;
5371 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5372 return true;
5374 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5375 return true;
5378 return false;
5380 #endif /* not ARM_WINCE */
5382 /* Return all other types in memory. */
5383 return true;
5386 const struct pcs_attribute_arg
5388 const char *arg;
5389 enum arm_pcs value;
5390 } pcs_attribute_args[] =
5392 {"aapcs", ARM_PCS_AAPCS},
5393 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5394 #if 0
5395 /* We could recognize these, but changes would be needed elsewhere
5396 * to implement them. */
5397 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5398 {"atpcs", ARM_PCS_ATPCS},
5399 {"apcs", ARM_PCS_APCS},
5400 #endif
5401 {NULL, ARM_PCS_UNKNOWN}
5404 static enum arm_pcs
5405 arm_pcs_from_attribute (tree attr)
5407 const struct pcs_attribute_arg *ptr;
5408 const char *arg;
5410 /* Get the value of the argument. */
5411 if (TREE_VALUE (attr) == NULL_TREE
5412 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5413 return ARM_PCS_UNKNOWN;
5415 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5417 /* Check it against the list of known arguments. */
5418 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5419 if (streq (arg, ptr->arg))
5420 return ptr->value;
5422 /* An unrecognized interrupt type. */
5423 return ARM_PCS_UNKNOWN;
5426 /* Get the PCS variant to use for this call. TYPE is the function's type
5427 specification, DECL is the specific declartion. DECL may be null if
5428 the call could be indirect or if this is a library call. */
5429 static enum arm_pcs
5430 arm_get_pcs_model (const_tree type, const_tree decl)
5432 bool user_convention = false;
5433 enum arm_pcs user_pcs = arm_pcs_default;
5434 tree attr;
5436 gcc_assert (type);
5438 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5439 if (attr)
5441 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5442 user_convention = true;
5445 if (TARGET_AAPCS_BASED)
5447 /* Detect varargs functions. These always use the base rules
5448 (no argument is ever a candidate for a co-processor
5449 register). */
5450 bool base_rules = stdarg_p (type);
5452 if (user_convention)
5454 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5455 sorry ("non-AAPCS derived PCS variant");
5456 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5457 error ("variadic functions must use the base AAPCS variant");
5460 if (base_rules)
5461 return ARM_PCS_AAPCS;
5462 else if (user_convention)
5463 return user_pcs;
5464 else if (decl && flag_unit_at_a_time)
5466 /* Local functions never leak outside this compilation unit,
5467 so we are free to use whatever conventions are
5468 appropriate. */
5469 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5470 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5471 if (i && i->local)
5472 return ARM_PCS_AAPCS_LOCAL;
5475 else if (user_convention && user_pcs != arm_pcs_default)
5476 sorry ("PCS variant");
5478 /* For everything else we use the target's default. */
5479 return arm_pcs_default;
5483 static void
5484 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5485 const_tree fntype ATTRIBUTE_UNUSED,
5486 rtx libcall ATTRIBUTE_UNUSED,
5487 const_tree fndecl ATTRIBUTE_UNUSED)
5489 /* Record the unallocated VFP registers. */
5490 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5491 pcum->aapcs_vfp_reg_alloc = 0;
5494 /* Walk down the type tree of TYPE counting consecutive base elements.
5495 If *MODEP is VOIDmode, then set it to the first valid floating point
5496 type. If a non-floating point type is found, or if a floating point
5497 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5498 otherwise return the count in the sub-tree. */
5499 static int
5500 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5502 machine_mode mode;
5503 HOST_WIDE_INT size;
5505 switch (TREE_CODE (type))
5507 case REAL_TYPE:
5508 mode = TYPE_MODE (type);
5509 if (mode != DFmode && mode != SFmode)
5510 return -1;
5512 if (*modep == VOIDmode)
5513 *modep = mode;
5515 if (*modep == mode)
5516 return 1;
5518 break;
5520 case COMPLEX_TYPE:
5521 mode = TYPE_MODE (TREE_TYPE (type));
5522 if (mode != DFmode && mode != SFmode)
5523 return -1;
5525 if (*modep == VOIDmode)
5526 *modep = mode;
5528 if (*modep == mode)
5529 return 2;
5531 break;
5533 case VECTOR_TYPE:
5534 /* Use V2SImode and V4SImode as representatives of all 64-bit
5535 and 128-bit vector types, whether or not those modes are
5536 supported with the present options. */
5537 size = int_size_in_bytes (type);
5538 switch (size)
5540 case 8:
5541 mode = V2SImode;
5542 break;
5543 case 16:
5544 mode = V4SImode;
5545 break;
5546 default:
5547 return -1;
5550 if (*modep == VOIDmode)
5551 *modep = mode;
5553 /* Vector modes are considered to be opaque: two vectors are
5554 equivalent for the purposes of being homogeneous aggregates
5555 if they are the same size. */
5556 if (*modep == mode)
5557 return 1;
5559 break;
5561 case ARRAY_TYPE:
5563 int count;
5564 tree index = TYPE_DOMAIN (type);
5566 /* Can't handle incomplete types nor sizes that are not
5567 fixed. */
5568 if (!COMPLETE_TYPE_P (type)
5569 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5570 return -1;
5572 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5573 if (count == -1
5574 || !index
5575 || !TYPE_MAX_VALUE (index)
5576 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5577 || !TYPE_MIN_VALUE (index)
5578 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5579 || count < 0)
5580 return -1;
5582 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5583 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5585 /* There must be no padding. */
5586 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5587 return -1;
5589 return count;
5592 case RECORD_TYPE:
5594 int count = 0;
5595 int sub_count;
5596 tree field;
5598 /* Can't handle incomplete types nor sizes that are not
5599 fixed. */
5600 if (!COMPLETE_TYPE_P (type)
5601 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5602 return -1;
5604 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5606 if (TREE_CODE (field) != FIELD_DECL)
5607 continue;
5609 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5610 if (sub_count < 0)
5611 return -1;
5612 count += sub_count;
5615 /* There must be no padding. */
5616 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5617 return -1;
5619 return count;
5622 case UNION_TYPE:
5623 case QUAL_UNION_TYPE:
5625 /* These aren't very interesting except in a degenerate case. */
5626 int count = 0;
5627 int sub_count;
5628 tree field;
5630 /* Can't handle incomplete types nor sizes that are not
5631 fixed. */
5632 if (!COMPLETE_TYPE_P (type)
5633 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5634 return -1;
5636 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5638 if (TREE_CODE (field) != FIELD_DECL)
5639 continue;
5641 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5642 if (sub_count < 0)
5643 return -1;
5644 count = count > sub_count ? count : sub_count;
5647 /* There must be no padding. */
5648 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5649 return -1;
5651 return count;
5654 default:
5655 break;
5658 return -1;
5661 /* Return true if PCS_VARIANT should use VFP registers. */
5662 static bool
5663 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5665 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5667 static bool seen_thumb1_vfp = false;
5669 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5671 sorry ("Thumb-1 hard-float VFP ABI");
5672 /* sorry() is not immediately fatal, so only display this once. */
5673 seen_thumb1_vfp = true;
5676 return true;
5679 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5680 return false;
5682 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5683 (TARGET_VFP_DOUBLE || !is_double));
5686 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5687 suitable for passing or returning in VFP registers for the PCS
5688 variant selected. If it is, then *BASE_MODE is updated to contain
5689 a machine mode describing each element of the argument's type and
5690 *COUNT to hold the number of such elements. */
5691 static bool
5692 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5693 machine_mode mode, const_tree type,
5694 machine_mode *base_mode, int *count)
5696 machine_mode new_mode = VOIDmode;
5698 /* If we have the type information, prefer that to working things
5699 out from the mode. */
5700 if (type)
5702 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5704 if (ag_count > 0 && ag_count <= 4)
5705 *count = ag_count;
5706 else
5707 return false;
5709 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5710 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5711 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5713 *count = 1;
5714 new_mode = mode;
5716 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5718 *count = 2;
5719 new_mode = (mode == DCmode ? DFmode : SFmode);
5721 else
5722 return false;
5725 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5726 return false;
5728 *base_mode = new_mode;
5729 return true;
5732 static bool
5733 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5734 machine_mode mode, const_tree type)
5736 int count ATTRIBUTE_UNUSED;
5737 machine_mode ag_mode ATTRIBUTE_UNUSED;
5739 if (!use_vfp_abi (pcs_variant, false))
5740 return false;
5741 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5742 &ag_mode, &count);
5745 static bool
5746 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5747 const_tree type)
5749 if (!use_vfp_abi (pcum->pcs_variant, false))
5750 return false;
5752 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5753 &pcum->aapcs_vfp_rmode,
5754 &pcum->aapcs_vfp_rcount);
5757 static bool
5758 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5759 const_tree type ATTRIBUTE_UNUSED)
5761 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5762 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5763 int regno;
5765 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5766 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5768 pcum->aapcs_vfp_reg_alloc = mask << regno;
5769 if (mode == BLKmode
5770 || (mode == TImode && ! TARGET_NEON)
5771 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5773 int i;
5774 int rcount = pcum->aapcs_vfp_rcount;
5775 int rshift = shift;
5776 machine_mode rmode = pcum->aapcs_vfp_rmode;
5777 rtx par;
5778 if (!TARGET_NEON)
5780 /* Avoid using unsupported vector modes. */
5781 if (rmode == V2SImode)
5782 rmode = DImode;
5783 else if (rmode == V4SImode)
5785 rmode = DImode;
5786 rcount *= 2;
5787 rshift /= 2;
5790 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5791 for (i = 0; i < rcount; i++)
5793 rtx tmp = gen_rtx_REG (rmode,
5794 FIRST_VFP_REGNUM + regno + i * rshift);
5795 tmp = gen_rtx_EXPR_LIST
5796 (VOIDmode, tmp,
5797 GEN_INT (i * GET_MODE_SIZE (rmode)));
5798 XVECEXP (par, 0, i) = tmp;
5801 pcum->aapcs_reg = par;
5803 else
5804 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5805 return true;
5807 return false;
5810 static rtx
5811 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5812 machine_mode mode,
5813 const_tree type ATTRIBUTE_UNUSED)
5815 if (!use_vfp_abi (pcs_variant, false))
5816 return NULL;
5818 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5820 int count;
5821 machine_mode ag_mode;
5822 int i;
5823 rtx par;
5824 int shift;
5826 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5827 &ag_mode, &count);
5829 if (!TARGET_NEON)
5831 if (ag_mode == V2SImode)
5832 ag_mode = DImode;
5833 else if (ag_mode == V4SImode)
5835 ag_mode = DImode;
5836 count *= 2;
5839 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5840 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5841 for (i = 0; i < count; i++)
5843 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5844 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5845 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5846 XVECEXP (par, 0, i) = tmp;
5849 return par;
5852 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5855 static void
5856 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5857 machine_mode mode ATTRIBUTE_UNUSED,
5858 const_tree type ATTRIBUTE_UNUSED)
5860 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5861 pcum->aapcs_vfp_reg_alloc = 0;
5862 return;
5865 #define AAPCS_CP(X) \
5867 aapcs_ ## X ## _cum_init, \
5868 aapcs_ ## X ## _is_call_candidate, \
5869 aapcs_ ## X ## _allocate, \
5870 aapcs_ ## X ## _is_return_candidate, \
5871 aapcs_ ## X ## _allocate_return_reg, \
5872 aapcs_ ## X ## _advance \
5875 /* Table of co-processors that can be used to pass arguments in
5876 registers. Idealy no arugment should be a candidate for more than
5877 one co-processor table entry, but the table is processed in order
5878 and stops after the first match. If that entry then fails to put
5879 the argument into a co-processor register, the argument will go on
5880 the stack. */
5881 static struct
5883 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5884 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5886 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5887 BLKmode) is a candidate for this co-processor's registers; this
5888 function should ignore any position-dependent state in
5889 CUMULATIVE_ARGS and only use call-type dependent information. */
5890 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5892 /* Return true if the argument does get a co-processor register; it
5893 should set aapcs_reg to an RTX of the register allocated as is
5894 required for a return from FUNCTION_ARG. */
5895 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5897 /* Return true if a result of mode MODE (or type TYPE if MODE is
5898 BLKmode) is can be returned in this co-processor's registers. */
5899 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5901 /* Allocate and return an RTX element to hold the return type of a
5902 call, this routine must not fail and will only be called if
5903 is_return_candidate returned true with the same parameters. */
5904 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5906 /* Finish processing this argument and prepare to start processing
5907 the next one. */
5908 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5909 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5911 AAPCS_CP(vfp)
5914 #undef AAPCS_CP
5916 static int
5917 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5918 const_tree type)
5920 int i;
5922 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5923 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5924 return i;
5926 return -1;
5929 static int
5930 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5932 /* We aren't passed a decl, so we can't check that a call is local.
5933 However, it isn't clear that that would be a win anyway, since it
5934 might limit some tail-calling opportunities. */
5935 enum arm_pcs pcs_variant;
5937 if (fntype)
5939 const_tree fndecl = NULL_TREE;
5941 if (TREE_CODE (fntype) == FUNCTION_DECL)
5943 fndecl = fntype;
5944 fntype = TREE_TYPE (fntype);
5947 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5949 else
5950 pcs_variant = arm_pcs_default;
5952 if (pcs_variant != ARM_PCS_AAPCS)
5954 int i;
5956 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5957 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5958 TYPE_MODE (type),
5959 type))
5960 return i;
5962 return -1;
5965 static rtx
5966 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5967 const_tree fntype)
5969 /* We aren't passed a decl, so we can't check that a call is local.
5970 However, it isn't clear that that would be a win anyway, since it
5971 might limit some tail-calling opportunities. */
5972 enum arm_pcs pcs_variant;
5973 int unsignedp ATTRIBUTE_UNUSED;
5975 if (fntype)
5977 const_tree fndecl = NULL_TREE;
5979 if (TREE_CODE (fntype) == FUNCTION_DECL)
5981 fndecl = fntype;
5982 fntype = TREE_TYPE (fntype);
5985 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5987 else
5988 pcs_variant = arm_pcs_default;
5990 /* Promote integer types. */
5991 if (type && INTEGRAL_TYPE_P (type))
5992 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5994 if (pcs_variant != ARM_PCS_AAPCS)
5996 int i;
5998 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5999 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6000 type))
6001 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6002 mode, type);
6005 /* Promotes small structs returned in a register to full-word size
6006 for big-endian AAPCS. */
6007 if (type && arm_return_in_msb (type))
6009 HOST_WIDE_INT size = int_size_in_bytes (type);
6010 if (size % UNITS_PER_WORD != 0)
6012 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6013 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6017 return gen_rtx_REG (mode, R0_REGNUM);
6020 static rtx
6021 aapcs_libcall_value (machine_mode mode)
6023 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6024 && GET_MODE_SIZE (mode) <= 4)
6025 mode = SImode;
6027 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6030 /* Lay out a function argument using the AAPCS rules. The rule
6031 numbers referred to here are those in the AAPCS. */
6032 static void
6033 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6034 const_tree type, bool named)
6036 int nregs, nregs2;
6037 int ncrn;
6039 /* We only need to do this once per argument. */
6040 if (pcum->aapcs_arg_processed)
6041 return;
6043 pcum->aapcs_arg_processed = true;
6045 /* Special case: if named is false then we are handling an incoming
6046 anonymous argument which is on the stack. */
6047 if (!named)
6048 return;
6050 /* Is this a potential co-processor register candidate? */
6051 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6053 int slot = aapcs_select_call_coproc (pcum, mode, type);
6054 pcum->aapcs_cprc_slot = slot;
6056 /* We don't have to apply any of the rules from part B of the
6057 preparation phase, these are handled elsewhere in the
6058 compiler. */
6060 if (slot >= 0)
6062 /* A Co-processor register candidate goes either in its own
6063 class of registers or on the stack. */
6064 if (!pcum->aapcs_cprc_failed[slot])
6066 /* C1.cp - Try to allocate the argument to co-processor
6067 registers. */
6068 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6069 return;
6071 /* C2.cp - Put the argument on the stack and note that we
6072 can't assign any more candidates in this slot. We also
6073 need to note that we have allocated stack space, so that
6074 we won't later try to split a non-cprc candidate between
6075 core registers and the stack. */
6076 pcum->aapcs_cprc_failed[slot] = true;
6077 pcum->can_split = false;
6080 /* We didn't get a register, so this argument goes on the
6081 stack. */
6082 gcc_assert (pcum->can_split == false);
6083 return;
6087 /* C3 - For double-word aligned arguments, round the NCRN up to the
6088 next even number. */
6089 ncrn = pcum->aapcs_ncrn;
6090 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
6091 ncrn++;
6093 nregs = ARM_NUM_REGS2(mode, type);
6095 /* Sigh, this test should really assert that nregs > 0, but a GCC
6096 extension allows empty structs and then gives them empty size; it
6097 then allows such a structure to be passed by value. For some of
6098 the code below we have to pretend that such an argument has
6099 non-zero size so that we 'locate' it correctly either in
6100 registers or on the stack. */
6101 gcc_assert (nregs >= 0);
6103 nregs2 = nregs ? nregs : 1;
6105 /* C4 - Argument fits entirely in core registers. */
6106 if (ncrn + nregs2 <= NUM_ARG_REGS)
6108 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6109 pcum->aapcs_next_ncrn = ncrn + nregs;
6110 return;
6113 /* C5 - Some core registers left and there are no arguments already
6114 on the stack: split this argument between the remaining core
6115 registers and the stack. */
6116 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6118 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6119 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6120 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6121 return;
6124 /* C6 - NCRN is set to 4. */
6125 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6127 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6128 return;
6131 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6132 for a call to a function whose data type is FNTYPE.
6133 For a library call, FNTYPE is NULL. */
6134 void
6135 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6136 rtx libname,
6137 tree fndecl ATTRIBUTE_UNUSED)
6139 /* Long call handling. */
6140 if (fntype)
6141 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6142 else
6143 pcum->pcs_variant = arm_pcs_default;
6145 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6147 if (arm_libcall_uses_aapcs_base (libname))
6148 pcum->pcs_variant = ARM_PCS_AAPCS;
6150 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6151 pcum->aapcs_reg = NULL_RTX;
6152 pcum->aapcs_partial = 0;
6153 pcum->aapcs_arg_processed = false;
6154 pcum->aapcs_cprc_slot = -1;
6155 pcum->can_split = true;
6157 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6159 int i;
6161 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6163 pcum->aapcs_cprc_failed[i] = false;
6164 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6167 return;
6170 /* Legacy ABIs */
6172 /* On the ARM, the offset starts at 0. */
6173 pcum->nregs = 0;
6174 pcum->iwmmxt_nregs = 0;
6175 pcum->can_split = true;
6177 /* Varargs vectors are treated the same as long long.
6178 named_count avoids having to change the way arm handles 'named' */
6179 pcum->named_count = 0;
6180 pcum->nargs = 0;
6182 if (TARGET_REALLY_IWMMXT && fntype)
6184 tree fn_arg;
6186 for (fn_arg = TYPE_ARG_TYPES (fntype);
6187 fn_arg;
6188 fn_arg = TREE_CHAIN (fn_arg))
6189 pcum->named_count += 1;
6191 if (! pcum->named_count)
6192 pcum->named_count = INT_MAX;
6196 /* Return true if mode/type need doubleword alignment. */
6197 static bool
6198 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6200 if (!type)
6201 return PARM_BOUNDARY < GET_MODE_ALIGNMENT (mode);
6203 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6204 if (!AGGREGATE_TYPE_P (type))
6205 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6207 /* Array types: Use member alignment of element type. */
6208 if (TREE_CODE (type) == ARRAY_TYPE)
6209 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6211 /* Record/aggregate types: Use greatest member alignment of any member. */
6212 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6213 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6214 return true;
6216 return false;
6220 /* Determine where to put an argument to a function.
6221 Value is zero to push the argument on the stack,
6222 or a hard register in which to store the argument.
6224 MODE is the argument's machine mode.
6225 TYPE is the data type of the argument (as a tree).
6226 This is null for libcalls where that information may
6227 not be available.
6228 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6229 the preceding args and about the function being called.
6230 NAMED is nonzero if this argument is a named parameter
6231 (otherwise it is an extra parameter matching an ellipsis).
6233 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6234 other arguments are passed on the stack. If (NAMED == 0) (which happens
6235 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6236 defined), say it is passed in the stack (function_prologue will
6237 indeed make it pass in the stack if necessary). */
6239 static rtx
6240 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6241 const_tree type, bool named)
6243 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6244 int nregs;
6246 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6247 a call insn (op3 of a call_value insn). */
6248 if (mode == VOIDmode)
6249 return const0_rtx;
6251 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6253 aapcs_layout_arg (pcum, mode, type, named);
6254 return pcum->aapcs_reg;
6257 /* Varargs vectors are treated the same as long long.
6258 named_count avoids having to change the way arm handles 'named' */
6259 if (TARGET_IWMMXT_ABI
6260 && arm_vector_mode_supported_p (mode)
6261 && pcum->named_count > pcum->nargs + 1)
6263 if (pcum->iwmmxt_nregs <= 9)
6264 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6265 else
6267 pcum->can_split = false;
6268 return NULL_RTX;
6272 /* Put doubleword aligned quantities in even register pairs. */
6273 if (pcum->nregs & 1
6274 && ARM_DOUBLEWORD_ALIGN
6275 && arm_needs_doubleword_align (mode, type))
6276 pcum->nregs++;
6278 /* Only allow splitting an arg between regs and memory if all preceding
6279 args were allocated to regs. For args passed by reference we only count
6280 the reference pointer. */
6281 if (pcum->can_split)
6282 nregs = 1;
6283 else
6284 nregs = ARM_NUM_REGS2 (mode, type);
6286 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6287 return NULL_RTX;
6289 return gen_rtx_REG (mode, pcum->nregs);
6292 static unsigned int
6293 arm_function_arg_boundary (machine_mode mode, const_tree type)
6295 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6296 ? DOUBLEWORD_ALIGNMENT
6297 : PARM_BOUNDARY);
6300 static int
6301 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6302 tree type, bool named)
6304 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6305 int nregs = pcum->nregs;
6307 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6309 aapcs_layout_arg (pcum, mode, type, named);
6310 return pcum->aapcs_partial;
6313 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6314 return 0;
6316 if (NUM_ARG_REGS > nregs
6317 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6318 && pcum->can_split)
6319 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6321 return 0;
6324 /* Update the data in PCUM to advance over an argument
6325 of mode MODE and data type TYPE.
6326 (TYPE is null for libcalls where that information may not be available.) */
6328 static void
6329 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6330 const_tree type, bool named)
6332 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6334 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6336 aapcs_layout_arg (pcum, mode, type, named);
6338 if (pcum->aapcs_cprc_slot >= 0)
6340 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6341 type);
6342 pcum->aapcs_cprc_slot = -1;
6345 /* Generic stuff. */
6346 pcum->aapcs_arg_processed = false;
6347 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6348 pcum->aapcs_reg = NULL_RTX;
6349 pcum->aapcs_partial = 0;
6351 else
6353 pcum->nargs += 1;
6354 if (arm_vector_mode_supported_p (mode)
6355 && pcum->named_count > pcum->nargs
6356 && TARGET_IWMMXT_ABI)
6357 pcum->iwmmxt_nregs += 1;
6358 else
6359 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6363 /* Variable sized types are passed by reference. This is a GCC
6364 extension to the ARM ABI. */
6366 static bool
6367 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6368 machine_mode mode ATTRIBUTE_UNUSED,
6369 const_tree type, bool named ATTRIBUTE_UNUSED)
6371 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6374 /* Encode the current state of the #pragma [no_]long_calls. */
6375 typedef enum
6377 OFF, /* No #pragma [no_]long_calls is in effect. */
6378 LONG, /* #pragma long_calls is in effect. */
6379 SHORT /* #pragma no_long_calls is in effect. */
6380 } arm_pragma_enum;
6382 static arm_pragma_enum arm_pragma_long_calls = OFF;
6384 void
6385 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6387 arm_pragma_long_calls = LONG;
6390 void
6391 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6393 arm_pragma_long_calls = SHORT;
6396 void
6397 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6399 arm_pragma_long_calls = OFF;
6402 /* Handle an attribute requiring a FUNCTION_DECL;
6403 arguments as in struct attribute_spec.handler. */
6404 static tree
6405 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6406 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6408 if (TREE_CODE (*node) != FUNCTION_DECL)
6410 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6411 name);
6412 *no_add_attrs = true;
6415 return NULL_TREE;
6418 /* Handle an "interrupt" or "isr" attribute;
6419 arguments as in struct attribute_spec.handler. */
6420 static tree
6421 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6422 bool *no_add_attrs)
6424 if (DECL_P (*node))
6426 if (TREE_CODE (*node) != FUNCTION_DECL)
6428 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6429 name);
6430 *no_add_attrs = true;
6432 /* FIXME: the argument if any is checked for type attributes;
6433 should it be checked for decl ones? */
6435 else
6437 if (TREE_CODE (*node) == FUNCTION_TYPE
6438 || TREE_CODE (*node) == METHOD_TYPE)
6440 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6442 warning (OPT_Wattributes, "%qE attribute ignored",
6443 name);
6444 *no_add_attrs = true;
6447 else if (TREE_CODE (*node) == POINTER_TYPE
6448 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6449 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6450 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6452 *node = build_variant_type_copy (*node);
6453 TREE_TYPE (*node) = build_type_attribute_variant
6454 (TREE_TYPE (*node),
6455 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6456 *no_add_attrs = true;
6458 else
6460 /* Possibly pass this attribute on from the type to a decl. */
6461 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6462 | (int) ATTR_FLAG_FUNCTION_NEXT
6463 | (int) ATTR_FLAG_ARRAY_NEXT))
6465 *no_add_attrs = true;
6466 return tree_cons (name, args, NULL_TREE);
6468 else
6470 warning (OPT_Wattributes, "%qE attribute ignored",
6471 name);
6476 return NULL_TREE;
6479 /* Handle a "pcs" attribute; arguments as in struct
6480 attribute_spec.handler. */
6481 static tree
6482 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6483 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6485 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6487 warning (OPT_Wattributes, "%qE attribute ignored", name);
6488 *no_add_attrs = true;
6490 return NULL_TREE;
6493 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6494 /* Handle the "notshared" attribute. This attribute is another way of
6495 requesting hidden visibility. ARM's compiler supports
6496 "__declspec(notshared)"; we support the same thing via an
6497 attribute. */
6499 static tree
6500 arm_handle_notshared_attribute (tree *node,
6501 tree name ATTRIBUTE_UNUSED,
6502 tree args ATTRIBUTE_UNUSED,
6503 int flags ATTRIBUTE_UNUSED,
6504 bool *no_add_attrs)
6506 tree decl = TYPE_NAME (*node);
6508 if (decl)
6510 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6511 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6512 *no_add_attrs = false;
6514 return NULL_TREE;
6516 #endif
6518 /* Return 0 if the attributes for two types are incompatible, 1 if they
6519 are compatible, and 2 if they are nearly compatible (which causes a
6520 warning to be generated). */
6521 static int
6522 arm_comp_type_attributes (const_tree type1, const_tree type2)
6524 int l1, l2, s1, s2;
6526 /* Check for mismatch of non-default calling convention. */
6527 if (TREE_CODE (type1) != FUNCTION_TYPE)
6528 return 1;
6530 /* Check for mismatched call attributes. */
6531 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6532 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6533 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6534 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6536 /* Only bother to check if an attribute is defined. */
6537 if (l1 | l2 | s1 | s2)
6539 /* If one type has an attribute, the other must have the same attribute. */
6540 if ((l1 != l2) || (s1 != s2))
6541 return 0;
6543 /* Disallow mixed attributes. */
6544 if ((l1 & s2) || (l2 & s1))
6545 return 0;
6548 /* Check for mismatched ISR attribute. */
6549 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6550 if (! l1)
6551 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6552 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6553 if (! l2)
6554 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6555 if (l1 != l2)
6556 return 0;
6558 return 1;
6561 /* Assigns default attributes to newly defined type. This is used to
6562 set short_call/long_call attributes for function types of
6563 functions defined inside corresponding #pragma scopes. */
6564 static void
6565 arm_set_default_type_attributes (tree type)
6567 /* Add __attribute__ ((long_call)) to all functions, when
6568 inside #pragma long_calls or __attribute__ ((short_call)),
6569 when inside #pragma no_long_calls. */
6570 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6572 tree type_attr_list, attr_name;
6573 type_attr_list = TYPE_ATTRIBUTES (type);
6575 if (arm_pragma_long_calls == LONG)
6576 attr_name = get_identifier ("long_call");
6577 else if (arm_pragma_long_calls == SHORT)
6578 attr_name = get_identifier ("short_call");
6579 else
6580 return;
6582 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6583 TYPE_ATTRIBUTES (type) = type_attr_list;
6587 /* Return true if DECL is known to be linked into section SECTION. */
6589 static bool
6590 arm_function_in_section_p (tree decl, section *section)
6592 /* We can only be certain about the prevailing symbol definition. */
6593 if (!decl_binds_to_current_def_p (decl))
6594 return false;
6596 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6597 if (!DECL_SECTION_NAME (decl))
6599 /* Make sure that we will not create a unique section for DECL. */
6600 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6601 return false;
6604 return function_section (decl) == section;
6607 /* Return nonzero if a 32-bit "long_call" should be generated for
6608 a call from the current function to DECL. We generate a long_call
6609 if the function:
6611 a. has an __attribute__((long call))
6612 or b. is within the scope of a #pragma long_calls
6613 or c. the -mlong-calls command line switch has been specified
6615 However we do not generate a long call if the function:
6617 d. has an __attribute__ ((short_call))
6618 or e. is inside the scope of a #pragma no_long_calls
6619 or f. is defined in the same section as the current function. */
6621 bool
6622 arm_is_long_call_p (tree decl)
6624 tree attrs;
6626 if (!decl)
6627 return TARGET_LONG_CALLS;
6629 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6630 if (lookup_attribute ("short_call", attrs))
6631 return false;
6633 /* For "f", be conservative, and only cater for cases in which the
6634 whole of the current function is placed in the same section. */
6635 if (!flag_reorder_blocks_and_partition
6636 && TREE_CODE (decl) == FUNCTION_DECL
6637 && arm_function_in_section_p (decl, current_function_section ()))
6638 return false;
6640 if (lookup_attribute ("long_call", attrs))
6641 return true;
6643 return TARGET_LONG_CALLS;
6646 /* Return nonzero if it is ok to make a tail-call to DECL. */
6647 static bool
6648 arm_function_ok_for_sibcall (tree decl, tree exp)
6650 unsigned long func_type;
6652 if (cfun->machine->sibcall_blocked)
6653 return false;
6655 /* Never tailcall something if we are generating code for Thumb-1. */
6656 if (TARGET_THUMB1)
6657 return false;
6659 /* The PIC register is live on entry to VxWorks PLT entries, so we
6660 must make the call before restoring the PIC register. */
6661 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6662 return false;
6664 /* If we are interworking and the function is not declared static
6665 then we can't tail-call it unless we know that it exists in this
6666 compilation unit (since it might be a Thumb routine). */
6667 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6668 && !TREE_ASM_WRITTEN (decl))
6669 return false;
6671 func_type = arm_current_func_type ();
6672 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6673 if (IS_INTERRUPT (func_type))
6674 return false;
6676 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6678 /* Check that the return value locations are the same. For
6679 example that we aren't returning a value from the sibling in
6680 a VFP register but then need to transfer it to a core
6681 register. */
6682 rtx a, b;
6684 a = arm_function_value (TREE_TYPE (exp), decl, false);
6685 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6686 cfun->decl, false);
6687 if (!rtx_equal_p (a, b))
6688 return false;
6691 /* Never tailcall if function may be called with a misaligned SP. */
6692 if (IS_STACKALIGN (func_type))
6693 return false;
6695 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6696 references should become a NOP. Don't convert such calls into
6697 sibling calls. */
6698 if (TARGET_AAPCS_BASED
6699 && arm_abi == ARM_ABI_AAPCS
6700 && decl
6701 && DECL_WEAK (decl))
6702 return false;
6704 /* Everything else is ok. */
6705 return true;
6709 /* Addressing mode support functions. */
6711 /* Return nonzero if X is a legitimate immediate operand when compiling
6712 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6714 legitimate_pic_operand_p (rtx x)
6716 if (GET_CODE (x) == SYMBOL_REF
6717 || (GET_CODE (x) == CONST
6718 && GET_CODE (XEXP (x, 0)) == PLUS
6719 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6720 return 0;
6722 return 1;
6725 /* Record that the current function needs a PIC register. Initialize
6726 cfun->machine->pic_reg if we have not already done so. */
6728 static void
6729 require_pic_register (void)
6731 /* A lot of the logic here is made obscure by the fact that this
6732 routine gets called as part of the rtx cost estimation process.
6733 We don't want those calls to affect any assumptions about the real
6734 function; and further, we can't call entry_of_function() until we
6735 start the real expansion process. */
6736 if (!crtl->uses_pic_offset_table)
6738 gcc_assert (can_create_pseudo_p ());
6739 if (arm_pic_register != INVALID_REGNUM
6740 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6742 if (!cfun->machine->pic_reg)
6743 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6745 /* Play games to avoid marking the function as needing pic
6746 if we are being called as part of the cost-estimation
6747 process. */
6748 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6749 crtl->uses_pic_offset_table = 1;
6751 else
6753 rtx_insn *seq, *insn;
6755 if (!cfun->machine->pic_reg)
6756 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6758 /* Play games to avoid marking the function as needing pic
6759 if we are being called as part of the cost-estimation
6760 process. */
6761 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6763 crtl->uses_pic_offset_table = 1;
6764 start_sequence ();
6766 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6767 && arm_pic_register > LAST_LO_REGNUM)
6768 emit_move_insn (cfun->machine->pic_reg,
6769 gen_rtx_REG (Pmode, arm_pic_register));
6770 else
6771 arm_load_pic_register (0UL);
6773 seq = get_insns ();
6774 end_sequence ();
6776 for (insn = seq; insn; insn = NEXT_INSN (insn))
6777 if (INSN_P (insn))
6778 INSN_LOCATION (insn) = prologue_location;
6780 /* We can be called during expansion of PHI nodes, where
6781 we can't yet emit instructions directly in the final
6782 insn stream. Queue the insns on the entry edge, they will
6783 be committed after everything else is expanded. */
6784 insert_insn_on_edge (seq,
6785 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6792 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6794 if (GET_CODE (orig) == SYMBOL_REF
6795 || GET_CODE (orig) == LABEL_REF)
6797 rtx insn;
6799 if (reg == 0)
6801 gcc_assert (can_create_pseudo_p ());
6802 reg = gen_reg_rtx (Pmode);
6805 /* VxWorks does not impose a fixed gap between segments; the run-time
6806 gap can be different from the object-file gap. We therefore can't
6807 use GOTOFF unless we are absolutely sure that the symbol is in the
6808 same segment as the GOT. Unfortunately, the flexibility of linker
6809 scripts means that we can't be sure of that in general, so assume
6810 that GOTOFF is never valid on VxWorks. */
6811 if ((GET_CODE (orig) == LABEL_REF
6812 || (GET_CODE (orig) == SYMBOL_REF &&
6813 SYMBOL_REF_LOCAL_P (orig)))
6814 && NEED_GOT_RELOC
6815 && arm_pic_data_is_text_relative)
6816 insn = arm_pic_static_addr (orig, reg);
6817 else
6819 rtx pat;
6820 rtx mem;
6822 /* If this function doesn't have a pic register, create one now. */
6823 require_pic_register ();
6825 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6827 /* Make the MEM as close to a constant as possible. */
6828 mem = SET_SRC (pat);
6829 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6830 MEM_READONLY_P (mem) = 1;
6831 MEM_NOTRAP_P (mem) = 1;
6833 insn = emit_insn (pat);
6836 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6837 by loop. */
6838 set_unique_reg_note (insn, REG_EQUAL, orig);
6840 return reg;
6842 else if (GET_CODE (orig) == CONST)
6844 rtx base, offset;
6846 if (GET_CODE (XEXP (orig, 0)) == PLUS
6847 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6848 return orig;
6850 /* Handle the case where we have: const (UNSPEC_TLS). */
6851 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6852 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6853 return orig;
6855 /* Handle the case where we have:
6856 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6857 CONST_INT. */
6858 if (GET_CODE (XEXP (orig, 0)) == PLUS
6859 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6860 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6862 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6863 return orig;
6866 if (reg == 0)
6868 gcc_assert (can_create_pseudo_p ());
6869 reg = gen_reg_rtx (Pmode);
6872 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6874 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6875 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6876 base == reg ? 0 : reg);
6878 if (CONST_INT_P (offset))
6880 /* The base register doesn't really matter, we only want to
6881 test the index for the appropriate mode. */
6882 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6884 gcc_assert (can_create_pseudo_p ());
6885 offset = force_reg (Pmode, offset);
6888 if (CONST_INT_P (offset))
6889 return plus_constant (Pmode, base, INTVAL (offset));
6892 if (GET_MODE_SIZE (mode) > 4
6893 && (GET_MODE_CLASS (mode) == MODE_INT
6894 || TARGET_SOFT_FLOAT))
6896 emit_insn (gen_addsi3 (reg, base, offset));
6897 return reg;
6900 return gen_rtx_PLUS (Pmode, base, offset);
6903 return orig;
6907 /* Find a spare register to use during the prolog of a function. */
6909 static int
6910 thumb_find_work_register (unsigned long pushed_regs_mask)
6912 int reg;
6914 /* Check the argument registers first as these are call-used. The
6915 register allocation order means that sometimes r3 might be used
6916 but earlier argument registers might not, so check them all. */
6917 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6918 if (!df_regs_ever_live_p (reg))
6919 return reg;
6921 /* Before going on to check the call-saved registers we can try a couple
6922 more ways of deducing that r3 is available. The first is when we are
6923 pushing anonymous arguments onto the stack and we have less than 4
6924 registers worth of fixed arguments(*). In this case r3 will be part of
6925 the variable argument list and so we can be sure that it will be
6926 pushed right at the start of the function. Hence it will be available
6927 for the rest of the prologue.
6928 (*): ie crtl->args.pretend_args_size is greater than 0. */
6929 if (cfun->machine->uses_anonymous_args
6930 && crtl->args.pretend_args_size > 0)
6931 return LAST_ARG_REGNUM;
6933 /* The other case is when we have fixed arguments but less than 4 registers
6934 worth. In this case r3 might be used in the body of the function, but
6935 it is not being used to convey an argument into the function. In theory
6936 we could just check crtl->args.size to see how many bytes are
6937 being passed in argument registers, but it seems that it is unreliable.
6938 Sometimes it will have the value 0 when in fact arguments are being
6939 passed. (See testcase execute/20021111-1.c for an example). So we also
6940 check the args_info.nregs field as well. The problem with this field is
6941 that it makes no allowances for arguments that are passed to the
6942 function but which are not used. Hence we could miss an opportunity
6943 when a function has an unused argument in r3. But it is better to be
6944 safe than to be sorry. */
6945 if (! cfun->machine->uses_anonymous_args
6946 && crtl->args.size >= 0
6947 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6948 && (TARGET_AAPCS_BASED
6949 ? crtl->args.info.aapcs_ncrn < 4
6950 : crtl->args.info.nregs < 4))
6951 return LAST_ARG_REGNUM;
6953 /* Otherwise look for a call-saved register that is going to be pushed. */
6954 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6955 if (pushed_regs_mask & (1 << reg))
6956 return reg;
6958 if (TARGET_THUMB2)
6960 /* Thumb-2 can use high regs. */
6961 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6962 if (pushed_regs_mask & (1 << reg))
6963 return reg;
6965 /* Something went wrong - thumb_compute_save_reg_mask()
6966 should have arranged for a suitable register to be pushed. */
6967 gcc_unreachable ();
6970 static GTY(()) int pic_labelno;
6972 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6973 low register. */
6975 void
6976 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6978 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6980 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6981 return;
6983 gcc_assert (flag_pic);
6985 pic_reg = cfun->machine->pic_reg;
6986 if (TARGET_VXWORKS_RTP)
6988 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6989 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6990 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6992 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6994 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6995 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6997 else
6999 /* We use an UNSPEC rather than a LABEL_REF because this label
7000 never appears in the code stream. */
7002 labelno = GEN_INT (pic_labelno++);
7003 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7004 l1 = gen_rtx_CONST (VOIDmode, l1);
7006 /* On the ARM the PC register contains 'dot + 8' at the time of the
7007 addition, on the Thumb it is 'dot + 4'. */
7008 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7009 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7010 UNSPEC_GOTSYM_OFF);
7011 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7013 if (TARGET_32BIT)
7015 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7017 else /* TARGET_THUMB1 */
7019 if (arm_pic_register != INVALID_REGNUM
7020 && REGNO (pic_reg) > LAST_LO_REGNUM)
7022 /* We will have pushed the pic register, so we should always be
7023 able to find a work register. */
7024 pic_tmp = gen_rtx_REG (SImode,
7025 thumb_find_work_register (saved_regs));
7026 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7027 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7028 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7030 else if (arm_pic_register != INVALID_REGNUM
7031 && arm_pic_register > LAST_LO_REGNUM
7032 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7034 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7035 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7036 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7038 else
7039 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7043 /* Need to emit this whether or not we obey regdecls,
7044 since setjmp/longjmp can cause life info to screw up. */
7045 emit_use (pic_reg);
7048 /* Generate code to load the address of a static var when flag_pic is set. */
7049 static rtx
7050 arm_pic_static_addr (rtx orig, rtx reg)
7052 rtx l1, labelno, offset_rtx, insn;
7054 gcc_assert (flag_pic);
7056 /* We use an UNSPEC rather than a LABEL_REF because this label
7057 never appears in the code stream. */
7058 labelno = GEN_INT (pic_labelno++);
7059 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7060 l1 = gen_rtx_CONST (VOIDmode, l1);
7062 /* On the ARM the PC register contains 'dot + 8' at the time of the
7063 addition, on the Thumb it is 'dot + 4'. */
7064 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7065 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7066 UNSPEC_SYMBOL_OFFSET);
7067 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7069 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7070 return insn;
7073 /* Return nonzero if X is valid as an ARM state addressing register. */
7074 static int
7075 arm_address_register_rtx_p (rtx x, int strict_p)
7077 int regno;
7079 if (!REG_P (x))
7080 return 0;
7082 regno = REGNO (x);
7084 if (strict_p)
7085 return ARM_REGNO_OK_FOR_BASE_P (regno);
7087 return (regno <= LAST_ARM_REGNUM
7088 || regno >= FIRST_PSEUDO_REGISTER
7089 || regno == FRAME_POINTER_REGNUM
7090 || regno == ARG_POINTER_REGNUM);
7093 /* Return TRUE if this rtx is the difference of a symbol and a label,
7094 and will reduce to a PC-relative relocation in the object file.
7095 Expressions like this can be left alone when generating PIC, rather
7096 than forced through the GOT. */
7097 static int
7098 pcrel_constant_p (rtx x)
7100 if (GET_CODE (x) == MINUS)
7101 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7103 return FALSE;
7106 /* Return true if X will surely end up in an index register after next
7107 splitting pass. */
7108 static bool
7109 will_be_in_index_register (const_rtx x)
7111 /* arm.md: calculate_pic_address will split this into a register. */
7112 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7115 /* Return nonzero if X is a valid ARM state address operand. */
7117 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7118 int strict_p)
7120 bool use_ldrd;
7121 enum rtx_code code = GET_CODE (x);
7123 if (arm_address_register_rtx_p (x, strict_p))
7124 return 1;
7126 use_ldrd = (TARGET_LDRD
7127 && (mode == DImode
7128 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7130 if (code == POST_INC || code == PRE_DEC
7131 || ((code == PRE_INC || code == POST_DEC)
7132 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7133 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7135 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7136 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7137 && GET_CODE (XEXP (x, 1)) == PLUS
7138 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7140 rtx addend = XEXP (XEXP (x, 1), 1);
7142 /* Don't allow ldrd post increment by register because it's hard
7143 to fixup invalid register choices. */
7144 if (use_ldrd
7145 && GET_CODE (x) == POST_MODIFY
7146 && REG_P (addend))
7147 return 0;
7149 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7150 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7153 /* After reload constants split into minipools will have addresses
7154 from a LABEL_REF. */
7155 else if (reload_completed
7156 && (code == LABEL_REF
7157 || (code == CONST
7158 && GET_CODE (XEXP (x, 0)) == PLUS
7159 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7160 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7161 return 1;
7163 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7164 return 0;
7166 else if (code == PLUS)
7168 rtx xop0 = XEXP (x, 0);
7169 rtx xop1 = XEXP (x, 1);
7171 return ((arm_address_register_rtx_p (xop0, strict_p)
7172 && ((CONST_INT_P (xop1)
7173 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7174 || (!strict_p && will_be_in_index_register (xop1))))
7175 || (arm_address_register_rtx_p (xop1, strict_p)
7176 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7179 #if 0
7180 /* Reload currently can't handle MINUS, so disable this for now */
7181 else if (GET_CODE (x) == MINUS)
7183 rtx xop0 = XEXP (x, 0);
7184 rtx xop1 = XEXP (x, 1);
7186 return (arm_address_register_rtx_p (xop0, strict_p)
7187 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7189 #endif
7191 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7192 && code == SYMBOL_REF
7193 && CONSTANT_POOL_ADDRESS_P (x)
7194 && ! (flag_pic
7195 && symbol_mentioned_p (get_pool_constant (x))
7196 && ! pcrel_constant_p (get_pool_constant (x))))
7197 return 1;
7199 return 0;
7202 /* Return nonzero if X is a valid Thumb-2 address operand. */
7203 static int
7204 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7206 bool use_ldrd;
7207 enum rtx_code code = GET_CODE (x);
7209 if (arm_address_register_rtx_p (x, strict_p))
7210 return 1;
7212 use_ldrd = (TARGET_LDRD
7213 && (mode == DImode
7214 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7216 if (code == POST_INC || code == PRE_DEC
7217 || ((code == PRE_INC || code == POST_DEC)
7218 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7219 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7221 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7222 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7223 && GET_CODE (XEXP (x, 1)) == PLUS
7224 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7226 /* Thumb-2 only has autoincrement by constant. */
7227 rtx addend = XEXP (XEXP (x, 1), 1);
7228 HOST_WIDE_INT offset;
7230 if (!CONST_INT_P (addend))
7231 return 0;
7233 offset = INTVAL(addend);
7234 if (GET_MODE_SIZE (mode) <= 4)
7235 return (offset > -256 && offset < 256);
7237 return (use_ldrd && offset > -1024 && offset < 1024
7238 && (offset & 3) == 0);
7241 /* After reload constants split into minipools will have addresses
7242 from a LABEL_REF. */
7243 else if (reload_completed
7244 && (code == LABEL_REF
7245 || (code == CONST
7246 && GET_CODE (XEXP (x, 0)) == PLUS
7247 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7248 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7249 return 1;
7251 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7252 return 0;
7254 else if (code == PLUS)
7256 rtx xop0 = XEXP (x, 0);
7257 rtx xop1 = XEXP (x, 1);
7259 return ((arm_address_register_rtx_p (xop0, strict_p)
7260 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7261 || (!strict_p && will_be_in_index_register (xop1))))
7262 || (arm_address_register_rtx_p (xop1, strict_p)
7263 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7266 /* Normally we can assign constant values to target registers without
7267 the help of constant pool. But there are cases we have to use constant
7268 pool like:
7269 1) assign a label to register.
7270 2) sign-extend a 8bit value to 32bit and then assign to register.
7272 Constant pool access in format:
7273 (set (reg r0) (mem (symbol_ref (".LC0"))))
7274 will cause the use of literal pool (later in function arm_reorg).
7275 So here we mark such format as an invalid format, then the compiler
7276 will adjust it into:
7277 (set (reg r0) (symbol_ref (".LC0")))
7278 (set (reg r0) (mem (reg r0))).
7279 No extra register is required, and (mem (reg r0)) won't cause the use
7280 of literal pools. */
7281 else if (arm_disable_literal_pool && code == SYMBOL_REF
7282 && CONSTANT_POOL_ADDRESS_P (x))
7283 return 0;
7285 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7286 && code == SYMBOL_REF
7287 && CONSTANT_POOL_ADDRESS_P (x)
7288 && ! (flag_pic
7289 && symbol_mentioned_p (get_pool_constant (x))
7290 && ! pcrel_constant_p (get_pool_constant (x))))
7291 return 1;
7293 return 0;
7296 /* Return nonzero if INDEX is valid for an address index operand in
7297 ARM state. */
7298 static int
7299 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7300 int strict_p)
7302 HOST_WIDE_INT range;
7303 enum rtx_code code = GET_CODE (index);
7305 /* Standard coprocessor addressing modes. */
7306 if (TARGET_HARD_FLOAT
7307 && TARGET_VFP
7308 && (mode == SFmode || mode == DFmode))
7309 return (code == CONST_INT && INTVAL (index) < 1024
7310 && INTVAL (index) > -1024
7311 && (INTVAL (index) & 3) == 0);
7313 /* For quad modes, we restrict the constant offset to be slightly less
7314 than what the instruction format permits. We do this because for
7315 quad mode moves, we will actually decompose them into two separate
7316 double-mode reads or writes. INDEX must therefore be a valid
7317 (double-mode) offset and so should INDEX+8. */
7318 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7319 return (code == CONST_INT
7320 && INTVAL (index) < 1016
7321 && INTVAL (index) > -1024
7322 && (INTVAL (index) & 3) == 0);
7324 /* We have no such constraint on double mode offsets, so we permit the
7325 full range of the instruction format. */
7326 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7327 return (code == CONST_INT
7328 && INTVAL (index) < 1024
7329 && INTVAL (index) > -1024
7330 && (INTVAL (index) & 3) == 0);
7332 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7333 return (code == CONST_INT
7334 && INTVAL (index) < 1024
7335 && INTVAL (index) > -1024
7336 && (INTVAL (index) & 3) == 0);
7338 if (arm_address_register_rtx_p (index, strict_p)
7339 && (GET_MODE_SIZE (mode) <= 4))
7340 return 1;
7342 if (mode == DImode || mode == DFmode)
7344 if (code == CONST_INT)
7346 HOST_WIDE_INT val = INTVAL (index);
7348 if (TARGET_LDRD)
7349 return val > -256 && val < 256;
7350 else
7351 return val > -4096 && val < 4092;
7354 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7357 if (GET_MODE_SIZE (mode) <= 4
7358 && ! (arm_arch4
7359 && (mode == HImode
7360 || mode == HFmode
7361 || (mode == QImode && outer == SIGN_EXTEND))))
7363 if (code == MULT)
7365 rtx xiop0 = XEXP (index, 0);
7366 rtx xiop1 = XEXP (index, 1);
7368 return ((arm_address_register_rtx_p (xiop0, strict_p)
7369 && power_of_two_operand (xiop1, SImode))
7370 || (arm_address_register_rtx_p (xiop1, strict_p)
7371 && power_of_two_operand (xiop0, SImode)));
7373 else if (code == LSHIFTRT || code == ASHIFTRT
7374 || code == ASHIFT || code == ROTATERT)
7376 rtx op = XEXP (index, 1);
7378 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7379 && CONST_INT_P (op)
7380 && INTVAL (op) > 0
7381 && INTVAL (op) <= 31);
7385 /* For ARM v4 we may be doing a sign-extend operation during the
7386 load. */
7387 if (arm_arch4)
7389 if (mode == HImode
7390 || mode == HFmode
7391 || (outer == SIGN_EXTEND && mode == QImode))
7392 range = 256;
7393 else
7394 range = 4096;
7396 else
7397 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7399 return (code == CONST_INT
7400 && INTVAL (index) < range
7401 && INTVAL (index) > -range);
7404 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7405 index operand. i.e. 1, 2, 4 or 8. */
7406 static bool
7407 thumb2_index_mul_operand (rtx op)
7409 HOST_WIDE_INT val;
7411 if (!CONST_INT_P (op))
7412 return false;
7414 val = INTVAL(op);
7415 return (val == 1 || val == 2 || val == 4 || val == 8);
7418 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7419 static int
7420 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7422 enum rtx_code code = GET_CODE (index);
7424 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7425 /* Standard coprocessor addressing modes. */
7426 if (TARGET_HARD_FLOAT
7427 && TARGET_VFP
7428 && (mode == SFmode || mode == DFmode))
7429 return (code == CONST_INT && INTVAL (index) < 1024
7430 /* Thumb-2 allows only > -256 index range for it's core register
7431 load/stores. Since we allow SF/DF in core registers, we have
7432 to use the intersection between -256~4096 (core) and -1024~1024
7433 (coprocessor). */
7434 && INTVAL (index) > -256
7435 && (INTVAL (index) & 3) == 0);
7437 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7439 /* For DImode assume values will usually live in core regs
7440 and only allow LDRD addressing modes. */
7441 if (!TARGET_LDRD || mode != DImode)
7442 return (code == CONST_INT
7443 && INTVAL (index) < 1024
7444 && INTVAL (index) > -1024
7445 && (INTVAL (index) & 3) == 0);
7448 /* For quad modes, we restrict the constant offset to be slightly less
7449 than what the instruction format permits. We do this because for
7450 quad mode moves, we will actually decompose them into two separate
7451 double-mode reads or writes. INDEX must therefore be a valid
7452 (double-mode) offset and so should INDEX+8. */
7453 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7454 return (code == CONST_INT
7455 && INTVAL (index) < 1016
7456 && INTVAL (index) > -1024
7457 && (INTVAL (index) & 3) == 0);
7459 /* We have no such constraint on double mode offsets, so we permit the
7460 full range of the instruction format. */
7461 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7462 return (code == CONST_INT
7463 && INTVAL (index) < 1024
7464 && INTVAL (index) > -1024
7465 && (INTVAL (index) & 3) == 0);
7467 if (arm_address_register_rtx_p (index, strict_p)
7468 && (GET_MODE_SIZE (mode) <= 4))
7469 return 1;
7471 if (mode == DImode || mode == DFmode)
7473 if (code == CONST_INT)
7475 HOST_WIDE_INT val = INTVAL (index);
7476 /* ??? Can we assume ldrd for thumb2? */
7477 /* Thumb-2 ldrd only has reg+const addressing modes. */
7478 /* ldrd supports offsets of +-1020.
7479 However the ldr fallback does not. */
7480 return val > -256 && val < 256 && (val & 3) == 0;
7482 else
7483 return 0;
7486 if (code == MULT)
7488 rtx xiop0 = XEXP (index, 0);
7489 rtx xiop1 = XEXP (index, 1);
7491 return ((arm_address_register_rtx_p (xiop0, strict_p)
7492 && thumb2_index_mul_operand (xiop1))
7493 || (arm_address_register_rtx_p (xiop1, strict_p)
7494 && thumb2_index_mul_operand (xiop0)));
7496 else if (code == ASHIFT)
7498 rtx op = XEXP (index, 1);
7500 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7501 && CONST_INT_P (op)
7502 && INTVAL (op) > 0
7503 && INTVAL (op) <= 3);
7506 return (code == CONST_INT
7507 && INTVAL (index) < 4096
7508 && INTVAL (index) > -256);
7511 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7512 static int
7513 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7515 int regno;
7517 if (!REG_P (x))
7518 return 0;
7520 regno = REGNO (x);
7522 if (strict_p)
7523 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7525 return (regno <= LAST_LO_REGNUM
7526 || regno > LAST_VIRTUAL_REGISTER
7527 || regno == FRAME_POINTER_REGNUM
7528 || (GET_MODE_SIZE (mode) >= 4
7529 && (regno == STACK_POINTER_REGNUM
7530 || regno >= FIRST_PSEUDO_REGISTER
7531 || x == hard_frame_pointer_rtx
7532 || x == arg_pointer_rtx)));
7535 /* Return nonzero if x is a legitimate index register. This is the case
7536 for any base register that can access a QImode object. */
7537 inline static int
7538 thumb1_index_register_rtx_p (rtx x, int strict_p)
7540 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7543 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7545 The AP may be eliminated to either the SP or the FP, so we use the
7546 least common denominator, e.g. SImode, and offsets from 0 to 64.
7548 ??? Verify whether the above is the right approach.
7550 ??? Also, the FP may be eliminated to the SP, so perhaps that
7551 needs special handling also.
7553 ??? Look at how the mips16 port solves this problem. It probably uses
7554 better ways to solve some of these problems.
7556 Although it is not incorrect, we don't accept QImode and HImode
7557 addresses based on the frame pointer or arg pointer until the
7558 reload pass starts. This is so that eliminating such addresses
7559 into stack based ones won't produce impossible code. */
7561 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7563 /* ??? Not clear if this is right. Experiment. */
7564 if (GET_MODE_SIZE (mode) < 4
7565 && !(reload_in_progress || reload_completed)
7566 && (reg_mentioned_p (frame_pointer_rtx, x)
7567 || reg_mentioned_p (arg_pointer_rtx, x)
7568 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7569 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7570 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7571 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7572 return 0;
7574 /* Accept any base register. SP only in SImode or larger. */
7575 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7576 return 1;
7578 /* This is PC relative data before arm_reorg runs. */
7579 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7580 && GET_CODE (x) == SYMBOL_REF
7581 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7582 return 1;
7584 /* This is PC relative data after arm_reorg runs. */
7585 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7586 && reload_completed
7587 && (GET_CODE (x) == LABEL_REF
7588 || (GET_CODE (x) == CONST
7589 && GET_CODE (XEXP (x, 0)) == PLUS
7590 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7591 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7592 return 1;
7594 /* Post-inc indexing only supported for SImode and larger. */
7595 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7596 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7597 return 1;
7599 else if (GET_CODE (x) == PLUS)
7601 /* REG+REG address can be any two index registers. */
7602 /* We disallow FRAME+REG addressing since we know that FRAME
7603 will be replaced with STACK, and SP relative addressing only
7604 permits SP+OFFSET. */
7605 if (GET_MODE_SIZE (mode) <= 4
7606 && XEXP (x, 0) != frame_pointer_rtx
7607 && XEXP (x, 1) != frame_pointer_rtx
7608 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7609 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7610 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7611 return 1;
7613 /* REG+const has 5-7 bit offset for non-SP registers. */
7614 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7615 || XEXP (x, 0) == arg_pointer_rtx)
7616 && CONST_INT_P (XEXP (x, 1))
7617 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7618 return 1;
7620 /* REG+const has 10-bit offset for SP, but only SImode and
7621 larger is supported. */
7622 /* ??? Should probably check for DI/DFmode overflow here
7623 just like GO_IF_LEGITIMATE_OFFSET does. */
7624 else if (REG_P (XEXP (x, 0))
7625 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7626 && GET_MODE_SIZE (mode) >= 4
7627 && CONST_INT_P (XEXP (x, 1))
7628 && INTVAL (XEXP (x, 1)) >= 0
7629 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7630 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7631 return 1;
7633 else if (REG_P (XEXP (x, 0))
7634 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7635 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7636 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7637 && REGNO (XEXP (x, 0))
7638 <= LAST_VIRTUAL_POINTER_REGISTER))
7639 && GET_MODE_SIZE (mode) >= 4
7640 && CONST_INT_P (XEXP (x, 1))
7641 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7642 return 1;
7645 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7646 && GET_MODE_SIZE (mode) == 4
7647 && GET_CODE (x) == SYMBOL_REF
7648 && CONSTANT_POOL_ADDRESS_P (x)
7649 && ! (flag_pic
7650 && symbol_mentioned_p (get_pool_constant (x))
7651 && ! pcrel_constant_p (get_pool_constant (x))))
7652 return 1;
7654 return 0;
7657 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7658 instruction of mode MODE. */
7660 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7662 switch (GET_MODE_SIZE (mode))
7664 case 1:
7665 return val >= 0 && val < 32;
7667 case 2:
7668 return val >= 0 && val < 64 && (val & 1) == 0;
7670 default:
7671 return (val >= 0
7672 && (val + GET_MODE_SIZE (mode)) <= 128
7673 && (val & 3) == 0);
7677 bool
7678 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7680 if (TARGET_ARM)
7681 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7682 else if (TARGET_THUMB2)
7683 return thumb2_legitimate_address_p (mode, x, strict_p);
7684 else /* if (TARGET_THUMB1) */
7685 return thumb1_legitimate_address_p (mode, x, strict_p);
7688 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7690 Given an rtx X being reloaded into a reg required to be
7691 in class CLASS, return the class of reg to actually use.
7692 In general this is just CLASS, but for the Thumb core registers and
7693 immediate constants we prefer a LO_REGS class or a subset. */
7695 static reg_class_t
7696 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7698 if (TARGET_32BIT)
7699 return rclass;
7700 else
7702 if (rclass == GENERAL_REGS)
7703 return LO_REGS;
7704 else
7705 return rclass;
7709 /* Build the SYMBOL_REF for __tls_get_addr. */
7711 static GTY(()) rtx tls_get_addr_libfunc;
7713 static rtx
7714 get_tls_get_addr (void)
7716 if (!tls_get_addr_libfunc)
7717 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7718 return tls_get_addr_libfunc;
7722 arm_load_tp (rtx target)
7724 if (!target)
7725 target = gen_reg_rtx (SImode);
7727 if (TARGET_HARD_TP)
7729 /* Can return in any reg. */
7730 emit_insn (gen_load_tp_hard (target));
7732 else
7734 /* Always returned in r0. Immediately copy the result into a pseudo,
7735 otherwise other uses of r0 (e.g. setting up function arguments) may
7736 clobber the value. */
7738 rtx tmp;
7740 emit_insn (gen_load_tp_soft ());
7742 tmp = gen_rtx_REG (SImode, R0_REGNUM);
7743 emit_move_insn (target, tmp);
7745 return target;
7748 static rtx
7749 load_tls_operand (rtx x, rtx reg)
7751 rtx tmp;
7753 if (reg == NULL_RTX)
7754 reg = gen_reg_rtx (SImode);
7756 tmp = gen_rtx_CONST (SImode, x);
7758 emit_move_insn (reg, tmp);
7760 return reg;
7763 static rtx
7764 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7766 rtx insns, label, labelno, sum;
7768 gcc_assert (reloc != TLS_DESCSEQ);
7769 start_sequence ();
7771 labelno = GEN_INT (pic_labelno++);
7772 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7773 label = gen_rtx_CONST (VOIDmode, label);
7775 sum = gen_rtx_UNSPEC (Pmode,
7776 gen_rtvec (4, x, GEN_INT (reloc), label,
7777 GEN_INT (TARGET_ARM ? 8 : 4)),
7778 UNSPEC_TLS);
7779 reg = load_tls_operand (sum, reg);
7781 if (TARGET_ARM)
7782 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7783 else
7784 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7786 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7787 LCT_PURE, /* LCT_CONST? */
7788 Pmode, 1, reg, Pmode);
7790 insns = get_insns ();
7791 end_sequence ();
7793 return insns;
7796 static rtx
7797 arm_tls_descseq_addr (rtx x, rtx reg)
7799 rtx labelno = GEN_INT (pic_labelno++);
7800 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7801 rtx sum = gen_rtx_UNSPEC (Pmode,
7802 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7803 gen_rtx_CONST (VOIDmode, label),
7804 GEN_INT (!TARGET_ARM)),
7805 UNSPEC_TLS);
7806 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7808 emit_insn (gen_tlscall (x, labelno));
7809 if (!reg)
7810 reg = gen_reg_rtx (SImode);
7811 else
7812 gcc_assert (REGNO (reg) != R0_REGNUM);
7814 emit_move_insn (reg, reg0);
7816 return reg;
7820 legitimize_tls_address (rtx x, rtx reg)
7822 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7823 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7825 switch (model)
7827 case TLS_MODEL_GLOBAL_DYNAMIC:
7828 if (TARGET_GNU2_TLS)
7830 reg = arm_tls_descseq_addr (x, reg);
7832 tp = arm_load_tp (NULL_RTX);
7834 dest = gen_rtx_PLUS (Pmode, tp, reg);
7836 else
7838 /* Original scheme */
7839 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7840 dest = gen_reg_rtx (Pmode);
7841 emit_libcall_block (insns, dest, ret, x);
7843 return dest;
7845 case TLS_MODEL_LOCAL_DYNAMIC:
7846 if (TARGET_GNU2_TLS)
7848 reg = arm_tls_descseq_addr (x, reg);
7850 tp = arm_load_tp (NULL_RTX);
7852 dest = gen_rtx_PLUS (Pmode, tp, reg);
7854 else
7856 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7858 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7859 share the LDM result with other LD model accesses. */
7860 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7861 UNSPEC_TLS);
7862 dest = gen_reg_rtx (Pmode);
7863 emit_libcall_block (insns, dest, ret, eqv);
7865 /* Load the addend. */
7866 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7867 GEN_INT (TLS_LDO32)),
7868 UNSPEC_TLS);
7869 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7870 dest = gen_rtx_PLUS (Pmode, dest, addend);
7872 return dest;
7874 case TLS_MODEL_INITIAL_EXEC:
7875 labelno = GEN_INT (pic_labelno++);
7876 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7877 label = gen_rtx_CONST (VOIDmode, label);
7878 sum = gen_rtx_UNSPEC (Pmode,
7879 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7880 GEN_INT (TARGET_ARM ? 8 : 4)),
7881 UNSPEC_TLS);
7882 reg = load_tls_operand (sum, reg);
7884 if (TARGET_ARM)
7885 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7886 else if (TARGET_THUMB2)
7887 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7888 else
7890 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7891 emit_move_insn (reg, gen_const_mem (SImode, reg));
7894 tp = arm_load_tp (NULL_RTX);
7896 return gen_rtx_PLUS (Pmode, tp, reg);
7898 case TLS_MODEL_LOCAL_EXEC:
7899 tp = arm_load_tp (NULL_RTX);
7901 reg = gen_rtx_UNSPEC (Pmode,
7902 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7903 UNSPEC_TLS);
7904 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7906 return gen_rtx_PLUS (Pmode, tp, reg);
7908 default:
7909 abort ();
7913 /* Try machine-dependent ways of modifying an illegitimate address
7914 to be legitimate. If we find one, return the new, valid address. */
7916 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7918 if (arm_tls_referenced_p (x))
7920 rtx addend = NULL;
7922 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7924 addend = XEXP (XEXP (x, 0), 1);
7925 x = XEXP (XEXP (x, 0), 0);
7928 if (GET_CODE (x) != SYMBOL_REF)
7929 return x;
7931 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7933 x = legitimize_tls_address (x, NULL_RTX);
7935 if (addend)
7937 x = gen_rtx_PLUS (SImode, x, addend);
7938 orig_x = x;
7940 else
7941 return x;
7944 if (!TARGET_ARM)
7946 /* TODO: legitimize_address for Thumb2. */
7947 if (TARGET_THUMB2)
7948 return x;
7949 return thumb_legitimize_address (x, orig_x, mode);
7952 if (GET_CODE (x) == PLUS)
7954 rtx xop0 = XEXP (x, 0);
7955 rtx xop1 = XEXP (x, 1);
7957 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7958 xop0 = force_reg (SImode, xop0);
7960 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7961 && !symbol_mentioned_p (xop1))
7962 xop1 = force_reg (SImode, xop1);
7964 if (ARM_BASE_REGISTER_RTX_P (xop0)
7965 && CONST_INT_P (xop1))
7967 HOST_WIDE_INT n, low_n;
7968 rtx base_reg, val;
7969 n = INTVAL (xop1);
7971 /* VFP addressing modes actually allow greater offsets, but for
7972 now we just stick with the lowest common denominator. */
7973 if (mode == DImode
7974 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7976 low_n = n & 0x0f;
7977 n &= ~0x0f;
7978 if (low_n > 4)
7980 n += 16;
7981 low_n -= 16;
7984 else
7986 low_n = ((mode) == TImode ? 0
7987 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7988 n -= low_n;
7991 base_reg = gen_reg_rtx (SImode);
7992 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7993 emit_move_insn (base_reg, val);
7994 x = plus_constant (Pmode, base_reg, low_n);
7996 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7997 x = gen_rtx_PLUS (SImode, xop0, xop1);
8000 /* XXX We don't allow MINUS any more -- see comment in
8001 arm_legitimate_address_outer_p (). */
8002 else if (GET_CODE (x) == MINUS)
8004 rtx xop0 = XEXP (x, 0);
8005 rtx xop1 = XEXP (x, 1);
8007 if (CONSTANT_P (xop0))
8008 xop0 = force_reg (SImode, xop0);
8010 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8011 xop1 = force_reg (SImode, xop1);
8013 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8014 x = gen_rtx_MINUS (SImode, xop0, xop1);
8017 /* Make sure to take full advantage of the pre-indexed addressing mode
8018 with absolute addresses which often allows for the base register to
8019 be factorized for multiple adjacent memory references, and it might
8020 even allows for the mini pool to be avoided entirely. */
8021 else if (CONST_INT_P (x) && optimize > 0)
8023 unsigned int bits;
8024 HOST_WIDE_INT mask, base, index;
8025 rtx base_reg;
8027 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8028 use a 8-bit index. So let's use a 12-bit index for SImode only and
8029 hope that arm_gen_constant will enable ldrb to use more bits. */
8030 bits = (mode == SImode) ? 12 : 8;
8031 mask = (1 << bits) - 1;
8032 base = INTVAL (x) & ~mask;
8033 index = INTVAL (x) & mask;
8034 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8036 /* It'll most probably be more efficient to generate the base
8037 with more bits set and use a negative index instead. */
8038 base |= mask;
8039 index -= mask;
8041 base_reg = force_reg (SImode, GEN_INT (base));
8042 x = plus_constant (Pmode, base_reg, index);
8045 if (flag_pic)
8047 /* We need to find and carefully transform any SYMBOL and LABEL
8048 references; so go back to the original address expression. */
8049 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8051 if (new_x != orig_x)
8052 x = new_x;
8055 return x;
8059 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8060 to be legitimate. If we find one, return the new, valid address. */
8062 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8064 if (GET_CODE (x) == PLUS
8065 && CONST_INT_P (XEXP (x, 1))
8066 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8067 || INTVAL (XEXP (x, 1)) < 0))
8069 rtx xop0 = XEXP (x, 0);
8070 rtx xop1 = XEXP (x, 1);
8071 HOST_WIDE_INT offset = INTVAL (xop1);
8073 /* Try and fold the offset into a biasing of the base register and
8074 then offsetting that. Don't do this when optimizing for space
8075 since it can cause too many CSEs. */
8076 if (optimize_size && offset >= 0
8077 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8079 HOST_WIDE_INT delta;
8081 if (offset >= 256)
8082 delta = offset - (256 - GET_MODE_SIZE (mode));
8083 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8084 delta = 31 * GET_MODE_SIZE (mode);
8085 else
8086 delta = offset & (~31 * GET_MODE_SIZE (mode));
8088 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8089 NULL_RTX);
8090 x = plus_constant (Pmode, xop0, delta);
8092 else if (offset < 0 && offset > -256)
8093 /* Small negative offsets are best done with a subtract before the
8094 dereference, forcing these into a register normally takes two
8095 instructions. */
8096 x = force_operand (x, NULL_RTX);
8097 else
8099 /* For the remaining cases, force the constant into a register. */
8100 xop1 = force_reg (SImode, xop1);
8101 x = gen_rtx_PLUS (SImode, xop0, xop1);
8104 else if (GET_CODE (x) == PLUS
8105 && s_register_operand (XEXP (x, 1), SImode)
8106 && !s_register_operand (XEXP (x, 0), SImode))
8108 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8110 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8113 if (flag_pic)
8115 /* We need to find and carefully transform any SYMBOL and LABEL
8116 references; so go back to the original address expression. */
8117 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8119 if (new_x != orig_x)
8120 x = new_x;
8123 return x;
8126 /* Return TRUE if X contains any TLS symbol references. */
8128 bool
8129 arm_tls_referenced_p (rtx x)
8131 if (! TARGET_HAVE_TLS)
8132 return false;
8134 subrtx_iterator::array_type array;
8135 FOR_EACH_SUBRTX (iter, array, x, ALL)
8137 const_rtx x = *iter;
8138 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8139 return true;
8141 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8142 TLS offsets, not real symbol references. */
8143 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8144 iter.skip_subrtxes ();
8146 return false;
8149 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8151 On the ARM, allow any integer (invalid ones are removed later by insn
8152 patterns), nice doubles and symbol_refs which refer to the function's
8153 constant pool XXX.
8155 When generating pic allow anything. */
8157 static bool
8158 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8160 return flag_pic || !label_mentioned_p (x);
8163 static bool
8164 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8166 return (CONST_INT_P (x)
8167 || CONST_DOUBLE_P (x)
8168 || CONSTANT_ADDRESS_P (x)
8169 || flag_pic);
8172 static bool
8173 arm_legitimate_constant_p (machine_mode mode, rtx x)
8175 return (!arm_cannot_force_const_mem (mode, x)
8176 && (TARGET_32BIT
8177 ? arm_legitimate_constant_p_1 (mode, x)
8178 : thumb_legitimate_constant_p (mode, x)));
8181 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8183 static bool
8184 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8186 rtx base, offset;
8188 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8190 split_const (x, &base, &offset);
8191 if (GET_CODE (base) == SYMBOL_REF
8192 && !offset_within_block_p (base, INTVAL (offset)))
8193 return true;
8195 return arm_tls_referenced_p (x);
8198 #define REG_OR_SUBREG_REG(X) \
8199 (REG_P (X) \
8200 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8202 #define REG_OR_SUBREG_RTX(X) \
8203 (REG_P (X) ? (X) : SUBREG_REG (X))
8205 static inline int
8206 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8208 machine_mode mode = GET_MODE (x);
8209 int total, words;
8211 switch (code)
8213 case ASHIFT:
8214 case ASHIFTRT:
8215 case LSHIFTRT:
8216 case ROTATERT:
8217 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8219 case PLUS:
8220 case MINUS:
8221 case COMPARE:
8222 case NEG:
8223 case NOT:
8224 return COSTS_N_INSNS (1);
8226 case MULT:
8227 if (CONST_INT_P (XEXP (x, 1)))
8229 int cycles = 0;
8230 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8232 while (i)
8234 i >>= 2;
8235 cycles++;
8237 return COSTS_N_INSNS (2) + cycles;
8239 return COSTS_N_INSNS (1) + 16;
8241 case SET:
8242 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8243 the mode. */
8244 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8245 return (COSTS_N_INSNS (words)
8246 + 4 * ((MEM_P (SET_SRC (x)))
8247 + MEM_P (SET_DEST (x))));
8249 case CONST_INT:
8250 if (outer == SET)
8252 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8253 return 0;
8254 if (thumb_shiftable_const (INTVAL (x)))
8255 return COSTS_N_INSNS (2);
8256 return COSTS_N_INSNS (3);
8258 else if ((outer == PLUS || outer == COMPARE)
8259 && INTVAL (x) < 256 && INTVAL (x) > -256)
8260 return 0;
8261 else if ((outer == IOR || outer == XOR || outer == AND)
8262 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8263 return COSTS_N_INSNS (1);
8264 else if (outer == AND)
8266 int i;
8267 /* This duplicates the tests in the andsi3 expander. */
8268 for (i = 9; i <= 31; i++)
8269 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8270 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8271 return COSTS_N_INSNS (2);
8273 else if (outer == ASHIFT || outer == ASHIFTRT
8274 || outer == LSHIFTRT)
8275 return 0;
8276 return COSTS_N_INSNS (2);
8278 case CONST:
8279 case CONST_DOUBLE:
8280 case LABEL_REF:
8281 case SYMBOL_REF:
8282 return COSTS_N_INSNS (3);
8284 case UDIV:
8285 case UMOD:
8286 case DIV:
8287 case MOD:
8288 return 100;
8290 case TRUNCATE:
8291 return 99;
8293 case AND:
8294 case XOR:
8295 case IOR:
8296 /* XXX guess. */
8297 return 8;
8299 case MEM:
8300 /* XXX another guess. */
8301 /* Memory costs quite a lot for the first word, but subsequent words
8302 load at the equivalent of a single insn each. */
8303 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8304 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8305 ? 4 : 0));
8307 case IF_THEN_ELSE:
8308 /* XXX a guess. */
8309 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8310 return 14;
8311 return 2;
8313 case SIGN_EXTEND:
8314 case ZERO_EXTEND:
8315 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8316 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8318 if (mode == SImode)
8319 return total;
8321 if (arm_arch6)
8322 return total + COSTS_N_INSNS (1);
8324 /* Assume a two-shift sequence. Increase the cost slightly so
8325 we prefer actual shifts over an extend operation. */
8326 return total + 1 + COSTS_N_INSNS (2);
8328 default:
8329 return 99;
8333 static inline bool
8334 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8336 machine_mode mode = GET_MODE (x);
8337 enum rtx_code subcode;
8338 rtx operand;
8339 enum rtx_code code = GET_CODE (x);
8340 *total = 0;
8342 switch (code)
8344 case MEM:
8345 /* Memory costs quite a lot for the first word, but subsequent words
8346 load at the equivalent of a single insn each. */
8347 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8348 return true;
8350 case DIV:
8351 case MOD:
8352 case UDIV:
8353 case UMOD:
8354 if (TARGET_HARD_FLOAT && mode == SFmode)
8355 *total = COSTS_N_INSNS (2);
8356 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8357 *total = COSTS_N_INSNS (4);
8358 else
8359 *total = COSTS_N_INSNS (20);
8360 return false;
8362 case ROTATE:
8363 if (REG_P (XEXP (x, 1)))
8364 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8365 else if (!CONST_INT_P (XEXP (x, 1)))
8366 *total = rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8368 /* Fall through */
8369 case ROTATERT:
8370 if (mode != SImode)
8372 *total += COSTS_N_INSNS (4);
8373 return true;
8376 /* Fall through */
8377 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8378 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8379 if (mode == DImode)
8381 *total += COSTS_N_INSNS (3);
8382 return true;
8385 *total += COSTS_N_INSNS (1);
8386 /* Increase the cost of complex shifts because they aren't any faster,
8387 and reduce dual issue opportunities. */
8388 if (arm_tune_cortex_a9
8389 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8390 ++*total;
8392 return true;
8394 case MINUS:
8395 if (mode == DImode)
8397 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8398 if (CONST_INT_P (XEXP (x, 0))
8399 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8401 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8402 return true;
8405 if (CONST_INT_P (XEXP (x, 1))
8406 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8408 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8409 return true;
8412 return false;
8415 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8417 if (TARGET_HARD_FLOAT
8418 && (mode == SFmode
8419 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8421 *total = COSTS_N_INSNS (1);
8422 if (CONST_DOUBLE_P (XEXP (x, 0))
8423 && arm_const_double_rtx (XEXP (x, 0)))
8425 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8426 return true;
8429 if (CONST_DOUBLE_P (XEXP (x, 1))
8430 && arm_const_double_rtx (XEXP (x, 1)))
8432 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8433 return true;
8436 return false;
8438 *total = COSTS_N_INSNS (20);
8439 return false;
8442 *total = COSTS_N_INSNS (1);
8443 if (CONST_INT_P (XEXP (x, 0))
8444 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8446 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8447 return true;
8450 subcode = GET_CODE (XEXP (x, 1));
8451 if (subcode == ASHIFT || subcode == ASHIFTRT
8452 || subcode == LSHIFTRT
8453 || subcode == ROTATE || subcode == ROTATERT)
8455 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8456 *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, subcode, 0, speed);
8457 return true;
8460 /* A shift as a part of RSB costs no more than RSB itself. */
8461 if (GET_CODE (XEXP (x, 0)) == MULT
8462 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8464 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code, 0, speed);
8465 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8466 return true;
8469 if (subcode == MULT
8470 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8472 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8473 *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, subcode, 0, speed);
8474 return true;
8477 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8478 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8480 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
8481 0, speed);
8482 if (REG_P (XEXP (XEXP (x, 1), 0))
8483 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8484 *total += COSTS_N_INSNS (1);
8486 return true;
8489 /* Fall through */
8491 case PLUS:
8492 if (code == PLUS && arm_arch6 && mode == SImode
8493 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8494 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8496 *total = COSTS_N_INSNS (1);
8497 *total += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
8498 GET_CODE (XEXP (x, 0)), 0, speed);
8499 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8500 return true;
8503 /* MLA: All arguments must be registers. We filter out
8504 multiplication by a power of two, so that we fall down into
8505 the code below. */
8506 if (GET_CODE (XEXP (x, 0)) == MULT
8507 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8509 /* The cost comes from the cost of the multiply. */
8510 return false;
8513 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8515 if (TARGET_HARD_FLOAT
8516 && (mode == SFmode
8517 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8519 *total = COSTS_N_INSNS (1);
8520 if (CONST_DOUBLE_P (XEXP (x, 1))
8521 && arm_const_double_rtx (XEXP (x, 1)))
8523 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8524 return true;
8527 return false;
8530 *total = COSTS_N_INSNS (20);
8531 return false;
8534 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8535 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8537 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), mode, code,
8538 1, speed);
8539 if (REG_P (XEXP (XEXP (x, 0), 0))
8540 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8541 *total += COSTS_N_INSNS (1);
8542 return true;
8545 /* Fall through */
8547 case AND: case XOR: case IOR:
8549 /* Normally the frame registers will be spilt into reg+const during
8550 reload, so it is a bad idea to combine them with other instructions,
8551 since then they might not be moved outside of loops. As a compromise
8552 we allow integration with ops that have a constant as their second
8553 operand. */
8554 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8555 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8556 && !CONST_INT_P (XEXP (x, 1)))
8557 *total = COSTS_N_INSNS (1);
8559 if (mode == DImode)
8561 *total += COSTS_N_INSNS (2);
8562 if (CONST_INT_P (XEXP (x, 1))
8563 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8565 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8566 return true;
8569 return false;
8572 *total += COSTS_N_INSNS (1);
8573 if (CONST_INT_P (XEXP (x, 1))
8574 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8576 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8577 return true;
8579 subcode = GET_CODE (XEXP (x, 0));
8580 if (subcode == ASHIFT || subcode == ASHIFTRT
8581 || subcode == LSHIFTRT
8582 || subcode == ROTATE || subcode == ROTATERT)
8584 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8585 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8586 return true;
8589 if (subcode == MULT
8590 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8592 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8593 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8594 return true;
8597 if (subcode == UMIN || subcode == UMAX
8598 || subcode == SMIN || subcode == SMAX)
8600 *total = COSTS_N_INSNS (3);
8601 return true;
8604 return false;
8606 case MULT:
8607 /* This should have been handled by the CPU specific routines. */
8608 gcc_unreachable ();
8610 case TRUNCATE:
8611 if (arm_arch3m && mode == SImode
8612 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8613 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8614 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8615 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8616 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8617 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8619 *total = rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, LSHIFTRT,
8620 0, speed);
8621 return true;
8623 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8624 return false;
8626 case NEG:
8627 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8629 if (TARGET_HARD_FLOAT
8630 && (mode == SFmode
8631 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8633 *total = COSTS_N_INSNS (1);
8634 return false;
8636 *total = COSTS_N_INSNS (2);
8637 return false;
8640 /* Fall through */
8641 case NOT:
8642 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8643 if (mode == SImode && code == NOT)
8645 subcode = GET_CODE (XEXP (x, 0));
8646 if (subcode == ASHIFT || subcode == ASHIFTRT
8647 || subcode == LSHIFTRT
8648 || subcode == ROTATE || subcode == ROTATERT
8649 || (subcode == MULT
8650 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8652 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode,
8653 0, speed);
8654 /* Register shifts cost an extra cycle. */
8655 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8656 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8657 mode, subcode,
8658 1, speed);
8659 return true;
8663 return false;
8665 case IF_THEN_ELSE:
8666 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8668 *total = COSTS_N_INSNS (4);
8669 return true;
8672 operand = XEXP (x, 0);
8674 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8675 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8676 && REG_P (XEXP (operand, 0))
8677 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8678 *total += COSTS_N_INSNS (1);
8679 *total += rtx_cost (XEXP (x, 1), VOIDmode, code, 1, speed);
8680 *total += rtx_cost (XEXP (x, 2), VOIDmode, code, 2, speed);
8681 return true;
8683 case NE:
8684 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8686 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
8687 0, speed);
8688 return true;
8690 goto scc_insn;
8692 case GE:
8693 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8694 && mode == SImode && XEXP (x, 1) == const0_rtx)
8696 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
8697 0, speed);
8698 return true;
8700 goto scc_insn;
8702 case LT:
8703 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8704 && mode == SImode && XEXP (x, 1) == const0_rtx)
8706 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
8707 0, speed);
8708 return true;
8710 goto scc_insn;
8712 case EQ:
8713 case GT:
8714 case LE:
8715 case GEU:
8716 case LTU:
8717 case GTU:
8718 case LEU:
8719 case UNORDERED:
8720 case ORDERED:
8721 case UNEQ:
8722 case UNGE:
8723 case UNLT:
8724 case UNGT:
8725 case UNLE:
8726 scc_insn:
8727 /* SCC insns. In the case where the comparison has already been
8728 performed, then they cost 2 instructions. Otherwise they need
8729 an additional comparison before them. */
8730 *total = COSTS_N_INSNS (2);
8731 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8733 return true;
8736 /* Fall through */
8737 case COMPARE:
8738 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8740 *total = 0;
8741 return true;
8744 *total += COSTS_N_INSNS (1);
8745 if (CONST_INT_P (XEXP (x, 1))
8746 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8748 *total += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed);
8749 return true;
8752 subcode = GET_CODE (XEXP (x, 0));
8753 if (subcode == ASHIFT || subcode == ASHIFTRT
8754 || subcode == LSHIFTRT
8755 || subcode == ROTATE || subcode == ROTATERT)
8757 mode = GET_MODE (XEXP (x, 0));
8758 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8759 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8760 return true;
8763 if (subcode == MULT
8764 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8766 mode = GET_MODE (XEXP (x, 0));
8767 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8768 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8769 return true;
8772 return false;
8774 case UMIN:
8775 case UMAX:
8776 case SMIN:
8777 case SMAX:
8778 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8779 if (!CONST_INT_P (XEXP (x, 1))
8780 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8781 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8782 return true;
8784 case ABS:
8785 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8787 if (TARGET_HARD_FLOAT
8788 && (mode == SFmode
8789 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8791 *total = COSTS_N_INSNS (1);
8792 return false;
8794 *total = COSTS_N_INSNS (20);
8795 return false;
8797 *total = COSTS_N_INSNS (1);
8798 if (mode == DImode)
8799 *total += COSTS_N_INSNS (3);
8800 return false;
8802 case SIGN_EXTEND:
8803 case ZERO_EXTEND:
8804 *total = 0;
8805 if (GET_MODE_CLASS (mode) == MODE_INT)
8807 rtx op = XEXP (x, 0);
8808 machine_mode opmode = GET_MODE (op);
8810 if (mode == DImode)
8811 *total += COSTS_N_INSNS (1);
8813 if (opmode != SImode)
8815 if (MEM_P (op))
8817 /* If !arm_arch4, we use one of the extendhisi2_mem
8818 or movhi_bytes patterns for HImode. For a QImode
8819 sign extension, we first zero-extend from memory
8820 and then perform a shift sequence. */
8821 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8822 *total += COSTS_N_INSNS (2);
8824 else if (arm_arch6)
8825 *total += COSTS_N_INSNS (1);
8827 /* We don't have the necessary insn, so we need to perform some
8828 other operation. */
8829 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8830 /* An and with constant 255. */
8831 *total += COSTS_N_INSNS (1);
8832 else
8833 /* A shift sequence. Increase costs slightly to avoid
8834 combining two shifts into an extend operation. */
8835 *total += COSTS_N_INSNS (2) + 1;
8838 return false;
8841 switch (GET_MODE (XEXP (x, 0)))
8843 case V8QImode:
8844 case V4HImode:
8845 case V2SImode:
8846 case V4QImode:
8847 case V2HImode:
8848 *total = COSTS_N_INSNS (1);
8849 return false;
8851 default:
8852 gcc_unreachable ();
8854 gcc_unreachable ();
8856 case ZERO_EXTRACT:
8857 case SIGN_EXTRACT:
8858 mode = GET_MODE (XEXP (x, 0));
8859 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8860 return true;
8862 case CONST_INT:
8863 if (const_ok_for_arm (INTVAL (x))
8864 || const_ok_for_arm (~INTVAL (x)))
8865 *total = COSTS_N_INSNS (1);
8866 else
8867 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8868 INTVAL (x), NULL_RTX,
8869 NULL_RTX, 0, 0));
8870 return true;
8872 case CONST:
8873 case LABEL_REF:
8874 case SYMBOL_REF:
8875 *total = COSTS_N_INSNS (3);
8876 return true;
8878 case HIGH:
8879 *total = COSTS_N_INSNS (1);
8880 return true;
8882 case LO_SUM:
8883 *total = COSTS_N_INSNS (1);
8884 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8885 return true;
8887 case CONST_DOUBLE:
8888 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8889 && (mode == SFmode || !TARGET_VFP_SINGLE))
8890 *total = COSTS_N_INSNS (1);
8891 else
8892 *total = COSTS_N_INSNS (4);
8893 return true;
8895 case SET:
8896 /* The vec_extract patterns accept memory operands that require an
8897 address reload. Account for the cost of that reload to give the
8898 auto-inc-dec pass an incentive to try to replace them. */
8899 if (TARGET_NEON && MEM_P (SET_DEST (x))
8900 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8902 mode = GET_MODE (SET_DEST (x));
8903 *total = rtx_cost (SET_DEST (x), mode, code, 0, speed);
8904 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8905 *total += COSTS_N_INSNS (1);
8906 return true;
8908 /* Likewise for the vec_set patterns. */
8909 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8910 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8911 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8913 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8914 mode = GET_MODE (SET_DEST (x));
8915 *total = rtx_cost (mem, mode, code, 0, speed);
8916 if (!neon_vector_mem_operand (mem, 2, true))
8917 *total += COSTS_N_INSNS (1);
8918 return true;
8920 return false;
8922 case UNSPEC:
8923 /* We cost this as high as our memory costs to allow this to
8924 be hoisted from loops. */
8925 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8927 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8929 return true;
8931 case CONST_VECTOR:
8932 if (TARGET_NEON
8933 && TARGET_HARD_FLOAT
8934 && outer == SET
8935 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8936 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8937 *total = COSTS_N_INSNS (1);
8938 else
8939 *total = COSTS_N_INSNS (4);
8940 return true;
8942 default:
8943 *total = COSTS_N_INSNS (4);
8944 return false;
8948 /* Estimates the size cost of thumb1 instructions.
8949 For now most of the code is copied from thumb1_rtx_costs. We need more
8950 fine grain tuning when we have more related test cases. */
8951 static inline int
8952 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8954 machine_mode mode = GET_MODE (x);
8955 int words;
8957 switch (code)
8959 case ASHIFT:
8960 case ASHIFTRT:
8961 case LSHIFTRT:
8962 case ROTATERT:
8963 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8965 case PLUS:
8966 case MINUS:
8967 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8968 defined by RTL expansion, especially for the expansion of
8969 multiplication. */
8970 if ((GET_CODE (XEXP (x, 0)) == MULT
8971 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8972 || (GET_CODE (XEXP (x, 1)) == MULT
8973 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8974 return COSTS_N_INSNS (2);
8975 /* On purpose fall through for normal RTX. */
8976 case COMPARE:
8977 case NEG:
8978 case NOT:
8979 return COSTS_N_INSNS (1);
8981 case MULT:
8982 if (CONST_INT_P (XEXP (x, 1)))
8984 /* Thumb1 mul instruction can't operate on const. We must Load it
8985 into a register first. */
8986 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8987 /* For the targets which have a very small and high-latency multiply
8988 unit, we prefer to synthesize the mult with up to 5 instructions,
8989 giving a good balance between size and performance. */
8990 if (arm_arch6m && arm_m_profile_small_mul)
8991 return COSTS_N_INSNS (5);
8992 else
8993 return COSTS_N_INSNS (1) + const_size;
8995 return COSTS_N_INSNS (1);
8997 case SET:
8998 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8999 the mode. */
9000 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9001 return COSTS_N_INSNS (words)
9002 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
9003 || satisfies_constraint_K (SET_SRC (x))
9004 /* thumb1_movdi_insn. */
9005 || ((words > 1) && MEM_P (SET_SRC (x))));
9007 case CONST_INT:
9008 if (outer == SET)
9010 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
9011 return COSTS_N_INSNS (1);
9012 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9013 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9014 return COSTS_N_INSNS (2);
9015 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9016 if (thumb_shiftable_const (INTVAL (x)))
9017 return COSTS_N_INSNS (2);
9018 return COSTS_N_INSNS (3);
9020 else if ((outer == PLUS || outer == COMPARE)
9021 && INTVAL (x) < 256 && INTVAL (x) > -256)
9022 return 0;
9023 else if ((outer == IOR || outer == XOR || outer == AND)
9024 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9025 return COSTS_N_INSNS (1);
9026 else if (outer == AND)
9028 int i;
9029 /* This duplicates the tests in the andsi3 expander. */
9030 for (i = 9; i <= 31; i++)
9031 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
9032 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
9033 return COSTS_N_INSNS (2);
9035 else if (outer == ASHIFT || outer == ASHIFTRT
9036 || outer == LSHIFTRT)
9037 return 0;
9038 return COSTS_N_INSNS (2);
9040 case CONST:
9041 case CONST_DOUBLE:
9042 case LABEL_REF:
9043 case SYMBOL_REF:
9044 return COSTS_N_INSNS (3);
9046 case UDIV:
9047 case UMOD:
9048 case DIV:
9049 case MOD:
9050 return 100;
9052 case TRUNCATE:
9053 return 99;
9055 case AND:
9056 case XOR:
9057 case IOR:
9058 return COSTS_N_INSNS (1);
9060 case MEM:
9061 return (COSTS_N_INSNS (1)
9062 + COSTS_N_INSNS (1)
9063 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9064 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9065 ? COSTS_N_INSNS (1) : 0));
9067 case IF_THEN_ELSE:
9068 /* XXX a guess. */
9069 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9070 return 14;
9071 return 2;
9073 case ZERO_EXTEND:
9074 /* XXX still guessing. */
9075 switch (GET_MODE (XEXP (x, 0)))
9077 case QImode:
9078 return (1 + (mode == DImode ? 4 : 0)
9079 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9081 case HImode:
9082 return (4 + (mode == DImode ? 4 : 0)
9083 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9085 case SImode:
9086 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9088 default:
9089 return 99;
9092 default:
9093 return 99;
9097 /* RTX costs when optimizing for size. */
9098 static bool
9099 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9100 int *total)
9102 machine_mode mode = GET_MODE (x);
9103 if (TARGET_THUMB1)
9105 *total = thumb1_size_rtx_costs (x, code, outer_code);
9106 return true;
9109 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9110 switch (code)
9112 case MEM:
9113 /* A memory access costs 1 insn if the mode is small, or the address is
9114 a single register, otherwise it costs one insn per word. */
9115 if (REG_P (XEXP (x, 0)))
9116 *total = COSTS_N_INSNS (1);
9117 else if (flag_pic
9118 && GET_CODE (XEXP (x, 0)) == PLUS
9119 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9120 /* This will be split into two instructions.
9121 See arm.md:calculate_pic_address. */
9122 *total = COSTS_N_INSNS (2);
9123 else
9124 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9125 return true;
9127 case DIV:
9128 case MOD:
9129 case UDIV:
9130 case UMOD:
9131 /* Needs a libcall, so it costs about this. */
9132 *total = COSTS_N_INSNS (2);
9133 return false;
9135 case ROTATE:
9136 if (mode == SImode && REG_P (XEXP (x, 1)))
9138 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
9139 0, false);
9140 return true;
9142 /* Fall through */
9143 case ROTATERT:
9144 case ASHIFT:
9145 case LSHIFTRT:
9146 case ASHIFTRT:
9147 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9149 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), mode, code,
9150 0, false);
9151 return true;
9153 else if (mode == SImode)
9155 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
9156 0, false);
9157 /* Slightly disparage register shifts, but not by much. */
9158 if (!CONST_INT_P (XEXP (x, 1)))
9159 *total += 1 + rtx_cost (XEXP (x, 1), mode, code, 1, false);
9160 return true;
9163 /* Needs a libcall. */
9164 *total = COSTS_N_INSNS (2);
9165 return false;
9167 case MINUS:
9168 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9169 && (mode == SFmode || !TARGET_VFP_SINGLE))
9171 *total = COSTS_N_INSNS (1);
9172 return false;
9175 if (mode == SImode)
9177 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9178 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9180 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9181 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9182 || subcode1 == ROTATE || subcode1 == ROTATERT
9183 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9184 || subcode1 == ASHIFTRT)
9186 /* It's just the cost of the two operands. */
9187 *total = 0;
9188 return false;
9191 *total = COSTS_N_INSNS (1);
9192 return false;
9195 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9196 return false;
9198 case PLUS:
9199 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9200 && (mode == SFmode || !TARGET_VFP_SINGLE))
9202 *total = COSTS_N_INSNS (1);
9203 return false;
9206 /* A shift as a part of ADD costs nothing. */
9207 if (GET_CODE (XEXP (x, 0)) == MULT
9208 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9210 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9211 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code, 0, false);
9212 *total += rtx_cost (XEXP (x, 1), mode, code, 1, false);
9213 return true;
9216 /* Fall through */
9217 case AND: case XOR: case IOR:
9218 if (mode == SImode)
9220 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9222 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9223 || subcode == LSHIFTRT || subcode == ASHIFTRT
9224 || (code == AND && subcode == NOT))
9226 /* It's just the cost of the two operands. */
9227 *total = 0;
9228 return false;
9232 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9233 return false;
9235 case MULT:
9236 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9237 return false;
9239 case NEG:
9240 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9241 && (mode == SFmode || !TARGET_VFP_SINGLE))
9243 *total = COSTS_N_INSNS (1);
9244 return false;
9247 /* Fall through */
9248 case NOT:
9249 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9251 return false;
9253 case IF_THEN_ELSE:
9254 *total = 0;
9255 return false;
9257 case COMPARE:
9258 if (cc_register (XEXP (x, 0), VOIDmode))
9259 * total = 0;
9260 else
9261 *total = COSTS_N_INSNS (1);
9262 return false;
9264 case ABS:
9265 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9266 && (mode == SFmode || !TARGET_VFP_SINGLE))
9267 *total = COSTS_N_INSNS (1);
9268 else
9269 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9270 return false;
9272 case SIGN_EXTEND:
9273 case ZERO_EXTEND:
9274 return arm_rtx_costs_1 (x, outer_code, total, 0);
9276 case CONST_INT:
9277 if (const_ok_for_arm (INTVAL (x)))
9278 /* A multiplication by a constant requires another instruction
9279 to load the constant to a register. */
9280 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9281 ? 1 : 0);
9282 else if (const_ok_for_arm (~INTVAL (x)))
9283 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9284 else if (const_ok_for_arm (-INTVAL (x)))
9286 if (outer_code == COMPARE || outer_code == PLUS
9287 || outer_code == MINUS)
9288 *total = 0;
9289 else
9290 *total = COSTS_N_INSNS (1);
9292 else
9293 *total = COSTS_N_INSNS (2);
9294 return true;
9296 case CONST:
9297 case LABEL_REF:
9298 case SYMBOL_REF:
9299 *total = COSTS_N_INSNS (2);
9300 return true;
9302 case CONST_DOUBLE:
9303 *total = COSTS_N_INSNS (4);
9304 return true;
9306 case CONST_VECTOR:
9307 if (TARGET_NEON
9308 && TARGET_HARD_FLOAT
9309 && outer_code == SET
9310 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9311 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9312 *total = COSTS_N_INSNS (1);
9313 else
9314 *total = COSTS_N_INSNS (4);
9315 return true;
9317 case HIGH:
9318 case LO_SUM:
9319 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9320 cost of these slightly. */
9321 *total = COSTS_N_INSNS (1) + 1;
9322 return true;
9324 case SET:
9325 return false;
9327 default:
9328 if (mode != VOIDmode)
9329 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9330 else
9331 *total = COSTS_N_INSNS (4); /* How knows? */
9332 return false;
9336 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9337 operand, then return the operand that is being shifted. If the shift
9338 is not by a constant, then set SHIFT_REG to point to the operand.
9339 Return NULL if OP is not a shifter operand. */
9340 static rtx
9341 shifter_op_p (rtx op, rtx *shift_reg)
9343 enum rtx_code code = GET_CODE (op);
9345 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9346 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9347 return XEXP (op, 0);
9348 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9349 return XEXP (op, 0);
9350 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9351 || code == ASHIFTRT)
9353 if (!CONST_INT_P (XEXP (op, 1)))
9354 *shift_reg = XEXP (op, 1);
9355 return XEXP (op, 0);
9358 return NULL;
9361 static bool
9362 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9364 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9365 rtx_code code = GET_CODE (x);
9366 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9368 switch (XINT (x, 1))
9370 case UNSPEC_UNALIGNED_LOAD:
9371 /* We can only do unaligned loads into the integer unit, and we can't
9372 use LDM or LDRD. */
9373 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9374 if (speed_p)
9375 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9376 + extra_cost->ldst.load_unaligned);
9378 #ifdef NOT_YET
9379 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9380 ADDR_SPACE_GENERIC, speed_p);
9381 #endif
9382 return true;
9384 case UNSPEC_UNALIGNED_STORE:
9385 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9386 if (speed_p)
9387 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9388 + extra_cost->ldst.store_unaligned);
9390 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9391 #ifdef NOT_YET
9392 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9393 ADDR_SPACE_GENERIC, speed_p);
9394 #endif
9395 return true;
9397 case UNSPEC_VRINTZ:
9398 case UNSPEC_VRINTP:
9399 case UNSPEC_VRINTM:
9400 case UNSPEC_VRINTR:
9401 case UNSPEC_VRINTX:
9402 case UNSPEC_VRINTA:
9403 if (speed_p)
9404 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9406 return true;
9407 default:
9408 *cost = COSTS_N_INSNS (2);
9409 break;
9411 return true;
9414 /* Cost of a libcall. We assume one insn per argument, an amount for the
9415 call (one insn for -Os) and then one for processing the result. */
9416 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9418 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9419 do \
9421 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9422 if (shift_op != NULL \
9423 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9425 if (shift_reg) \
9427 if (speed_p) \
9428 *cost += extra_cost->alu.arith_shift_reg; \
9429 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9430 ASHIFT, 1, speed_p); \
9432 else if (speed_p) \
9433 *cost += extra_cost->alu.arith_shift; \
9435 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9436 ASHIFT, 0, speed_p) \
9437 + rtx_cost (XEXP (x, 1 - IDX), \
9438 GET_MODE (shift_op), \
9439 OP, 1, speed_p)); \
9440 return true; \
9443 while (0);
9445 /* RTX costs. Make an estimate of the cost of executing the operation
9446 X, which is contained with an operation with code OUTER_CODE.
9447 SPEED_P indicates whether the cost desired is the performance cost,
9448 or the size cost. The estimate is stored in COST and the return
9449 value is TRUE if the cost calculation is final, or FALSE if the
9450 caller should recurse through the operands of X to add additional
9451 costs.
9453 We currently make no attempt to model the size savings of Thumb-2
9454 16-bit instructions. At the normal points in compilation where
9455 this code is called we have no measure of whether the condition
9456 flags are live or not, and thus no realistic way to determine what
9457 the size will eventually be. */
9458 static bool
9459 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9460 const struct cpu_cost_table *extra_cost,
9461 int *cost, bool speed_p)
9463 machine_mode mode = GET_MODE (x);
9465 *cost = COSTS_N_INSNS (1);
9467 if (TARGET_THUMB1)
9469 if (speed_p)
9470 *cost = thumb1_rtx_costs (x, code, outer_code);
9471 else
9472 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9473 return true;
9476 switch (code)
9478 case SET:
9479 *cost = 0;
9480 /* SET RTXs don't have a mode so we get it from the destination. */
9481 mode = GET_MODE (SET_DEST (x));
9483 if (REG_P (SET_SRC (x))
9484 && REG_P (SET_DEST (x)))
9486 /* Assume that most copies can be done with a single insn,
9487 unless we don't have HW FP, in which case everything
9488 larger than word mode will require two insns. */
9489 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9490 && GET_MODE_SIZE (mode) > 4)
9491 || mode == DImode)
9492 ? 2 : 1);
9493 /* Conditional register moves can be encoded
9494 in 16 bits in Thumb mode. */
9495 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9496 *cost >>= 1;
9498 return true;
9501 if (CONST_INT_P (SET_SRC (x)))
9503 /* Handle CONST_INT here, since the value doesn't have a mode
9504 and we would otherwise be unable to work out the true cost. */
9505 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9506 0, speed_p);
9507 outer_code = SET;
9508 /* Slightly lower the cost of setting a core reg to a constant.
9509 This helps break up chains and allows for better scheduling. */
9510 if (REG_P (SET_DEST (x))
9511 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9512 *cost -= 1;
9513 x = SET_SRC (x);
9514 /* Immediate moves with an immediate in the range [0, 255] can be
9515 encoded in 16 bits in Thumb mode. */
9516 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9517 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9518 *cost >>= 1;
9519 goto const_int_cost;
9522 return false;
9524 case MEM:
9525 /* A memory access costs 1 insn if the mode is small, or the address is
9526 a single register, otherwise it costs one insn per word. */
9527 if (REG_P (XEXP (x, 0)))
9528 *cost = COSTS_N_INSNS (1);
9529 else if (flag_pic
9530 && GET_CODE (XEXP (x, 0)) == PLUS
9531 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9532 /* This will be split into two instructions.
9533 See arm.md:calculate_pic_address. */
9534 *cost = COSTS_N_INSNS (2);
9535 else
9536 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9538 /* For speed optimizations, add the costs of the address and
9539 accessing memory. */
9540 if (speed_p)
9541 #ifdef NOT_YET
9542 *cost += (extra_cost->ldst.load
9543 + arm_address_cost (XEXP (x, 0), mode,
9544 ADDR_SPACE_GENERIC, speed_p));
9545 #else
9546 *cost += extra_cost->ldst.load;
9547 #endif
9548 return true;
9550 case PARALLEL:
9552 /* Calculations of LDM costs are complex. We assume an initial cost
9553 (ldm_1st) which will load the number of registers mentioned in
9554 ldm_regs_per_insn_1st registers; then each additional
9555 ldm_regs_per_insn_subsequent registers cost one more insn. The
9556 formula for N regs is thus:
9558 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9559 + ldm_regs_per_insn_subsequent - 1)
9560 / ldm_regs_per_insn_subsequent).
9562 Additional costs may also be added for addressing. A similar
9563 formula is used for STM. */
9565 bool is_ldm = load_multiple_operation (x, SImode);
9566 bool is_stm = store_multiple_operation (x, SImode);
9568 if (is_ldm || is_stm)
9570 if (speed_p)
9572 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9573 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9574 ? extra_cost->ldst.ldm_regs_per_insn_1st
9575 : extra_cost->ldst.stm_regs_per_insn_1st;
9576 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9577 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9578 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9580 *cost += regs_per_insn_1st
9581 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9582 + regs_per_insn_sub - 1)
9583 / regs_per_insn_sub);
9584 return true;
9588 return false;
9590 case DIV:
9591 case UDIV:
9592 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9593 && (mode == SFmode || !TARGET_VFP_SINGLE))
9594 *cost += COSTS_N_INSNS (speed_p
9595 ? extra_cost->fp[mode != SFmode].div : 0);
9596 else if (mode == SImode && TARGET_IDIV)
9597 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9598 else
9599 *cost = LIBCALL_COST (2);
9600 return false; /* All arguments must be in registers. */
9602 case MOD:
9603 /* MOD by a power of 2 can be expanded as:
9604 rsbs r1, r0, #0
9605 and r0, r0, #(n - 1)
9606 and r1, r1, #(n - 1)
9607 rsbpl r0, r1, #0. */
9608 if (CONST_INT_P (XEXP (x, 1))
9609 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9610 && mode == SImode)
9612 *cost += COSTS_N_INSNS (3);
9614 if (speed_p)
9615 *cost += 2 * extra_cost->alu.logical
9616 + extra_cost->alu.arith;
9617 return true;
9620 /* Fall-through. */
9621 case UMOD:
9622 *cost = LIBCALL_COST (2);
9623 return false; /* All arguments must be in registers. */
9625 case ROTATE:
9626 if (mode == SImode && REG_P (XEXP (x, 1)))
9628 *cost += (COSTS_N_INSNS (1)
9629 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9630 if (speed_p)
9631 *cost += extra_cost->alu.shift_reg;
9632 return true;
9634 /* Fall through */
9635 case ROTATERT:
9636 case ASHIFT:
9637 case LSHIFTRT:
9638 case ASHIFTRT:
9639 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9641 *cost += (COSTS_N_INSNS (2)
9642 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9643 if (speed_p)
9644 *cost += 2 * extra_cost->alu.shift;
9645 return true;
9647 else if (mode == SImode)
9649 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9650 /* Slightly disparage register shifts at -Os, but not by much. */
9651 if (!CONST_INT_P (XEXP (x, 1)))
9652 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9653 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9654 return true;
9656 else if (GET_MODE_CLASS (mode) == MODE_INT
9657 && GET_MODE_SIZE (mode) < 4)
9659 if (code == ASHIFT)
9661 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9662 /* Slightly disparage register shifts at -Os, but not by
9663 much. */
9664 if (!CONST_INT_P (XEXP (x, 1)))
9665 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9666 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9668 else if (code == LSHIFTRT || code == ASHIFTRT)
9670 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9672 /* Can use SBFX/UBFX. */
9673 if (speed_p)
9674 *cost += extra_cost->alu.bfx;
9675 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9677 else
9679 *cost += COSTS_N_INSNS (1);
9680 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9681 if (speed_p)
9683 if (CONST_INT_P (XEXP (x, 1)))
9684 *cost += 2 * extra_cost->alu.shift;
9685 else
9686 *cost += (extra_cost->alu.shift
9687 + extra_cost->alu.shift_reg);
9689 else
9690 /* Slightly disparage register shifts. */
9691 *cost += !CONST_INT_P (XEXP (x, 1));
9694 else /* Rotates. */
9696 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9697 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9698 if (speed_p)
9700 if (CONST_INT_P (XEXP (x, 1)))
9701 *cost += (2 * extra_cost->alu.shift
9702 + extra_cost->alu.log_shift);
9703 else
9704 *cost += (extra_cost->alu.shift
9705 + extra_cost->alu.shift_reg
9706 + extra_cost->alu.log_shift_reg);
9709 return true;
9712 *cost = LIBCALL_COST (2);
9713 return false;
9715 case BSWAP:
9716 if (arm_arch6)
9718 if (mode == SImode)
9720 if (speed_p)
9721 *cost += extra_cost->alu.rev;
9723 return false;
9726 else
9728 /* No rev instruction available. Look at arm_legacy_rev
9729 and thumb_legacy_rev for the form of RTL used then. */
9730 if (TARGET_THUMB)
9732 *cost += COSTS_N_INSNS (9);
9734 if (speed_p)
9736 *cost += 6 * extra_cost->alu.shift;
9737 *cost += 3 * extra_cost->alu.logical;
9740 else
9742 *cost += COSTS_N_INSNS (4);
9744 if (speed_p)
9746 *cost += 2 * extra_cost->alu.shift;
9747 *cost += extra_cost->alu.arith_shift;
9748 *cost += 2 * extra_cost->alu.logical;
9751 return true;
9753 return false;
9755 case MINUS:
9756 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9757 && (mode == SFmode || !TARGET_VFP_SINGLE))
9759 if (GET_CODE (XEXP (x, 0)) == MULT
9760 || GET_CODE (XEXP (x, 1)) == MULT)
9762 rtx mul_op0, mul_op1, sub_op;
9764 if (speed_p)
9765 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9767 if (GET_CODE (XEXP (x, 0)) == MULT)
9769 mul_op0 = XEXP (XEXP (x, 0), 0);
9770 mul_op1 = XEXP (XEXP (x, 0), 1);
9771 sub_op = XEXP (x, 1);
9773 else
9775 mul_op0 = XEXP (XEXP (x, 1), 0);
9776 mul_op1 = XEXP (XEXP (x, 1), 1);
9777 sub_op = XEXP (x, 0);
9780 /* The first operand of the multiply may be optionally
9781 negated. */
9782 if (GET_CODE (mul_op0) == NEG)
9783 mul_op0 = XEXP (mul_op0, 0);
9785 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9786 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9787 + rtx_cost (sub_op, mode, code, 0, speed_p));
9789 return true;
9792 if (speed_p)
9793 *cost += extra_cost->fp[mode != SFmode].addsub;
9794 return false;
9797 if (mode == SImode)
9799 rtx shift_by_reg = NULL;
9800 rtx shift_op;
9801 rtx non_shift_op;
9803 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9804 if (shift_op == NULL)
9806 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9807 non_shift_op = XEXP (x, 0);
9809 else
9810 non_shift_op = XEXP (x, 1);
9812 if (shift_op != NULL)
9814 if (shift_by_reg != NULL)
9816 if (speed_p)
9817 *cost += extra_cost->alu.arith_shift_reg;
9818 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9820 else if (speed_p)
9821 *cost += extra_cost->alu.arith_shift;
9823 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9824 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9825 return true;
9828 if (arm_arch_thumb2
9829 && GET_CODE (XEXP (x, 1)) == MULT)
9831 /* MLS. */
9832 if (speed_p)
9833 *cost += extra_cost->mult[0].add;
9834 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9835 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9836 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9837 return true;
9840 if (CONST_INT_P (XEXP (x, 0)))
9842 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9843 INTVAL (XEXP (x, 0)), NULL_RTX,
9844 NULL_RTX, 1, 0);
9845 *cost = COSTS_N_INSNS (insns);
9846 if (speed_p)
9847 *cost += insns * extra_cost->alu.arith;
9848 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9849 return true;
9851 else if (speed_p)
9852 *cost += extra_cost->alu.arith;
9854 return false;
9857 if (GET_MODE_CLASS (mode) == MODE_INT
9858 && GET_MODE_SIZE (mode) < 4)
9860 rtx shift_op, shift_reg;
9861 shift_reg = NULL;
9863 /* We check both sides of the MINUS for shifter operands since,
9864 unlike PLUS, it's not commutative. */
9866 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9867 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9869 /* Slightly disparage, as we might need to widen the result. */
9870 *cost += 1;
9871 if (speed_p)
9872 *cost += extra_cost->alu.arith;
9874 if (CONST_INT_P (XEXP (x, 0)))
9876 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9877 return true;
9880 return false;
9883 if (mode == DImode)
9885 *cost += COSTS_N_INSNS (1);
9887 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9889 rtx op1 = XEXP (x, 1);
9891 if (speed_p)
9892 *cost += 2 * extra_cost->alu.arith;
9894 if (GET_CODE (op1) == ZERO_EXTEND)
9895 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9896 0, speed_p);
9897 else
9898 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9899 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9900 0, speed_p);
9901 return true;
9903 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9905 if (speed_p)
9906 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9907 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9908 0, speed_p)
9909 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9910 return true;
9912 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9913 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9915 if (speed_p)
9916 *cost += (extra_cost->alu.arith
9917 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9918 ? extra_cost->alu.arith
9919 : extra_cost->alu.arith_shift));
9920 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9921 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9922 GET_CODE (XEXP (x, 1)), 0, speed_p));
9923 return true;
9926 if (speed_p)
9927 *cost += 2 * extra_cost->alu.arith;
9928 return false;
9931 /* Vector mode? */
9933 *cost = LIBCALL_COST (2);
9934 return false;
9936 case PLUS:
9937 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9938 && (mode == SFmode || !TARGET_VFP_SINGLE))
9940 if (GET_CODE (XEXP (x, 0)) == MULT)
9942 rtx mul_op0, mul_op1, add_op;
9944 if (speed_p)
9945 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9947 mul_op0 = XEXP (XEXP (x, 0), 0);
9948 mul_op1 = XEXP (XEXP (x, 0), 1);
9949 add_op = XEXP (x, 1);
9951 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9952 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9953 + rtx_cost (add_op, mode, code, 0, speed_p));
9955 return true;
9958 if (speed_p)
9959 *cost += extra_cost->fp[mode != SFmode].addsub;
9960 return false;
9962 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9964 *cost = LIBCALL_COST (2);
9965 return false;
9968 /* Narrow modes can be synthesized in SImode, but the range
9969 of useful sub-operations is limited. Check for shift operations
9970 on one of the operands. Only left shifts can be used in the
9971 narrow modes. */
9972 if (GET_MODE_CLASS (mode) == MODE_INT
9973 && GET_MODE_SIZE (mode) < 4)
9975 rtx shift_op, shift_reg;
9976 shift_reg = NULL;
9978 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9980 if (CONST_INT_P (XEXP (x, 1)))
9982 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9983 INTVAL (XEXP (x, 1)), NULL_RTX,
9984 NULL_RTX, 1, 0);
9985 *cost = COSTS_N_INSNS (insns);
9986 if (speed_p)
9987 *cost += insns * extra_cost->alu.arith;
9988 /* Slightly penalize a narrow operation as the result may
9989 need widening. */
9990 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9991 return true;
9994 /* Slightly penalize a narrow operation as the result may
9995 need widening. */
9996 *cost += 1;
9997 if (speed_p)
9998 *cost += extra_cost->alu.arith;
10000 return false;
10003 if (mode == SImode)
10005 rtx shift_op, shift_reg;
10007 if (TARGET_INT_SIMD
10008 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10009 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10011 /* UXTA[BH] or SXTA[BH]. */
10012 if (speed_p)
10013 *cost += extra_cost->alu.extend_arith;
10014 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10015 0, speed_p)
10016 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10017 return true;
10020 shift_reg = NULL;
10021 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10022 if (shift_op != NULL)
10024 if (shift_reg)
10026 if (speed_p)
10027 *cost += extra_cost->alu.arith_shift_reg;
10028 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10030 else if (speed_p)
10031 *cost += extra_cost->alu.arith_shift;
10033 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10034 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10035 return true;
10037 if (GET_CODE (XEXP (x, 0)) == MULT)
10039 rtx mul_op = XEXP (x, 0);
10041 if (TARGET_DSP_MULTIPLY
10042 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10043 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10044 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10045 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10046 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10047 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10048 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10049 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10050 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10051 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10052 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10053 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10054 == 16))))))
10056 /* SMLA[BT][BT]. */
10057 if (speed_p)
10058 *cost += extra_cost->mult[0].extend_add;
10059 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10060 SIGN_EXTEND, 0, speed_p)
10061 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10062 SIGN_EXTEND, 0, speed_p)
10063 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10064 return true;
10067 if (speed_p)
10068 *cost += extra_cost->mult[0].add;
10069 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10070 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10071 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10072 return true;
10074 if (CONST_INT_P (XEXP (x, 1)))
10076 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10077 INTVAL (XEXP (x, 1)), NULL_RTX,
10078 NULL_RTX, 1, 0);
10079 *cost = COSTS_N_INSNS (insns);
10080 if (speed_p)
10081 *cost += insns * extra_cost->alu.arith;
10082 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10083 return true;
10085 else if (speed_p)
10086 *cost += extra_cost->alu.arith;
10088 return false;
10091 if (mode == DImode)
10093 if (arm_arch3m
10094 && GET_CODE (XEXP (x, 0)) == MULT
10095 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10096 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10097 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10098 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10100 if (speed_p)
10101 *cost += extra_cost->mult[1].extend_add;
10102 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10103 ZERO_EXTEND, 0, speed_p)
10104 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10105 ZERO_EXTEND, 0, speed_p)
10106 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10107 return true;
10110 *cost += COSTS_N_INSNS (1);
10112 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10113 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10115 if (speed_p)
10116 *cost += (extra_cost->alu.arith
10117 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10118 ? extra_cost->alu.arith
10119 : extra_cost->alu.arith_shift));
10121 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10122 0, speed_p)
10123 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10124 return true;
10127 if (speed_p)
10128 *cost += 2 * extra_cost->alu.arith;
10129 return false;
10132 /* Vector mode? */
10133 *cost = LIBCALL_COST (2);
10134 return false;
10135 case IOR:
10136 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10138 if (speed_p)
10139 *cost += extra_cost->alu.rev;
10141 return true;
10143 /* Fall through. */
10144 case AND: case XOR:
10145 if (mode == SImode)
10147 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10148 rtx op0 = XEXP (x, 0);
10149 rtx shift_op, shift_reg;
10151 if (subcode == NOT
10152 && (code == AND
10153 || (code == IOR && TARGET_THUMB2)))
10154 op0 = XEXP (op0, 0);
10156 shift_reg = NULL;
10157 shift_op = shifter_op_p (op0, &shift_reg);
10158 if (shift_op != NULL)
10160 if (shift_reg)
10162 if (speed_p)
10163 *cost += extra_cost->alu.log_shift_reg;
10164 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10166 else if (speed_p)
10167 *cost += extra_cost->alu.log_shift;
10169 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10170 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10171 return true;
10174 if (CONST_INT_P (XEXP (x, 1)))
10176 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10177 INTVAL (XEXP (x, 1)), NULL_RTX,
10178 NULL_RTX, 1, 0);
10180 *cost = COSTS_N_INSNS (insns);
10181 if (speed_p)
10182 *cost += insns * extra_cost->alu.logical;
10183 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10184 return true;
10187 if (speed_p)
10188 *cost += extra_cost->alu.logical;
10189 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10190 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10191 return true;
10194 if (mode == DImode)
10196 rtx op0 = XEXP (x, 0);
10197 enum rtx_code subcode = GET_CODE (op0);
10199 *cost += COSTS_N_INSNS (1);
10201 if (subcode == NOT
10202 && (code == AND
10203 || (code == IOR && TARGET_THUMB2)))
10204 op0 = XEXP (op0, 0);
10206 if (GET_CODE (op0) == ZERO_EXTEND)
10208 if (speed_p)
10209 *cost += 2 * extra_cost->alu.logical;
10211 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10212 0, speed_p)
10213 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10214 return true;
10216 else if (GET_CODE (op0) == SIGN_EXTEND)
10218 if (speed_p)
10219 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10221 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10222 0, speed_p)
10223 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10224 return true;
10227 if (speed_p)
10228 *cost += 2 * extra_cost->alu.logical;
10230 return true;
10232 /* Vector mode? */
10234 *cost = LIBCALL_COST (2);
10235 return false;
10237 case MULT:
10238 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10239 && (mode == SFmode || !TARGET_VFP_SINGLE))
10241 rtx op0 = XEXP (x, 0);
10243 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10244 op0 = XEXP (op0, 0);
10246 if (speed_p)
10247 *cost += extra_cost->fp[mode != SFmode].mult;
10249 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10250 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10251 return true;
10253 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10255 *cost = LIBCALL_COST (2);
10256 return false;
10259 if (mode == SImode)
10261 if (TARGET_DSP_MULTIPLY
10262 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10263 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10264 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10265 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10266 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10267 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10268 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10269 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10270 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10271 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10272 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10273 && (INTVAL (XEXP (XEXP (x, 1), 1))
10274 == 16))))))
10276 /* SMUL[TB][TB]. */
10277 if (speed_p)
10278 *cost += extra_cost->mult[0].extend;
10279 *cost += rtx_cost (XEXP (x, 0), mode, SIGN_EXTEND, 0, speed_p);
10280 *cost += rtx_cost (XEXP (x, 1), mode, SIGN_EXTEND, 1, speed_p);
10281 return true;
10283 if (speed_p)
10284 *cost += extra_cost->mult[0].simple;
10285 return false;
10288 if (mode == DImode)
10290 if (arm_arch3m
10291 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10292 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10293 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10294 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10296 if (speed_p)
10297 *cost += extra_cost->mult[1].extend;
10298 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10299 ZERO_EXTEND, 0, speed_p)
10300 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10301 ZERO_EXTEND, 0, speed_p));
10302 return true;
10305 *cost = LIBCALL_COST (2);
10306 return false;
10309 /* Vector mode? */
10310 *cost = LIBCALL_COST (2);
10311 return false;
10313 case NEG:
10314 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10315 && (mode == SFmode || !TARGET_VFP_SINGLE))
10317 if (GET_CODE (XEXP (x, 0)) == MULT)
10319 /* VNMUL. */
10320 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10321 return true;
10324 if (speed_p)
10325 *cost += extra_cost->fp[mode != SFmode].neg;
10327 return false;
10329 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10331 *cost = LIBCALL_COST (1);
10332 return false;
10335 if (mode == SImode)
10337 if (GET_CODE (XEXP (x, 0)) == ABS)
10339 *cost += COSTS_N_INSNS (1);
10340 /* Assume the non-flag-changing variant. */
10341 if (speed_p)
10342 *cost += (extra_cost->alu.log_shift
10343 + extra_cost->alu.arith_shift);
10344 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10345 return true;
10348 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10349 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10351 *cost += COSTS_N_INSNS (1);
10352 /* No extra cost for MOV imm and MVN imm. */
10353 /* If the comparison op is using the flags, there's no further
10354 cost, otherwise we need to add the cost of the comparison. */
10355 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10356 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10357 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10359 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10360 *cost += (COSTS_N_INSNS (1)
10361 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10362 0, speed_p)
10363 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10364 1, speed_p));
10365 if (speed_p)
10366 *cost += extra_cost->alu.arith;
10368 return true;
10371 if (speed_p)
10372 *cost += extra_cost->alu.arith;
10373 return false;
10376 if (GET_MODE_CLASS (mode) == MODE_INT
10377 && GET_MODE_SIZE (mode) < 4)
10379 /* Slightly disparage, as we might need an extend operation. */
10380 *cost += 1;
10381 if (speed_p)
10382 *cost += extra_cost->alu.arith;
10383 return false;
10386 if (mode == DImode)
10388 *cost += COSTS_N_INSNS (1);
10389 if (speed_p)
10390 *cost += 2 * extra_cost->alu.arith;
10391 return false;
10394 /* Vector mode? */
10395 *cost = LIBCALL_COST (1);
10396 return false;
10398 case NOT:
10399 if (mode == SImode)
10401 rtx shift_op;
10402 rtx shift_reg = NULL;
10404 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10406 if (shift_op)
10408 if (shift_reg != NULL)
10410 if (speed_p)
10411 *cost += extra_cost->alu.log_shift_reg;
10412 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10414 else if (speed_p)
10415 *cost += extra_cost->alu.log_shift;
10416 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10417 return true;
10420 if (speed_p)
10421 *cost += extra_cost->alu.logical;
10422 return false;
10424 if (mode == DImode)
10426 *cost += COSTS_N_INSNS (1);
10427 return false;
10430 /* Vector mode? */
10432 *cost += LIBCALL_COST (1);
10433 return false;
10435 case IF_THEN_ELSE:
10437 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10439 *cost += COSTS_N_INSNS (3);
10440 return true;
10442 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10443 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10445 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10446 /* Assume that if one arm of the if_then_else is a register,
10447 that it will be tied with the result and eliminate the
10448 conditional insn. */
10449 if (REG_P (XEXP (x, 1)))
10450 *cost += op2cost;
10451 else if (REG_P (XEXP (x, 2)))
10452 *cost += op1cost;
10453 else
10455 if (speed_p)
10457 if (extra_cost->alu.non_exec_costs_exec)
10458 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10459 else
10460 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10462 else
10463 *cost += op1cost + op2cost;
10466 return true;
10468 case COMPARE:
10469 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10470 *cost = 0;
10471 else
10473 machine_mode op0mode;
10474 /* We'll mostly assume that the cost of a compare is the cost of the
10475 LHS. However, there are some notable exceptions. */
10477 /* Floating point compares are never done as side-effects. */
10478 op0mode = GET_MODE (XEXP (x, 0));
10479 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10480 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10482 if (speed_p)
10483 *cost += extra_cost->fp[op0mode != SFmode].compare;
10485 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10487 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10488 return true;
10491 return false;
10493 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10495 *cost = LIBCALL_COST (2);
10496 return false;
10499 /* DImode compares normally take two insns. */
10500 if (op0mode == DImode)
10502 *cost += COSTS_N_INSNS (1);
10503 if (speed_p)
10504 *cost += 2 * extra_cost->alu.arith;
10505 return false;
10508 if (op0mode == SImode)
10510 rtx shift_op;
10511 rtx shift_reg;
10513 if (XEXP (x, 1) == const0_rtx
10514 && !(REG_P (XEXP (x, 0))
10515 || (GET_CODE (XEXP (x, 0)) == SUBREG
10516 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10518 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10520 /* Multiply operations that set the flags are often
10521 significantly more expensive. */
10522 if (speed_p
10523 && GET_CODE (XEXP (x, 0)) == MULT
10524 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10525 *cost += extra_cost->mult[0].flag_setting;
10527 if (speed_p
10528 && GET_CODE (XEXP (x, 0)) == PLUS
10529 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10530 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10531 0), 1), mode))
10532 *cost += extra_cost->mult[0].flag_setting;
10533 return true;
10536 shift_reg = NULL;
10537 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10538 if (shift_op != NULL)
10540 if (shift_reg != NULL)
10542 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10543 1, speed_p);
10544 if (speed_p)
10545 *cost += extra_cost->alu.arith_shift_reg;
10547 else if (speed_p)
10548 *cost += extra_cost->alu.arith_shift;
10549 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10550 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10551 return true;
10554 if (speed_p)
10555 *cost += extra_cost->alu.arith;
10556 if (CONST_INT_P (XEXP (x, 1))
10557 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10559 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10560 return true;
10562 return false;
10565 /* Vector mode? */
10567 *cost = LIBCALL_COST (2);
10568 return false;
10570 return true;
10572 case EQ:
10573 case NE:
10574 case LT:
10575 case LE:
10576 case GT:
10577 case GE:
10578 case LTU:
10579 case LEU:
10580 case GEU:
10581 case GTU:
10582 case ORDERED:
10583 case UNORDERED:
10584 case UNEQ:
10585 case UNLE:
10586 case UNLT:
10587 case UNGE:
10588 case UNGT:
10589 case LTGT:
10590 if (outer_code == SET)
10592 /* Is it a store-flag operation? */
10593 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10594 && XEXP (x, 1) == const0_rtx)
10596 /* Thumb also needs an IT insn. */
10597 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10598 return true;
10600 if (XEXP (x, 1) == const0_rtx)
10602 switch (code)
10604 case LT:
10605 /* LSR Rd, Rn, #31. */
10606 if (speed_p)
10607 *cost += extra_cost->alu.shift;
10608 break;
10610 case EQ:
10611 /* RSBS T1, Rn, #0
10612 ADC Rd, Rn, T1. */
10614 case NE:
10615 /* SUBS T1, Rn, #1
10616 SBC Rd, Rn, T1. */
10617 *cost += COSTS_N_INSNS (1);
10618 break;
10620 case LE:
10621 /* RSBS T1, Rn, Rn, LSR #31
10622 ADC Rd, Rn, T1. */
10623 *cost += COSTS_N_INSNS (1);
10624 if (speed_p)
10625 *cost += extra_cost->alu.arith_shift;
10626 break;
10628 case GT:
10629 /* RSB Rd, Rn, Rn, ASR #1
10630 LSR Rd, Rd, #31. */
10631 *cost += COSTS_N_INSNS (1);
10632 if (speed_p)
10633 *cost += (extra_cost->alu.arith_shift
10634 + extra_cost->alu.shift);
10635 break;
10637 case GE:
10638 /* ASR Rd, Rn, #31
10639 ADD Rd, Rn, #1. */
10640 *cost += COSTS_N_INSNS (1);
10641 if (speed_p)
10642 *cost += extra_cost->alu.shift;
10643 break;
10645 default:
10646 /* Remaining cases are either meaningless or would take
10647 three insns anyway. */
10648 *cost = COSTS_N_INSNS (3);
10649 break;
10651 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10652 return true;
10654 else
10656 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10657 if (CONST_INT_P (XEXP (x, 1))
10658 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10660 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10661 return true;
10664 return false;
10667 /* Not directly inside a set. If it involves the condition code
10668 register it must be the condition for a branch, cond_exec or
10669 I_T_E operation. Since the comparison is performed elsewhere
10670 this is just the control part which has no additional
10671 cost. */
10672 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10673 && XEXP (x, 1) == const0_rtx)
10675 *cost = 0;
10676 return true;
10678 return false;
10680 case ABS:
10681 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10682 && (mode == SFmode || !TARGET_VFP_SINGLE))
10684 if (speed_p)
10685 *cost += extra_cost->fp[mode != SFmode].neg;
10687 return false;
10689 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10691 *cost = LIBCALL_COST (1);
10692 return false;
10695 if (mode == SImode)
10697 if (speed_p)
10698 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10699 return false;
10701 /* Vector mode? */
10702 *cost = LIBCALL_COST (1);
10703 return false;
10705 case SIGN_EXTEND:
10706 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10707 && MEM_P (XEXP (x, 0)))
10709 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10711 if (mode == DImode)
10712 *cost += COSTS_N_INSNS (1);
10714 if (!speed_p)
10715 return true;
10717 if (GET_MODE (XEXP (x, 0)) == SImode)
10718 *cost += extra_cost->ldst.load;
10719 else
10720 *cost += extra_cost->ldst.load_sign_extend;
10722 if (mode == DImode)
10723 *cost += extra_cost->alu.shift;
10725 return true;
10728 /* Widening from less than 32-bits requires an extend operation. */
10729 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10731 /* We have SXTB/SXTH. */
10732 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10733 if (speed_p)
10734 *cost += extra_cost->alu.extend;
10736 else if (GET_MODE (XEXP (x, 0)) != SImode)
10738 /* Needs two shifts. */
10739 *cost += COSTS_N_INSNS (1);
10740 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10741 if (speed_p)
10742 *cost += 2 * extra_cost->alu.shift;
10745 /* Widening beyond 32-bits requires one more insn. */
10746 if (mode == DImode)
10748 *cost += COSTS_N_INSNS (1);
10749 if (speed_p)
10750 *cost += extra_cost->alu.shift;
10753 return true;
10755 case ZERO_EXTEND:
10756 if ((arm_arch4
10757 || GET_MODE (XEXP (x, 0)) == SImode
10758 || GET_MODE (XEXP (x, 0)) == QImode)
10759 && MEM_P (XEXP (x, 0)))
10761 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10763 if (mode == DImode)
10764 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10766 return true;
10769 /* Widening from less than 32-bits requires an extend operation. */
10770 if (GET_MODE (XEXP (x, 0)) == QImode)
10772 /* UXTB can be a shorter instruction in Thumb2, but it might
10773 be slower than the AND Rd, Rn, #255 alternative. When
10774 optimizing for speed it should never be slower to use
10775 AND, and we don't really model 16-bit vs 32-bit insns
10776 here. */
10777 if (speed_p)
10778 *cost += extra_cost->alu.logical;
10780 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10782 /* We have UXTB/UXTH. */
10783 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10784 if (speed_p)
10785 *cost += extra_cost->alu.extend;
10787 else if (GET_MODE (XEXP (x, 0)) != SImode)
10789 /* Needs two shifts. It's marginally preferable to use
10790 shifts rather than two BIC instructions as the second
10791 shift may merge with a subsequent insn as a shifter
10792 op. */
10793 *cost = COSTS_N_INSNS (2);
10794 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10795 if (speed_p)
10796 *cost += 2 * extra_cost->alu.shift;
10799 /* Widening beyond 32-bits requires one more insn. */
10800 if (mode == DImode)
10802 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10805 return true;
10807 case CONST_INT:
10808 *cost = 0;
10809 /* CONST_INT has no mode, so we cannot tell for sure how many
10810 insns are really going to be needed. The best we can do is
10811 look at the value passed. If it fits in SImode, then assume
10812 that's the mode it will be used for. Otherwise assume it
10813 will be used in DImode. */
10814 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10815 mode = SImode;
10816 else
10817 mode = DImode;
10819 /* Avoid blowing up in arm_gen_constant (). */
10820 if (!(outer_code == PLUS
10821 || outer_code == AND
10822 || outer_code == IOR
10823 || outer_code == XOR
10824 || outer_code == MINUS))
10825 outer_code = SET;
10827 const_int_cost:
10828 if (mode == SImode)
10830 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10831 INTVAL (x), NULL, NULL,
10832 0, 0));
10833 /* Extra costs? */
10835 else
10837 *cost += COSTS_N_INSNS (arm_gen_constant
10838 (outer_code, SImode, NULL,
10839 trunc_int_for_mode (INTVAL (x), SImode),
10840 NULL, NULL, 0, 0)
10841 + arm_gen_constant (outer_code, SImode, NULL,
10842 INTVAL (x) >> 32, NULL,
10843 NULL, 0, 0));
10844 /* Extra costs? */
10847 return true;
10849 case CONST:
10850 case LABEL_REF:
10851 case SYMBOL_REF:
10852 if (speed_p)
10854 if (arm_arch_thumb2 && !flag_pic)
10855 *cost += COSTS_N_INSNS (1);
10856 else
10857 *cost += extra_cost->ldst.load;
10859 else
10860 *cost += COSTS_N_INSNS (1);
10862 if (flag_pic)
10864 *cost += COSTS_N_INSNS (1);
10865 if (speed_p)
10866 *cost += extra_cost->alu.arith;
10869 return true;
10871 case CONST_FIXED:
10872 *cost = COSTS_N_INSNS (4);
10873 /* Fixme. */
10874 return true;
10876 case CONST_DOUBLE:
10877 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10878 && (mode == SFmode || !TARGET_VFP_SINGLE))
10880 if (vfp3_const_double_rtx (x))
10882 if (speed_p)
10883 *cost += extra_cost->fp[mode == DFmode].fpconst;
10884 return true;
10887 if (speed_p)
10889 if (mode == DFmode)
10890 *cost += extra_cost->ldst.loadd;
10891 else
10892 *cost += extra_cost->ldst.loadf;
10894 else
10895 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10897 return true;
10899 *cost = COSTS_N_INSNS (4);
10900 return true;
10902 case CONST_VECTOR:
10903 /* Fixme. */
10904 if (TARGET_NEON
10905 && TARGET_HARD_FLOAT
10906 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10907 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10908 *cost = COSTS_N_INSNS (1);
10909 else
10910 *cost = COSTS_N_INSNS (4);
10911 return true;
10913 case HIGH:
10914 case LO_SUM:
10915 /* When optimizing for size, we prefer constant pool entries to
10916 MOVW/MOVT pairs, so bump the cost of these slightly. */
10917 if (!speed_p)
10918 *cost += 1;
10919 return true;
10921 case CLZ:
10922 if (speed_p)
10923 *cost += extra_cost->alu.clz;
10924 return false;
10926 case SMIN:
10927 if (XEXP (x, 1) == const0_rtx)
10929 if (speed_p)
10930 *cost += extra_cost->alu.log_shift;
10931 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10932 return true;
10934 /* Fall through. */
10935 case SMAX:
10936 case UMIN:
10937 case UMAX:
10938 *cost += COSTS_N_INSNS (1);
10939 return false;
10941 case TRUNCATE:
10942 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10943 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10944 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10945 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10946 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10947 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10948 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10949 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10950 == ZERO_EXTEND))))
10952 if (speed_p)
10953 *cost += extra_cost->mult[1].extend;
10954 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10955 ZERO_EXTEND, 0, speed_p)
10956 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10957 ZERO_EXTEND, 0, speed_p));
10958 return true;
10960 *cost = LIBCALL_COST (1);
10961 return false;
10963 case UNSPEC_VOLATILE:
10964 case UNSPEC:
10965 return arm_unspec_cost (x, outer_code, speed_p, cost);
10967 case PC:
10968 /* Reading the PC is like reading any other register. Writing it
10969 is more expensive, but we take that into account elsewhere. */
10970 *cost = 0;
10971 return true;
10973 case ZERO_EXTRACT:
10974 /* TODO: Simple zero_extract of bottom bits using AND. */
10975 /* Fall through. */
10976 case SIGN_EXTRACT:
10977 if (arm_arch6
10978 && mode == SImode
10979 && CONST_INT_P (XEXP (x, 1))
10980 && CONST_INT_P (XEXP (x, 2)))
10982 if (speed_p)
10983 *cost += extra_cost->alu.bfx;
10984 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10985 return true;
10987 /* Without UBFX/SBFX, need to resort to shift operations. */
10988 *cost += COSTS_N_INSNS (1);
10989 if (speed_p)
10990 *cost += 2 * extra_cost->alu.shift;
10991 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10992 return true;
10994 case FLOAT_EXTEND:
10995 if (TARGET_HARD_FLOAT)
10997 if (speed_p)
10998 *cost += extra_cost->fp[mode == DFmode].widen;
10999 if (!TARGET_FPU_ARMV8
11000 && GET_MODE (XEXP (x, 0)) == HFmode)
11002 /* Pre v8, widening HF->DF is a two-step process, first
11003 widening to SFmode. */
11004 *cost += COSTS_N_INSNS (1);
11005 if (speed_p)
11006 *cost += extra_cost->fp[0].widen;
11008 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11009 return true;
11012 *cost = LIBCALL_COST (1);
11013 return false;
11015 case FLOAT_TRUNCATE:
11016 if (TARGET_HARD_FLOAT)
11018 if (speed_p)
11019 *cost += extra_cost->fp[mode == DFmode].narrow;
11020 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11021 return true;
11022 /* Vector modes? */
11024 *cost = LIBCALL_COST (1);
11025 return false;
11027 case FMA:
11028 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11030 rtx op0 = XEXP (x, 0);
11031 rtx op1 = XEXP (x, 1);
11032 rtx op2 = XEXP (x, 2);
11035 /* vfms or vfnma. */
11036 if (GET_CODE (op0) == NEG)
11037 op0 = XEXP (op0, 0);
11039 /* vfnms or vfnma. */
11040 if (GET_CODE (op2) == NEG)
11041 op2 = XEXP (op2, 0);
11043 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11044 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11045 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11047 if (speed_p)
11048 *cost += extra_cost->fp[mode ==DFmode].fma;
11050 return true;
11053 *cost = LIBCALL_COST (3);
11054 return false;
11056 case FIX:
11057 case UNSIGNED_FIX:
11058 if (TARGET_HARD_FLOAT)
11060 if (GET_MODE_CLASS (mode) == MODE_INT)
11062 mode = GET_MODE (XEXP (x, 0));
11063 if (speed_p)
11064 *cost += extra_cost->fp[mode == DFmode].toint;
11065 /* Strip of the 'cost' of rounding towards zero. */
11066 if (GET_CODE (XEXP (x, 0)) == FIX)
11067 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11068 0, speed_p);
11069 else
11070 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11071 /* ??? Increase the cost to deal with transferring from
11072 FP -> CORE registers? */
11073 return true;
11075 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11076 && TARGET_FPU_ARMV8)
11078 if (speed_p)
11079 *cost += extra_cost->fp[mode == DFmode].roundint;
11080 return false;
11082 /* Vector costs? */
11084 *cost = LIBCALL_COST (1);
11085 return false;
11087 case FLOAT:
11088 case UNSIGNED_FLOAT:
11089 if (TARGET_HARD_FLOAT)
11091 /* ??? Increase the cost to deal with transferring from CORE
11092 -> FP registers? */
11093 if (speed_p)
11094 *cost += extra_cost->fp[mode == DFmode].fromint;
11095 return false;
11097 *cost = LIBCALL_COST (1);
11098 return false;
11100 case CALL:
11101 return true;
11103 case ASM_OPERANDS:
11105 /* Just a guess. Guess number of instructions in the asm
11106 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11107 though (see PR60663). */
11108 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11109 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11111 *cost = COSTS_N_INSNS (asm_length + num_operands);
11112 return true;
11114 default:
11115 if (mode != VOIDmode)
11116 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11117 else
11118 *cost = COSTS_N_INSNS (4); /* Who knows? */
11119 return false;
11123 #undef HANDLE_NARROW_SHIFT_ARITH
11125 /* RTX costs when optimizing for size. */
11126 static bool
11127 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11128 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11130 bool result;
11131 int code = GET_CODE (x);
11133 if (TARGET_OLD_RTX_COSTS
11134 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11136 /* Old way. (Deprecated.) */
11137 if (!speed)
11138 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11139 (enum rtx_code) outer_code, total);
11140 else
11141 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11142 (enum rtx_code) outer_code, total,
11143 speed);
11145 else
11147 /* New way. */
11148 if (current_tune->insn_extra_cost)
11149 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11150 (enum rtx_code) outer_code,
11151 current_tune->insn_extra_cost,
11152 total, speed);
11153 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11154 && current_tune->insn_extra_cost != NULL */
11155 else
11156 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11157 (enum rtx_code) outer_code,
11158 &generic_extra_costs, total, speed);
11161 if (dump_file && (dump_flags & TDF_DETAILS))
11163 print_rtl_single (dump_file, x);
11164 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11165 *total, result ? "final" : "partial");
11167 return result;
11170 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11171 supported on any "slowmul" cores, so it can be ignored. */
11173 static bool
11174 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11175 int *total, bool speed)
11177 machine_mode mode = GET_MODE (x);
11179 if (TARGET_THUMB)
11181 *total = thumb1_rtx_costs (x, code, outer_code);
11182 return true;
11185 switch (code)
11187 case MULT:
11188 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11189 || mode == DImode)
11191 *total = COSTS_N_INSNS (20);
11192 return false;
11195 if (CONST_INT_P (XEXP (x, 1)))
11197 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11198 & (unsigned HOST_WIDE_INT) 0xffffffff);
11199 int cost, const_ok = const_ok_for_arm (i);
11200 int j, booth_unit_size;
11202 /* Tune as appropriate. */
11203 cost = const_ok ? 4 : 8;
11204 booth_unit_size = 2;
11205 for (j = 0; i && j < 32; j += booth_unit_size)
11207 i >>= booth_unit_size;
11208 cost++;
11211 *total = COSTS_N_INSNS (cost);
11212 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
11213 return true;
11216 *total = COSTS_N_INSNS (20);
11217 return false;
11219 default:
11220 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11225 /* RTX cost for cores with a fast multiply unit (M variants). */
11227 static bool
11228 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11229 int *total, bool speed)
11231 machine_mode mode = GET_MODE (x);
11233 if (TARGET_THUMB1)
11235 *total = thumb1_rtx_costs (x, code, outer_code);
11236 return true;
11239 /* ??? should thumb2 use different costs? */
11240 switch (code)
11242 case MULT:
11243 /* There is no point basing this on the tuning, since it is always the
11244 fast variant if it exists at all. */
11245 if (mode == DImode
11246 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11247 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11248 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11250 *total = COSTS_N_INSNS(2);
11251 return false;
11255 if (mode == DImode)
11257 *total = COSTS_N_INSNS (5);
11258 return false;
11261 if (CONST_INT_P (XEXP (x, 1)))
11263 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11264 & (unsigned HOST_WIDE_INT) 0xffffffff);
11265 int cost, const_ok = const_ok_for_arm (i);
11266 int j, booth_unit_size;
11268 /* Tune as appropriate. */
11269 cost = const_ok ? 4 : 8;
11270 booth_unit_size = 8;
11271 for (j = 0; i && j < 32; j += booth_unit_size)
11273 i >>= booth_unit_size;
11274 cost++;
11277 *total = COSTS_N_INSNS(cost);
11278 return false;
11281 if (mode == SImode)
11283 *total = COSTS_N_INSNS (4);
11284 return false;
11287 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11289 if (TARGET_HARD_FLOAT
11290 && (mode == SFmode
11291 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11293 *total = COSTS_N_INSNS (1);
11294 return false;
11298 /* Requires a lib call */
11299 *total = COSTS_N_INSNS (20);
11300 return false;
11302 default:
11303 return arm_rtx_costs_1 (x, outer_code, total, speed);
11308 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11309 so it can be ignored. */
11311 static bool
11312 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11313 int *total, bool speed)
11315 machine_mode mode = GET_MODE (x);
11317 if (TARGET_THUMB)
11319 *total = thumb1_rtx_costs (x, code, outer_code);
11320 return true;
11323 switch (code)
11325 case COMPARE:
11326 if (GET_CODE (XEXP (x, 0)) != MULT)
11327 return arm_rtx_costs_1 (x, outer_code, total, speed);
11329 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11330 will stall until the multiplication is complete. */
11331 *total = COSTS_N_INSNS (3);
11332 return false;
11334 case MULT:
11335 /* There is no point basing this on the tuning, since it is always the
11336 fast variant if it exists at all. */
11337 if (mode == DImode
11338 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11339 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11340 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11342 *total = COSTS_N_INSNS (2);
11343 return false;
11347 if (mode == DImode)
11349 *total = COSTS_N_INSNS (5);
11350 return false;
11353 if (CONST_INT_P (XEXP (x, 1)))
11355 /* If operand 1 is a constant we can more accurately
11356 calculate the cost of the multiply. The multiplier can
11357 retire 15 bits on the first cycle and a further 12 on the
11358 second. We do, of course, have to load the constant into
11359 a register first. */
11360 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11361 /* There's a general overhead of one cycle. */
11362 int cost = 1;
11363 unsigned HOST_WIDE_INT masked_const;
11365 if (i & 0x80000000)
11366 i = ~i;
11368 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11370 masked_const = i & 0xffff8000;
11371 if (masked_const != 0)
11373 cost++;
11374 masked_const = i & 0xf8000000;
11375 if (masked_const != 0)
11376 cost++;
11378 *total = COSTS_N_INSNS (cost);
11379 return false;
11382 if (mode == SImode)
11384 *total = COSTS_N_INSNS (3);
11385 return false;
11388 /* Requires a lib call */
11389 *total = COSTS_N_INSNS (20);
11390 return false;
11392 default:
11393 return arm_rtx_costs_1 (x, outer_code, total, speed);
11398 /* RTX costs for 9e (and later) cores. */
11400 static bool
11401 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11402 int *total, bool speed)
11404 machine_mode mode = GET_MODE (x);
11406 if (TARGET_THUMB1)
11408 switch (code)
11410 case MULT:
11411 /* Small multiply: 32 cycles for an integer multiply inst. */
11412 if (arm_arch6m && arm_m_profile_small_mul)
11413 *total = COSTS_N_INSNS (32);
11414 else
11415 *total = COSTS_N_INSNS (3);
11416 return true;
11418 default:
11419 *total = thumb1_rtx_costs (x, code, outer_code);
11420 return true;
11424 switch (code)
11426 case MULT:
11427 /* There is no point basing this on the tuning, since it is always the
11428 fast variant if it exists at all. */
11429 if (mode == DImode
11430 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11431 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11432 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11434 *total = COSTS_N_INSNS (2);
11435 return false;
11439 if (mode == DImode)
11441 *total = COSTS_N_INSNS (5);
11442 return false;
11445 if (mode == SImode)
11447 *total = COSTS_N_INSNS (2);
11448 return false;
11451 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11453 if (TARGET_HARD_FLOAT
11454 && (mode == SFmode
11455 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11457 *total = COSTS_N_INSNS (1);
11458 return false;
11462 *total = COSTS_N_INSNS (20);
11463 return false;
11465 default:
11466 return arm_rtx_costs_1 (x, outer_code, total, speed);
11469 /* All address computations that can be done are free, but rtx cost returns
11470 the same for practically all of them. So we weight the different types
11471 of address here in the order (most pref first):
11472 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11473 static inline int
11474 arm_arm_address_cost (rtx x)
11476 enum rtx_code c = GET_CODE (x);
11478 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11479 return 0;
11480 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11481 return 10;
11483 if (c == PLUS)
11485 if (CONST_INT_P (XEXP (x, 1)))
11486 return 2;
11488 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11489 return 3;
11491 return 4;
11494 return 6;
11497 static inline int
11498 arm_thumb_address_cost (rtx x)
11500 enum rtx_code c = GET_CODE (x);
11502 if (c == REG)
11503 return 1;
11504 if (c == PLUS
11505 && REG_P (XEXP (x, 0))
11506 && CONST_INT_P (XEXP (x, 1)))
11507 return 1;
11509 return 2;
11512 static int
11513 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11514 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11516 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11519 /* Adjust cost hook for XScale. */
11520 static bool
11521 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11523 /* Some true dependencies can have a higher cost depending
11524 on precisely how certain input operands are used. */
11525 if (REG_NOTE_KIND(link) == 0
11526 && recog_memoized (insn) >= 0
11527 && recog_memoized (dep) >= 0)
11529 int shift_opnum = get_attr_shift (insn);
11530 enum attr_type attr_type = get_attr_type (dep);
11532 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11533 operand for INSN. If we have a shifted input operand and the
11534 instruction we depend on is another ALU instruction, then we may
11535 have to account for an additional stall. */
11536 if (shift_opnum != 0
11537 && (attr_type == TYPE_ALU_SHIFT_IMM
11538 || attr_type == TYPE_ALUS_SHIFT_IMM
11539 || attr_type == TYPE_LOGIC_SHIFT_IMM
11540 || attr_type == TYPE_LOGICS_SHIFT_IMM
11541 || attr_type == TYPE_ALU_SHIFT_REG
11542 || attr_type == TYPE_ALUS_SHIFT_REG
11543 || attr_type == TYPE_LOGIC_SHIFT_REG
11544 || attr_type == TYPE_LOGICS_SHIFT_REG
11545 || attr_type == TYPE_MOV_SHIFT
11546 || attr_type == TYPE_MVN_SHIFT
11547 || attr_type == TYPE_MOV_SHIFT_REG
11548 || attr_type == TYPE_MVN_SHIFT_REG))
11550 rtx shifted_operand;
11551 int opno;
11553 /* Get the shifted operand. */
11554 extract_insn (insn);
11555 shifted_operand = recog_data.operand[shift_opnum];
11557 /* Iterate over all the operands in DEP. If we write an operand
11558 that overlaps with SHIFTED_OPERAND, then we have increase the
11559 cost of this dependency. */
11560 extract_insn (dep);
11561 preprocess_constraints (dep);
11562 for (opno = 0; opno < recog_data.n_operands; opno++)
11564 /* We can ignore strict inputs. */
11565 if (recog_data.operand_type[opno] == OP_IN)
11566 continue;
11568 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11569 shifted_operand))
11571 *cost = 2;
11572 return false;
11577 return true;
11580 /* Adjust cost hook for Cortex A9. */
11581 static bool
11582 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11584 switch (REG_NOTE_KIND (link))
11586 case REG_DEP_ANTI:
11587 *cost = 0;
11588 return false;
11590 case REG_DEP_TRUE:
11591 case REG_DEP_OUTPUT:
11592 if (recog_memoized (insn) >= 0
11593 && recog_memoized (dep) >= 0)
11595 if (GET_CODE (PATTERN (insn)) == SET)
11597 if (GET_MODE_CLASS
11598 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11599 || GET_MODE_CLASS
11600 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11602 enum attr_type attr_type_insn = get_attr_type (insn);
11603 enum attr_type attr_type_dep = get_attr_type (dep);
11605 /* By default all dependencies of the form
11606 s0 = s0 <op> s1
11607 s0 = s0 <op> s2
11608 have an extra latency of 1 cycle because
11609 of the input and output dependency in this
11610 case. However this gets modeled as an true
11611 dependency and hence all these checks. */
11612 if (REG_P (SET_DEST (PATTERN (insn)))
11613 && REG_P (SET_DEST (PATTERN (dep)))
11614 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11615 SET_DEST (PATTERN (dep))))
11617 /* FMACS is a special case where the dependent
11618 instruction can be issued 3 cycles before
11619 the normal latency in case of an output
11620 dependency. */
11621 if ((attr_type_insn == TYPE_FMACS
11622 || attr_type_insn == TYPE_FMACD)
11623 && (attr_type_dep == TYPE_FMACS
11624 || attr_type_dep == TYPE_FMACD))
11626 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11627 *cost = insn_default_latency (dep) - 3;
11628 else
11629 *cost = insn_default_latency (dep);
11630 return false;
11632 else
11634 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11635 *cost = insn_default_latency (dep) + 1;
11636 else
11637 *cost = insn_default_latency (dep);
11639 return false;
11644 break;
11646 default:
11647 gcc_unreachable ();
11650 return true;
11653 /* Adjust cost hook for FA726TE. */
11654 static bool
11655 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11657 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11658 have penalty of 3. */
11659 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11660 && recog_memoized (insn) >= 0
11661 && recog_memoized (dep) >= 0
11662 && get_attr_conds (dep) == CONDS_SET)
11664 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11665 if (get_attr_conds (insn) == CONDS_USE
11666 && get_attr_type (insn) != TYPE_BRANCH)
11668 *cost = 3;
11669 return false;
11672 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11673 || get_attr_conds (insn) == CONDS_USE)
11675 *cost = 0;
11676 return false;
11680 return true;
11683 /* Implement TARGET_REGISTER_MOVE_COST.
11685 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11686 it is typically more expensive than a single memory access. We set
11687 the cost to less than two memory accesses so that floating
11688 point to integer conversion does not go through memory. */
11691 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11692 reg_class_t from, reg_class_t to)
11694 if (TARGET_32BIT)
11696 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11697 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11698 return 15;
11699 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11700 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11701 return 4;
11702 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11703 return 20;
11704 else
11705 return 2;
11707 else
11709 if (from == HI_REGS || to == HI_REGS)
11710 return 4;
11711 else
11712 return 2;
11716 /* Implement TARGET_MEMORY_MOVE_COST. */
11719 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11720 bool in ATTRIBUTE_UNUSED)
11722 if (TARGET_32BIT)
11723 return 10;
11724 else
11726 if (GET_MODE_SIZE (mode) < 4)
11727 return 8;
11728 else
11729 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11733 /* Vectorizer cost model implementation. */
11735 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11736 static int
11737 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11738 tree vectype,
11739 int misalign ATTRIBUTE_UNUSED)
11741 unsigned elements;
11743 switch (type_of_cost)
11745 case scalar_stmt:
11746 return current_tune->vec_costs->scalar_stmt_cost;
11748 case scalar_load:
11749 return current_tune->vec_costs->scalar_load_cost;
11751 case scalar_store:
11752 return current_tune->vec_costs->scalar_store_cost;
11754 case vector_stmt:
11755 return current_tune->vec_costs->vec_stmt_cost;
11757 case vector_load:
11758 return current_tune->vec_costs->vec_align_load_cost;
11760 case vector_store:
11761 return current_tune->vec_costs->vec_store_cost;
11763 case vec_to_scalar:
11764 return current_tune->vec_costs->vec_to_scalar_cost;
11766 case scalar_to_vec:
11767 return current_tune->vec_costs->scalar_to_vec_cost;
11769 case unaligned_load:
11770 return current_tune->vec_costs->vec_unalign_load_cost;
11772 case unaligned_store:
11773 return current_tune->vec_costs->vec_unalign_store_cost;
11775 case cond_branch_taken:
11776 return current_tune->vec_costs->cond_taken_branch_cost;
11778 case cond_branch_not_taken:
11779 return current_tune->vec_costs->cond_not_taken_branch_cost;
11781 case vec_perm:
11782 case vec_promote_demote:
11783 return current_tune->vec_costs->vec_stmt_cost;
11785 case vec_construct:
11786 elements = TYPE_VECTOR_SUBPARTS (vectype);
11787 return elements / 2 + 1;
11789 default:
11790 gcc_unreachable ();
11794 /* Implement targetm.vectorize.add_stmt_cost. */
11796 static unsigned
11797 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11798 struct _stmt_vec_info *stmt_info, int misalign,
11799 enum vect_cost_model_location where)
11801 unsigned *cost = (unsigned *) data;
11802 unsigned retval = 0;
11804 if (flag_vect_cost_model)
11806 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11807 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11809 /* Statements in an inner loop relative to the loop being
11810 vectorized are weighted more heavily. The value here is
11811 arbitrary and could potentially be improved with analysis. */
11812 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11813 count *= 50; /* FIXME. */
11815 retval = (unsigned) (count * stmt_cost);
11816 cost[where] += retval;
11819 return retval;
11822 /* Return true if and only if this insn can dual-issue only as older. */
11823 static bool
11824 cortexa7_older_only (rtx_insn *insn)
11826 if (recog_memoized (insn) < 0)
11827 return false;
11829 switch (get_attr_type (insn))
11831 case TYPE_ALU_DSP_REG:
11832 case TYPE_ALU_SREG:
11833 case TYPE_ALUS_SREG:
11834 case TYPE_LOGIC_REG:
11835 case TYPE_LOGICS_REG:
11836 case TYPE_ADC_REG:
11837 case TYPE_ADCS_REG:
11838 case TYPE_ADR:
11839 case TYPE_BFM:
11840 case TYPE_REV:
11841 case TYPE_MVN_REG:
11842 case TYPE_SHIFT_IMM:
11843 case TYPE_SHIFT_REG:
11844 case TYPE_LOAD_BYTE:
11845 case TYPE_LOAD1:
11846 case TYPE_STORE1:
11847 case TYPE_FFARITHS:
11848 case TYPE_FADDS:
11849 case TYPE_FFARITHD:
11850 case TYPE_FADDD:
11851 case TYPE_FMOV:
11852 case TYPE_F_CVT:
11853 case TYPE_FCMPS:
11854 case TYPE_FCMPD:
11855 case TYPE_FCONSTS:
11856 case TYPE_FCONSTD:
11857 case TYPE_FMULS:
11858 case TYPE_FMACS:
11859 case TYPE_FMULD:
11860 case TYPE_FMACD:
11861 case TYPE_FDIVS:
11862 case TYPE_FDIVD:
11863 case TYPE_F_MRC:
11864 case TYPE_F_MRRC:
11865 case TYPE_F_FLAG:
11866 case TYPE_F_LOADS:
11867 case TYPE_F_STORES:
11868 return true;
11869 default:
11870 return false;
11874 /* Return true if and only if this insn can dual-issue as younger. */
11875 static bool
11876 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11878 if (recog_memoized (insn) < 0)
11880 if (verbose > 5)
11881 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11882 return false;
11885 switch (get_attr_type (insn))
11887 case TYPE_ALU_IMM:
11888 case TYPE_ALUS_IMM:
11889 case TYPE_LOGIC_IMM:
11890 case TYPE_LOGICS_IMM:
11891 case TYPE_EXTEND:
11892 case TYPE_MVN_IMM:
11893 case TYPE_MOV_IMM:
11894 case TYPE_MOV_REG:
11895 case TYPE_MOV_SHIFT:
11896 case TYPE_MOV_SHIFT_REG:
11897 case TYPE_BRANCH:
11898 case TYPE_CALL:
11899 return true;
11900 default:
11901 return false;
11906 /* Look for an instruction that can dual issue only as an older
11907 instruction, and move it in front of any instructions that can
11908 dual-issue as younger, while preserving the relative order of all
11909 other instructions in the ready list. This is a hueuristic to help
11910 dual-issue in later cycles, by postponing issue of more flexible
11911 instructions. This heuristic may affect dual issue opportunities
11912 in the current cycle. */
11913 static void
11914 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11915 int *n_readyp, int clock)
11917 int i;
11918 int first_older_only = -1, first_younger = -1;
11920 if (verbose > 5)
11921 fprintf (file,
11922 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11923 clock,
11924 *n_readyp);
11926 /* Traverse the ready list from the head (the instruction to issue
11927 first), and looking for the first instruction that can issue as
11928 younger and the first instruction that can dual-issue only as
11929 older. */
11930 for (i = *n_readyp - 1; i >= 0; i--)
11932 rtx_insn *insn = ready[i];
11933 if (cortexa7_older_only (insn))
11935 first_older_only = i;
11936 if (verbose > 5)
11937 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11938 break;
11940 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11941 first_younger = i;
11944 /* Nothing to reorder because either no younger insn found or insn
11945 that can dual-issue only as older appears before any insn that
11946 can dual-issue as younger. */
11947 if (first_younger == -1)
11949 if (verbose > 5)
11950 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11951 return;
11954 /* Nothing to reorder because no older-only insn in the ready list. */
11955 if (first_older_only == -1)
11957 if (verbose > 5)
11958 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11959 return;
11962 /* Move first_older_only insn before first_younger. */
11963 if (verbose > 5)
11964 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11965 INSN_UID(ready [first_older_only]),
11966 INSN_UID(ready [first_younger]));
11967 rtx_insn *first_older_only_insn = ready [first_older_only];
11968 for (i = first_older_only; i < first_younger; i++)
11970 ready[i] = ready[i+1];
11973 ready[i] = first_older_only_insn;
11974 return;
11977 /* Implement TARGET_SCHED_REORDER. */
11978 static int
11979 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11980 int clock)
11982 switch (arm_tune)
11984 case cortexa7:
11985 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11986 break;
11987 default:
11988 /* Do nothing for other cores. */
11989 break;
11992 return arm_issue_rate ();
11995 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11996 It corrects the value of COST based on the relationship between
11997 INSN and DEP through the dependence LINK. It returns the new
11998 value. There is a per-core adjust_cost hook to adjust scheduler costs
11999 and the per-core hook can choose to completely override the generic
12000 adjust_cost function. Only put bits of code into arm_adjust_cost that
12001 are common across all cores. */
12002 static int
12003 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
12005 rtx i_pat, d_pat;
12007 /* When generating Thumb-1 code, we want to place flag-setting operations
12008 close to a conditional branch which depends on them, so that we can
12009 omit the comparison. */
12010 if (TARGET_THUMB1
12011 && REG_NOTE_KIND (link) == 0
12012 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12013 && recog_memoized (dep) >= 0
12014 && get_attr_conds (dep) == CONDS_SET)
12015 return 0;
12017 if (current_tune->sched_adjust_cost != NULL)
12019 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
12020 return cost;
12023 /* XXX Is this strictly true? */
12024 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
12025 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
12026 return 0;
12028 /* Call insns don't incur a stall, even if they follow a load. */
12029 if (REG_NOTE_KIND (link) == 0
12030 && CALL_P (insn))
12031 return 1;
12033 if ((i_pat = single_set (insn)) != NULL
12034 && MEM_P (SET_SRC (i_pat))
12035 && (d_pat = single_set (dep)) != NULL
12036 && MEM_P (SET_DEST (d_pat)))
12038 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12039 /* This is a load after a store, there is no conflict if the load reads
12040 from a cached area. Assume that loads from the stack, and from the
12041 constant pool are cached, and that others will miss. This is a
12042 hack. */
12044 if ((GET_CODE (src_mem) == SYMBOL_REF
12045 && CONSTANT_POOL_ADDRESS_P (src_mem))
12046 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12047 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12048 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12049 return 1;
12052 return cost;
12056 arm_max_conditional_execute (void)
12058 return max_insns_skipped;
12061 static int
12062 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12064 if (TARGET_32BIT)
12065 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12066 else
12067 return (optimize > 0) ? 2 : 0;
12070 static int
12071 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12073 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12076 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12077 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12078 sequences of non-executed instructions in IT blocks probably take the same
12079 amount of time as executed instructions (and the IT instruction itself takes
12080 space in icache). This function was experimentally determined to give good
12081 results on a popular embedded benchmark. */
12083 static int
12084 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12086 return (TARGET_32BIT && speed_p) ? 1
12087 : arm_default_branch_cost (speed_p, predictable_p);
12090 static int
12091 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12093 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12096 static bool fp_consts_inited = false;
12098 static REAL_VALUE_TYPE value_fp0;
12100 static void
12101 init_fp_table (void)
12103 REAL_VALUE_TYPE r;
12105 r = REAL_VALUE_ATOF ("0", DFmode);
12106 value_fp0 = r;
12107 fp_consts_inited = true;
12110 /* Return TRUE if rtx X is a valid immediate FP constant. */
12112 arm_const_double_rtx (rtx x)
12114 const REAL_VALUE_TYPE *r;
12116 if (!fp_consts_inited)
12117 init_fp_table ();
12119 r = CONST_DOUBLE_REAL_VALUE (x);
12120 if (REAL_VALUE_MINUS_ZERO (*r))
12121 return 0;
12123 if (real_equal (r, &value_fp0))
12124 return 1;
12126 return 0;
12129 /* VFPv3 has a fairly wide range of representable immediates, formed from
12130 "quarter-precision" floating-point values. These can be evaluated using this
12131 formula (with ^ for exponentiation):
12133 -1^s * n * 2^-r
12135 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12136 16 <= n <= 31 and 0 <= r <= 7.
12138 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12140 - A (most-significant) is the sign bit.
12141 - BCD are the exponent (encoded as r XOR 3).
12142 - EFGH are the mantissa (encoded as n - 16).
12145 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12146 fconst[sd] instruction, or -1 if X isn't suitable. */
12147 static int
12148 vfp3_const_double_index (rtx x)
12150 REAL_VALUE_TYPE r, m;
12151 int sign, exponent;
12152 unsigned HOST_WIDE_INT mantissa, mant_hi;
12153 unsigned HOST_WIDE_INT mask;
12154 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12155 bool fail;
12157 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12158 return -1;
12160 r = *CONST_DOUBLE_REAL_VALUE (x);
12162 /* We can't represent these things, so detect them first. */
12163 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12164 return -1;
12166 /* Extract sign, exponent and mantissa. */
12167 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12168 r = real_value_abs (&r);
12169 exponent = REAL_EXP (&r);
12170 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12171 highest (sign) bit, with a fixed binary point at bit point_pos.
12172 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12173 bits for the mantissa, this may fail (low bits would be lost). */
12174 real_ldexp (&m, &r, point_pos - exponent);
12175 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12176 mantissa = w.elt (0);
12177 mant_hi = w.elt (1);
12179 /* If there are bits set in the low part of the mantissa, we can't
12180 represent this value. */
12181 if (mantissa != 0)
12182 return -1;
12184 /* Now make it so that mantissa contains the most-significant bits, and move
12185 the point_pos to indicate that the least-significant bits have been
12186 discarded. */
12187 point_pos -= HOST_BITS_PER_WIDE_INT;
12188 mantissa = mant_hi;
12190 /* We can permit four significant bits of mantissa only, plus a high bit
12191 which is always 1. */
12192 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12193 if ((mantissa & mask) != 0)
12194 return -1;
12196 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12197 mantissa >>= point_pos - 5;
12199 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12200 floating-point immediate zero with Neon using an integer-zero load, but
12201 that case is handled elsewhere.) */
12202 if (mantissa == 0)
12203 return -1;
12205 gcc_assert (mantissa >= 16 && mantissa <= 31);
12207 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12208 normalized significands are in the range [1, 2). (Our mantissa is shifted
12209 left 4 places at this point relative to normalized IEEE754 values). GCC
12210 internally uses [0.5, 1) (see real.c), so the exponent returned from
12211 REAL_EXP must be altered. */
12212 exponent = 5 - exponent;
12214 if (exponent < 0 || exponent > 7)
12215 return -1;
12217 /* Sign, mantissa and exponent are now in the correct form to plug into the
12218 formula described in the comment above. */
12219 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12222 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12224 vfp3_const_double_rtx (rtx x)
12226 if (!TARGET_VFP3)
12227 return 0;
12229 return vfp3_const_double_index (x) != -1;
12232 /* Recognize immediates which can be used in various Neon instructions. Legal
12233 immediates are described by the following table (for VMVN variants, the
12234 bitwise inverse of the constant shown is recognized. In either case, VMOV
12235 is output and the correct instruction to use for a given constant is chosen
12236 by the assembler). The constant shown is replicated across all elements of
12237 the destination vector.
12239 insn elems variant constant (binary)
12240 ---- ----- ------- -----------------
12241 vmov i32 0 00000000 00000000 00000000 abcdefgh
12242 vmov i32 1 00000000 00000000 abcdefgh 00000000
12243 vmov i32 2 00000000 abcdefgh 00000000 00000000
12244 vmov i32 3 abcdefgh 00000000 00000000 00000000
12245 vmov i16 4 00000000 abcdefgh
12246 vmov i16 5 abcdefgh 00000000
12247 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12248 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12249 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12250 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12251 vmvn i16 10 00000000 abcdefgh
12252 vmvn i16 11 abcdefgh 00000000
12253 vmov i32 12 00000000 00000000 abcdefgh 11111111
12254 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12255 vmov i32 14 00000000 abcdefgh 11111111 11111111
12256 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12257 vmov i8 16 abcdefgh
12258 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12259 eeeeeeee ffffffff gggggggg hhhhhhhh
12260 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12261 vmov f32 19 00000000 00000000 00000000 00000000
12263 For case 18, B = !b. Representable values are exactly those accepted by
12264 vfp3_const_double_index, but are output as floating-point numbers rather
12265 than indices.
12267 For case 19, we will change it to vmov.i32 when assembling.
12269 Variants 0-5 (inclusive) may also be used as immediates for the second
12270 operand of VORR/VBIC instructions.
12272 The INVERSE argument causes the bitwise inverse of the given operand to be
12273 recognized instead (used for recognizing legal immediates for the VAND/VORN
12274 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12275 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12276 output, rather than the real insns vbic/vorr).
12278 INVERSE makes no difference to the recognition of float vectors.
12280 The return value is the variant of immediate as shown in the above table, or
12281 -1 if the given value doesn't match any of the listed patterns.
12283 static int
12284 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12285 rtx *modconst, int *elementwidth)
12287 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12288 matches = 1; \
12289 for (i = 0; i < idx; i += (STRIDE)) \
12290 if (!(TEST)) \
12291 matches = 0; \
12292 if (matches) \
12294 immtype = (CLASS); \
12295 elsize = (ELSIZE); \
12296 break; \
12299 unsigned int i, elsize = 0, idx = 0, n_elts;
12300 unsigned int innersize;
12301 unsigned char bytes[16];
12302 int immtype = -1, matches;
12303 unsigned int invmask = inverse ? 0xff : 0;
12304 bool vector = GET_CODE (op) == CONST_VECTOR;
12306 if (vector)
12307 n_elts = CONST_VECTOR_NUNITS (op);
12308 else
12310 n_elts = 1;
12311 if (mode == VOIDmode)
12312 mode = DImode;
12315 innersize = GET_MODE_UNIT_SIZE (mode);
12317 /* Vectors of float constants. */
12318 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12320 rtx el0 = CONST_VECTOR_ELT (op, 0);
12321 const REAL_VALUE_TYPE *r0;
12323 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12324 return -1;
12326 r0 = CONST_DOUBLE_REAL_VALUE (el0);
12328 for (i = 1; i < n_elts; i++)
12330 rtx elt = CONST_VECTOR_ELT (op, i);
12331 if (!real_equal (r0, CONST_DOUBLE_REAL_VALUE (elt)))
12332 return -1;
12335 if (modconst)
12336 *modconst = CONST_VECTOR_ELT (op, 0);
12338 if (elementwidth)
12339 *elementwidth = 0;
12341 if (el0 == CONST0_RTX (GET_MODE (el0)))
12342 return 19;
12343 else
12344 return 18;
12347 /* Splat vector constant out into a byte vector. */
12348 for (i = 0; i < n_elts; i++)
12350 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12351 unsigned HOST_WIDE_INT elpart;
12352 unsigned int part, parts;
12354 if (CONST_INT_P (el))
12356 elpart = INTVAL (el);
12357 parts = 1;
12359 else if (CONST_DOUBLE_P (el))
12361 elpart = CONST_DOUBLE_LOW (el);
12362 parts = 2;
12364 else
12365 gcc_unreachable ();
12367 for (part = 0; part < parts; part++)
12369 unsigned int byte;
12370 for (byte = 0; byte < innersize; byte++)
12372 bytes[idx++] = (elpart & 0xff) ^ invmask;
12373 elpart >>= BITS_PER_UNIT;
12375 if (CONST_DOUBLE_P (el))
12376 elpart = CONST_DOUBLE_HIGH (el);
12380 /* Sanity check. */
12381 gcc_assert (idx == GET_MODE_SIZE (mode));
12385 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12386 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12388 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12389 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12391 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12392 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12394 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12395 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12397 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12399 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12401 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12402 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12404 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12405 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12407 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12408 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12410 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12411 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12413 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12415 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12417 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12418 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12420 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12421 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12423 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12424 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12426 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12427 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12429 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12431 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12432 && bytes[i] == bytes[(i + 8) % idx]);
12434 while (0);
12436 if (immtype == -1)
12437 return -1;
12439 if (elementwidth)
12440 *elementwidth = elsize;
12442 if (modconst)
12444 unsigned HOST_WIDE_INT imm = 0;
12446 /* Un-invert bytes of recognized vector, if necessary. */
12447 if (invmask != 0)
12448 for (i = 0; i < idx; i++)
12449 bytes[i] ^= invmask;
12451 if (immtype == 17)
12453 /* FIXME: Broken on 32-bit H_W_I hosts. */
12454 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12456 for (i = 0; i < 8; i++)
12457 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12458 << (i * BITS_PER_UNIT);
12460 *modconst = GEN_INT (imm);
12462 else
12464 unsigned HOST_WIDE_INT imm = 0;
12466 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12467 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12469 *modconst = GEN_INT (imm);
12473 return immtype;
12474 #undef CHECK
12477 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12478 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12479 float elements), and a modified constant (whatever should be output for a
12480 VMOV) in *MODCONST. */
12483 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12484 rtx *modconst, int *elementwidth)
12486 rtx tmpconst;
12487 int tmpwidth;
12488 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12490 if (retval == -1)
12491 return 0;
12493 if (modconst)
12494 *modconst = tmpconst;
12496 if (elementwidth)
12497 *elementwidth = tmpwidth;
12499 return 1;
12502 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12503 the immediate is valid, write a constant suitable for using as an operand
12504 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12505 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12508 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12509 rtx *modconst, int *elementwidth)
12511 rtx tmpconst;
12512 int tmpwidth;
12513 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12515 if (retval < 0 || retval > 5)
12516 return 0;
12518 if (modconst)
12519 *modconst = tmpconst;
12521 if (elementwidth)
12522 *elementwidth = tmpwidth;
12524 return 1;
12527 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12528 the immediate is valid, write a constant suitable for using as an operand
12529 to VSHR/VSHL to *MODCONST and the corresponding element width to
12530 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12531 because they have different limitations. */
12534 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12535 rtx *modconst, int *elementwidth,
12536 bool isleftshift)
12538 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12539 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12540 unsigned HOST_WIDE_INT last_elt = 0;
12541 unsigned HOST_WIDE_INT maxshift;
12543 /* Split vector constant out into a byte vector. */
12544 for (i = 0; i < n_elts; i++)
12546 rtx el = CONST_VECTOR_ELT (op, i);
12547 unsigned HOST_WIDE_INT elpart;
12549 if (CONST_INT_P (el))
12550 elpart = INTVAL (el);
12551 else if (CONST_DOUBLE_P (el))
12552 return 0;
12553 else
12554 gcc_unreachable ();
12556 if (i != 0 && elpart != last_elt)
12557 return 0;
12559 last_elt = elpart;
12562 /* Shift less than element size. */
12563 maxshift = innersize * 8;
12565 if (isleftshift)
12567 /* Left shift immediate value can be from 0 to <size>-1. */
12568 if (last_elt >= maxshift)
12569 return 0;
12571 else
12573 /* Right shift immediate value can be from 1 to <size>. */
12574 if (last_elt == 0 || last_elt > maxshift)
12575 return 0;
12578 if (elementwidth)
12579 *elementwidth = innersize * 8;
12581 if (modconst)
12582 *modconst = CONST_VECTOR_ELT (op, 0);
12584 return 1;
12587 /* Return a string suitable for output of Neon immediate logic operation
12588 MNEM. */
12590 char *
12591 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12592 int inverse, int quad)
12594 int width, is_valid;
12595 static char templ[40];
12597 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12599 gcc_assert (is_valid != 0);
12601 if (quad)
12602 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12603 else
12604 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12606 return templ;
12609 /* Return a string suitable for output of Neon immediate shift operation
12610 (VSHR or VSHL) MNEM. */
12612 char *
12613 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12614 machine_mode mode, int quad,
12615 bool isleftshift)
12617 int width, is_valid;
12618 static char templ[40];
12620 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12621 gcc_assert (is_valid != 0);
12623 if (quad)
12624 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12625 else
12626 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12628 return templ;
12631 /* Output a sequence of pairwise operations to implement a reduction.
12632 NOTE: We do "too much work" here, because pairwise operations work on two
12633 registers-worth of operands in one go. Unfortunately we can't exploit those
12634 extra calculations to do the full operation in fewer steps, I don't think.
12635 Although all vector elements of the result but the first are ignored, we
12636 actually calculate the same result in each of the elements. An alternative
12637 such as initially loading a vector with zero to use as each of the second
12638 operands would use up an additional register and take an extra instruction,
12639 for no particular gain. */
12641 void
12642 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12643 rtx (*reduc) (rtx, rtx, rtx))
12645 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12646 rtx tmpsum = op1;
12648 for (i = parts / 2; i >= 1; i /= 2)
12650 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12651 emit_insn (reduc (dest, tmpsum, tmpsum));
12652 tmpsum = dest;
12656 /* If VALS is a vector constant that can be loaded into a register
12657 using VDUP, generate instructions to do so and return an RTX to
12658 assign to the register. Otherwise return NULL_RTX. */
12660 static rtx
12661 neon_vdup_constant (rtx vals)
12663 machine_mode mode = GET_MODE (vals);
12664 machine_mode inner_mode = GET_MODE_INNER (mode);
12665 rtx x;
12667 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12668 return NULL_RTX;
12670 if (!const_vec_duplicate_p (vals, &x))
12671 /* The elements are not all the same. We could handle repeating
12672 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12673 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12674 vdup.i16). */
12675 return NULL_RTX;
12677 /* We can load this constant by using VDUP and a constant in a
12678 single ARM register. This will be cheaper than a vector
12679 load. */
12681 x = copy_to_mode_reg (inner_mode, x);
12682 return gen_rtx_VEC_DUPLICATE (mode, x);
12685 /* Generate code to load VALS, which is a PARALLEL containing only
12686 constants (for vec_init) or CONST_VECTOR, efficiently into a
12687 register. Returns an RTX to copy into the register, or NULL_RTX
12688 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12691 neon_make_constant (rtx vals)
12693 machine_mode mode = GET_MODE (vals);
12694 rtx target;
12695 rtx const_vec = NULL_RTX;
12696 int n_elts = GET_MODE_NUNITS (mode);
12697 int n_const = 0;
12698 int i;
12700 if (GET_CODE (vals) == CONST_VECTOR)
12701 const_vec = vals;
12702 else if (GET_CODE (vals) == PARALLEL)
12704 /* A CONST_VECTOR must contain only CONST_INTs and
12705 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12706 Only store valid constants in a CONST_VECTOR. */
12707 for (i = 0; i < n_elts; ++i)
12709 rtx x = XVECEXP (vals, 0, i);
12710 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12711 n_const++;
12713 if (n_const == n_elts)
12714 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12716 else
12717 gcc_unreachable ();
12719 if (const_vec != NULL
12720 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12721 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12722 return const_vec;
12723 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12724 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12725 pipeline cycle; creating the constant takes one or two ARM
12726 pipeline cycles. */
12727 return target;
12728 else if (const_vec != NULL_RTX)
12729 /* Load from constant pool. On Cortex-A8 this takes two cycles
12730 (for either double or quad vectors). We can not take advantage
12731 of single-cycle VLD1 because we need a PC-relative addressing
12732 mode. */
12733 return const_vec;
12734 else
12735 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12736 We can not construct an initializer. */
12737 return NULL_RTX;
12740 /* Initialize vector TARGET to VALS. */
12742 void
12743 neon_expand_vector_init (rtx target, rtx vals)
12745 machine_mode mode = GET_MODE (target);
12746 machine_mode inner_mode = GET_MODE_INNER (mode);
12747 int n_elts = GET_MODE_NUNITS (mode);
12748 int n_var = 0, one_var = -1;
12749 bool all_same = true;
12750 rtx x, mem;
12751 int i;
12753 for (i = 0; i < n_elts; ++i)
12755 x = XVECEXP (vals, 0, i);
12756 if (!CONSTANT_P (x))
12757 ++n_var, one_var = i;
12759 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12760 all_same = false;
12763 if (n_var == 0)
12765 rtx constant = neon_make_constant (vals);
12766 if (constant != NULL_RTX)
12768 emit_move_insn (target, constant);
12769 return;
12773 /* Splat a single non-constant element if we can. */
12774 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12776 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12777 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12778 return;
12781 /* One field is non-constant. Load constant then overwrite varying
12782 field. This is more efficient than using the stack. */
12783 if (n_var == 1)
12785 rtx copy = copy_rtx (vals);
12786 rtx index = GEN_INT (one_var);
12788 /* Load constant part of vector, substitute neighboring value for
12789 varying element. */
12790 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12791 neon_expand_vector_init (target, copy);
12793 /* Insert variable. */
12794 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12795 switch (mode)
12797 case V8QImode:
12798 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12799 break;
12800 case V16QImode:
12801 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12802 break;
12803 case V4HImode:
12804 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12805 break;
12806 case V8HImode:
12807 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12808 break;
12809 case V2SImode:
12810 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12811 break;
12812 case V4SImode:
12813 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12814 break;
12815 case V2SFmode:
12816 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12817 break;
12818 case V4SFmode:
12819 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12820 break;
12821 case V2DImode:
12822 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12823 break;
12824 default:
12825 gcc_unreachable ();
12827 return;
12830 /* Construct the vector in memory one field at a time
12831 and load the whole vector. */
12832 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12833 for (i = 0; i < n_elts; i++)
12834 emit_move_insn (adjust_address_nv (mem, inner_mode,
12835 i * GET_MODE_SIZE (inner_mode)),
12836 XVECEXP (vals, 0, i));
12837 emit_move_insn (target, mem);
12840 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12841 ERR if it doesn't. EXP indicates the source location, which includes the
12842 inlining history for intrinsics. */
12844 static void
12845 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12846 const_tree exp, const char *desc)
12848 HOST_WIDE_INT lane;
12850 gcc_assert (CONST_INT_P (operand));
12852 lane = INTVAL (operand);
12854 if (lane < low || lane >= high)
12856 if (exp)
12857 error ("%K%s %wd out of range %wd - %wd",
12858 exp, desc, lane, low, high - 1);
12859 else
12860 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12864 /* Bounds-check lanes. */
12866 void
12867 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12868 const_tree exp)
12870 bounds_check (operand, low, high, exp, "lane");
12873 /* Bounds-check constants. */
12875 void
12876 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12878 bounds_check (operand, low, high, NULL_TREE, "constant");
12881 HOST_WIDE_INT
12882 neon_element_bits (machine_mode mode)
12884 return GET_MODE_UNIT_BITSIZE (mode);
12888 /* Predicates for `match_operand' and `match_operator'. */
12890 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12891 WB is true if full writeback address modes are allowed and is false
12892 if limited writeback address modes (POST_INC and PRE_DEC) are
12893 allowed. */
12896 arm_coproc_mem_operand (rtx op, bool wb)
12898 rtx ind;
12900 /* Reject eliminable registers. */
12901 if (! (reload_in_progress || reload_completed || lra_in_progress)
12902 && ( reg_mentioned_p (frame_pointer_rtx, op)
12903 || reg_mentioned_p (arg_pointer_rtx, op)
12904 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12905 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12906 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12907 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12908 return FALSE;
12910 /* Constants are converted into offsets from labels. */
12911 if (!MEM_P (op))
12912 return FALSE;
12914 ind = XEXP (op, 0);
12916 if (reload_completed
12917 && (GET_CODE (ind) == LABEL_REF
12918 || (GET_CODE (ind) == CONST
12919 && GET_CODE (XEXP (ind, 0)) == PLUS
12920 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12921 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12922 return TRUE;
12924 /* Match: (mem (reg)). */
12925 if (REG_P (ind))
12926 return arm_address_register_rtx_p (ind, 0);
12928 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12929 acceptable in any case (subject to verification by
12930 arm_address_register_rtx_p). We need WB to be true to accept
12931 PRE_INC and POST_DEC. */
12932 if (GET_CODE (ind) == POST_INC
12933 || GET_CODE (ind) == PRE_DEC
12934 || (wb
12935 && (GET_CODE (ind) == PRE_INC
12936 || GET_CODE (ind) == POST_DEC)))
12937 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12939 if (wb
12940 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12941 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12942 && GET_CODE (XEXP (ind, 1)) == PLUS
12943 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12944 ind = XEXP (ind, 1);
12946 /* Match:
12947 (plus (reg)
12948 (const)). */
12949 if (GET_CODE (ind) == PLUS
12950 && REG_P (XEXP (ind, 0))
12951 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12952 && CONST_INT_P (XEXP (ind, 1))
12953 && INTVAL (XEXP (ind, 1)) > -1024
12954 && INTVAL (XEXP (ind, 1)) < 1024
12955 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12956 return TRUE;
12958 return FALSE;
12961 /* Return TRUE if OP is a memory operand which we can load or store a vector
12962 to/from. TYPE is one of the following values:
12963 0 - Vector load/stor (vldr)
12964 1 - Core registers (ldm)
12965 2 - Element/structure loads (vld1)
12968 neon_vector_mem_operand (rtx op, int type, bool strict)
12970 rtx ind;
12972 /* Reject eliminable registers. */
12973 if (! (reload_in_progress || reload_completed)
12974 && ( reg_mentioned_p (frame_pointer_rtx, op)
12975 || reg_mentioned_p (arg_pointer_rtx, op)
12976 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12977 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12978 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12979 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12980 return !strict;
12982 /* Constants are converted into offsets from labels. */
12983 if (!MEM_P (op))
12984 return FALSE;
12986 ind = XEXP (op, 0);
12988 if (reload_completed
12989 && (GET_CODE (ind) == LABEL_REF
12990 || (GET_CODE (ind) == CONST
12991 && GET_CODE (XEXP (ind, 0)) == PLUS
12992 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12993 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12994 return TRUE;
12996 /* Match: (mem (reg)). */
12997 if (REG_P (ind))
12998 return arm_address_register_rtx_p (ind, 0);
13000 /* Allow post-increment with Neon registers. */
13001 if ((type != 1 && GET_CODE (ind) == POST_INC)
13002 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13003 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13005 /* Allow post-increment by register for VLDn */
13006 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13007 && GET_CODE (XEXP (ind, 1)) == PLUS
13008 && REG_P (XEXP (XEXP (ind, 1), 1)))
13009 return true;
13011 /* Match:
13012 (plus (reg)
13013 (const)). */
13014 if (type == 0
13015 && GET_CODE (ind) == PLUS
13016 && REG_P (XEXP (ind, 0))
13017 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13018 && CONST_INT_P (XEXP (ind, 1))
13019 && INTVAL (XEXP (ind, 1)) > -1024
13020 /* For quad modes, we restrict the constant offset to be slightly less
13021 than what the instruction format permits. We have no such constraint
13022 on double mode offsets. (This must match arm_legitimate_index_p.) */
13023 && (INTVAL (XEXP (ind, 1))
13024 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13025 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13026 return TRUE;
13028 return FALSE;
13031 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13032 type. */
13034 neon_struct_mem_operand (rtx op)
13036 rtx ind;
13038 /* Reject eliminable registers. */
13039 if (! (reload_in_progress || reload_completed)
13040 && ( reg_mentioned_p (frame_pointer_rtx, op)
13041 || reg_mentioned_p (arg_pointer_rtx, op)
13042 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13043 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13044 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13045 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13046 return FALSE;
13048 /* Constants are converted into offsets from labels. */
13049 if (!MEM_P (op))
13050 return FALSE;
13052 ind = XEXP (op, 0);
13054 if (reload_completed
13055 && (GET_CODE (ind) == LABEL_REF
13056 || (GET_CODE (ind) == CONST
13057 && GET_CODE (XEXP (ind, 0)) == PLUS
13058 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13059 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13060 return TRUE;
13062 /* Match: (mem (reg)). */
13063 if (REG_P (ind))
13064 return arm_address_register_rtx_p (ind, 0);
13066 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13067 if (GET_CODE (ind) == POST_INC
13068 || GET_CODE (ind) == PRE_DEC)
13069 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13071 return FALSE;
13074 /* Return true if X is a register that will be eliminated later on. */
13076 arm_eliminable_register (rtx x)
13078 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13079 || REGNO (x) == ARG_POINTER_REGNUM
13080 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13081 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13084 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13085 coprocessor registers. Otherwise return NO_REGS. */
13087 enum reg_class
13088 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13090 if (mode == HFmode)
13092 if (!TARGET_NEON_FP16)
13093 return GENERAL_REGS;
13094 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13095 return NO_REGS;
13096 return GENERAL_REGS;
13099 /* The neon move patterns handle all legitimate vector and struct
13100 addresses. */
13101 if (TARGET_NEON
13102 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13103 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13104 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13105 || VALID_NEON_STRUCT_MODE (mode)))
13106 return NO_REGS;
13108 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13109 return NO_REGS;
13111 return GENERAL_REGS;
13114 /* Values which must be returned in the most-significant end of the return
13115 register. */
13117 static bool
13118 arm_return_in_msb (const_tree valtype)
13120 return (TARGET_AAPCS_BASED
13121 && BYTES_BIG_ENDIAN
13122 && (AGGREGATE_TYPE_P (valtype)
13123 || TREE_CODE (valtype) == COMPLEX_TYPE
13124 || FIXED_POINT_TYPE_P (valtype)));
13127 /* Return TRUE if X references a SYMBOL_REF. */
13129 symbol_mentioned_p (rtx x)
13131 const char * fmt;
13132 int i;
13134 if (GET_CODE (x) == SYMBOL_REF)
13135 return 1;
13137 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13138 are constant offsets, not symbols. */
13139 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13140 return 0;
13142 fmt = GET_RTX_FORMAT (GET_CODE (x));
13144 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13146 if (fmt[i] == 'E')
13148 int j;
13150 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13151 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13152 return 1;
13154 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13155 return 1;
13158 return 0;
13161 /* Return TRUE if X references a LABEL_REF. */
13163 label_mentioned_p (rtx x)
13165 const char * fmt;
13166 int i;
13168 if (GET_CODE (x) == LABEL_REF)
13169 return 1;
13171 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13172 instruction, but they are constant offsets, not symbols. */
13173 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13174 return 0;
13176 fmt = GET_RTX_FORMAT (GET_CODE (x));
13177 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13179 if (fmt[i] == 'E')
13181 int j;
13183 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13184 if (label_mentioned_p (XVECEXP (x, i, j)))
13185 return 1;
13187 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13188 return 1;
13191 return 0;
13195 tls_mentioned_p (rtx x)
13197 switch (GET_CODE (x))
13199 case CONST:
13200 return tls_mentioned_p (XEXP (x, 0));
13202 case UNSPEC:
13203 if (XINT (x, 1) == UNSPEC_TLS)
13204 return 1;
13206 default:
13207 return 0;
13211 /* Must not copy any rtx that uses a pc-relative address. */
13213 static bool
13214 arm_cannot_copy_insn_p (rtx_insn *insn)
13216 /* The tls call insn cannot be copied, as it is paired with a data
13217 word. */
13218 if (recog_memoized (insn) == CODE_FOR_tlscall)
13219 return true;
13221 subrtx_iterator::array_type array;
13222 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13224 const_rtx x = *iter;
13225 if (GET_CODE (x) == UNSPEC
13226 && (XINT (x, 1) == UNSPEC_PIC_BASE
13227 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13228 return true;
13230 return false;
13233 enum rtx_code
13234 minmax_code (rtx x)
13236 enum rtx_code code = GET_CODE (x);
13238 switch (code)
13240 case SMAX:
13241 return GE;
13242 case SMIN:
13243 return LE;
13244 case UMIN:
13245 return LEU;
13246 case UMAX:
13247 return GEU;
13248 default:
13249 gcc_unreachable ();
13253 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13255 bool
13256 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13257 int *mask, bool *signed_sat)
13259 /* The high bound must be a power of two minus one. */
13260 int log = exact_log2 (INTVAL (hi_bound) + 1);
13261 if (log == -1)
13262 return false;
13264 /* The low bound is either zero (for usat) or one less than the
13265 negation of the high bound (for ssat). */
13266 if (INTVAL (lo_bound) == 0)
13268 if (mask)
13269 *mask = log;
13270 if (signed_sat)
13271 *signed_sat = false;
13273 return true;
13276 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13278 if (mask)
13279 *mask = log + 1;
13280 if (signed_sat)
13281 *signed_sat = true;
13283 return true;
13286 return false;
13289 /* Return 1 if memory locations are adjacent. */
13291 adjacent_mem_locations (rtx a, rtx b)
13293 /* We don't guarantee to preserve the order of these memory refs. */
13294 if (volatile_refs_p (a) || volatile_refs_p (b))
13295 return 0;
13297 if ((REG_P (XEXP (a, 0))
13298 || (GET_CODE (XEXP (a, 0)) == PLUS
13299 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13300 && (REG_P (XEXP (b, 0))
13301 || (GET_CODE (XEXP (b, 0)) == PLUS
13302 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13304 HOST_WIDE_INT val0 = 0, val1 = 0;
13305 rtx reg0, reg1;
13306 int val_diff;
13308 if (GET_CODE (XEXP (a, 0)) == PLUS)
13310 reg0 = XEXP (XEXP (a, 0), 0);
13311 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13313 else
13314 reg0 = XEXP (a, 0);
13316 if (GET_CODE (XEXP (b, 0)) == PLUS)
13318 reg1 = XEXP (XEXP (b, 0), 0);
13319 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13321 else
13322 reg1 = XEXP (b, 0);
13324 /* Don't accept any offset that will require multiple
13325 instructions to handle, since this would cause the
13326 arith_adjacentmem pattern to output an overlong sequence. */
13327 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13328 return 0;
13330 /* Don't allow an eliminable register: register elimination can make
13331 the offset too large. */
13332 if (arm_eliminable_register (reg0))
13333 return 0;
13335 val_diff = val1 - val0;
13337 if (arm_ld_sched)
13339 /* If the target has load delay slots, then there's no benefit
13340 to using an ldm instruction unless the offset is zero and
13341 we are optimizing for size. */
13342 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13343 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13344 && (val_diff == 4 || val_diff == -4));
13347 return ((REGNO (reg0) == REGNO (reg1))
13348 && (val_diff == 4 || val_diff == -4));
13351 return 0;
13354 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13355 for load operations, false for store operations. CONSECUTIVE is true
13356 if the register numbers in the operation must be consecutive in the register
13357 bank. RETURN_PC is true if value is to be loaded in PC.
13358 The pattern we are trying to match for load is:
13359 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13360 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13363 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13365 where
13366 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13367 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13368 3. If consecutive is TRUE, then for kth register being loaded,
13369 REGNO (R_dk) = REGNO (R_d0) + k.
13370 The pattern for store is similar. */
13371 bool
13372 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13373 bool consecutive, bool return_pc)
13375 HOST_WIDE_INT count = XVECLEN (op, 0);
13376 rtx reg, mem, addr;
13377 unsigned regno;
13378 unsigned first_regno;
13379 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13380 rtx elt;
13381 bool addr_reg_in_reglist = false;
13382 bool update = false;
13383 int reg_increment;
13384 int offset_adj;
13385 int regs_per_val;
13387 /* If not in SImode, then registers must be consecutive
13388 (e.g., VLDM instructions for DFmode). */
13389 gcc_assert ((mode == SImode) || consecutive);
13390 /* Setting return_pc for stores is illegal. */
13391 gcc_assert (!return_pc || load);
13393 /* Set up the increments and the regs per val based on the mode. */
13394 reg_increment = GET_MODE_SIZE (mode);
13395 regs_per_val = reg_increment / 4;
13396 offset_adj = return_pc ? 1 : 0;
13398 if (count <= 1
13399 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13400 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13401 return false;
13403 /* Check if this is a write-back. */
13404 elt = XVECEXP (op, 0, offset_adj);
13405 if (GET_CODE (SET_SRC (elt)) == PLUS)
13407 i++;
13408 base = 1;
13409 update = true;
13411 /* The offset adjustment must be the number of registers being
13412 popped times the size of a single register. */
13413 if (!REG_P (SET_DEST (elt))
13414 || !REG_P (XEXP (SET_SRC (elt), 0))
13415 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13416 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13417 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13418 ((count - 1 - offset_adj) * reg_increment))
13419 return false;
13422 i = i + offset_adj;
13423 base = base + offset_adj;
13424 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13425 success depends on the type: VLDM can do just one reg,
13426 LDM must do at least two. */
13427 if ((count <= i) && (mode == SImode))
13428 return false;
13430 elt = XVECEXP (op, 0, i - 1);
13431 if (GET_CODE (elt) != SET)
13432 return false;
13434 if (load)
13436 reg = SET_DEST (elt);
13437 mem = SET_SRC (elt);
13439 else
13441 reg = SET_SRC (elt);
13442 mem = SET_DEST (elt);
13445 if (!REG_P (reg) || !MEM_P (mem))
13446 return false;
13448 regno = REGNO (reg);
13449 first_regno = regno;
13450 addr = XEXP (mem, 0);
13451 if (GET_CODE (addr) == PLUS)
13453 if (!CONST_INT_P (XEXP (addr, 1)))
13454 return false;
13456 offset = INTVAL (XEXP (addr, 1));
13457 addr = XEXP (addr, 0);
13460 if (!REG_P (addr))
13461 return false;
13463 /* Don't allow SP to be loaded unless it is also the base register. It
13464 guarantees that SP is reset correctly when an LDM instruction
13465 is interrupted. Otherwise, we might end up with a corrupt stack. */
13466 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13467 return false;
13469 for (; i < count; i++)
13471 elt = XVECEXP (op, 0, i);
13472 if (GET_CODE (elt) != SET)
13473 return false;
13475 if (load)
13477 reg = SET_DEST (elt);
13478 mem = SET_SRC (elt);
13480 else
13482 reg = SET_SRC (elt);
13483 mem = SET_DEST (elt);
13486 if (!REG_P (reg)
13487 || GET_MODE (reg) != mode
13488 || REGNO (reg) <= regno
13489 || (consecutive
13490 && (REGNO (reg) !=
13491 (unsigned int) (first_regno + regs_per_val * (i - base))))
13492 /* Don't allow SP to be loaded unless it is also the base register. It
13493 guarantees that SP is reset correctly when an LDM instruction
13494 is interrupted. Otherwise, we might end up with a corrupt stack. */
13495 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13496 || !MEM_P (mem)
13497 || GET_MODE (mem) != mode
13498 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13499 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13500 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13501 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13502 offset + (i - base) * reg_increment))
13503 && (!REG_P (XEXP (mem, 0))
13504 || offset + (i - base) * reg_increment != 0)))
13505 return false;
13507 regno = REGNO (reg);
13508 if (regno == REGNO (addr))
13509 addr_reg_in_reglist = true;
13512 if (load)
13514 if (update && addr_reg_in_reglist)
13515 return false;
13517 /* For Thumb-1, address register is always modified - either by write-back
13518 or by explicit load. If the pattern does not describe an update,
13519 then the address register must be in the list of loaded registers. */
13520 if (TARGET_THUMB1)
13521 return update || addr_reg_in_reglist;
13524 return true;
13527 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13528 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13529 instruction. ADD_OFFSET is nonzero if the base address register needs
13530 to be modified with an add instruction before we can use it. */
13532 static bool
13533 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13534 int nops, HOST_WIDE_INT add_offset)
13536 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13537 if the offset isn't small enough. The reason 2 ldrs are faster
13538 is because these ARMs are able to do more than one cache access
13539 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13540 whilst the ARM8 has a double bandwidth cache. This means that
13541 these cores can do both an instruction fetch and a data fetch in
13542 a single cycle, so the trick of calculating the address into a
13543 scratch register (one of the result regs) and then doing a load
13544 multiple actually becomes slower (and no smaller in code size).
13545 That is the transformation
13547 ldr rd1, [rbase + offset]
13548 ldr rd2, [rbase + offset + 4]
13552 add rd1, rbase, offset
13553 ldmia rd1, {rd1, rd2}
13555 produces worse code -- '3 cycles + any stalls on rd2' instead of
13556 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13557 access per cycle, the first sequence could never complete in less
13558 than 6 cycles, whereas the ldm sequence would only take 5 and
13559 would make better use of sequential accesses if not hitting the
13560 cache.
13562 We cheat here and test 'arm_ld_sched' which we currently know to
13563 only be true for the ARM8, ARM9 and StrongARM. If this ever
13564 changes, then the test below needs to be reworked. */
13565 if (nops == 2 && arm_ld_sched && add_offset != 0)
13566 return false;
13568 /* XScale has load-store double instructions, but they have stricter
13569 alignment requirements than load-store multiple, so we cannot
13570 use them.
13572 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13573 the pipeline until completion.
13575 NREGS CYCLES
13581 An ldr instruction takes 1-3 cycles, but does not block the
13582 pipeline.
13584 NREGS CYCLES
13585 1 1-3
13586 2 2-6
13587 3 3-9
13588 4 4-12
13590 Best case ldr will always win. However, the more ldr instructions
13591 we issue, the less likely we are to be able to schedule them well.
13592 Using ldr instructions also increases code size.
13594 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13595 for counts of 3 or 4 regs. */
13596 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13597 return false;
13598 return true;
13601 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13602 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13603 an array ORDER which describes the sequence to use when accessing the
13604 offsets that produces an ascending order. In this sequence, each
13605 offset must be larger by exactly 4 than the previous one. ORDER[0]
13606 must have been filled in with the lowest offset by the caller.
13607 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13608 we use to verify that ORDER produces an ascending order of registers.
13609 Return true if it was possible to construct such an order, false if
13610 not. */
13612 static bool
13613 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13614 int *unsorted_regs)
13616 int i;
13617 for (i = 1; i < nops; i++)
13619 int j;
13621 order[i] = order[i - 1];
13622 for (j = 0; j < nops; j++)
13623 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13625 /* We must find exactly one offset that is higher than the
13626 previous one by 4. */
13627 if (order[i] != order[i - 1])
13628 return false;
13629 order[i] = j;
13631 if (order[i] == order[i - 1])
13632 return false;
13633 /* The register numbers must be ascending. */
13634 if (unsorted_regs != NULL
13635 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13636 return false;
13638 return true;
13641 /* Used to determine in a peephole whether a sequence of load
13642 instructions can be changed into a load-multiple instruction.
13643 NOPS is the number of separate load instructions we are examining. The
13644 first NOPS entries in OPERANDS are the destination registers, the
13645 next NOPS entries are memory operands. If this function is
13646 successful, *BASE is set to the common base register of the memory
13647 accesses; *LOAD_OFFSET is set to the first memory location's offset
13648 from that base register.
13649 REGS is an array filled in with the destination register numbers.
13650 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13651 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13652 the sequence of registers in REGS matches the loads from ascending memory
13653 locations, and the function verifies that the register numbers are
13654 themselves ascending. If CHECK_REGS is false, the register numbers
13655 are stored in the order they are found in the operands. */
13656 static int
13657 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13658 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13660 int unsorted_regs[MAX_LDM_STM_OPS];
13661 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13662 int order[MAX_LDM_STM_OPS];
13663 rtx base_reg_rtx = NULL;
13664 int base_reg = -1;
13665 int i, ldm_case;
13667 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13668 easily extended if required. */
13669 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13671 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13673 /* Loop over the operands and check that the memory references are
13674 suitable (i.e. immediate offsets from the same base register). At
13675 the same time, extract the target register, and the memory
13676 offsets. */
13677 for (i = 0; i < nops; i++)
13679 rtx reg;
13680 rtx offset;
13682 /* Convert a subreg of a mem into the mem itself. */
13683 if (GET_CODE (operands[nops + i]) == SUBREG)
13684 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13686 gcc_assert (MEM_P (operands[nops + i]));
13688 /* Don't reorder volatile memory references; it doesn't seem worth
13689 looking for the case where the order is ok anyway. */
13690 if (MEM_VOLATILE_P (operands[nops + i]))
13691 return 0;
13693 offset = const0_rtx;
13695 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13696 || (GET_CODE (reg) == SUBREG
13697 && REG_P (reg = SUBREG_REG (reg))))
13698 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13699 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13700 || (GET_CODE (reg) == SUBREG
13701 && REG_P (reg = SUBREG_REG (reg))))
13702 && (CONST_INT_P (offset
13703 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13705 if (i == 0)
13707 base_reg = REGNO (reg);
13708 base_reg_rtx = reg;
13709 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13710 return 0;
13712 else if (base_reg != (int) REGNO (reg))
13713 /* Not addressed from the same base register. */
13714 return 0;
13716 unsorted_regs[i] = (REG_P (operands[i])
13717 ? REGNO (operands[i])
13718 : REGNO (SUBREG_REG (operands[i])));
13720 /* If it isn't an integer register, or if it overwrites the
13721 base register but isn't the last insn in the list, then
13722 we can't do this. */
13723 if (unsorted_regs[i] < 0
13724 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13725 || unsorted_regs[i] > 14
13726 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13727 return 0;
13729 /* Don't allow SP to be loaded unless it is also the base
13730 register. It guarantees that SP is reset correctly when
13731 an LDM instruction is interrupted. Otherwise, we might
13732 end up with a corrupt stack. */
13733 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13734 return 0;
13736 unsorted_offsets[i] = INTVAL (offset);
13737 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13738 order[0] = i;
13740 else
13741 /* Not a suitable memory address. */
13742 return 0;
13745 /* All the useful information has now been extracted from the
13746 operands into unsorted_regs and unsorted_offsets; additionally,
13747 order[0] has been set to the lowest offset in the list. Sort
13748 the offsets into order, verifying that they are adjacent, and
13749 check that the register numbers are ascending. */
13750 if (!compute_offset_order (nops, unsorted_offsets, order,
13751 check_regs ? unsorted_regs : NULL))
13752 return 0;
13754 if (saved_order)
13755 memcpy (saved_order, order, sizeof order);
13757 if (base)
13759 *base = base_reg;
13761 for (i = 0; i < nops; i++)
13762 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13764 *load_offset = unsorted_offsets[order[0]];
13767 if (TARGET_THUMB1
13768 && !peep2_reg_dead_p (nops, base_reg_rtx))
13769 return 0;
13771 if (unsorted_offsets[order[0]] == 0)
13772 ldm_case = 1; /* ldmia */
13773 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13774 ldm_case = 2; /* ldmib */
13775 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13776 ldm_case = 3; /* ldmda */
13777 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13778 ldm_case = 4; /* ldmdb */
13779 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13780 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13781 ldm_case = 5;
13782 else
13783 return 0;
13785 if (!multiple_operation_profitable_p (false, nops,
13786 ldm_case == 5
13787 ? unsorted_offsets[order[0]] : 0))
13788 return 0;
13790 return ldm_case;
13793 /* Used to determine in a peephole whether a sequence of store instructions can
13794 be changed into a store-multiple instruction.
13795 NOPS is the number of separate store instructions we are examining.
13796 NOPS_TOTAL is the total number of instructions recognized by the peephole
13797 pattern.
13798 The first NOPS entries in OPERANDS are the source registers, the next
13799 NOPS entries are memory operands. If this function is successful, *BASE is
13800 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13801 to the first memory location's offset from that base register. REGS is an
13802 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13803 likewise filled with the corresponding rtx's.
13804 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13805 numbers to an ascending order of stores.
13806 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13807 from ascending memory locations, and the function verifies that the register
13808 numbers are themselves ascending. If CHECK_REGS is false, the register
13809 numbers are stored in the order they are found in the operands. */
13810 static int
13811 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13812 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13813 HOST_WIDE_INT *load_offset, bool check_regs)
13815 int unsorted_regs[MAX_LDM_STM_OPS];
13816 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13817 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13818 int order[MAX_LDM_STM_OPS];
13819 int base_reg = -1;
13820 rtx base_reg_rtx = NULL;
13821 int i, stm_case;
13823 /* Write back of base register is currently only supported for Thumb 1. */
13824 int base_writeback = TARGET_THUMB1;
13826 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13827 easily extended if required. */
13828 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13830 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13832 /* Loop over the operands and check that the memory references are
13833 suitable (i.e. immediate offsets from the same base register). At
13834 the same time, extract the target register, and the memory
13835 offsets. */
13836 for (i = 0; i < nops; i++)
13838 rtx reg;
13839 rtx offset;
13841 /* Convert a subreg of a mem into the mem itself. */
13842 if (GET_CODE (operands[nops + i]) == SUBREG)
13843 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13845 gcc_assert (MEM_P (operands[nops + i]));
13847 /* Don't reorder volatile memory references; it doesn't seem worth
13848 looking for the case where the order is ok anyway. */
13849 if (MEM_VOLATILE_P (operands[nops + i]))
13850 return 0;
13852 offset = const0_rtx;
13854 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13855 || (GET_CODE (reg) == SUBREG
13856 && REG_P (reg = SUBREG_REG (reg))))
13857 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13858 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13859 || (GET_CODE (reg) == SUBREG
13860 && REG_P (reg = SUBREG_REG (reg))))
13861 && (CONST_INT_P (offset
13862 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13864 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13865 ? operands[i] : SUBREG_REG (operands[i]));
13866 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13868 if (i == 0)
13870 base_reg = REGNO (reg);
13871 base_reg_rtx = reg;
13872 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13873 return 0;
13875 else if (base_reg != (int) REGNO (reg))
13876 /* Not addressed from the same base register. */
13877 return 0;
13879 /* If it isn't an integer register, then we can't do this. */
13880 if (unsorted_regs[i] < 0
13881 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13882 /* The effects are unpredictable if the base register is
13883 both updated and stored. */
13884 || (base_writeback && unsorted_regs[i] == base_reg)
13885 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13886 || unsorted_regs[i] > 14)
13887 return 0;
13889 unsorted_offsets[i] = INTVAL (offset);
13890 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13891 order[0] = i;
13893 else
13894 /* Not a suitable memory address. */
13895 return 0;
13898 /* All the useful information has now been extracted from the
13899 operands into unsorted_regs and unsorted_offsets; additionally,
13900 order[0] has been set to the lowest offset in the list. Sort
13901 the offsets into order, verifying that they are adjacent, and
13902 check that the register numbers are ascending. */
13903 if (!compute_offset_order (nops, unsorted_offsets, order,
13904 check_regs ? unsorted_regs : NULL))
13905 return 0;
13907 if (saved_order)
13908 memcpy (saved_order, order, sizeof order);
13910 if (base)
13912 *base = base_reg;
13914 for (i = 0; i < nops; i++)
13916 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13917 if (reg_rtxs)
13918 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13921 *load_offset = unsorted_offsets[order[0]];
13924 if (TARGET_THUMB1
13925 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13926 return 0;
13928 if (unsorted_offsets[order[0]] == 0)
13929 stm_case = 1; /* stmia */
13930 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13931 stm_case = 2; /* stmib */
13932 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13933 stm_case = 3; /* stmda */
13934 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13935 stm_case = 4; /* stmdb */
13936 else
13937 return 0;
13939 if (!multiple_operation_profitable_p (false, nops, 0))
13940 return 0;
13942 return stm_case;
13945 /* Routines for use in generating RTL. */
13947 /* Generate a load-multiple instruction. COUNT is the number of loads in
13948 the instruction; REGS and MEMS are arrays containing the operands.
13949 BASEREG is the base register to be used in addressing the memory operands.
13950 WBACK_OFFSET is nonzero if the instruction should update the base
13951 register. */
13953 static rtx
13954 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13955 HOST_WIDE_INT wback_offset)
13957 int i = 0, j;
13958 rtx result;
13960 if (!multiple_operation_profitable_p (false, count, 0))
13962 rtx seq;
13964 start_sequence ();
13966 for (i = 0; i < count; i++)
13967 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13969 if (wback_offset != 0)
13970 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13972 seq = get_insns ();
13973 end_sequence ();
13975 return seq;
13978 result = gen_rtx_PARALLEL (VOIDmode,
13979 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13980 if (wback_offset != 0)
13982 XVECEXP (result, 0, 0)
13983 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13984 i = 1;
13985 count++;
13988 for (j = 0; i < count; i++, j++)
13989 XVECEXP (result, 0, i)
13990 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13992 return result;
13995 /* Generate a store-multiple instruction. COUNT is the number of stores in
13996 the instruction; REGS and MEMS are arrays containing the operands.
13997 BASEREG is the base register to be used in addressing the memory operands.
13998 WBACK_OFFSET is nonzero if the instruction should update the base
13999 register. */
14001 static rtx
14002 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14003 HOST_WIDE_INT wback_offset)
14005 int i = 0, j;
14006 rtx result;
14008 if (GET_CODE (basereg) == PLUS)
14009 basereg = XEXP (basereg, 0);
14011 if (!multiple_operation_profitable_p (false, count, 0))
14013 rtx seq;
14015 start_sequence ();
14017 for (i = 0; i < count; i++)
14018 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14020 if (wback_offset != 0)
14021 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14023 seq = get_insns ();
14024 end_sequence ();
14026 return seq;
14029 result = gen_rtx_PARALLEL (VOIDmode,
14030 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14031 if (wback_offset != 0)
14033 XVECEXP (result, 0, 0)
14034 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14035 i = 1;
14036 count++;
14039 for (j = 0; i < count; i++, j++)
14040 XVECEXP (result, 0, i)
14041 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14043 return result;
14046 /* Generate either a load-multiple or a store-multiple instruction. This
14047 function can be used in situations where we can start with a single MEM
14048 rtx and adjust its address upwards.
14049 COUNT is the number of operations in the instruction, not counting a
14050 possible update of the base register. REGS is an array containing the
14051 register operands.
14052 BASEREG is the base register to be used in addressing the memory operands,
14053 which are constructed from BASEMEM.
14054 WRITE_BACK specifies whether the generated instruction should include an
14055 update of the base register.
14056 OFFSETP is used to pass an offset to and from this function; this offset
14057 is not used when constructing the address (instead BASEMEM should have an
14058 appropriate offset in its address), it is used only for setting
14059 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14061 static rtx
14062 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14063 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14065 rtx mems[MAX_LDM_STM_OPS];
14066 HOST_WIDE_INT offset = *offsetp;
14067 int i;
14069 gcc_assert (count <= MAX_LDM_STM_OPS);
14071 if (GET_CODE (basereg) == PLUS)
14072 basereg = XEXP (basereg, 0);
14074 for (i = 0; i < count; i++)
14076 rtx addr = plus_constant (Pmode, basereg, i * 4);
14077 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14078 offset += 4;
14081 if (write_back)
14082 *offsetp = offset;
14084 if (is_load)
14085 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14086 write_back ? 4 * count : 0);
14087 else
14088 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14089 write_back ? 4 * count : 0);
14093 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14094 rtx basemem, HOST_WIDE_INT *offsetp)
14096 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14097 offsetp);
14101 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14102 rtx basemem, HOST_WIDE_INT *offsetp)
14104 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14105 offsetp);
14108 /* Called from a peephole2 expander to turn a sequence of loads into an
14109 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14110 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14111 is true if we can reorder the registers because they are used commutatively
14112 subsequently.
14113 Returns true iff we could generate a new instruction. */
14115 bool
14116 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14118 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14119 rtx mems[MAX_LDM_STM_OPS];
14120 int i, j, base_reg;
14121 rtx base_reg_rtx;
14122 HOST_WIDE_INT offset;
14123 int write_back = FALSE;
14124 int ldm_case;
14125 rtx addr;
14127 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14128 &base_reg, &offset, !sort_regs);
14130 if (ldm_case == 0)
14131 return false;
14133 if (sort_regs)
14134 for (i = 0; i < nops - 1; i++)
14135 for (j = i + 1; j < nops; j++)
14136 if (regs[i] > regs[j])
14138 int t = regs[i];
14139 regs[i] = regs[j];
14140 regs[j] = t;
14142 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14144 if (TARGET_THUMB1)
14146 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14147 gcc_assert (ldm_case == 1 || ldm_case == 5);
14148 write_back = TRUE;
14151 if (ldm_case == 5)
14153 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14154 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14155 offset = 0;
14156 if (!TARGET_THUMB1)
14158 base_reg = regs[0];
14159 base_reg_rtx = newbase;
14163 for (i = 0; i < nops; i++)
14165 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14166 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14167 SImode, addr, 0);
14169 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14170 write_back ? offset + i * 4 : 0));
14171 return true;
14174 /* Called from a peephole2 expander to turn a sequence of stores into an
14175 STM instruction. OPERANDS are the operands found by the peephole matcher;
14176 NOPS indicates how many separate stores we are trying to combine.
14177 Returns true iff we could generate a new instruction. */
14179 bool
14180 gen_stm_seq (rtx *operands, int nops)
14182 int i;
14183 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14184 rtx mems[MAX_LDM_STM_OPS];
14185 int base_reg;
14186 rtx base_reg_rtx;
14187 HOST_WIDE_INT offset;
14188 int write_back = FALSE;
14189 int stm_case;
14190 rtx addr;
14191 bool base_reg_dies;
14193 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14194 mem_order, &base_reg, &offset, true);
14196 if (stm_case == 0)
14197 return false;
14199 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14201 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14202 if (TARGET_THUMB1)
14204 gcc_assert (base_reg_dies);
14205 write_back = TRUE;
14208 if (stm_case == 5)
14210 gcc_assert (base_reg_dies);
14211 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14212 offset = 0;
14215 addr = plus_constant (Pmode, base_reg_rtx, offset);
14217 for (i = 0; i < nops; i++)
14219 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14220 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14221 SImode, addr, 0);
14223 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14224 write_back ? offset + i * 4 : 0));
14225 return true;
14228 /* Called from a peephole2 expander to turn a sequence of stores that are
14229 preceded by constant loads into an STM instruction. OPERANDS are the
14230 operands found by the peephole matcher; NOPS indicates how many
14231 separate stores we are trying to combine; there are 2 * NOPS
14232 instructions in the peephole.
14233 Returns true iff we could generate a new instruction. */
14235 bool
14236 gen_const_stm_seq (rtx *operands, int nops)
14238 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14239 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14240 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14241 rtx mems[MAX_LDM_STM_OPS];
14242 int base_reg;
14243 rtx base_reg_rtx;
14244 HOST_WIDE_INT offset;
14245 int write_back = FALSE;
14246 int stm_case;
14247 rtx addr;
14248 bool base_reg_dies;
14249 int i, j;
14250 HARD_REG_SET allocated;
14252 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14253 mem_order, &base_reg, &offset, false);
14255 if (stm_case == 0)
14256 return false;
14258 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14260 /* If the same register is used more than once, try to find a free
14261 register. */
14262 CLEAR_HARD_REG_SET (allocated);
14263 for (i = 0; i < nops; i++)
14265 for (j = i + 1; j < nops; j++)
14266 if (regs[i] == regs[j])
14268 rtx t = peep2_find_free_register (0, nops * 2,
14269 TARGET_THUMB1 ? "l" : "r",
14270 SImode, &allocated);
14271 if (t == NULL_RTX)
14272 return false;
14273 reg_rtxs[i] = t;
14274 regs[i] = REGNO (t);
14278 /* Compute an ordering that maps the register numbers to an ascending
14279 sequence. */
14280 reg_order[0] = 0;
14281 for (i = 0; i < nops; i++)
14282 if (regs[i] < regs[reg_order[0]])
14283 reg_order[0] = i;
14285 for (i = 1; i < nops; i++)
14287 int this_order = reg_order[i - 1];
14288 for (j = 0; j < nops; j++)
14289 if (regs[j] > regs[reg_order[i - 1]]
14290 && (this_order == reg_order[i - 1]
14291 || regs[j] < regs[this_order]))
14292 this_order = j;
14293 reg_order[i] = this_order;
14296 /* Ensure that registers that must be live after the instruction end
14297 up with the correct value. */
14298 for (i = 0; i < nops; i++)
14300 int this_order = reg_order[i];
14301 if ((this_order != mem_order[i]
14302 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14303 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14304 return false;
14307 /* Load the constants. */
14308 for (i = 0; i < nops; i++)
14310 rtx op = operands[2 * nops + mem_order[i]];
14311 sorted_regs[i] = regs[reg_order[i]];
14312 emit_move_insn (reg_rtxs[reg_order[i]], op);
14315 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14317 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14318 if (TARGET_THUMB1)
14320 gcc_assert (base_reg_dies);
14321 write_back = TRUE;
14324 if (stm_case == 5)
14326 gcc_assert (base_reg_dies);
14327 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14328 offset = 0;
14331 addr = plus_constant (Pmode, base_reg_rtx, offset);
14333 for (i = 0; i < nops; i++)
14335 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14336 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14337 SImode, addr, 0);
14339 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14340 write_back ? offset + i * 4 : 0));
14341 return true;
14344 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14345 unaligned copies on processors which support unaligned semantics for those
14346 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14347 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14348 An interleave factor of 1 (the minimum) will perform no interleaving.
14349 Load/store multiple are used for aligned addresses where possible. */
14351 static void
14352 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14353 HOST_WIDE_INT length,
14354 unsigned int interleave_factor)
14356 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14357 int *regnos = XALLOCAVEC (int, interleave_factor);
14358 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14359 HOST_WIDE_INT i, j;
14360 HOST_WIDE_INT remaining = length, words;
14361 rtx halfword_tmp = NULL, byte_tmp = NULL;
14362 rtx dst, src;
14363 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14364 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14365 HOST_WIDE_INT srcoffset, dstoffset;
14366 HOST_WIDE_INT src_autoinc, dst_autoinc;
14367 rtx mem, addr;
14369 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14371 /* Use hard registers if we have aligned source or destination so we can use
14372 load/store multiple with contiguous registers. */
14373 if (dst_aligned || src_aligned)
14374 for (i = 0; i < interleave_factor; i++)
14375 regs[i] = gen_rtx_REG (SImode, i);
14376 else
14377 for (i = 0; i < interleave_factor; i++)
14378 regs[i] = gen_reg_rtx (SImode);
14380 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14381 src = copy_addr_to_reg (XEXP (srcbase, 0));
14383 srcoffset = dstoffset = 0;
14385 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14386 For copying the last bytes we want to subtract this offset again. */
14387 src_autoinc = dst_autoinc = 0;
14389 for (i = 0; i < interleave_factor; i++)
14390 regnos[i] = i;
14392 /* Copy BLOCK_SIZE_BYTES chunks. */
14394 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14396 /* Load words. */
14397 if (src_aligned && interleave_factor > 1)
14399 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14400 TRUE, srcbase, &srcoffset));
14401 src_autoinc += UNITS_PER_WORD * interleave_factor;
14403 else
14405 for (j = 0; j < interleave_factor; j++)
14407 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14408 - src_autoinc));
14409 mem = adjust_automodify_address (srcbase, SImode, addr,
14410 srcoffset + j * UNITS_PER_WORD);
14411 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14413 srcoffset += block_size_bytes;
14416 /* Store words. */
14417 if (dst_aligned && interleave_factor > 1)
14419 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14420 TRUE, dstbase, &dstoffset));
14421 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14423 else
14425 for (j = 0; j < interleave_factor; j++)
14427 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14428 - dst_autoinc));
14429 mem = adjust_automodify_address (dstbase, SImode, addr,
14430 dstoffset + j * UNITS_PER_WORD);
14431 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14433 dstoffset += block_size_bytes;
14436 remaining -= block_size_bytes;
14439 /* Copy any whole words left (note these aren't interleaved with any
14440 subsequent halfword/byte load/stores in the interests of simplicity). */
14442 words = remaining / UNITS_PER_WORD;
14444 gcc_assert (words < interleave_factor);
14446 if (src_aligned && words > 1)
14448 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14449 &srcoffset));
14450 src_autoinc += UNITS_PER_WORD * words;
14452 else
14454 for (j = 0; j < words; j++)
14456 addr = plus_constant (Pmode, src,
14457 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14458 mem = adjust_automodify_address (srcbase, SImode, addr,
14459 srcoffset + j * UNITS_PER_WORD);
14460 if (src_aligned)
14461 emit_move_insn (regs[j], mem);
14462 else
14463 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14465 srcoffset += words * UNITS_PER_WORD;
14468 if (dst_aligned && words > 1)
14470 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14471 &dstoffset));
14472 dst_autoinc += words * UNITS_PER_WORD;
14474 else
14476 for (j = 0; j < words; j++)
14478 addr = plus_constant (Pmode, dst,
14479 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14480 mem = adjust_automodify_address (dstbase, SImode, addr,
14481 dstoffset + j * UNITS_PER_WORD);
14482 if (dst_aligned)
14483 emit_move_insn (mem, regs[j]);
14484 else
14485 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14487 dstoffset += words * UNITS_PER_WORD;
14490 remaining -= words * UNITS_PER_WORD;
14492 gcc_assert (remaining < 4);
14494 /* Copy a halfword if necessary. */
14496 if (remaining >= 2)
14498 halfword_tmp = gen_reg_rtx (SImode);
14500 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14501 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14502 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14504 /* Either write out immediately, or delay until we've loaded the last
14505 byte, depending on interleave factor. */
14506 if (interleave_factor == 1)
14508 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14509 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14510 emit_insn (gen_unaligned_storehi (mem,
14511 gen_lowpart (HImode, halfword_tmp)));
14512 halfword_tmp = NULL;
14513 dstoffset += 2;
14516 remaining -= 2;
14517 srcoffset += 2;
14520 gcc_assert (remaining < 2);
14522 /* Copy last byte. */
14524 if ((remaining & 1) != 0)
14526 byte_tmp = gen_reg_rtx (SImode);
14528 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14529 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14530 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14532 if (interleave_factor == 1)
14534 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14535 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14536 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14537 byte_tmp = NULL;
14538 dstoffset++;
14541 remaining--;
14542 srcoffset++;
14545 /* Store last halfword if we haven't done so already. */
14547 if (halfword_tmp)
14549 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14550 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14551 emit_insn (gen_unaligned_storehi (mem,
14552 gen_lowpart (HImode, halfword_tmp)));
14553 dstoffset += 2;
14556 /* Likewise for last byte. */
14558 if (byte_tmp)
14560 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14561 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14562 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14563 dstoffset++;
14566 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14569 /* From mips_adjust_block_mem:
14571 Helper function for doing a loop-based block operation on memory
14572 reference MEM. Each iteration of the loop will operate on LENGTH
14573 bytes of MEM.
14575 Create a new base register for use within the loop and point it to
14576 the start of MEM. Create a new memory reference that uses this
14577 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14579 static void
14580 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14581 rtx *loop_mem)
14583 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14585 /* Although the new mem does not refer to a known location,
14586 it does keep up to LENGTH bytes of alignment. */
14587 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14588 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14591 /* From mips_block_move_loop:
14593 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14594 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14595 the memory regions do not overlap. */
14597 static void
14598 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14599 unsigned int interleave_factor,
14600 HOST_WIDE_INT bytes_per_iter)
14602 rtx src_reg, dest_reg, final_src, test;
14603 HOST_WIDE_INT leftover;
14605 leftover = length % bytes_per_iter;
14606 length -= leftover;
14608 /* Create registers and memory references for use within the loop. */
14609 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14610 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14612 /* Calculate the value that SRC_REG should have after the last iteration of
14613 the loop. */
14614 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14615 0, 0, OPTAB_WIDEN);
14617 /* Emit the start of the loop. */
14618 rtx_code_label *label = gen_label_rtx ();
14619 emit_label (label);
14621 /* Emit the loop body. */
14622 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14623 interleave_factor);
14625 /* Move on to the next block. */
14626 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14627 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14629 /* Emit the loop condition. */
14630 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14631 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14633 /* Mop up any left-over bytes. */
14634 if (leftover)
14635 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14638 /* Emit a block move when either the source or destination is unaligned (not
14639 aligned to a four-byte boundary). This may need further tuning depending on
14640 core type, optimize_size setting, etc. */
14642 static int
14643 arm_movmemqi_unaligned (rtx *operands)
14645 HOST_WIDE_INT length = INTVAL (operands[2]);
14647 if (optimize_size)
14649 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14650 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14651 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14652 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14653 or dst_aligned though: allow more interleaving in those cases since the
14654 resulting code can be smaller. */
14655 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14656 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14658 if (length > 12)
14659 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14660 interleave_factor, bytes_per_iter);
14661 else
14662 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14663 interleave_factor);
14665 else
14667 /* Note that the loop created by arm_block_move_unaligned_loop may be
14668 subject to loop unrolling, which makes tuning this condition a little
14669 redundant. */
14670 if (length > 32)
14671 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14672 else
14673 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14676 return 1;
14680 arm_gen_movmemqi (rtx *operands)
14682 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14683 HOST_WIDE_INT srcoffset, dstoffset;
14684 int i;
14685 rtx src, dst, srcbase, dstbase;
14686 rtx part_bytes_reg = NULL;
14687 rtx mem;
14689 if (!CONST_INT_P (operands[2])
14690 || !CONST_INT_P (operands[3])
14691 || INTVAL (operands[2]) > 64)
14692 return 0;
14694 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14695 return arm_movmemqi_unaligned (operands);
14697 if (INTVAL (operands[3]) & 3)
14698 return 0;
14700 dstbase = operands[0];
14701 srcbase = operands[1];
14703 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14704 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14706 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14707 out_words_to_go = INTVAL (operands[2]) / 4;
14708 last_bytes = INTVAL (operands[2]) & 3;
14709 dstoffset = srcoffset = 0;
14711 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14712 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14714 for (i = 0; in_words_to_go >= 2; i+=4)
14716 if (in_words_to_go > 4)
14717 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14718 TRUE, srcbase, &srcoffset));
14719 else
14720 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14721 src, FALSE, srcbase,
14722 &srcoffset));
14724 if (out_words_to_go)
14726 if (out_words_to_go > 4)
14727 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14728 TRUE, dstbase, &dstoffset));
14729 else if (out_words_to_go != 1)
14730 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14731 out_words_to_go, dst,
14732 (last_bytes == 0
14733 ? FALSE : TRUE),
14734 dstbase, &dstoffset));
14735 else
14737 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14738 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14739 if (last_bytes != 0)
14741 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14742 dstoffset += 4;
14747 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14748 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14751 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14752 if (out_words_to_go)
14754 rtx sreg;
14756 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14757 sreg = copy_to_reg (mem);
14759 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14760 emit_move_insn (mem, sreg);
14761 in_words_to_go--;
14763 gcc_assert (!in_words_to_go); /* Sanity check */
14766 if (in_words_to_go)
14768 gcc_assert (in_words_to_go > 0);
14770 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14771 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14774 gcc_assert (!last_bytes || part_bytes_reg);
14776 if (BYTES_BIG_ENDIAN && last_bytes)
14778 rtx tmp = gen_reg_rtx (SImode);
14780 /* The bytes we want are in the top end of the word. */
14781 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14782 GEN_INT (8 * (4 - last_bytes))));
14783 part_bytes_reg = tmp;
14785 while (last_bytes)
14787 mem = adjust_automodify_address (dstbase, QImode,
14788 plus_constant (Pmode, dst,
14789 last_bytes - 1),
14790 dstoffset + last_bytes - 1);
14791 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14793 if (--last_bytes)
14795 tmp = gen_reg_rtx (SImode);
14796 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14797 part_bytes_reg = tmp;
14802 else
14804 if (last_bytes > 1)
14806 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14807 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14808 last_bytes -= 2;
14809 if (last_bytes)
14811 rtx tmp = gen_reg_rtx (SImode);
14812 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14813 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14814 part_bytes_reg = tmp;
14815 dstoffset += 2;
14819 if (last_bytes)
14821 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14822 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14826 return 1;
14829 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14830 by mode size. */
14831 inline static rtx
14832 next_consecutive_mem (rtx mem)
14834 machine_mode mode = GET_MODE (mem);
14835 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14836 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14838 return adjust_automodify_address (mem, mode, addr, offset);
14841 /* Copy using LDRD/STRD instructions whenever possible.
14842 Returns true upon success. */
14843 bool
14844 gen_movmem_ldrd_strd (rtx *operands)
14846 unsigned HOST_WIDE_INT len;
14847 HOST_WIDE_INT align;
14848 rtx src, dst, base;
14849 rtx reg0;
14850 bool src_aligned, dst_aligned;
14851 bool src_volatile, dst_volatile;
14853 gcc_assert (CONST_INT_P (operands[2]));
14854 gcc_assert (CONST_INT_P (operands[3]));
14856 len = UINTVAL (operands[2]);
14857 if (len > 64)
14858 return false;
14860 /* Maximum alignment we can assume for both src and dst buffers. */
14861 align = INTVAL (operands[3]);
14863 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14864 return false;
14866 /* Place src and dst addresses in registers
14867 and update the corresponding mem rtx. */
14868 dst = operands[0];
14869 dst_volatile = MEM_VOLATILE_P (dst);
14870 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14871 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14872 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14874 src = operands[1];
14875 src_volatile = MEM_VOLATILE_P (src);
14876 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14877 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14878 src = adjust_automodify_address (src, VOIDmode, base, 0);
14880 if (!unaligned_access && !(src_aligned && dst_aligned))
14881 return false;
14883 if (src_volatile || dst_volatile)
14884 return false;
14886 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14887 if (!(dst_aligned || src_aligned))
14888 return arm_gen_movmemqi (operands);
14890 src = adjust_address (src, DImode, 0);
14891 dst = adjust_address (dst, DImode, 0);
14892 while (len >= 8)
14894 len -= 8;
14895 reg0 = gen_reg_rtx (DImode);
14896 if (src_aligned)
14897 emit_move_insn (reg0, src);
14898 else
14899 emit_insn (gen_unaligned_loaddi (reg0, src));
14901 if (dst_aligned)
14902 emit_move_insn (dst, reg0);
14903 else
14904 emit_insn (gen_unaligned_storedi (dst, reg0));
14906 src = next_consecutive_mem (src);
14907 dst = next_consecutive_mem (dst);
14910 gcc_assert (len < 8);
14911 if (len >= 4)
14913 /* More than a word but less than a double-word to copy. Copy a word. */
14914 reg0 = gen_reg_rtx (SImode);
14915 src = adjust_address (src, SImode, 0);
14916 dst = adjust_address (dst, SImode, 0);
14917 if (src_aligned)
14918 emit_move_insn (reg0, src);
14919 else
14920 emit_insn (gen_unaligned_loadsi (reg0, src));
14922 if (dst_aligned)
14923 emit_move_insn (dst, reg0);
14924 else
14925 emit_insn (gen_unaligned_storesi (dst, reg0));
14927 src = next_consecutive_mem (src);
14928 dst = next_consecutive_mem (dst);
14929 len -= 4;
14932 if (len == 0)
14933 return true;
14935 /* Copy the remaining bytes. */
14936 if (len >= 2)
14938 dst = adjust_address (dst, HImode, 0);
14939 src = adjust_address (src, HImode, 0);
14940 reg0 = gen_reg_rtx (SImode);
14941 if (src_aligned)
14942 emit_insn (gen_zero_extendhisi2 (reg0, src));
14943 else
14944 emit_insn (gen_unaligned_loadhiu (reg0, src));
14946 if (dst_aligned)
14947 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14948 else
14949 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14951 src = next_consecutive_mem (src);
14952 dst = next_consecutive_mem (dst);
14953 if (len == 2)
14954 return true;
14957 dst = adjust_address (dst, QImode, 0);
14958 src = adjust_address (src, QImode, 0);
14959 reg0 = gen_reg_rtx (QImode);
14960 emit_move_insn (reg0, src);
14961 emit_move_insn (dst, reg0);
14962 return true;
14965 /* Select a dominance comparison mode if possible for a test of the general
14966 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14967 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14968 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14969 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14970 In all cases OP will be either EQ or NE, but we don't need to know which
14971 here. If we are unable to support a dominance comparison we return
14972 CC mode. This will then fail to match for the RTL expressions that
14973 generate this call. */
14974 machine_mode
14975 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14977 enum rtx_code cond1, cond2;
14978 int swapped = 0;
14980 /* Currently we will probably get the wrong result if the individual
14981 comparisons are not simple. This also ensures that it is safe to
14982 reverse a comparison if necessary. */
14983 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14984 != CCmode)
14985 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14986 != CCmode))
14987 return CCmode;
14989 /* The if_then_else variant of this tests the second condition if the
14990 first passes, but is true if the first fails. Reverse the first
14991 condition to get a true "inclusive-or" expression. */
14992 if (cond_or == DOM_CC_NX_OR_Y)
14993 cond1 = reverse_condition (cond1);
14995 /* If the comparisons are not equal, and one doesn't dominate the other,
14996 then we can't do this. */
14997 if (cond1 != cond2
14998 && !comparison_dominates_p (cond1, cond2)
14999 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15000 return CCmode;
15002 if (swapped)
15003 std::swap (cond1, cond2);
15005 switch (cond1)
15007 case EQ:
15008 if (cond_or == DOM_CC_X_AND_Y)
15009 return CC_DEQmode;
15011 switch (cond2)
15013 case EQ: return CC_DEQmode;
15014 case LE: return CC_DLEmode;
15015 case LEU: return CC_DLEUmode;
15016 case GE: return CC_DGEmode;
15017 case GEU: return CC_DGEUmode;
15018 default: gcc_unreachable ();
15021 case LT:
15022 if (cond_or == DOM_CC_X_AND_Y)
15023 return CC_DLTmode;
15025 switch (cond2)
15027 case LT:
15028 return CC_DLTmode;
15029 case LE:
15030 return CC_DLEmode;
15031 case NE:
15032 return CC_DNEmode;
15033 default:
15034 gcc_unreachable ();
15037 case GT:
15038 if (cond_or == DOM_CC_X_AND_Y)
15039 return CC_DGTmode;
15041 switch (cond2)
15043 case GT:
15044 return CC_DGTmode;
15045 case GE:
15046 return CC_DGEmode;
15047 case NE:
15048 return CC_DNEmode;
15049 default:
15050 gcc_unreachable ();
15053 case LTU:
15054 if (cond_or == DOM_CC_X_AND_Y)
15055 return CC_DLTUmode;
15057 switch (cond2)
15059 case LTU:
15060 return CC_DLTUmode;
15061 case LEU:
15062 return CC_DLEUmode;
15063 case NE:
15064 return CC_DNEmode;
15065 default:
15066 gcc_unreachable ();
15069 case GTU:
15070 if (cond_or == DOM_CC_X_AND_Y)
15071 return CC_DGTUmode;
15073 switch (cond2)
15075 case GTU:
15076 return CC_DGTUmode;
15077 case GEU:
15078 return CC_DGEUmode;
15079 case NE:
15080 return CC_DNEmode;
15081 default:
15082 gcc_unreachable ();
15085 /* The remaining cases only occur when both comparisons are the
15086 same. */
15087 case NE:
15088 gcc_assert (cond1 == cond2);
15089 return CC_DNEmode;
15091 case LE:
15092 gcc_assert (cond1 == cond2);
15093 return CC_DLEmode;
15095 case GE:
15096 gcc_assert (cond1 == cond2);
15097 return CC_DGEmode;
15099 case LEU:
15100 gcc_assert (cond1 == cond2);
15101 return CC_DLEUmode;
15103 case GEU:
15104 gcc_assert (cond1 == cond2);
15105 return CC_DGEUmode;
15107 default:
15108 gcc_unreachable ();
15112 machine_mode
15113 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15115 /* All floating point compares return CCFP if it is an equality
15116 comparison, and CCFPE otherwise. */
15117 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15119 switch (op)
15121 case EQ:
15122 case NE:
15123 case UNORDERED:
15124 case ORDERED:
15125 case UNLT:
15126 case UNLE:
15127 case UNGT:
15128 case UNGE:
15129 case UNEQ:
15130 case LTGT:
15131 return CCFPmode;
15133 case LT:
15134 case LE:
15135 case GT:
15136 case GE:
15137 return CCFPEmode;
15139 default:
15140 gcc_unreachable ();
15144 /* A compare with a shifted operand. Because of canonicalization, the
15145 comparison will have to be swapped when we emit the assembler. */
15146 if (GET_MODE (y) == SImode
15147 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15148 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15149 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15150 || GET_CODE (x) == ROTATERT))
15151 return CC_SWPmode;
15153 /* This operation is performed swapped, but since we only rely on the Z
15154 flag we don't need an additional mode. */
15155 if (GET_MODE (y) == SImode
15156 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15157 && GET_CODE (x) == NEG
15158 && (op == EQ || op == NE))
15159 return CC_Zmode;
15161 /* This is a special case that is used by combine to allow a
15162 comparison of a shifted byte load to be split into a zero-extend
15163 followed by a comparison of the shifted integer (only valid for
15164 equalities and unsigned inequalities). */
15165 if (GET_MODE (x) == SImode
15166 && GET_CODE (x) == ASHIFT
15167 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15168 && GET_CODE (XEXP (x, 0)) == SUBREG
15169 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15170 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15171 && (op == EQ || op == NE
15172 || op == GEU || op == GTU || op == LTU || op == LEU)
15173 && CONST_INT_P (y))
15174 return CC_Zmode;
15176 /* A construct for a conditional compare, if the false arm contains
15177 0, then both conditions must be true, otherwise either condition
15178 must be true. Not all conditions are possible, so CCmode is
15179 returned if it can't be done. */
15180 if (GET_CODE (x) == IF_THEN_ELSE
15181 && (XEXP (x, 2) == const0_rtx
15182 || XEXP (x, 2) == const1_rtx)
15183 && COMPARISON_P (XEXP (x, 0))
15184 && COMPARISON_P (XEXP (x, 1)))
15185 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15186 INTVAL (XEXP (x, 2)));
15188 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15189 if (GET_CODE (x) == AND
15190 && (op == EQ || op == NE)
15191 && COMPARISON_P (XEXP (x, 0))
15192 && COMPARISON_P (XEXP (x, 1)))
15193 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15194 DOM_CC_X_AND_Y);
15196 if (GET_CODE (x) == IOR
15197 && (op == EQ || op == NE)
15198 && COMPARISON_P (XEXP (x, 0))
15199 && COMPARISON_P (XEXP (x, 1)))
15200 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15201 DOM_CC_X_OR_Y);
15203 /* An operation (on Thumb) where we want to test for a single bit.
15204 This is done by shifting that bit up into the top bit of a
15205 scratch register; we can then branch on the sign bit. */
15206 if (TARGET_THUMB1
15207 && GET_MODE (x) == SImode
15208 && (op == EQ || op == NE)
15209 && GET_CODE (x) == ZERO_EXTRACT
15210 && XEXP (x, 1) == const1_rtx)
15211 return CC_Nmode;
15213 /* An operation that sets the condition codes as a side-effect, the
15214 V flag is not set correctly, so we can only use comparisons where
15215 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15216 instead.) */
15217 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15218 if (GET_MODE (x) == SImode
15219 && y == const0_rtx
15220 && (op == EQ || op == NE || op == LT || op == GE)
15221 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15222 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15223 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15224 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15225 || GET_CODE (x) == LSHIFTRT
15226 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15227 || GET_CODE (x) == ROTATERT
15228 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15229 return CC_NOOVmode;
15231 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15232 return CC_Zmode;
15234 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15235 && GET_CODE (x) == PLUS
15236 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15237 return CC_Cmode;
15239 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15241 switch (op)
15243 case EQ:
15244 case NE:
15245 /* A DImode comparison against zero can be implemented by
15246 or'ing the two halves together. */
15247 if (y == const0_rtx)
15248 return CC_Zmode;
15250 /* We can do an equality test in three Thumb instructions. */
15251 if (!TARGET_32BIT)
15252 return CC_Zmode;
15254 /* FALLTHROUGH */
15256 case LTU:
15257 case LEU:
15258 case GTU:
15259 case GEU:
15260 /* DImode unsigned comparisons can be implemented by cmp +
15261 cmpeq without a scratch register. Not worth doing in
15262 Thumb-2. */
15263 if (TARGET_32BIT)
15264 return CC_CZmode;
15266 /* FALLTHROUGH */
15268 case LT:
15269 case LE:
15270 case GT:
15271 case GE:
15272 /* DImode signed and unsigned comparisons can be implemented
15273 by cmp + sbcs with a scratch register, but that does not
15274 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15275 gcc_assert (op != EQ && op != NE);
15276 return CC_NCVmode;
15278 default:
15279 gcc_unreachable ();
15283 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15284 return GET_MODE (x);
15286 return CCmode;
15289 /* X and Y are two things to compare using CODE. Emit the compare insn and
15290 return the rtx for register 0 in the proper mode. FP means this is a
15291 floating point compare: I don't think that it is needed on the arm. */
15293 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15295 machine_mode mode;
15296 rtx cc_reg;
15297 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15299 /* We might have X as a constant, Y as a register because of the predicates
15300 used for cmpdi. If so, force X to a register here. */
15301 if (dimode_comparison && !REG_P (x))
15302 x = force_reg (DImode, x);
15304 mode = SELECT_CC_MODE (code, x, y);
15305 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15307 if (dimode_comparison
15308 && mode != CC_CZmode)
15310 rtx clobber, set;
15312 /* To compare two non-zero values for equality, XOR them and
15313 then compare against zero. Not used for ARM mode; there
15314 CC_CZmode is cheaper. */
15315 if (mode == CC_Zmode && y != const0_rtx)
15317 gcc_assert (!reload_completed);
15318 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15319 y = const0_rtx;
15322 /* A scratch register is required. */
15323 if (reload_completed)
15324 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15325 else
15326 scratch = gen_rtx_SCRATCH (SImode);
15328 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15329 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15330 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15332 else
15333 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15335 return cc_reg;
15338 /* Generate a sequence of insns that will generate the correct return
15339 address mask depending on the physical architecture that the program
15340 is running on. */
15342 arm_gen_return_addr_mask (void)
15344 rtx reg = gen_reg_rtx (Pmode);
15346 emit_insn (gen_return_addr_mask (reg));
15347 return reg;
15350 void
15351 arm_reload_in_hi (rtx *operands)
15353 rtx ref = operands[1];
15354 rtx base, scratch;
15355 HOST_WIDE_INT offset = 0;
15357 if (GET_CODE (ref) == SUBREG)
15359 offset = SUBREG_BYTE (ref);
15360 ref = SUBREG_REG (ref);
15363 if (REG_P (ref))
15365 /* We have a pseudo which has been spilt onto the stack; there
15366 are two cases here: the first where there is a simple
15367 stack-slot replacement and a second where the stack-slot is
15368 out of range, or is used as a subreg. */
15369 if (reg_equiv_mem (REGNO (ref)))
15371 ref = reg_equiv_mem (REGNO (ref));
15372 base = find_replacement (&XEXP (ref, 0));
15374 else
15375 /* The slot is out of range, or was dressed up in a SUBREG. */
15376 base = reg_equiv_address (REGNO (ref));
15378 else
15379 base = find_replacement (&XEXP (ref, 0));
15381 /* Handle the case where the address is too complex to be offset by 1. */
15382 if (GET_CODE (base) == MINUS
15383 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15385 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15387 emit_set_insn (base_plus, base);
15388 base = base_plus;
15390 else if (GET_CODE (base) == PLUS)
15392 /* The addend must be CONST_INT, or we would have dealt with it above. */
15393 HOST_WIDE_INT hi, lo;
15395 offset += INTVAL (XEXP (base, 1));
15396 base = XEXP (base, 0);
15398 /* Rework the address into a legal sequence of insns. */
15399 /* Valid range for lo is -4095 -> 4095 */
15400 lo = (offset >= 0
15401 ? (offset & 0xfff)
15402 : -((-offset) & 0xfff));
15404 /* Corner case, if lo is the max offset then we would be out of range
15405 once we have added the additional 1 below, so bump the msb into the
15406 pre-loading insn(s). */
15407 if (lo == 4095)
15408 lo &= 0x7ff;
15410 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15411 ^ (HOST_WIDE_INT) 0x80000000)
15412 - (HOST_WIDE_INT) 0x80000000);
15414 gcc_assert (hi + lo == offset);
15416 if (hi != 0)
15418 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15420 /* Get the base address; addsi3 knows how to handle constants
15421 that require more than one insn. */
15422 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15423 base = base_plus;
15424 offset = lo;
15428 /* Operands[2] may overlap operands[0] (though it won't overlap
15429 operands[1]), that's why we asked for a DImode reg -- so we can
15430 use the bit that does not overlap. */
15431 if (REGNO (operands[2]) == REGNO (operands[0]))
15432 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15433 else
15434 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15436 emit_insn (gen_zero_extendqisi2 (scratch,
15437 gen_rtx_MEM (QImode,
15438 plus_constant (Pmode, base,
15439 offset))));
15440 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15441 gen_rtx_MEM (QImode,
15442 plus_constant (Pmode, base,
15443 offset + 1))));
15444 if (!BYTES_BIG_ENDIAN)
15445 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15446 gen_rtx_IOR (SImode,
15447 gen_rtx_ASHIFT
15448 (SImode,
15449 gen_rtx_SUBREG (SImode, operands[0], 0),
15450 GEN_INT (8)),
15451 scratch));
15452 else
15453 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15454 gen_rtx_IOR (SImode,
15455 gen_rtx_ASHIFT (SImode, scratch,
15456 GEN_INT (8)),
15457 gen_rtx_SUBREG (SImode, operands[0], 0)));
15460 /* Handle storing a half-word to memory during reload by synthesizing as two
15461 byte stores. Take care not to clobber the input values until after we
15462 have moved them somewhere safe. This code assumes that if the DImode
15463 scratch in operands[2] overlaps either the input value or output address
15464 in some way, then that value must die in this insn (we absolutely need
15465 two scratch registers for some corner cases). */
15466 void
15467 arm_reload_out_hi (rtx *operands)
15469 rtx ref = operands[0];
15470 rtx outval = operands[1];
15471 rtx base, scratch;
15472 HOST_WIDE_INT offset = 0;
15474 if (GET_CODE (ref) == SUBREG)
15476 offset = SUBREG_BYTE (ref);
15477 ref = SUBREG_REG (ref);
15480 if (REG_P (ref))
15482 /* We have a pseudo which has been spilt onto the stack; there
15483 are two cases here: the first where there is a simple
15484 stack-slot replacement and a second where the stack-slot is
15485 out of range, or is used as a subreg. */
15486 if (reg_equiv_mem (REGNO (ref)))
15488 ref = reg_equiv_mem (REGNO (ref));
15489 base = find_replacement (&XEXP (ref, 0));
15491 else
15492 /* The slot is out of range, or was dressed up in a SUBREG. */
15493 base = reg_equiv_address (REGNO (ref));
15495 else
15496 base = find_replacement (&XEXP (ref, 0));
15498 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15500 /* Handle the case where the address is too complex to be offset by 1. */
15501 if (GET_CODE (base) == MINUS
15502 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15504 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15506 /* Be careful not to destroy OUTVAL. */
15507 if (reg_overlap_mentioned_p (base_plus, outval))
15509 /* Updating base_plus might destroy outval, see if we can
15510 swap the scratch and base_plus. */
15511 if (!reg_overlap_mentioned_p (scratch, outval))
15512 std::swap (scratch, base_plus);
15513 else
15515 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15517 /* Be conservative and copy OUTVAL into the scratch now,
15518 this should only be necessary if outval is a subreg
15519 of something larger than a word. */
15520 /* XXX Might this clobber base? I can't see how it can,
15521 since scratch is known to overlap with OUTVAL, and
15522 must be wider than a word. */
15523 emit_insn (gen_movhi (scratch_hi, outval));
15524 outval = scratch_hi;
15528 emit_set_insn (base_plus, base);
15529 base = base_plus;
15531 else if (GET_CODE (base) == PLUS)
15533 /* The addend must be CONST_INT, or we would have dealt with it above. */
15534 HOST_WIDE_INT hi, lo;
15536 offset += INTVAL (XEXP (base, 1));
15537 base = XEXP (base, 0);
15539 /* Rework the address into a legal sequence of insns. */
15540 /* Valid range for lo is -4095 -> 4095 */
15541 lo = (offset >= 0
15542 ? (offset & 0xfff)
15543 : -((-offset) & 0xfff));
15545 /* Corner case, if lo is the max offset then we would be out of range
15546 once we have added the additional 1 below, so bump the msb into the
15547 pre-loading insn(s). */
15548 if (lo == 4095)
15549 lo &= 0x7ff;
15551 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15552 ^ (HOST_WIDE_INT) 0x80000000)
15553 - (HOST_WIDE_INT) 0x80000000);
15555 gcc_assert (hi + lo == offset);
15557 if (hi != 0)
15559 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15561 /* Be careful not to destroy OUTVAL. */
15562 if (reg_overlap_mentioned_p (base_plus, outval))
15564 /* Updating base_plus might destroy outval, see if we
15565 can swap the scratch and base_plus. */
15566 if (!reg_overlap_mentioned_p (scratch, outval))
15567 std::swap (scratch, base_plus);
15568 else
15570 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15572 /* Be conservative and copy outval into scratch now,
15573 this should only be necessary if outval is a
15574 subreg of something larger than a word. */
15575 /* XXX Might this clobber base? I can't see how it
15576 can, since scratch is known to overlap with
15577 outval. */
15578 emit_insn (gen_movhi (scratch_hi, outval));
15579 outval = scratch_hi;
15583 /* Get the base address; addsi3 knows how to handle constants
15584 that require more than one insn. */
15585 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15586 base = base_plus;
15587 offset = lo;
15591 if (BYTES_BIG_ENDIAN)
15593 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15594 plus_constant (Pmode, base,
15595 offset + 1)),
15596 gen_lowpart (QImode, outval)));
15597 emit_insn (gen_lshrsi3 (scratch,
15598 gen_rtx_SUBREG (SImode, outval, 0),
15599 GEN_INT (8)));
15600 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15601 offset)),
15602 gen_lowpart (QImode, scratch)));
15604 else
15606 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15607 offset)),
15608 gen_lowpart (QImode, outval)));
15609 emit_insn (gen_lshrsi3 (scratch,
15610 gen_rtx_SUBREG (SImode, outval, 0),
15611 GEN_INT (8)));
15612 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15613 plus_constant (Pmode, base,
15614 offset + 1)),
15615 gen_lowpart (QImode, scratch)));
15619 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15620 (padded to the size of a word) should be passed in a register. */
15622 static bool
15623 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15625 if (TARGET_AAPCS_BASED)
15626 return must_pass_in_stack_var_size (mode, type);
15627 else
15628 return must_pass_in_stack_var_size_or_pad (mode, type);
15632 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15633 Return true if an argument passed on the stack should be padded upwards,
15634 i.e. if the least-significant byte has useful data.
15635 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15636 aggregate types are placed in the lowest memory address. */
15638 bool
15639 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15641 if (!TARGET_AAPCS_BASED)
15642 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15644 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15645 return false;
15647 return true;
15651 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15652 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15653 register has useful data, and return the opposite if the most
15654 significant byte does. */
15656 bool
15657 arm_pad_reg_upward (machine_mode mode,
15658 tree type, int first ATTRIBUTE_UNUSED)
15660 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15662 /* For AAPCS, small aggregates, small fixed-point types,
15663 and small complex types are always padded upwards. */
15664 if (type)
15666 if ((AGGREGATE_TYPE_P (type)
15667 || TREE_CODE (type) == COMPLEX_TYPE
15668 || FIXED_POINT_TYPE_P (type))
15669 && int_size_in_bytes (type) <= 4)
15670 return true;
15672 else
15674 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15675 && GET_MODE_SIZE (mode) <= 4)
15676 return true;
15680 /* Otherwise, use default padding. */
15681 return !BYTES_BIG_ENDIAN;
15684 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15685 assuming that the address in the base register is word aligned. */
15686 bool
15687 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15689 HOST_WIDE_INT max_offset;
15691 /* Offset must be a multiple of 4 in Thumb mode. */
15692 if (TARGET_THUMB2 && ((offset & 3) != 0))
15693 return false;
15695 if (TARGET_THUMB2)
15696 max_offset = 1020;
15697 else if (TARGET_ARM)
15698 max_offset = 255;
15699 else
15700 return false;
15702 return ((offset <= max_offset) && (offset >= -max_offset));
15705 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15706 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15707 Assumes that the address in the base register RN is word aligned. Pattern
15708 guarantees that both memory accesses use the same base register,
15709 the offsets are constants within the range, and the gap between the offsets is 4.
15710 If preload complete then check that registers are legal. WBACK indicates whether
15711 address is updated. LOAD indicates whether memory access is load or store. */
15712 bool
15713 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15714 bool wback, bool load)
15716 unsigned int t, t2, n;
15718 if (!reload_completed)
15719 return true;
15721 if (!offset_ok_for_ldrd_strd (offset))
15722 return false;
15724 t = REGNO (rt);
15725 t2 = REGNO (rt2);
15726 n = REGNO (rn);
15728 if ((TARGET_THUMB2)
15729 && ((wback && (n == t || n == t2))
15730 || (t == SP_REGNUM)
15731 || (t == PC_REGNUM)
15732 || (t2 == SP_REGNUM)
15733 || (t2 == PC_REGNUM)
15734 || (!load && (n == PC_REGNUM))
15735 || (load && (t == t2))
15736 /* Triggers Cortex-M3 LDRD errata. */
15737 || (!wback && load && fix_cm3_ldrd && (n == t))))
15738 return false;
15740 if ((TARGET_ARM)
15741 && ((wback && (n == t || n == t2))
15742 || (t2 == PC_REGNUM)
15743 || (t % 2 != 0) /* First destination register is not even. */
15744 || (t2 != t + 1)
15745 /* PC can be used as base register (for offset addressing only),
15746 but it is depricated. */
15747 || (n == PC_REGNUM)))
15748 return false;
15750 return true;
15753 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15754 operand MEM's address contains an immediate offset from the base
15755 register and has no side effects, in which case it sets BASE and
15756 OFFSET accordingly. */
15757 static bool
15758 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15760 rtx addr;
15762 gcc_assert (base != NULL && offset != NULL);
15764 /* TODO: Handle more general memory operand patterns, such as
15765 PRE_DEC and PRE_INC. */
15767 if (side_effects_p (mem))
15768 return false;
15770 /* Can't deal with subregs. */
15771 if (GET_CODE (mem) == SUBREG)
15772 return false;
15774 gcc_assert (MEM_P (mem));
15776 *offset = const0_rtx;
15778 addr = XEXP (mem, 0);
15780 /* If addr isn't valid for DImode, then we can't handle it. */
15781 if (!arm_legitimate_address_p (DImode, addr,
15782 reload_in_progress || reload_completed))
15783 return false;
15785 if (REG_P (addr))
15787 *base = addr;
15788 return true;
15790 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15792 *base = XEXP (addr, 0);
15793 *offset = XEXP (addr, 1);
15794 return (REG_P (*base) && CONST_INT_P (*offset));
15797 return false;
15800 /* Called from a peephole2 to replace two word-size accesses with a
15801 single LDRD/STRD instruction. Returns true iff we can generate a
15802 new instruction sequence. That is, both accesses use the same base
15803 register and the gap between constant offsets is 4. This function
15804 may reorder its operands to match ldrd/strd RTL templates.
15805 OPERANDS are the operands found by the peephole matcher;
15806 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15807 corresponding memory operands. LOAD indicaates whether the access
15808 is load or store. CONST_STORE indicates a store of constant
15809 integer values held in OPERANDS[4,5] and assumes that the pattern
15810 is of length 4 insn, for the purpose of checking dead registers.
15811 COMMUTE indicates that register operands may be reordered. */
15812 bool
15813 gen_operands_ldrd_strd (rtx *operands, bool load,
15814 bool const_store, bool commute)
15816 int nops = 2;
15817 HOST_WIDE_INT offsets[2], offset;
15818 rtx base = NULL_RTX;
15819 rtx cur_base, cur_offset, tmp;
15820 int i, gap;
15821 HARD_REG_SET regset;
15823 gcc_assert (!const_store || !load);
15824 /* Check that the memory references are immediate offsets from the
15825 same base register. Extract the base register, the destination
15826 registers, and the corresponding memory offsets. */
15827 for (i = 0; i < nops; i++)
15829 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15830 return false;
15832 if (i == 0)
15833 base = cur_base;
15834 else if (REGNO (base) != REGNO (cur_base))
15835 return false;
15837 offsets[i] = INTVAL (cur_offset);
15838 if (GET_CODE (operands[i]) == SUBREG)
15840 tmp = SUBREG_REG (operands[i]);
15841 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15842 operands[i] = tmp;
15846 /* Make sure there is no dependency between the individual loads. */
15847 if (load && REGNO (operands[0]) == REGNO (base))
15848 return false; /* RAW */
15850 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15851 return false; /* WAW */
15853 /* If the same input register is used in both stores
15854 when storing different constants, try to find a free register.
15855 For example, the code
15856 mov r0, 0
15857 str r0, [r2]
15858 mov r0, 1
15859 str r0, [r2, #4]
15860 can be transformed into
15861 mov r1, 0
15862 strd r1, r0, [r2]
15863 in Thumb mode assuming that r1 is free. */
15864 if (const_store
15865 && REGNO (operands[0]) == REGNO (operands[1])
15866 && INTVAL (operands[4]) != INTVAL (operands[5]))
15868 if (TARGET_THUMB2)
15870 CLEAR_HARD_REG_SET (regset);
15871 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15872 if (tmp == NULL_RTX)
15873 return false;
15875 /* Use the new register in the first load to ensure that
15876 if the original input register is not dead after peephole,
15877 then it will have the correct constant value. */
15878 operands[0] = tmp;
15880 else if (TARGET_ARM)
15882 return false;
15883 int regno = REGNO (operands[0]);
15884 if (!peep2_reg_dead_p (4, operands[0]))
15886 /* When the input register is even and is not dead after the
15887 pattern, it has to hold the second constant but we cannot
15888 form a legal STRD in ARM mode with this register as the second
15889 register. */
15890 if (regno % 2 == 0)
15891 return false;
15893 /* Is regno-1 free? */
15894 SET_HARD_REG_SET (regset);
15895 CLEAR_HARD_REG_BIT(regset, regno - 1);
15896 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15897 if (tmp == NULL_RTX)
15898 return false;
15900 operands[0] = tmp;
15902 else
15904 /* Find a DImode register. */
15905 CLEAR_HARD_REG_SET (regset);
15906 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15907 if (tmp != NULL_RTX)
15909 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15910 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15912 else
15914 /* Can we use the input register to form a DI register? */
15915 SET_HARD_REG_SET (regset);
15916 CLEAR_HARD_REG_BIT(regset,
15917 regno % 2 == 0 ? regno + 1 : regno - 1);
15918 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15919 if (tmp == NULL_RTX)
15920 return false;
15921 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15925 gcc_assert (operands[0] != NULL_RTX);
15926 gcc_assert (operands[1] != NULL_RTX);
15927 gcc_assert (REGNO (operands[0]) % 2 == 0);
15928 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15932 /* Make sure the instructions are ordered with lower memory access first. */
15933 if (offsets[0] > offsets[1])
15935 gap = offsets[0] - offsets[1];
15936 offset = offsets[1];
15938 /* Swap the instructions such that lower memory is accessed first. */
15939 std::swap (operands[0], operands[1]);
15940 std::swap (operands[2], operands[3]);
15941 if (const_store)
15942 std::swap (operands[4], operands[5]);
15944 else
15946 gap = offsets[1] - offsets[0];
15947 offset = offsets[0];
15950 /* Make sure accesses are to consecutive memory locations. */
15951 if (gap != 4)
15952 return false;
15954 /* Make sure we generate legal instructions. */
15955 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15956 false, load))
15957 return true;
15959 /* In Thumb state, where registers are almost unconstrained, there
15960 is little hope to fix it. */
15961 if (TARGET_THUMB2)
15962 return false;
15964 if (load && commute)
15966 /* Try reordering registers. */
15967 std::swap (operands[0], operands[1]);
15968 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15969 false, load))
15970 return true;
15973 if (const_store)
15975 /* If input registers are dead after this pattern, they can be
15976 reordered or replaced by other registers that are free in the
15977 current pattern. */
15978 if (!peep2_reg_dead_p (4, operands[0])
15979 || !peep2_reg_dead_p (4, operands[1]))
15980 return false;
15982 /* Try to reorder the input registers. */
15983 /* For example, the code
15984 mov r0, 0
15985 mov r1, 1
15986 str r1, [r2]
15987 str r0, [r2, #4]
15988 can be transformed into
15989 mov r1, 0
15990 mov r0, 1
15991 strd r0, [r2]
15993 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15994 false, false))
15996 std::swap (operands[0], operands[1]);
15997 return true;
16000 /* Try to find a free DI register. */
16001 CLEAR_HARD_REG_SET (regset);
16002 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16003 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16004 while (true)
16006 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16007 if (tmp == NULL_RTX)
16008 return false;
16010 /* DREG must be an even-numbered register in DImode.
16011 Split it into SI registers. */
16012 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16013 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16014 gcc_assert (operands[0] != NULL_RTX);
16015 gcc_assert (operands[1] != NULL_RTX);
16016 gcc_assert (REGNO (operands[0]) % 2 == 0);
16017 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16019 return (operands_ok_ldrd_strd (operands[0], operands[1],
16020 base, offset,
16021 false, load));
16025 return false;
16031 /* Print a symbolic form of X to the debug file, F. */
16032 static void
16033 arm_print_value (FILE *f, rtx x)
16035 switch (GET_CODE (x))
16037 case CONST_INT:
16038 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16039 return;
16041 case CONST_DOUBLE:
16042 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
16043 return;
16045 case CONST_VECTOR:
16047 int i;
16049 fprintf (f, "<");
16050 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16052 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16053 if (i < (CONST_VECTOR_NUNITS (x) - 1))
16054 fputc (',', f);
16056 fprintf (f, ">");
16058 return;
16060 case CONST_STRING:
16061 fprintf (f, "\"%s\"", XSTR (x, 0));
16062 return;
16064 case SYMBOL_REF:
16065 fprintf (f, "`%s'", XSTR (x, 0));
16066 return;
16068 case LABEL_REF:
16069 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16070 return;
16072 case CONST:
16073 arm_print_value (f, XEXP (x, 0));
16074 return;
16076 case PLUS:
16077 arm_print_value (f, XEXP (x, 0));
16078 fprintf (f, "+");
16079 arm_print_value (f, XEXP (x, 1));
16080 return;
16082 case PC:
16083 fprintf (f, "pc");
16084 return;
16086 default:
16087 fprintf (f, "????");
16088 return;
16092 /* Routines for manipulation of the constant pool. */
16094 /* Arm instructions cannot load a large constant directly into a
16095 register; they have to come from a pc relative load. The constant
16096 must therefore be placed in the addressable range of the pc
16097 relative load. Depending on the precise pc relative load
16098 instruction the range is somewhere between 256 bytes and 4k. This
16099 means that we often have to dump a constant inside a function, and
16100 generate code to branch around it.
16102 It is important to minimize this, since the branches will slow
16103 things down and make the code larger.
16105 Normally we can hide the table after an existing unconditional
16106 branch so that there is no interruption of the flow, but in the
16107 worst case the code looks like this:
16109 ldr rn, L1
16111 b L2
16112 align
16113 L1: .long value
16117 ldr rn, L3
16119 b L4
16120 align
16121 L3: .long value
16125 We fix this by performing a scan after scheduling, which notices
16126 which instructions need to have their operands fetched from the
16127 constant table and builds the table.
16129 The algorithm starts by building a table of all the constants that
16130 need fixing up and all the natural barriers in the function (places
16131 where a constant table can be dropped without breaking the flow).
16132 For each fixup we note how far the pc-relative replacement will be
16133 able to reach and the offset of the instruction into the function.
16135 Having built the table we then group the fixes together to form
16136 tables that are as large as possible (subject to addressing
16137 constraints) and emit each table of constants after the last
16138 barrier that is within range of all the instructions in the group.
16139 If a group does not contain a barrier, then we forcibly create one
16140 by inserting a jump instruction into the flow. Once the table has
16141 been inserted, the insns are then modified to reference the
16142 relevant entry in the pool.
16144 Possible enhancements to the algorithm (not implemented) are:
16146 1) For some processors and object formats, there may be benefit in
16147 aligning the pools to the start of cache lines; this alignment
16148 would need to be taken into account when calculating addressability
16149 of a pool. */
16151 /* These typedefs are located at the start of this file, so that
16152 they can be used in the prototypes there. This comment is to
16153 remind readers of that fact so that the following structures
16154 can be understood more easily.
16156 typedef struct minipool_node Mnode;
16157 typedef struct minipool_fixup Mfix; */
16159 struct minipool_node
16161 /* Doubly linked chain of entries. */
16162 Mnode * next;
16163 Mnode * prev;
16164 /* The maximum offset into the code that this entry can be placed. While
16165 pushing fixes for forward references, all entries are sorted in order
16166 of increasing max_address. */
16167 HOST_WIDE_INT max_address;
16168 /* Similarly for an entry inserted for a backwards ref. */
16169 HOST_WIDE_INT min_address;
16170 /* The number of fixes referencing this entry. This can become zero
16171 if we "unpush" an entry. In this case we ignore the entry when we
16172 come to emit the code. */
16173 int refcount;
16174 /* The offset from the start of the minipool. */
16175 HOST_WIDE_INT offset;
16176 /* The value in table. */
16177 rtx value;
16178 /* The mode of value. */
16179 machine_mode mode;
16180 /* The size of the value. With iWMMXt enabled
16181 sizes > 4 also imply an alignment of 8-bytes. */
16182 int fix_size;
16185 struct minipool_fixup
16187 Mfix * next;
16188 rtx_insn * insn;
16189 HOST_WIDE_INT address;
16190 rtx * loc;
16191 machine_mode mode;
16192 int fix_size;
16193 rtx value;
16194 Mnode * minipool;
16195 HOST_WIDE_INT forwards;
16196 HOST_WIDE_INT backwards;
16199 /* Fixes less than a word need padding out to a word boundary. */
16200 #define MINIPOOL_FIX_SIZE(mode) \
16201 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16203 static Mnode * minipool_vector_head;
16204 static Mnode * minipool_vector_tail;
16205 static rtx_code_label *minipool_vector_label;
16206 static int minipool_pad;
16208 /* The linked list of all minipool fixes required for this function. */
16209 Mfix * minipool_fix_head;
16210 Mfix * minipool_fix_tail;
16211 /* The fix entry for the current minipool, once it has been placed. */
16212 Mfix * minipool_barrier;
16214 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16215 #define JUMP_TABLES_IN_TEXT_SECTION 0
16216 #endif
16218 static HOST_WIDE_INT
16219 get_jump_table_size (rtx_jump_table_data *insn)
16221 /* ADDR_VECs only take room if read-only data does into the text
16222 section. */
16223 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16225 rtx body = PATTERN (insn);
16226 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16227 HOST_WIDE_INT size;
16228 HOST_WIDE_INT modesize;
16230 modesize = GET_MODE_SIZE (GET_MODE (body));
16231 size = modesize * XVECLEN (body, elt);
16232 switch (modesize)
16234 case 1:
16235 /* Round up size of TBB table to a halfword boundary. */
16236 size = (size + 1) & ~(HOST_WIDE_INT)1;
16237 break;
16238 case 2:
16239 /* No padding necessary for TBH. */
16240 break;
16241 case 4:
16242 /* Add two bytes for alignment on Thumb. */
16243 if (TARGET_THUMB)
16244 size += 2;
16245 break;
16246 default:
16247 gcc_unreachable ();
16249 return size;
16252 return 0;
16255 /* Return the maximum amount of padding that will be inserted before
16256 label LABEL. */
16258 static HOST_WIDE_INT
16259 get_label_padding (rtx label)
16261 HOST_WIDE_INT align, min_insn_size;
16263 align = 1 << label_to_alignment (label);
16264 min_insn_size = TARGET_THUMB ? 2 : 4;
16265 return align > min_insn_size ? align - min_insn_size : 0;
16268 /* Move a minipool fix MP from its current location to before MAX_MP.
16269 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16270 constraints may need updating. */
16271 static Mnode *
16272 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16273 HOST_WIDE_INT max_address)
16275 /* The code below assumes these are different. */
16276 gcc_assert (mp != max_mp);
16278 if (max_mp == NULL)
16280 if (max_address < mp->max_address)
16281 mp->max_address = max_address;
16283 else
16285 if (max_address > max_mp->max_address - mp->fix_size)
16286 mp->max_address = max_mp->max_address - mp->fix_size;
16287 else
16288 mp->max_address = max_address;
16290 /* Unlink MP from its current position. Since max_mp is non-null,
16291 mp->prev must be non-null. */
16292 mp->prev->next = mp->next;
16293 if (mp->next != NULL)
16294 mp->next->prev = mp->prev;
16295 else
16296 minipool_vector_tail = mp->prev;
16298 /* Re-insert it before MAX_MP. */
16299 mp->next = max_mp;
16300 mp->prev = max_mp->prev;
16301 max_mp->prev = mp;
16303 if (mp->prev != NULL)
16304 mp->prev->next = mp;
16305 else
16306 minipool_vector_head = mp;
16309 /* Save the new entry. */
16310 max_mp = mp;
16312 /* Scan over the preceding entries and adjust their addresses as
16313 required. */
16314 while (mp->prev != NULL
16315 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16317 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16318 mp = mp->prev;
16321 return max_mp;
16324 /* Add a constant to the minipool for a forward reference. Returns the
16325 node added or NULL if the constant will not fit in this pool. */
16326 static Mnode *
16327 add_minipool_forward_ref (Mfix *fix)
16329 /* If set, max_mp is the first pool_entry that has a lower
16330 constraint than the one we are trying to add. */
16331 Mnode * max_mp = NULL;
16332 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16333 Mnode * mp;
16335 /* If the minipool starts before the end of FIX->INSN then this FIX
16336 can not be placed into the current pool. Furthermore, adding the
16337 new constant pool entry may cause the pool to start FIX_SIZE bytes
16338 earlier. */
16339 if (minipool_vector_head &&
16340 (fix->address + get_attr_length (fix->insn)
16341 >= minipool_vector_head->max_address - fix->fix_size))
16342 return NULL;
16344 /* Scan the pool to see if a constant with the same value has
16345 already been added. While we are doing this, also note the
16346 location where we must insert the constant if it doesn't already
16347 exist. */
16348 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16350 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16351 && fix->mode == mp->mode
16352 && (!LABEL_P (fix->value)
16353 || (CODE_LABEL_NUMBER (fix->value)
16354 == CODE_LABEL_NUMBER (mp->value)))
16355 && rtx_equal_p (fix->value, mp->value))
16357 /* More than one fix references this entry. */
16358 mp->refcount++;
16359 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16362 /* Note the insertion point if necessary. */
16363 if (max_mp == NULL
16364 && mp->max_address > max_address)
16365 max_mp = mp;
16367 /* If we are inserting an 8-bytes aligned quantity and
16368 we have not already found an insertion point, then
16369 make sure that all such 8-byte aligned quantities are
16370 placed at the start of the pool. */
16371 if (ARM_DOUBLEWORD_ALIGN
16372 && max_mp == NULL
16373 && fix->fix_size >= 8
16374 && mp->fix_size < 8)
16376 max_mp = mp;
16377 max_address = mp->max_address;
16381 /* The value is not currently in the minipool, so we need to create
16382 a new entry for it. If MAX_MP is NULL, the entry will be put on
16383 the end of the list since the placement is less constrained than
16384 any existing entry. Otherwise, we insert the new fix before
16385 MAX_MP and, if necessary, adjust the constraints on the other
16386 entries. */
16387 mp = XNEW (Mnode);
16388 mp->fix_size = fix->fix_size;
16389 mp->mode = fix->mode;
16390 mp->value = fix->value;
16391 mp->refcount = 1;
16392 /* Not yet required for a backwards ref. */
16393 mp->min_address = -65536;
16395 if (max_mp == NULL)
16397 mp->max_address = max_address;
16398 mp->next = NULL;
16399 mp->prev = minipool_vector_tail;
16401 if (mp->prev == NULL)
16403 minipool_vector_head = mp;
16404 minipool_vector_label = gen_label_rtx ();
16406 else
16407 mp->prev->next = mp;
16409 minipool_vector_tail = mp;
16411 else
16413 if (max_address > max_mp->max_address - mp->fix_size)
16414 mp->max_address = max_mp->max_address - mp->fix_size;
16415 else
16416 mp->max_address = max_address;
16418 mp->next = max_mp;
16419 mp->prev = max_mp->prev;
16420 max_mp->prev = mp;
16421 if (mp->prev != NULL)
16422 mp->prev->next = mp;
16423 else
16424 minipool_vector_head = mp;
16427 /* Save the new entry. */
16428 max_mp = mp;
16430 /* Scan over the preceding entries and adjust their addresses as
16431 required. */
16432 while (mp->prev != NULL
16433 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16435 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16436 mp = mp->prev;
16439 return max_mp;
16442 static Mnode *
16443 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16444 HOST_WIDE_INT min_address)
16446 HOST_WIDE_INT offset;
16448 /* The code below assumes these are different. */
16449 gcc_assert (mp != min_mp);
16451 if (min_mp == NULL)
16453 if (min_address > mp->min_address)
16454 mp->min_address = min_address;
16456 else
16458 /* We will adjust this below if it is too loose. */
16459 mp->min_address = min_address;
16461 /* Unlink MP from its current position. Since min_mp is non-null,
16462 mp->next must be non-null. */
16463 mp->next->prev = mp->prev;
16464 if (mp->prev != NULL)
16465 mp->prev->next = mp->next;
16466 else
16467 minipool_vector_head = mp->next;
16469 /* Reinsert it after MIN_MP. */
16470 mp->prev = min_mp;
16471 mp->next = min_mp->next;
16472 min_mp->next = mp;
16473 if (mp->next != NULL)
16474 mp->next->prev = mp;
16475 else
16476 minipool_vector_tail = mp;
16479 min_mp = mp;
16481 offset = 0;
16482 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16484 mp->offset = offset;
16485 if (mp->refcount > 0)
16486 offset += mp->fix_size;
16488 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16489 mp->next->min_address = mp->min_address + mp->fix_size;
16492 return min_mp;
16495 /* Add a constant to the minipool for a backward reference. Returns the
16496 node added or NULL if the constant will not fit in this pool.
16498 Note that the code for insertion for a backwards reference can be
16499 somewhat confusing because the calculated offsets for each fix do
16500 not take into account the size of the pool (which is still under
16501 construction. */
16502 static Mnode *
16503 add_minipool_backward_ref (Mfix *fix)
16505 /* If set, min_mp is the last pool_entry that has a lower constraint
16506 than the one we are trying to add. */
16507 Mnode *min_mp = NULL;
16508 /* This can be negative, since it is only a constraint. */
16509 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16510 Mnode *mp;
16512 /* If we can't reach the current pool from this insn, or if we can't
16513 insert this entry at the end of the pool without pushing other
16514 fixes out of range, then we don't try. This ensures that we
16515 can't fail later on. */
16516 if (min_address >= minipool_barrier->address
16517 || (minipool_vector_tail->min_address + fix->fix_size
16518 >= minipool_barrier->address))
16519 return NULL;
16521 /* Scan the pool to see if a constant with the same value has
16522 already been added. While we are doing this, also note the
16523 location where we must insert the constant if it doesn't already
16524 exist. */
16525 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16527 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16528 && fix->mode == mp->mode
16529 && (!LABEL_P (fix->value)
16530 || (CODE_LABEL_NUMBER (fix->value)
16531 == CODE_LABEL_NUMBER (mp->value)))
16532 && rtx_equal_p (fix->value, mp->value)
16533 /* Check that there is enough slack to move this entry to the
16534 end of the table (this is conservative). */
16535 && (mp->max_address
16536 > (minipool_barrier->address
16537 + minipool_vector_tail->offset
16538 + minipool_vector_tail->fix_size)))
16540 mp->refcount++;
16541 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16544 if (min_mp != NULL)
16545 mp->min_address += fix->fix_size;
16546 else
16548 /* Note the insertion point if necessary. */
16549 if (mp->min_address < min_address)
16551 /* For now, we do not allow the insertion of 8-byte alignment
16552 requiring nodes anywhere but at the start of the pool. */
16553 if (ARM_DOUBLEWORD_ALIGN
16554 && fix->fix_size >= 8 && mp->fix_size < 8)
16555 return NULL;
16556 else
16557 min_mp = mp;
16559 else if (mp->max_address
16560 < minipool_barrier->address + mp->offset + fix->fix_size)
16562 /* Inserting before this entry would push the fix beyond
16563 its maximum address (which can happen if we have
16564 re-located a forwards fix); force the new fix to come
16565 after it. */
16566 if (ARM_DOUBLEWORD_ALIGN
16567 && fix->fix_size >= 8 && mp->fix_size < 8)
16568 return NULL;
16569 else
16571 min_mp = mp;
16572 min_address = mp->min_address + fix->fix_size;
16575 /* Do not insert a non-8-byte aligned quantity before 8-byte
16576 aligned quantities. */
16577 else if (ARM_DOUBLEWORD_ALIGN
16578 && fix->fix_size < 8
16579 && mp->fix_size >= 8)
16581 min_mp = mp;
16582 min_address = mp->min_address + fix->fix_size;
16587 /* We need to create a new entry. */
16588 mp = XNEW (Mnode);
16589 mp->fix_size = fix->fix_size;
16590 mp->mode = fix->mode;
16591 mp->value = fix->value;
16592 mp->refcount = 1;
16593 mp->max_address = minipool_barrier->address + 65536;
16595 mp->min_address = min_address;
16597 if (min_mp == NULL)
16599 mp->prev = NULL;
16600 mp->next = minipool_vector_head;
16602 if (mp->next == NULL)
16604 minipool_vector_tail = mp;
16605 minipool_vector_label = gen_label_rtx ();
16607 else
16608 mp->next->prev = mp;
16610 minipool_vector_head = mp;
16612 else
16614 mp->next = min_mp->next;
16615 mp->prev = min_mp;
16616 min_mp->next = mp;
16618 if (mp->next != NULL)
16619 mp->next->prev = mp;
16620 else
16621 minipool_vector_tail = mp;
16624 /* Save the new entry. */
16625 min_mp = mp;
16627 if (mp->prev)
16628 mp = mp->prev;
16629 else
16630 mp->offset = 0;
16632 /* Scan over the following entries and adjust their offsets. */
16633 while (mp->next != NULL)
16635 if (mp->next->min_address < mp->min_address + mp->fix_size)
16636 mp->next->min_address = mp->min_address + mp->fix_size;
16638 if (mp->refcount)
16639 mp->next->offset = mp->offset + mp->fix_size;
16640 else
16641 mp->next->offset = mp->offset;
16643 mp = mp->next;
16646 return min_mp;
16649 static void
16650 assign_minipool_offsets (Mfix *barrier)
16652 HOST_WIDE_INT offset = 0;
16653 Mnode *mp;
16655 minipool_barrier = barrier;
16657 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16659 mp->offset = offset;
16661 if (mp->refcount > 0)
16662 offset += mp->fix_size;
16666 /* Output the literal table */
16667 static void
16668 dump_minipool (rtx_insn *scan)
16670 Mnode * mp;
16671 Mnode * nmp;
16672 int align64 = 0;
16674 if (ARM_DOUBLEWORD_ALIGN)
16675 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16676 if (mp->refcount > 0 && mp->fix_size >= 8)
16678 align64 = 1;
16679 break;
16682 if (dump_file)
16683 fprintf (dump_file,
16684 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16685 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16687 scan = emit_label_after (gen_label_rtx (), scan);
16688 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16689 scan = emit_label_after (minipool_vector_label, scan);
16691 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16693 if (mp->refcount > 0)
16695 if (dump_file)
16697 fprintf (dump_file,
16698 ";; Offset %u, min %ld, max %ld ",
16699 (unsigned) mp->offset, (unsigned long) mp->min_address,
16700 (unsigned long) mp->max_address);
16701 arm_print_value (dump_file, mp->value);
16702 fputc ('\n', dump_file);
16705 switch (GET_MODE_SIZE (mp->mode))
16707 #ifdef HAVE_consttable_1
16708 case 1:
16709 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16710 break;
16712 #endif
16713 #ifdef HAVE_consttable_2
16714 case 2:
16715 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16716 break;
16718 #endif
16719 #ifdef HAVE_consttable_4
16720 case 4:
16721 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16722 break;
16724 #endif
16725 #ifdef HAVE_consttable_8
16726 case 8:
16727 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16728 break;
16730 #endif
16731 #ifdef HAVE_consttable_16
16732 case 16:
16733 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16734 break;
16736 #endif
16737 default:
16738 gcc_unreachable ();
16742 nmp = mp->next;
16743 free (mp);
16746 minipool_vector_head = minipool_vector_tail = NULL;
16747 scan = emit_insn_after (gen_consttable_end (), scan);
16748 scan = emit_barrier_after (scan);
16751 /* Return the cost of forcibly inserting a barrier after INSN. */
16752 static int
16753 arm_barrier_cost (rtx_insn *insn)
16755 /* Basing the location of the pool on the loop depth is preferable,
16756 but at the moment, the basic block information seems to be
16757 corrupt by this stage of the compilation. */
16758 int base_cost = 50;
16759 rtx_insn *next = next_nonnote_insn (insn);
16761 if (next != NULL && LABEL_P (next))
16762 base_cost -= 20;
16764 switch (GET_CODE (insn))
16766 case CODE_LABEL:
16767 /* It will always be better to place the table before the label, rather
16768 than after it. */
16769 return 50;
16771 case INSN:
16772 case CALL_INSN:
16773 return base_cost;
16775 case JUMP_INSN:
16776 return base_cost - 10;
16778 default:
16779 return base_cost + 10;
16783 /* Find the best place in the insn stream in the range
16784 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16785 Create the barrier by inserting a jump and add a new fix entry for
16786 it. */
16787 static Mfix *
16788 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16790 HOST_WIDE_INT count = 0;
16791 rtx_barrier *barrier;
16792 rtx_insn *from = fix->insn;
16793 /* The instruction after which we will insert the jump. */
16794 rtx_insn *selected = NULL;
16795 int selected_cost;
16796 /* The address at which the jump instruction will be placed. */
16797 HOST_WIDE_INT selected_address;
16798 Mfix * new_fix;
16799 HOST_WIDE_INT max_count = max_address - fix->address;
16800 rtx_code_label *label = gen_label_rtx ();
16802 selected_cost = arm_barrier_cost (from);
16803 selected_address = fix->address;
16805 while (from && count < max_count)
16807 rtx_jump_table_data *tmp;
16808 int new_cost;
16810 /* This code shouldn't have been called if there was a natural barrier
16811 within range. */
16812 gcc_assert (!BARRIER_P (from));
16814 /* Count the length of this insn. This must stay in sync with the
16815 code that pushes minipool fixes. */
16816 if (LABEL_P (from))
16817 count += get_label_padding (from);
16818 else
16819 count += get_attr_length (from);
16821 /* If there is a jump table, add its length. */
16822 if (tablejump_p (from, NULL, &tmp))
16824 count += get_jump_table_size (tmp);
16826 /* Jump tables aren't in a basic block, so base the cost on
16827 the dispatch insn. If we select this location, we will
16828 still put the pool after the table. */
16829 new_cost = arm_barrier_cost (from);
16831 if (count < max_count
16832 && (!selected || new_cost <= selected_cost))
16834 selected = tmp;
16835 selected_cost = new_cost;
16836 selected_address = fix->address + count;
16839 /* Continue after the dispatch table. */
16840 from = NEXT_INSN (tmp);
16841 continue;
16844 new_cost = arm_barrier_cost (from);
16846 if (count < max_count
16847 && (!selected || new_cost <= selected_cost))
16849 selected = from;
16850 selected_cost = new_cost;
16851 selected_address = fix->address + count;
16854 from = NEXT_INSN (from);
16857 /* Make sure that we found a place to insert the jump. */
16858 gcc_assert (selected);
16860 /* Make sure we do not split a call and its corresponding
16861 CALL_ARG_LOCATION note. */
16862 if (CALL_P (selected))
16864 rtx_insn *next = NEXT_INSN (selected);
16865 if (next && NOTE_P (next)
16866 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16867 selected = next;
16870 /* Create a new JUMP_INSN that branches around a barrier. */
16871 from = emit_jump_insn_after (gen_jump (label), selected);
16872 JUMP_LABEL (from) = label;
16873 barrier = emit_barrier_after (from);
16874 emit_label_after (label, barrier);
16876 /* Create a minipool barrier entry for the new barrier. */
16877 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16878 new_fix->insn = barrier;
16879 new_fix->address = selected_address;
16880 new_fix->next = fix->next;
16881 fix->next = new_fix;
16883 return new_fix;
16886 /* Record that there is a natural barrier in the insn stream at
16887 ADDRESS. */
16888 static void
16889 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16891 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16893 fix->insn = insn;
16894 fix->address = address;
16896 fix->next = NULL;
16897 if (minipool_fix_head != NULL)
16898 minipool_fix_tail->next = fix;
16899 else
16900 minipool_fix_head = fix;
16902 minipool_fix_tail = fix;
16905 /* Record INSN, which will need fixing up to load a value from the
16906 minipool. ADDRESS is the offset of the insn since the start of the
16907 function; LOC is a pointer to the part of the insn which requires
16908 fixing; VALUE is the constant that must be loaded, which is of type
16909 MODE. */
16910 static void
16911 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16912 machine_mode mode, rtx value)
16914 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16916 fix->insn = insn;
16917 fix->address = address;
16918 fix->loc = loc;
16919 fix->mode = mode;
16920 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16921 fix->value = value;
16922 fix->forwards = get_attr_pool_range (insn);
16923 fix->backwards = get_attr_neg_pool_range (insn);
16924 fix->minipool = NULL;
16926 /* If an insn doesn't have a range defined for it, then it isn't
16927 expecting to be reworked by this code. Better to stop now than
16928 to generate duff assembly code. */
16929 gcc_assert (fix->forwards || fix->backwards);
16931 /* If an entry requires 8-byte alignment then assume all constant pools
16932 require 4 bytes of padding. Trying to do this later on a per-pool
16933 basis is awkward because existing pool entries have to be modified. */
16934 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16935 minipool_pad = 4;
16937 if (dump_file)
16939 fprintf (dump_file,
16940 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16941 GET_MODE_NAME (mode),
16942 INSN_UID (insn), (unsigned long) address,
16943 -1 * (long)fix->backwards, (long)fix->forwards);
16944 arm_print_value (dump_file, fix->value);
16945 fprintf (dump_file, "\n");
16948 /* Add it to the chain of fixes. */
16949 fix->next = NULL;
16951 if (minipool_fix_head != NULL)
16952 minipool_fix_tail->next = fix;
16953 else
16954 minipool_fix_head = fix;
16956 minipool_fix_tail = fix;
16959 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16960 Returns the number of insns needed, or 99 if we always want to synthesize
16961 the value. */
16963 arm_max_const_double_inline_cost ()
16965 /* Let the value get synthesized to avoid the use of literal pools. */
16966 if (arm_disable_literal_pool)
16967 return 99;
16969 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16972 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16973 Returns the number of insns needed, or 99 if we don't know how to
16974 do it. */
16976 arm_const_double_inline_cost (rtx val)
16978 rtx lowpart, highpart;
16979 machine_mode mode;
16981 mode = GET_MODE (val);
16983 if (mode == VOIDmode)
16984 mode = DImode;
16986 gcc_assert (GET_MODE_SIZE (mode) == 8);
16988 lowpart = gen_lowpart (SImode, val);
16989 highpart = gen_highpart_mode (SImode, mode, val);
16991 gcc_assert (CONST_INT_P (lowpart));
16992 gcc_assert (CONST_INT_P (highpart));
16994 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16995 NULL_RTX, NULL_RTX, 0, 0)
16996 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16997 NULL_RTX, NULL_RTX, 0, 0));
17000 /* Cost of loading a SImode constant. */
17001 static inline int
17002 arm_const_inline_cost (enum rtx_code code, rtx val)
17004 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17005 NULL_RTX, NULL_RTX, 1, 0);
17008 /* Return true if it is worthwhile to split a 64-bit constant into two
17009 32-bit operations. This is the case if optimizing for size, or
17010 if we have load delay slots, or if one 32-bit part can be done with
17011 a single data operation. */
17012 bool
17013 arm_const_double_by_parts (rtx val)
17015 machine_mode mode = GET_MODE (val);
17016 rtx part;
17018 if (optimize_size || arm_ld_sched)
17019 return true;
17021 if (mode == VOIDmode)
17022 mode = DImode;
17024 part = gen_highpart_mode (SImode, mode, val);
17026 gcc_assert (CONST_INT_P (part));
17028 if (const_ok_for_arm (INTVAL (part))
17029 || const_ok_for_arm (~INTVAL (part)))
17030 return true;
17032 part = gen_lowpart (SImode, val);
17034 gcc_assert (CONST_INT_P (part));
17036 if (const_ok_for_arm (INTVAL (part))
17037 || const_ok_for_arm (~INTVAL (part)))
17038 return true;
17040 return false;
17043 /* Return true if it is possible to inline both the high and low parts
17044 of a 64-bit constant into 32-bit data processing instructions. */
17045 bool
17046 arm_const_double_by_immediates (rtx val)
17048 machine_mode mode = GET_MODE (val);
17049 rtx part;
17051 if (mode == VOIDmode)
17052 mode = DImode;
17054 part = gen_highpart_mode (SImode, mode, val);
17056 gcc_assert (CONST_INT_P (part));
17058 if (!const_ok_for_arm (INTVAL (part)))
17059 return false;
17061 part = gen_lowpart (SImode, val);
17063 gcc_assert (CONST_INT_P (part));
17065 if (!const_ok_for_arm (INTVAL (part)))
17066 return false;
17068 return true;
17071 /* Scan INSN and note any of its operands that need fixing.
17072 If DO_PUSHES is false we do not actually push any of the fixups
17073 needed. */
17074 static void
17075 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17077 int opno;
17079 extract_constrain_insn (insn);
17081 if (recog_data.n_alternatives == 0)
17082 return;
17084 /* Fill in recog_op_alt with information about the constraints of
17085 this insn. */
17086 preprocess_constraints (insn);
17088 const operand_alternative *op_alt = which_op_alt ();
17089 for (opno = 0; opno < recog_data.n_operands; opno++)
17091 /* Things we need to fix can only occur in inputs. */
17092 if (recog_data.operand_type[opno] != OP_IN)
17093 continue;
17095 /* If this alternative is a memory reference, then any mention
17096 of constants in this alternative is really to fool reload
17097 into allowing us to accept one there. We need to fix them up
17098 now so that we output the right code. */
17099 if (op_alt[opno].memory_ok)
17101 rtx op = recog_data.operand[opno];
17103 if (CONSTANT_P (op))
17105 if (do_pushes)
17106 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17107 recog_data.operand_mode[opno], op);
17109 else if (MEM_P (op)
17110 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17111 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17113 if (do_pushes)
17115 rtx cop = avoid_constant_pool_reference (op);
17117 /* Casting the address of something to a mode narrower
17118 than a word can cause avoid_constant_pool_reference()
17119 to return the pool reference itself. That's no good to
17120 us here. Lets just hope that we can use the
17121 constant pool value directly. */
17122 if (op == cop)
17123 cop = get_pool_constant (XEXP (op, 0));
17125 push_minipool_fix (insn, address,
17126 recog_data.operand_loc[opno],
17127 recog_data.operand_mode[opno], cop);
17134 return;
17137 /* Rewrite move insn into subtract of 0 if the condition codes will
17138 be useful in next conditional jump insn. */
17140 static void
17141 thumb1_reorg (void)
17143 basic_block bb;
17145 FOR_EACH_BB_FN (bb, cfun)
17147 rtx dest, src;
17148 rtx pat, op0, set = NULL;
17149 rtx_insn *prev, *insn = BB_END (bb);
17150 bool insn_clobbered = false;
17152 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17153 insn = PREV_INSN (insn);
17155 /* Find the last cbranchsi4_insn in basic block BB. */
17156 if (insn == BB_HEAD (bb)
17157 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17158 continue;
17160 /* Get the register with which we are comparing. */
17161 pat = PATTERN (insn);
17162 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17164 /* Find the first flag setting insn before INSN in basic block BB. */
17165 gcc_assert (insn != BB_HEAD (bb));
17166 for (prev = PREV_INSN (insn);
17167 (!insn_clobbered
17168 && prev != BB_HEAD (bb)
17169 && (NOTE_P (prev)
17170 || DEBUG_INSN_P (prev)
17171 || ((set = single_set (prev)) != NULL
17172 && get_attr_conds (prev) == CONDS_NOCOND)));
17173 prev = PREV_INSN (prev))
17175 if (reg_set_p (op0, prev))
17176 insn_clobbered = true;
17179 /* Skip if op0 is clobbered by insn other than prev. */
17180 if (insn_clobbered)
17181 continue;
17183 if (!set)
17184 continue;
17186 dest = SET_DEST (set);
17187 src = SET_SRC (set);
17188 if (!low_register_operand (dest, SImode)
17189 || !low_register_operand (src, SImode))
17190 continue;
17192 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17193 in INSN. Both src and dest of the move insn are checked. */
17194 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17196 dest = copy_rtx (dest);
17197 src = copy_rtx (src);
17198 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17199 PATTERN (prev) = gen_rtx_SET (dest, src);
17200 INSN_CODE (prev) = -1;
17201 /* Set test register in INSN to dest. */
17202 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17203 INSN_CODE (insn) = -1;
17208 /* Convert instructions to their cc-clobbering variant if possible, since
17209 that allows us to use smaller encodings. */
17211 static void
17212 thumb2_reorg (void)
17214 basic_block bb;
17215 regset_head live;
17217 INIT_REG_SET (&live);
17219 /* We are freeing block_for_insn in the toplev to keep compatibility
17220 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17221 compute_bb_for_insn ();
17222 df_analyze ();
17224 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17226 FOR_EACH_BB_FN (bb, cfun)
17228 if ((current_tune->disparage_flag_setting_t16_encodings
17229 == tune_params::DISPARAGE_FLAGS_ALL)
17230 && optimize_bb_for_speed_p (bb))
17231 continue;
17233 rtx_insn *insn;
17234 Convert_Action action = SKIP;
17235 Convert_Action action_for_partial_flag_setting
17236 = ((current_tune->disparage_flag_setting_t16_encodings
17237 != tune_params::DISPARAGE_FLAGS_NEITHER)
17238 && optimize_bb_for_speed_p (bb))
17239 ? SKIP : CONV;
17241 COPY_REG_SET (&live, DF_LR_OUT (bb));
17242 df_simulate_initialize_backwards (bb, &live);
17243 FOR_BB_INSNS_REVERSE (bb, insn)
17245 if (NONJUMP_INSN_P (insn)
17246 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17247 && GET_CODE (PATTERN (insn)) == SET)
17249 action = SKIP;
17250 rtx pat = PATTERN (insn);
17251 rtx dst = XEXP (pat, 0);
17252 rtx src = XEXP (pat, 1);
17253 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17255 if (UNARY_P (src) || BINARY_P (src))
17256 op0 = XEXP (src, 0);
17258 if (BINARY_P (src))
17259 op1 = XEXP (src, 1);
17261 if (low_register_operand (dst, SImode))
17263 switch (GET_CODE (src))
17265 case PLUS:
17266 /* Adding two registers and storing the result
17267 in the first source is already a 16-bit
17268 operation. */
17269 if (rtx_equal_p (dst, op0)
17270 && register_operand (op1, SImode))
17271 break;
17273 if (low_register_operand (op0, SImode))
17275 /* ADDS <Rd>,<Rn>,<Rm> */
17276 if (low_register_operand (op1, SImode))
17277 action = CONV;
17278 /* ADDS <Rdn>,#<imm8> */
17279 /* SUBS <Rdn>,#<imm8> */
17280 else if (rtx_equal_p (dst, op0)
17281 && CONST_INT_P (op1)
17282 && IN_RANGE (INTVAL (op1), -255, 255))
17283 action = CONV;
17284 /* ADDS <Rd>,<Rn>,#<imm3> */
17285 /* SUBS <Rd>,<Rn>,#<imm3> */
17286 else if (CONST_INT_P (op1)
17287 && IN_RANGE (INTVAL (op1), -7, 7))
17288 action = CONV;
17290 /* ADCS <Rd>, <Rn> */
17291 else if (GET_CODE (XEXP (src, 0)) == PLUS
17292 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17293 && low_register_operand (XEXP (XEXP (src, 0), 1),
17294 SImode)
17295 && COMPARISON_P (op1)
17296 && cc_register (XEXP (op1, 0), VOIDmode)
17297 && maybe_get_arm_condition_code (op1) == ARM_CS
17298 && XEXP (op1, 1) == const0_rtx)
17299 action = CONV;
17300 break;
17302 case MINUS:
17303 /* RSBS <Rd>,<Rn>,#0
17304 Not handled here: see NEG below. */
17305 /* SUBS <Rd>,<Rn>,#<imm3>
17306 SUBS <Rdn>,#<imm8>
17307 Not handled here: see PLUS above. */
17308 /* SUBS <Rd>,<Rn>,<Rm> */
17309 if (low_register_operand (op0, SImode)
17310 && low_register_operand (op1, SImode))
17311 action = CONV;
17312 break;
17314 case MULT:
17315 /* MULS <Rdm>,<Rn>,<Rdm>
17316 As an exception to the rule, this is only used
17317 when optimizing for size since MULS is slow on all
17318 known implementations. We do not even want to use
17319 MULS in cold code, if optimizing for speed, so we
17320 test the global flag here. */
17321 if (!optimize_size)
17322 break;
17323 /* else fall through. */
17324 case AND:
17325 case IOR:
17326 case XOR:
17327 /* ANDS <Rdn>,<Rm> */
17328 if (rtx_equal_p (dst, op0)
17329 && low_register_operand (op1, SImode))
17330 action = action_for_partial_flag_setting;
17331 else if (rtx_equal_p (dst, op1)
17332 && low_register_operand (op0, SImode))
17333 action = action_for_partial_flag_setting == SKIP
17334 ? SKIP : SWAP_CONV;
17335 break;
17337 case ASHIFTRT:
17338 case ASHIFT:
17339 case LSHIFTRT:
17340 /* ASRS <Rdn>,<Rm> */
17341 /* LSRS <Rdn>,<Rm> */
17342 /* LSLS <Rdn>,<Rm> */
17343 if (rtx_equal_p (dst, op0)
17344 && low_register_operand (op1, SImode))
17345 action = action_for_partial_flag_setting;
17346 /* ASRS <Rd>,<Rm>,#<imm5> */
17347 /* LSRS <Rd>,<Rm>,#<imm5> */
17348 /* LSLS <Rd>,<Rm>,#<imm5> */
17349 else if (low_register_operand (op0, SImode)
17350 && CONST_INT_P (op1)
17351 && IN_RANGE (INTVAL (op1), 0, 31))
17352 action = action_for_partial_flag_setting;
17353 break;
17355 case ROTATERT:
17356 /* RORS <Rdn>,<Rm> */
17357 if (rtx_equal_p (dst, op0)
17358 && low_register_operand (op1, SImode))
17359 action = action_for_partial_flag_setting;
17360 break;
17362 case NOT:
17363 /* MVNS <Rd>,<Rm> */
17364 if (low_register_operand (op0, SImode))
17365 action = action_for_partial_flag_setting;
17366 break;
17368 case NEG:
17369 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17370 if (low_register_operand (op0, SImode))
17371 action = CONV;
17372 break;
17374 case CONST_INT:
17375 /* MOVS <Rd>,#<imm8> */
17376 if (CONST_INT_P (src)
17377 && IN_RANGE (INTVAL (src), 0, 255))
17378 action = action_for_partial_flag_setting;
17379 break;
17381 case REG:
17382 /* MOVS and MOV<c> with registers have different
17383 encodings, so are not relevant here. */
17384 break;
17386 default:
17387 break;
17391 if (action != SKIP)
17393 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17394 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17395 rtvec vec;
17397 if (action == SWAP_CONV)
17399 src = copy_rtx (src);
17400 XEXP (src, 0) = op1;
17401 XEXP (src, 1) = op0;
17402 pat = gen_rtx_SET (dst, src);
17403 vec = gen_rtvec (2, pat, clobber);
17405 else /* action == CONV */
17406 vec = gen_rtvec (2, pat, clobber);
17408 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17409 INSN_CODE (insn) = -1;
17413 if (NONDEBUG_INSN_P (insn))
17414 df_simulate_one_insn_backwards (bb, insn, &live);
17418 CLEAR_REG_SET (&live);
17421 /* Gcc puts the pool in the wrong place for ARM, since we can only
17422 load addresses a limited distance around the pc. We do some
17423 special munging to move the constant pool values to the correct
17424 point in the code. */
17425 static void
17426 arm_reorg (void)
17428 rtx_insn *insn;
17429 HOST_WIDE_INT address = 0;
17430 Mfix * fix;
17432 if (TARGET_THUMB1)
17433 thumb1_reorg ();
17434 else if (TARGET_THUMB2)
17435 thumb2_reorg ();
17437 /* Ensure all insns that must be split have been split at this point.
17438 Otherwise, the pool placement code below may compute incorrect
17439 insn lengths. Note that when optimizing, all insns have already
17440 been split at this point. */
17441 if (!optimize)
17442 split_all_insns_noflow ();
17444 minipool_fix_head = minipool_fix_tail = NULL;
17446 /* The first insn must always be a note, or the code below won't
17447 scan it properly. */
17448 insn = get_insns ();
17449 gcc_assert (NOTE_P (insn));
17450 minipool_pad = 0;
17452 /* Scan all the insns and record the operands that will need fixing. */
17453 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17455 if (BARRIER_P (insn))
17456 push_minipool_barrier (insn, address);
17457 else if (INSN_P (insn))
17459 rtx_jump_table_data *table;
17461 note_invalid_constants (insn, address, true);
17462 address += get_attr_length (insn);
17464 /* If the insn is a vector jump, add the size of the table
17465 and skip the table. */
17466 if (tablejump_p (insn, NULL, &table))
17468 address += get_jump_table_size (table);
17469 insn = table;
17472 else if (LABEL_P (insn))
17473 /* Add the worst-case padding due to alignment. We don't add
17474 the _current_ padding because the minipool insertions
17475 themselves might change it. */
17476 address += get_label_padding (insn);
17479 fix = minipool_fix_head;
17481 /* Now scan the fixups and perform the required changes. */
17482 while (fix)
17484 Mfix * ftmp;
17485 Mfix * fdel;
17486 Mfix * last_added_fix;
17487 Mfix * last_barrier = NULL;
17488 Mfix * this_fix;
17490 /* Skip any further barriers before the next fix. */
17491 while (fix && BARRIER_P (fix->insn))
17492 fix = fix->next;
17494 /* No more fixes. */
17495 if (fix == NULL)
17496 break;
17498 last_added_fix = NULL;
17500 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17502 if (BARRIER_P (ftmp->insn))
17504 if (ftmp->address >= minipool_vector_head->max_address)
17505 break;
17507 last_barrier = ftmp;
17509 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17510 break;
17512 last_added_fix = ftmp; /* Keep track of the last fix added. */
17515 /* If we found a barrier, drop back to that; any fixes that we
17516 could have reached but come after the barrier will now go in
17517 the next mini-pool. */
17518 if (last_barrier != NULL)
17520 /* Reduce the refcount for those fixes that won't go into this
17521 pool after all. */
17522 for (fdel = last_barrier->next;
17523 fdel && fdel != ftmp;
17524 fdel = fdel->next)
17526 fdel->minipool->refcount--;
17527 fdel->minipool = NULL;
17530 ftmp = last_barrier;
17532 else
17534 /* ftmp is first fix that we can't fit into this pool and
17535 there no natural barriers that we could use. Insert a
17536 new barrier in the code somewhere between the previous
17537 fix and this one, and arrange to jump around it. */
17538 HOST_WIDE_INT max_address;
17540 /* The last item on the list of fixes must be a barrier, so
17541 we can never run off the end of the list of fixes without
17542 last_barrier being set. */
17543 gcc_assert (ftmp);
17545 max_address = minipool_vector_head->max_address;
17546 /* Check that there isn't another fix that is in range that
17547 we couldn't fit into this pool because the pool was
17548 already too large: we need to put the pool before such an
17549 instruction. The pool itself may come just after the
17550 fix because create_fix_barrier also allows space for a
17551 jump instruction. */
17552 if (ftmp->address < max_address)
17553 max_address = ftmp->address + 1;
17555 last_barrier = create_fix_barrier (last_added_fix, max_address);
17558 assign_minipool_offsets (last_barrier);
17560 while (ftmp)
17562 if (!BARRIER_P (ftmp->insn)
17563 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17564 == NULL))
17565 break;
17567 ftmp = ftmp->next;
17570 /* Scan over the fixes we have identified for this pool, fixing them
17571 up and adding the constants to the pool itself. */
17572 for (this_fix = fix; this_fix && ftmp != this_fix;
17573 this_fix = this_fix->next)
17574 if (!BARRIER_P (this_fix->insn))
17576 rtx addr
17577 = plus_constant (Pmode,
17578 gen_rtx_LABEL_REF (VOIDmode,
17579 minipool_vector_label),
17580 this_fix->minipool->offset);
17581 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17584 dump_minipool (last_barrier->insn);
17585 fix = ftmp;
17588 /* From now on we must synthesize any constants that we can't handle
17589 directly. This can happen if the RTL gets split during final
17590 instruction generation. */
17591 cfun->machine->after_arm_reorg = 1;
17593 /* Free the minipool memory. */
17594 obstack_free (&minipool_obstack, minipool_startobj);
17597 /* Routines to output assembly language. */
17599 /* Return string representation of passed in real value. */
17600 static const char *
17601 fp_const_from_val (REAL_VALUE_TYPE *r)
17603 if (!fp_consts_inited)
17604 init_fp_table ();
17606 gcc_assert (real_equal (r, &value_fp0));
17607 return "0";
17610 /* OPERANDS[0] is the entire list of insns that constitute pop,
17611 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17612 is in the list, UPDATE is true iff the list contains explicit
17613 update of base register. */
17614 void
17615 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17616 bool update)
17618 int i;
17619 char pattern[100];
17620 int offset;
17621 const char *conditional;
17622 int num_saves = XVECLEN (operands[0], 0);
17623 unsigned int regno;
17624 unsigned int regno_base = REGNO (operands[1]);
17626 offset = 0;
17627 offset += update ? 1 : 0;
17628 offset += return_pc ? 1 : 0;
17630 /* Is the base register in the list? */
17631 for (i = offset; i < num_saves; i++)
17633 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17634 /* If SP is in the list, then the base register must be SP. */
17635 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17636 /* If base register is in the list, there must be no explicit update. */
17637 if (regno == regno_base)
17638 gcc_assert (!update);
17641 conditional = reverse ? "%?%D0" : "%?%d0";
17642 if ((regno_base == SP_REGNUM) && TARGET_THUMB)
17644 /* Output pop (not stmfd) because it has a shorter encoding. */
17645 gcc_assert (update);
17646 sprintf (pattern, "pop%s\t{", conditional);
17648 else
17650 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17651 It's just a convention, their semantics are identical. */
17652 if (regno_base == SP_REGNUM)
17653 sprintf (pattern, "ldm%sfd\t", conditional);
17654 else if (TARGET_UNIFIED_ASM)
17655 sprintf (pattern, "ldmia%s\t", conditional);
17656 else
17657 sprintf (pattern, "ldm%sia\t", conditional);
17659 strcat (pattern, reg_names[regno_base]);
17660 if (update)
17661 strcat (pattern, "!, {");
17662 else
17663 strcat (pattern, ", {");
17666 /* Output the first destination register. */
17667 strcat (pattern,
17668 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17670 /* Output the rest of the destination registers. */
17671 for (i = offset + 1; i < num_saves; i++)
17673 strcat (pattern, ", ");
17674 strcat (pattern,
17675 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17678 strcat (pattern, "}");
17680 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17681 strcat (pattern, "^");
17683 output_asm_insn (pattern, &cond);
17687 /* Output the assembly for a store multiple. */
17689 const char *
17690 vfp_output_vstmd (rtx * operands)
17692 char pattern[100];
17693 int p;
17694 int base;
17695 int i;
17696 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17697 ? XEXP (operands[0], 0)
17698 : XEXP (XEXP (operands[0], 0), 0);
17699 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17701 if (push_p)
17702 strcpy (pattern, "vpush%?.64\t{%P1");
17703 else
17704 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17706 p = strlen (pattern);
17708 gcc_assert (REG_P (operands[1]));
17710 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17711 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17713 p += sprintf (&pattern[p], ", d%d", base + i);
17715 strcpy (&pattern[p], "}");
17717 output_asm_insn (pattern, operands);
17718 return "";
17722 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17723 number of bytes pushed. */
17725 static int
17726 vfp_emit_fstmd (int base_reg, int count)
17728 rtx par;
17729 rtx dwarf;
17730 rtx tmp, reg;
17731 int i;
17733 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17734 register pairs are stored by a store multiple insn. We avoid this
17735 by pushing an extra pair. */
17736 if (count == 2 && !arm_arch6)
17738 if (base_reg == LAST_VFP_REGNUM - 3)
17739 base_reg -= 2;
17740 count++;
17743 /* FSTMD may not store more than 16 doubleword registers at once. Split
17744 larger stores into multiple parts (up to a maximum of two, in
17745 practice). */
17746 if (count > 16)
17748 int saved;
17749 /* NOTE: base_reg is an internal register number, so each D register
17750 counts as 2. */
17751 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17752 saved += vfp_emit_fstmd (base_reg, 16);
17753 return saved;
17756 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17757 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17759 reg = gen_rtx_REG (DFmode, base_reg);
17760 base_reg += 2;
17762 XVECEXP (par, 0, 0)
17763 = gen_rtx_SET (gen_frame_mem
17764 (BLKmode,
17765 gen_rtx_PRE_MODIFY (Pmode,
17766 stack_pointer_rtx,
17767 plus_constant
17768 (Pmode, stack_pointer_rtx,
17769 - (count * 8)))
17771 gen_rtx_UNSPEC (BLKmode,
17772 gen_rtvec (1, reg),
17773 UNSPEC_PUSH_MULT));
17775 tmp = gen_rtx_SET (stack_pointer_rtx,
17776 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17777 RTX_FRAME_RELATED_P (tmp) = 1;
17778 XVECEXP (dwarf, 0, 0) = tmp;
17780 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17781 RTX_FRAME_RELATED_P (tmp) = 1;
17782 XVECEXP (dwarf, 0, 1) = tmp;
17784 for (i = 1; i < count; i++)
17786 reg = gen_rtx_REG (DFmode, base_reg);
17787 base_reg += 2;
17788 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17790 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17791 plus_constant (Pmode,
17792 stack_pointer_rtx,
17793 i * 8)),
17794 reg);
17795 RTX_FRAME_RELATED_P (tmp) = 1;
17796 XVECEXP (dwarf, 0, i + 1) = tmp;
17799 par = emit_insn (par);
17800 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17801 RTX_FRAME_RELATED_P (par) = 1;
17803 return count * 8;
17806 /* Emit a call instruction with pattern PAT. ADDR is the address of
17807 the call target. */
17809 void
17810 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17812 rtx insn;
17814 insn = emit_call_insn (pat);
17816 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17817 If the call might use such an entry, add a use of the PIC register
17818 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17819 if (TARGET_VXWORKS_RTP
17820 && flag_pic
17821 && !sibcall
17822 && GET_CODE (addr) == SYMBOL_REF
17823 && (SYMBOL_REF_DECL (addr)
17824 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17825 : !SYMBOL_REF_LOCAL_P (addr)))
17827 require_pic_register ();
17828 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17831 if (TARGET_AAPCS_BASED)
17833 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17834 linker. We need to add an IP clobber to allow setting
17835 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17836 is not needed since it's a fixed register. */
17837 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17838 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17842 /* Output a 'call' insn. */
17843 const char *
17844 output_call (rtx *operands)
17846 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17848 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17849 if (REGNO (operands[0]) == LR_REGNUM)
17851 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17852 output_asm_insn ("mov%?\t%0, %|lr", operands);
17855 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17857 if (TARGET_INTERWORK || arm_arch4t)
17858 output_asm_insn ("bx%?\t%0", operands);
17859 else
17860 output_asm_insn ("mov%?\t%|pc, %0", operands);
17862 return "";
17865 /* Output a 'call' insn that is a reference in memory. This is
17866 disabled for ARMv5 and we prefer a blx instead because otherwise
17867 there's a significant performance overhead. */
17868 const char *
17869 output_call_mem (rtx *operands)
17871 gcc_assert (!arm_arch5);
17872 if (TARGET_INTERWORK)
17874 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17875 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17876 output_asm_insn ("bx%?\t%|ip", operands);
17878 else if (regno_use_in (LR_REGNUM, operands[0]))
17880 /* LR is used in the memory address. We load the address in the
17881 first instruction. It's safe to use IP as the target of the
17882 load since the call will kill it anyway. */
17883 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17884 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17885 if (arm_arch4t)
17886 output_asm_insn ("bx%?\t%|ip", operands);
17887 else
17888 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17890 else
17892 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17893 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17896 return "";
17900 /* Output a move from arm registers to arm registers of a long double
17901 OPERANDS[0] is the destination.
17902 OPERANDS[1] is the source. */
17903 const char *
17904 output_mov_long_double_arm_from_arm (rtx *operands)
17906 /* We have to be careful here because the two might overlap. */
17907 int dest_start = REGNO (operands[0]);
17908 int src_start = REGNO (operands[1]);
17909 rtx ops[2];
17910 int i;
17912 if (dest_start < src_start)
17914 for (i = 0; i < 3; i++)
17916 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17917 ops[1] = gen_rtx_REG (SImode, src_start + i);
17918 output_asm_insn ("mov%?\t%0, %1", ops);
17921 else
17923 for (i = 2; i >= 0; i--)
17925 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17926 ops[1] = gen_rtx_REG (SImode, src_start + i);
17927 output_asm_insn ("mov%?\t%0, %1", ops);
17931 return "";
17934 void
17935 arm_emit_movpair (rtx dest, rtx src)
17937 rtx insn;
17939 /* If the src is an immediate, simplify it. */
17940 if (CONST_INT_P (src))
17942 HOST_WIDE_INT val = INTVAL (src);
17943 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17944 if ((val >> 16) & 0x0000ffff)
17946 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17947 GEN_INT (16)),
17948 GEN_INT ((val >> 16) & 0x0000ffff));
17949 insn = get_last_insn ();
17950 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17952 return;
17954 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17955 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17956 insn = get_last_insn ();
17957 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17960 /* Output a move between double words. It must be REG<-MEM
17961 or MEM<-REG. */
17962 const char *
17963 output_move_double (rtx *operands, bool emit, int *count)
17965 enum rtx_code code0 = GET_CODE (operands[0]);
17966 enum rtx_code code1 = GET_CODE (operands[1]);
17967 rtx otherops[3];
17968 if (count)
17969 *count = 1;
17971 /* The only case when this might happen is when
17972 you are looking at the length of a DImode instruction
17973 that has an invalid constant in it. */
17974 if (code0 == REG && code1 != MEM)
17976 gcc_assert (!emit);
17977 *count = 2;
17978 return "";
17981 if (code0 == REG)
17983 unsigned int reg0 = REGNO (operands[0]);
17985 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17987 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17989 switch (GET_CODE (XEXP (operands[1], 0)))
17991 case REG:
17993 if (emit)
17995 if (TARGET_LDRD
17996 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17997 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17998 else
17999 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18001 break;
18003 case PRE_INC:
18004 gcc_assert (TARGET_LDRD);
18005 if (emit)
18006 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
18007 break;
18009 case PRE_DEC:
18010 if (emit)
18012 if (TARGET_LDRD)
18013 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
18014 else
18015 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
18017 break;
18019 case POST_INC:
18020 if (emit)
18022 if (TARGET_LDRD)
18023 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
18024 else
18025 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
18027 break;
18029 case POST_DEC:
18030 gcc_assert (TARGET_LDRD);
18031 if (emit)
18032 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
18033 break;
18035 case PRE_MODIFY:
18036 case POST_MODIFY:
18037 /* Autoicrement addressing modes should never have overlapping
18038 base and destination registers, and overlapping index registers
18039 are already prohibited, so this doesn't need to worry about
18040 fix_cm3_ldrd. */
18041 otherops[0] = operands[0];
18042 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18043 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18045 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18047 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18049 /* Registers overlap so split out the increment. */
18050 if (emit)
18052 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18053 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
18055 if (count)
18056 *count = 2;
18058 else
18060 /* Use a single insn if we can.
18061 FIXME: IWMMXT allows offsets larger than ldrd can
18062 handle, fix these up with a pair of ldr. */
18063 if (TARGET_THUMB2
18064 || !CONST_INT_P (otherops[2])
18065 || (INTVAL (otherops[2]) > -256
18066 && INTVAL (otherops[2]) < 256))
18068 if (emit)
18069 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18071 else
18073 if (emit)
18075 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18076 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18078 if (count)
18079 *count = 2;
18084 else
18086 /* Use a single insn if we can.
18087 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18088 fix these up with a pair of ldr. */
18089 if (TARGET_THUMB2
18090 || !CONST_INT_P (otherops[2])
18091 || (INTVAL (otherops[2]) > -256
18092 && INTVAL (otherops[2]) < 256))
18094 if (emit)
18095 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18097 else
18099 if (emit)
18101 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18102 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18104 if (count)
18105 *count = 2;
18108 break;
18110 case LABEL_REF:
18111 case CONST:
18112 /* We might be able to use ldrd %0, %1 here. However the range is
18113 different to ldr/adr, and it is broken on some ARMv7-M
18114 implementations. */
18115 /* Use the second register of the pair to avoid problematic
18116 overlap. */
18117 otherops[1] = operands[1];
18118 if (emit)
18119 output_asm_insn ("adr%?\t%0, %1", otherops);
18120 operands[1] = otherops[0];
18121 if (emit)
18123 if (TARGET_LDRD)
18124 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18125 else
18126 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18129 if (count)
18130 *count = 2;
18131 break;
18133 /* ??? This needs checking for thumb2. */
18134 default:
18135 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18136 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18138 otherops[0] = operands[0];
18139 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18140 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18142 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18144 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18146 switch ((int) INTVAL (otherops[2]))
18148 case -8:
18149 if (emit)
18150 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18151 return "";
18152 case -4:
18153 if (TARGET_THUMB2)
18154 break;
18155 if (emit)
18156 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18157 return "";
18158 case 4:
18159 if (TARGET_THUMB2)
18160 break;
18161 if (emit)
18162 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18163 return "";
18166 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18167 operands[1] = otherops[0];
18168 if (TARGET_LDRD
18169 && (REG_P (otherops[2])
18170 || TARGET_THUMB2
18171 || (CONST_INT_P (otherops[2])
18172 && INTVAL (otherops[2]) > -256
18173 && INTVAL (otherops[2]) < 256)))
18175 if (reg_overlap_mentioned_p (operands[0],
18176 otherops[2]))
18178 /* Swap base and index registers over to
18179 avoid a conflict. */
18180 std::swap (otherops[1], otherops[2]);
18182 /* If both registers conflict, it will usually
18183 have been fixed by a splitter. */
18184 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18185 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18187 if (emit)
18189 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18190 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18192 if (count)
18193 *count = 2;
18195 else
18197 otherops[0] = operands[0];
18198 if (emit)
18199 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18201 return "";
18204 if (CONST_INT_P (otherops[2]))
18206 if (emit)
18208 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18209 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18210 else
18211 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18214 else
18216 if (emit)
18217 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18220 else
18222 if (emit)
18223 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18226 if (count)
18227 *count = 2;
18229 if (TARGET_LDRD)
18230 return "ldr%(d%)\t%0, [%1]";
18232 return "ldm%(ia%)\t%1, %M0";
18234 else
18236 otherops[1] = adjust_address (operands[1], SImode, 4);
18237 /* Take care of overlapping base/data reg. */
18238 if (reg_mentioned_p (operands[0], operands[1]))
18240 if (emit)
18242 output_asm_insn ("ldr%?\t%0, %1", otherops);
18243 output_asm_insn ("ldr%?\t%0, %1", operands);
18245 if (count)
18246 *count = 2;
18249 else
18251 if (emit)
18253 output_asm_insn ("ldr%?\t%0, %1", operands);
18254 output_asm_insn ("ldr%?\t%0, %1", otherops);
18256 if (count)
18257 *count = 2;
18262 else
18264 /* Constraints should ensure this. */
18265 gcc_assert (code0 == MEM && code1 == REG);
18266 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18267 || (TARGET_ARM && TARGET_LDRD));
18269 switch (GET_CODE (XEXP (operands[0], 0)))
18271 case REG:
18272 if (emit)
18274 if (TARGET_LDRD)
18275 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18276 else
18277 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18279 break;
18281 case PRE_INC:
18282 gcc_assert (TARGET_LDRD);
18283 if (emit)
18284 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18285 break;
18287 case PRE_DEC:
18288 if (emit)
18290 if (TARGET_LDRD)
18291 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18292 else
18293 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18295 break;
18297 case POST_INC:
18298 if (emit)
18300 if (TARGET_LDRD)
18301 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18302 else
18303 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18305 break;
18307 case POST_DEC:
18308 gcc_assert (TARGET_LDRD);
18309 if (emit)
18310 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18311 break;
18313 case PRE_MODIFY:
18314 case POST_MODIFY:
18315 otherops[0] = operands[1];
18316 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18317 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18319 /* IWMMXT allows offsets larger than ldrd can handle,
18320 fix these up with a pair of ldr. */
18321 if (!TARGET_THUMB2
18322 && CONST_INT_P (otherops[2])
18323 && (INTVAL(otherops[2]) <= -256
18324 || INTVAL(otherops[2]) >= 256))
18326 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18328 if (emit)
18330 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18331 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18333 if (count)
18334 *count = 2;
18336 else
18338 if (emit)
18340 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18341 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18343 if (count)
18344 *count = 2;
18347 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18349 if (emit)
18350 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18352 else
18354 if (emit)
18355 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18357 break;
18359 case PLUS:
18360 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18361 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18363 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18365 case -8:
18366 if (emit)
18367 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18368 return "";
18370 case -4:
18371 if (TARGET_THUMB2)
18372 break;
18373 if (emit)
18374 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18375 return "";
18377 case 4:
18378 if (TARGET_THUMB2)
18379 break;
18380 if (emit)
18381 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18382 return "";
18385 if (TARGET_LDRD
18386 && (REG_P (otherops[2])
18387 || TARGET_THUMB2
18388 || (CONST_INT_P (otherops[2])
18389 && INTVAL (otherops[2]) > -256
18390 && INTVAL (otherops[2]) < 256)))
18392 otherops[0] = operands[1];
18393 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18394 if (emit)
18395 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18396 return "";
18398 /* Fall through */
18400 default:
18401 otherops[0] = adjust_address (operands[0], SImode, 4);
18402 otherops[1] = operands[1];
18403 if (emit)
18405 output_asm_insn ("str%?\t%1, %0", operands);
18406 output_asm_insn ("str%?\t%H1, %0", otherops);
18408 if (count)
18409 *count = 2;
18413 return "";
18416 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18417 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18419 const char *
18420 output_move_quad (rtx *operands)
18422 if (REG_P (operands[0]))
18424 /* Load, or reg->reg move. */
18426 if (MEM_P (operands[1]))
18428 switch (GET_CODE (XEXP (operands[1], 0)))
18430 case REG:
18431 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18432 break;
18434 case LABEL_REF:
18435 case CONST:
18436 output_asm_insn ("adr%?\t%0, %1", operands);
18437 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18438 break;
18440 default:
18441 gcc_unreachable ();
18444 else
18446 rtx ops[2];
18447 int dest, src, i;
18449 gcc_assert (REG_P (operands[1]));
18451 dest = REGNO (operands[0]);
18452 src = REGNO (operands[1]);
18454 /* This seems pretty dumb, but hopefully GCC won't try to do it
18455 very often. */
18456 if (dest < src)
18457 for (i = 0; i < 4; i++)
18459 ops[0] = gen_rtx_REG (SImode, dest + i);
18460 ops[1] = gen_rtx_REG (SImode, src + i);
18461 output_asm_insn ("mov%?\t%0, %1", ops);
18463 else
18464 for (i = 3; i >= 0; i--)
18466 ops[0] = gen_rtx_REG (SImode, dest + i);
18467 ops[1] = gen_rtx_REG (SImode, src + i);
18468 output_asm_insn ("mov%?\t%0, %1", ops);
18472 else
18474 gcc_assert (MEM_P (operands[0]));
18475 gcc_assert (REG_P (operands[1]));
18476 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18478 switch (GET_CODE (XEXP (operands[0], 0)))
18480 case REG:
18481 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18482 break;
18484 default:
18485 gcc_unreachable ();
18489 return "";
18492 /* Output a VFP load or store instruction. */
18494 const char *
18495 output_move_vfp (rtx *operands)
18497 rtx reg, mem, addr, ops[2];
18498 int load = REG_P (operands[0]);
18499 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18500 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18501 const char *templ;
18502 char buff[50];
18503 machine_mode mode;
18505 reg = operands[!load];
18506 mem = operands[load];
18508 mode = GET_MODE (reg);
18510 gcc_assert (REG_P (reg));
18511 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18512 gcc_assert (mode == SFmode
18513 || mode == DFmode
18514 || mode == SImode
18515 || mode == DImode
18516 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18517 gcc_assert (MEM_P (mem));
18519 addr = XEXP (mem, 0);
18521 switch (GET_CODE (addr))
18523 case PRE_DEC:
18524 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18525 ops[0] = XEXP (addr, 0);
18526 ops[1] = reg;
18527 break;
18529 case POST_INC:
18530 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18531 ops[0] = XEXP (addr, 0);
18532 ops[1] = reg;
18533 break;
18535 default:
18536 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18537 ops[0] = reg;
18538 ops[1] = mem;
18539 break;
18542 sprintf (buff, templ,
18543 load ? "ld" : "st",
18544 dp ? "64" : "32",
18545 dp ? "P" : "",
18546 integer_p ? "\t%@ int" : "");
18547 output_asm_insn (buff, ops);
18549 return "";
18552 /* Output a Neon double-word or quad-word load or store, or a load
18553 or store for larger structure modes.
18555 WARNING: The ordering of elements is weird in big-endian mode,
18556 because the EABI requires that vectors stored in memory appear
18557 as though they were stored by a VSTM, as required by the EABI.
18558 GCC RTL defines element ordering based on in-memory order.
18559 This can be different from the architectural ordering of elements
18560 within a NEON register. The intrinsics defined in arm_neon.h use the
18561 NEON register element ordering, not the GCC RTL element ordering.
18563 For example, the in-memory ordering of a big-endian a quadword
18564 vector with 16-bit elements when stored from register pair {d0,d1}
18565 will be (lowest address first, d0[N] is NEON register element N):
18567 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18569 When necessary, quadword registers (dN, dN+1) are moved to ARM
18570 registers from rN in the order:
18572 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18574 So that STM/LDM can be used on vectors in ARM registers, and the
18575 same memory layout will result as if VSTM/VLDM were used.
18577 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18578 possible, which allows use of appropriate alignment tags.
18579 Note that the choice of "64" is independent of the actual vector
18580 element size; this size simply ensures that the behavior is
18581 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18583 Due to limitations of those instructions, use of VST1.64/VLD1.64
18584 is not possible if:
18585 - the address contains PRE_DEC, or
18586 - the mode refers to more than 4 double-word registers
18588 In those cases, it would be possible to replace VSTM/VLDM by a
18589 sequence of instructions; this is not currently implemented since
18590 this is not certain to actually improve performance. */
18592 const char *
18593 output_move_neon (rtx *operands)
18595 rtx reg, mem, addr, ops[2];
18596 int regno, nregs, load = REG_P (operands[0]);
18597 const char *templ;
18598 char buff[50];
18599 machine_mode mode;
18601 reg = operands[!load];
18602 mem = operands[load];
18604 mode = GET_MODE (reg);
18606 gcc_assert (REG_P (reg));
18607 regno = REGNO (reg);
18608 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18609 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18610 || NEON_REGNO_OK_FOR_QUAD (regno));
18611 gcc_assert (VALID_NEON_DREG_MODE (mode)
18612 || VALID_NEON_QREG_MODE (mode)
18613 || VALID_NEON_STRUCT_MODE (mode));
18614 gcc_assert (MEM_P (mem));
18616 addr = XEXP (mem, 0);
18618 /* Strip off const from addresses like (const (plus (...))). */
18619 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18620 addr = XEXP (addr, 0);
18622 switch (GET_CODE (addr))
18624 case POST_INC:
18625 /* We have to use vldm / vstm for too-large modes. */
18626 if (nregs > 4)
18628 templ = "v%smia%%?\t%%0!, %%h1";
18629 ops[0] = XEXP (addr, 0);
18631 else
18633 templ = "v%s1.64\t%%h1, %%A0";
18634 ops[0] = mem;
18636 ops[1] = reg;
18637 break;
18639 case PRE_DEC:
18640 /* We have to use vldm / vstm in this case, since there is no
18641 pre-decrement form of the vld1 / vst1 instructions. */
18642 templ = "v%smdb%%?\t%%0!, %%h1";
18643 ops[0] = XEXP (addr, 0);
18644 ops[1] = reg;
18645 break;
18647 case POST_MODIFY:
18648 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18649 gcc_unreachable ();
18651 case REG:
18652 /* We have to use vldm / vstm for too-large modes. */
18653 if (nregs > 1)
18655 if (nregs > 4)
18656 templ = "v%smia%%?\t%%m0, %%h1";
18657 else
18658 templ = "v%s1.64\t%%h1, %%A0";
18660 ops[0] = mem;
18661 ops[1] = reg;
18662 break;
18664 /* Fall through. */
18665 case LABEL_REF:
18666 case PLUS:
18668 int i;
18669 int overlap = -1;
18670 for (i = 0; i < nregs; i++)
18672 /* We're only using DImode here because it's a convenient size. */
18673 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18674 ops[1] = adjust_address (mem, DImode, 8 * i);
18675 if (reg_overlap_mentioned_p (ops[0], mem))
18677 gcc_assert (overlap == -1);
18678 overlap = i;
18680 else
18682 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18683 output_asm_insn (buff, ops);
18686 if (overlap != -1)
18688 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18689 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18690 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18691 output_asm_insn (buff, ops);
18694 return "";
18697 default:
18698 gcc_unreachable ();
18701 sprintf (buff, templ, load ? "ld" : "st");
18702 output_asm_insn (buff, ops);
18704 return "";
18707 /* Compute and return the length of neon_mov<mode>, where <mode> is
18708 one of VSTRUCT modes: EI, OI, CI or XI. */
18710 arm_attr_length_move_neon (rtx_insn *insn)
18712 rtx reg, mem, addr;
18713 int load;
18714 machine_mode mode;
18716 extract_insn_cached (insn);
18718 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18720 mode = GET_MODE (recog_data.operand[0]);
18721 switch (mode)
18723 case EImode:
18724 case OImode:
18725 return 8;
18726 case CImode:
18727 return 12;
18728 case XImode:
18729 return 16;
18730 default:
18731 gcc_unreachable ();
18735 load = REG_P (recog_data.operand[0]);
18736 reg = recog_data.operand[!load];
18737 mem = recog_data.operand[load];
18739 gcc_assert (MEM_P (mem));
18741 mode = GET_MODE (reg);
18742 addr = XEXP (mem, 0);
18744 /* Strip off const from addresses like (const (plus (...))). */
18745 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18746 addr = XEXP (addr, 0);
18748 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18750 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18751 return insns * 4;
18753 else
18754 return 4;
18757 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18758 return zero. */
18761 arm_address_offset_is_imm (rtx_insn *insn)
18763 rtx mem, addr;
18765 extract_insn_cached (insn);
18767 if (REG_P (recog_data.operand[0]))
18768 return 0;
18770 mem = recog_data.operand[0];
18772 gcc_assert (MEM_P (mem));
18774 addr = XEXP (mem, 0);
18776 if (REG_P (addr)
18777 || (GET_CODE (addr) == PLUS
18778 && REG_P (XEXP (addr, 0))
18779 && CONST_INT_P (XEXP (addr, 1))))
18780 return 1;
18781 else
18782 return 0;
18785 /* Output an ADD r, s, #n where n may be too big for one instruction.
18786 If adding zero to one register, output nothing. */
18787 const char *
18788 output_add_immediate (rtx *operands)
18790 HOST_WIDE_INT n = INTVAL (operands[2]);
18792 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18794 if (n < 0)
18795 output_multi_immediate (operands,
18796 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18797 -n);
18798 else
18799 output_multi_immediate (operands,
18800 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18804 return "";
18807 /* Output a multiple immediate operation.
18808 OPERANDS is the vector of operands referred to in the output patterns.
18809 INSTR1 is the output pattern to use for the first constant.
18810 INSTR2 is the output pattern to use for subsequent constants.
18811 IMMED_OP is the index of the constant slot in OPERANDS.
18812 N is the constant value. */
18813 static const char *
18814 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18815 int immed_op, HOST_WIDE_INT n)
18817 #if HOST_BITS_PER_WIDE_INT > 32
18818 n &= 0xffffffff;
18819 #endif
18821 if (n == 0)
18823 /* Quick and easy output. */
18824 operands[immed_op] = const0_rtx;
18825 output_asm_insn (instr1, operands);
18827 else
18829 int i;
18830 const char * instr = instr1;
18832 /* Note that n is never zero here (which would give no output). */
18833 for (i = 0; i < 32; i += 2)
18835 if (n & (3 << i))
18837 operands[immed_op] = GEN_INT (n & (255 << i));
18838 output_asm_insn (instr, operands);
18839 instr = instr2;
18840 i += 6;
18845 return "";
18848 /* Return the name of a shifter operation. */
18849 static const char *
18850 arm_shift_nmem(enum rtx_code code)
18852 switch (code)
18854 case ASHIFT:
18855 return ARM_LSL_NAME;
18857 case ASHIFTRT:
18858 return "asr";
18860 case LSHIFTRT:
18861 return "lsr";
18863 case ROTATERT:
18864 return "ror";
18866 default:
18867 abort();
18871 /* Return the appropriate ARM instruction for the operation code.
18872 The returned result should not be overwritten. OP is the rtx of the
18873 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18874 was shifted. */
18875 const char *
18876 arithmetic_instr (rtx op, int shift_first_arg)
18878 switch (GET_CODE (op))
18880 case PLUS:
18881 return "add";
18883 case MINUS:
18884 return shift_first_arg ? "rsb" : "sub";
18886 case IOR:
18887 return "orr";
18889 case XOR:
18890 return "eor";
18892 case AND:
18893 return "and";
18895 case ASHIFT:
18896 case ASHIFTRT:
18897 case LSHIFTRT:
18898 case ROTATERT:
18899 return arm_shift_nmem(GET_CODE(op));
18901 default:
18902 gcc_unreachable ();
18906 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18907 for the operation code. The returned result should not be overwritten.
18908 OP is the rtx code of the shift.
18909 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18910 shift. */
18911 static const char *
18912 shift_op (rtx op, HOST_WIDE_INT *amountp)
18914 const char * mnem;
18915 enum rtx_code code = GET_CODE (op);
18917 switch (code)
18919 case ROTATE:
18920 if (!CONST_INT_P (XEXP (op, 1)))
18922 output_operand_lossage ("invalid shift operand");
18923 return NULL;
18926 code = ROTATERT;
18927 *amountp = 32 - INTVAL (XEXP (op, 1));
18928 mnem = "ror";
18929 break;
18931 case ASHIFT:
18932 case ASHIFTRT:
18933 case LSHIFTRT:
18934 case ROTATERT:
18935 mnem = arm_shift_nmem(code);
18936 if (CONST_INT_P (XEXP (op, 1)))
18938 *amountp = INTVAL (XEXP (op, 1));
18940 else if (REG_P (XEXP (op, 1)))
18942 *amountp = -1;
18943 return mnem;
18945 else
18947 output_operand_lossage ("invalid shift operand");
18948 return NULL;
18950 break;
18952 case MULT:
18953 /* We never have to worry about the amount being other than a
18954 power of 2, since this case can never be reloaded from a reg. */
18955 if (!CONST_INT_P (XEXP (op, 1)))
18957 output_operand_lossage ("invalid shift operand");
18958 return NULL;
18961 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18963 /* Amount must be a power of two. */
18964 if (*amountp & (*amountp - 1))
18966 output_operand_lossage ("invalid shift operand");
18967 return NULL;
18970 *amountp = int_log2 (*amountp);
18971 return ARM_LSL_NAME;
18973 default:
18974 output_operand_lossage ("invalid shift operand");
18975 return NULL;
18978 /* This is not 100% correct, but follows from the desire to merge
18979 multiplication by a power of 2 with the recognizer for a
18980 shift. >=32 is not a valid shift for "lsl", so we must try and
18981 output a shift that produces the correct arithmetical result.
18982 Using lsr #32 is identical except for the fact that the carry bit
18983 is not set correctly if we set the flags; but we never use the
18984 carry bit from such an operation, so we can ignore that. */
18985 if (code == ROTATERT)
18986 /* Rotate is just modulo 32. */
18987 *amountp &= 31;
18988 else if (*amountp != (*amountp & 31))
18990 if (code == ASHIFT)
18991 mnem = "lsr";
18992 *amountp = 32;
18995 /* Shifts of 0 are no-ops. */
18996 if (*amountp == 0)
18997 return NULL;
18999 return mnem;
19002 /* Obtain the shift from the POWER of two. */
19004 static HOST_WIDE_INT
19005 int_log2 (HOST_WIDE_INT power)
19007 HOST_WIDE_INT shift = 0;
19009 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
19011 gcc_assert (shift <= 31);
19012 shift++;
19015 return shift;
19018 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19019 because /bin/as is horribly restrictive. The judgement about
19020 whether or not each character is 'printable' (and can be output as
19021 is) or not (and must be printed with an octal escape) must be made
19022 with reference to the *host* character set -- the situation is
19023 similar to that discussed in the comments above pp_c_char in
19024 c-pretty-print.c. */
19026 #define MAX_ASCII_LEN 51
19028 void
19029 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19031 int i;
19032 int len_so_far = 0;
19034 fputs ("\t.ascii\t\"", stream);
19036 for (i = 0; i < len; i++)
19038 int c = p[i];
19040 if (len_so_far >= MAX_ASCII_LEN)
19042 fputs ("\"\n\t.ascii\t\"", stream);
19043 len_so_far = 0;
19046 if (ISPRINT (c))
19048 if (c == '\\' || c == '\"')
19050 putc ('\\', stream);
19051 len_so_far++;
19053 putc (c, stream);
19054 len_so_far++;
19056 else
19058 fprintf (stream, "\\%03o", c);
19059 len_so_far += 4;
19063 fputs ("\"\n", stream);
19066 /* Whether a register is callee saved or not. This is necessary because high
19067 registers are marked as caller saved when optimizing for size on Thumb-1
19068 targets despite being callee saved in order to avoid using them. */
19069 #define callee_saved_reg_p(reg) \
19070 (!call_used_regs[reg] \
19071 || (TARGET_THUMB1 && optimize_size \
19072 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19074 /* Compute the register save mask for registers 0 through 12
19075 inclusive. This code is used by arm_compute_save_reg_mask. */
19077 static unsigned long
19078 arm_compute_save_reg0_reg12_mask (void)
19080 unsigned long func_type = arm_current_func_type ();
19081 unsigned long save_reg_mask = 0;
19082 unsigned int reg;
19084 if (IS_INTERRUPT (func_type))
19086 unsigned int max_reg;
19087 /* Interrupt functions must not corrupt any registers,
19088 even call clobbered ones. If this is a leaf function
19089 we can just examine the registers used by the RTL, but
19090 otherwise we have to assume that whatever function is
19091 called might clobber anything, and so we have to save
19092 all the call-clobbered registers as well. */
19093 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19094 /* FIQ handlers have registers r8 - r12 banked, so
19095 we only need to check r0 - r7, Normal ISRs only
19096 bank r14 and r15, so we must check up to r12.
19097 r13 is the stack pointer which is always preserved,
19098 so we do not need to consider it here. */
19099 max_reg = 7;
19100 else
19101 max_reg = 12;
19103 for (reg = 0; reg <= max_reg; reg++)
19104 if (df_regs_ever_live_p (reg)
19105 || (! crtl->is_leaf && call_used_regs[reg]))
19106 save_reg_mask |= (1 << reg);
19108 /* Also save the pic base register if necessary. */
19109 if (flag_pic
19110 && !TARGET_SINGLE_PIC_BASE
19111 && arm_pic_register != INVALID_REGNUM
19112 && crtl->uses_pic_offset_table)
19113 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19115 else if (IS_VOLATILE(func_type))
19117 /* For noreturn functions we historically omitted register saves
19118 altogether. However this really messes up debugging. As a
19119 compromise save just the frame pointers. Combined with the link
19120 register saved elsewhere this should be sufficient to get
19121 a backtrace. */
19122 if (frame_pointer_needed)
19123 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19124 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19125 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19126 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19127 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19129 else
19131 /* In the normal case we only need to save those registers
19132 which are call saved and which are used by this function. */
19133 for (reg = 0; reg <= 11; reg++)
19134 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19135 save_reg_mask |= (1 << reg);
19137 /* Handle the frame pointer as a special case. */
19138 if (frame_pointer_needed)
19139 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19141 /* If we aren't loading the PIC register,
19142 don't stack it even though it may be live. */
19143 if (flag_pic
19144 && !TARGET_SINGLE_PIC_BASE
19145 && arm_pic_register != INVALID_REGNUM
19146 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19147 || crtl->uses_pic_offset_table))
19148 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19150 /* The prologue will copy SP into R0, so save it. */
19151 if (IS_STACKALIGN (func_type))
19152 save_reg_mask |= 1;
19155 /* Save registers so the exception handler can modify them. */
19156 if (crtl->calls_eh_return)
19158 unsigned int i;
19160 for (i = 0; ; i++)
19162 reg = EH_RETURN_DATA_REGNO (i);
19163 if (reg == INVALID_REGNUM)
19164 break;
19165 save_reg_mask |= 1 << reg;
19169 return save_reg_mask;
19172 /* Return true if r3 is live at the start of the function. */
19174 static bool
19175 arm_r3_live_at_start_p (void)
19177 /* Just look at cfg info, which is still close enough to correct at this
19178 point. This gives false positives for broken functions that might use
19179 uninitialized data that happens to be allocated in r3, but who cares? */
19180 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19183 /* Compute the number of bytes used to store the static chain register on the
19184 stack, above the stack frame. We need to know this accurately to get the
19185 alignment of the rest of the stack frame correct. */
19187 static int
19188 arm_compute_static_chain_stack_bytes (void)
19190 /* See the defining assertion in arm_expand_prologue. */
19191 if (IS_NESTED (arm_current_func_type ())
19192 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19193 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19194 && !df_regs_ever_live_p (LR_REGNUM)))
19195 && arm_r3_live_at_start_p ()
19196 && crtl->args.pretend_args_size == 0)
19197 return 4;
19199 return 0;
19202 /* Compute a bit mask of which registers need to be
19203 saved on the stack for the current function.
19204 This is used by arm_get_frame_offsets, which may add extra registers. */
19206 static unsigned long
19207 arm_compute_save_reg_mask (void)
19209 unsigned int save_reg_mask = 0;
19210 unsigned long func_type = arm_current_func_type ();
19211 unsigned int reg;
19213 if (IS_NAKED (func_type))
19214 /* This should never really happen. */
19215 return 0;
19217 /* If we are creating a stack frame, then we must save the frame pointer,
19218 IP (which will hold the old stack pointer), LR and the PC. */
19219 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19220 save_reg_mask |=
19221 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19222 | (1 << IP_REGNUM)
19223 | (1 << LR_REGNUM)
19224 | (1 << PC_REGNUM);
19226 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19228 /* Decide if we need to save the link register.
19229 Interrupt routines have their own banked link register,
19230 so they never need to save it.
19231 Otherwise if we do not use the link register we do not need to save
19232 it. If we are pushing other registers onto the stack however, we
19233 can save an instruction in the epilogue by pushing the link register
19234 now and then popping it back into the PC. This incurs extra memory
19235 accesses though, so we only do it when optimizing for size, and only
19236 if we know that we will not need a fancy return sequence. */
19237 if (df_regs_ever_live_p (LR_REGNUM)
19238 || (save_reg_mask
19239 && optimize_size
19240 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19241 && !crtl->tail_call_emit
19242 && !crtl->calls_eh_return))
19243 save_reg_mask |= 1 << LR_REGNUM;
19245 if (cfun->machine->lr_save_eliminated)
19246 save_reg_mask &= ~ (1 << LR_REGNUM);
19248 if (TARGET_REALLY_IWMMXT
19249 && ((bit_count (save_reg_mask)
19250 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19251 arm_compute_static_chain_stack_bytes())
19252 ) % 2) != 0)
19254 /* The total number of registers that are going to be pushed
19255 onto the stack is odd. We need to ensure that the stack
19256 is 64-bit aligned before we start to save iWMMXt registers,
19257 and also before we start to create locals. (A local variable
19258 might be a double or long long which we will load/store using
19259 an iWMMXt instruction). Therefore we need to push another
19260 ARM register, so that the stack will be 64-bit aligned. We
19261 try to avoid using the arg registers (r0 -r3) as they might be
19262 used to pass values in a tail call. */
19263 for (reg = 4; reg <= 12; reg++)
19264 if ((save_reg_mask & (1 << reg)) == 0)
19265 break;
19267 if (reg <= 12)
19268 save_reg_mask |= (1 << reg);
19269 else
19271 cfun->machine->sibcall_blocked = 1;
19272 save_reg_mask |= (1 << 3);
19276 /* We may need to push an additional register for use initializing the
19277 PIC base register. */
19278 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19279 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19281 reg = thumb_find_work_register (1 << 4);
19282 if (!call_used_regs[reg])
19283 save_reg_mask |= (1 << reg);
19286 return save_reg_mask;
19289 /* Compute a bit mask of which registers need to be
19290 saved on the stack for the current function. */
19291 static unsigned long
19292 thumb1_compute_save_reg_mask (void)
19294 unsigned long mask;
19295 unsigned reg;
19297 mask = 0;
19298 for (reg = 0; reg < 12; reg ++)
19299 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19300 mask |= 1 << reg;
19302 if (flag_pic
19303 && !TARGET_SINGLE_PIC_BASE
19304 && arm_pic_register != INVALID_REGNUM
19305 && crtl->uses_pic_offset_table)
19306 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19308 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19309 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19310 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19312 /* LR will also be pushed if any lo regs are pushed. */
19313 if (mask & 0xff || thumb_force_lr_save ())
19314 mask |= (1 << LR_REGNUM);
19316 /* Make sure we have a low work register if we need one.
19317 We will need one if we are going to push a high register,
19318 but we are not currently intending to push a low register. */
19319 if ((mask & 0xff) == 0
19320 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19322 /* Use thumb_find_work_register to choose which register
19323 we will use. If the register is live then we will
19324 have to push it. Use LAST_LO_REGNUM as our fallback
19325 choice for the register to select. */
19326 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19327 /* Make sure the register returned by thumb_find_work_register is
19328 not part of the return value. */
19329 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19330 reg = LAST_LO_REGNUM;
19332 if (callee_saved_reg_p (reg))
19333 mask |= 1 << reg;
19336 /* The 504 below is 8 bytes less than 512 because there are two possible
19337 alignment words. We can't tell here if they will be present or not so we
19338 have to play it safe and assume that they are. */
19339 if ((CALLER_INTERWORKING_SLOT_SIZE +
19340 ROUND_UP_WORD (get_frame_size ()) +
19341 crtl->outgoing_args_size) >= 504)
19343 /* This is the same as the code in thumb1_expand_prologue() which
19344 determines which register to use for stack decrement. */
19345 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19346 if (mask & (1 << reg))
19347 break;
19349 if (reg > LAST_LO_REGNUM)
19351 /* Make sure we have a register available for stack decrement. */
19352 mask |= 1 << LAST_LO_REGNUM;
19356 return mask;
19360 /* Return the number of bytes required to save VFP registers. */
19361 static int
19362 arm_get_vfp_saved_size (void)
19364 unsigned int regno;
19365 int count;
19366 int saved;
19368 saved = 0;
19369 /* Space for saved VFP registers. */
19370 if (TARGET_HARD_FLOAT && TARGET_VFP)
19372 count = 0;
19373 for (regno = FIRST_VFP_REGNUM;
19374 regno < LAST_VFP_REGNUM;
19375 regno += 2)
19377 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19378 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19380 if (count > 0)
19382 /* Workaround ARM10 VFPr1 bug. */
19383 if (count == 2 && !arm_arch6)
19384 count++;
19385 saved += count * 8;
19387 count = 0;
19389 else
19390 count++;
19392 if (count > 0)
19394 if (count == 2 && !arm_arch6)
19395 count++;
19396 saved += count * 8;
19399 return saved;
19403 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19404 everything bar the final return instruction. If simple_return is true,
19405 then do not output epilogue, because it has already been emitted in RTL. */
19406 const char *
19407 output_return_instruction (rtx operand, bool really_return, bool reverse,
19408 bool simple_return)
19410 char conditional[10];
19411 char instr[100];
19412 unsigned reg;
19413 unsigned long live_regs_mask;
19414 unsigned long func_type;
19415 arm_stack_offsets *offsets;
19417 func_type = arm_current_func_type ();
19419 if (IS_NAKED (func_type))
19420 return "";
19422 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19424 /* If this function was declared non-returning, and we have
19425 found a tail call, then we have to trust that the called
19426 function won't return. */
19427 if (really_return)
19429 rtx ops[2];
19431 /* Otherwise, trap an attempted return by aborting. */
19432 ops[0] = operand;
19433 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19434 : "abort");
19435 assemble_external_libcall (ops[1]);
19436 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19439 return "";
19442 gcc_assert (!cfun->calls_alloca || really_return);
19444 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19446 cfun->machine->return_used_this_function = 1;
19448 offsets = arm_get_frame_offsets ();
19449 live_regs_mask = offsets->saved_regs_mask;
19451 if (!simple_return && live_regs_mask)
19453 const char * return_reg;
19455 /* If we do not have any special requirements for function exit
19456 (e.g. interworking) then we can load the return address
19457 directly into the PC. Otherwise we must load it into LR. */
19458 if (really_return
19459 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19460 return_reg = reg_names[PC_REGNUM];
19461 else
19462 return_reg = reg_names[LR_REGNUM];
19464 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19466 /* There are three possible reasons for the IP register
19467 being saved. 1) a stack frame was created, in which case
19468 IP contains the old stack pointer, or 2) an ISR routine
19469 corrupted it, or 3) it was saved to align the stack on
19470 iWMMXt. In case 1, restore IP into SP, otherwise just
19471 restore IP. */
19472 if (frame_pointer_needed)
19474 live_regs_mask &= ~ (1 << IP_REGNUM);
19475 live_regs_mask |= (1 << SP_REGNUM);
19477 else
19478 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19481 /* On some ARM architectures it is faster to use LDR rather than
19482 LDM to load a single register. On other architectures, the
19483 cost is the same. In 26 bit mode, or for exception handlers,
19484 we have to use LDM to load the PC so that the CPSR is also
19485 restored. */
19486 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19487 if (live_regs_mask == (1U << reg))
19488 break;
19490 if (reg <= LAST_ARM_REGNUM
19491 && (reg != LR_REGNUM
19492 || ! really_return
19493 || ! IS_INTERRUPT (func_type)))
19495 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19496 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19498 else
19500 char *p;
19501 int first = 1;
19503 /* Generate the load multiple instruction to restore the
19504 registers. Note we can get here, even if
19505 frame_pointer_needed is true, but only if sp already
19506 points to the base of the saved core registers. */
19507 if (live_regs_mask & (1 << SP_REGNUM))
19509 unsigned HOST_WIDE_INT stack_adjust;
19511 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19512 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19514 if (stack_adjust && arm_arch5 && TARGET_ARM)
19515 if (TARGET_UNIFIED_ASM)
19516 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19517 else
19518 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19519 else
19521 /* If we can't use ldmib (SA110 bug),
19522 then try to pop r3 instead. */
19523 if (stack_adjust)
19524 live_regs_mask |= 1 << 3;
19526 if (TARGET_UNIFIED_ASM)
19527 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19528 else
19529 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19532 else
19533 if (TARGET_UNIFIED_ASM)
19534 sprintf (instr, "pop%s\t{", conditional);
19535 else
19536 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19538 p = instr + strlen (instr);
19540 for (reg = 0; reg <= SP_REGNUM; reg++)
19541 if (live_regs_mask & (1 << reg))
19543 int l = strlen (reg_names[reg]);
19545 if (first)
19546 first = 0;
19547 else
19549 memcpy (p, ", ", 2);
19550 p += 2;
19553 memcpy (p, "%|", 2);
19554 memcpy (p + 2, reg_names[reg], l);
19555 p += l + 2;
19558 if (live_regs_mask & (1 << LR_REGNUM))
19560 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19561 /* If returning from an interrupt, restore the CPSR. */
19562 if (IS_INTERRUPT (func_type))
19563 strcat (p, "^");
19565 else
19566 strcpy (p, "}");
19569 output_asm_insn (instr, & operand);
19571 /* See if we need to generate an extra instruction to
19572 perform the actual function return. */
19573 if (really_return
19574 && func_type != ARM_FT_INTERWORKED
19575 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19577 /* The return has already been handled
19578 by loading the LR into the PC. */
19579 return "";
19583 if (really_return)
19585 switch ((int) ARM_FUNC_TYPE (func_type))
19587 case ARM_FT_ISR:
19588 case ARM_FT_FIQ:
19589 /* ??? This is wrong for unified assembly syntax. */
19590 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19591 break;
19593 case ARM_FT_INTERWORKED:
19594 sprintf (instr, "bx%s\t%%|lr", conditional);
19595 break;
19597 case ARM_FT_EXCEPTION:
19598 /* ??? This is wrong for unified assembly syntax. */
19599 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19600 break;
19602 default:
19603 /* Use bx if it's available. */
19604 if (arm_arch5 || arm_arch4t)
19605 sprintf (instr, "bx%s\t%%|lr", conditional);
19606 else
19607 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19608 break;
19611 output_asm_insn (instr, & operand);
19614 return "";
19617 /* Write the function name into the code section, directly preceding
19618 the function prologue.
19620 Code will be output similar to this:
19622 .ascii "arm_poke_function_name", 0
19623 .align
19625 .word 0xff000000 + (t1 - t0)
19626 arm_poke_function_name
19627 mov ip, sp
19628 stmfd sp!, {fp, ip, lr, pc}
19629 sub fp, ip, #4
19631 When performing a stack backtrace, code can inspect the value
19632 of 'pc' stored at 'fp' + 0. If the trace function then looks
19633 at location pc - 12 and the top 8 bits are set, then we know
19634 that there is a function name embedded immediately preceding this
19635 location and has length ((pc[-3]) & 0xff000000).
19637 We assume that pc is declared as a pointer to an unsigned long.
19639 It is of no benefit to output the function name if we are assembling
19640 a leaf function. These function types will not contain a stack
19641 backtrace structure, therefore it is not possible to determine the
19642 function name. */
19643 void
19644 arm_poke_function_name (FILE *stream, const char *name)
19646 unsigned long alignlength;
19647 unsigned long length;
19648 rtx x;
19650 length = strlen (name) + 1;
19651 alignlength = ROUND_UP_WORD (length);
19653 ASM_OUTPUT_ASCII (stream, name, length);
19654 ASM_OUTPUT_ALIGN (stream, 2);
19655 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19656 assemble_aligned_integer (UNITS_PER_WORD, x);
19659 /* Place some comments into the assembler stream
19660 describing the current function. */
19661 static void
19662 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19664 unsigned long func_type;
19666 /* ??? Do we want to print some of the below anyway? */
19667 if (TARGET_THUMB1)
19668 return;
19670 /* Sanity check. */
19671 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19673 func_type = arm_current_func_type ();
19675 switch ((int) ARM_FUNC_TYPE (func_type))
19677 default:
19678 case ARM_FT_NORMAL:
19679 break;
19680 case ARM_FT_INTERWORKED:
19681 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19682 break;
19683 case ARM_FT_ISR:
19684 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19685 break;
19686 case ARM_FT_FIQ:
19687 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19688 break;
19689 case ARM_FT_EXCEPTION:
19690 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19691 break;
19694 if (IS_NAKED (func_type))
19695 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19697 if (IS_VOLATILE (func_type))
19698 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19700 if (IS_NESTED (func_type))
19701 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19702 if (IS_STACKALIGN (func_type))
19703 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19705 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19706 crtl->args.size,
19707 crtl->args.pretend_args_size, frame_size);
19709 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19710 frame_pointer_needed,
19711 cfun->machine->uses_anonymous_args);
19713 if (cfun->machine->lr_save_eliminated)
19714 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19716 if (crtl->calls_eh_return)
19717 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19721 static void
19722 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19723 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19725 arm_stack_offsets *offsets;
19727 if (TARGET_THUMB1)
19729 int regno;
19731 /* Emit any call-via-reg trampolines that are needed for v4t support
19732 of call_reg and call_value_reg type insns. */
19733 for (regno = 0; regno < LR_REGNUM; regno++)
19735 rtx label = cfun->machine->call_via[regno];
19737 if (label != NULL)
19739 switch_to_section (function_section (current_function_decl));
19740 targetm.asm_out.internal_label (asm_out_file, "L",
19741 CODE_LABEL_NUMBER (label));
19742 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19746 /* ??? Probably not safe to set this here, since it assumes that a
19747 function will be emitted as assembly immediately after we generate
19748 RTL for it. This does not happen for inline functions. */
19749 cfun->machine->return_used_this_function = 0;
19751 else /* TARGET_32BIT */
19753 /* We need to take into account any stack-frame rounding. */
19754 offsets = arm_get_frame_offsets ();
19756 gcc_assert (!use_return_insn (FALSE, NULL)
19757 || (cfun->machine->return_used_this_function != 0)
19758 || offsets->saved_regs == offsets->outgoing_args
19759 || frame_pointer_needed);
19763 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19764 STR and STRD. If an even number of registers are being pushed, one
19765 or more STRD patterns are created for each register pair. If an
19766 odd number of registers are pushed, emit an initial STR followed by
19767 as many STRD instructions as are needed. This works best when the
19768 stack is initially 64-bit aligned (the normal case), since it
19769 ensures that each STRD is also 64-bit aligned. */
19770 static void
19771 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19773 int num_regs = 0;
19774 int i;
19775 int regno;
19776 rtx par = NULL_RTX;
19777 rtx dwarf = NULL_RTX;
19778 rtx tmp;
19779 bool first = true;
19781 num_regs = bit_count (saved_regs_mask);
19783 /* Must be at least one register to save, and can't save SP or PC. */
19784 gcc_assert (num_regs > 0 && num_regs <= 14);
19785 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19786 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19788 /* Create sequence for DWARF info. All the frame-related data for
19789 debugging is held in this wrapper. */
19790 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19792 /* Describe the stack adjustment. */
19793 tmp = gen_rtx_SET (stack_pointer_rtx,
19794 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19795 RTX_FRAME_RELATED_P (tmp) = 1;
19796 XVECEXP (dwarf, 0, 0) = tmp;
19798 /* Find the first register. */
19799 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19802 i = 0;
19804 /* If there's an odd number of registers to push. Start off by
19805 pushing a single register. This ensures that subsequent strd
19806 operations are dword aligned (assuming that SP was originally
19807 64-bit aligned). */
19808 if ((num_regs & 1) != 0)
19810 rtx reg, mem, insn;
19812 reg = gen_rtx_REG (SImode, regno);
19813 if (num_regs == 1)
19814 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19815 stack_pointer_rtx));
19816 else
19817 mem = gen_frame_mem (Pmode,
19818 gen_rtx_PRE_MODIFY
19819 (Pmode, stack_pointer_rtx,
19820 plus_constant (Pmode, stack_pointer_rtx,
19821 -4 * num_regs)));
19823 tmp = gen_rtx_SET (mem, reg);
19824 RTX_FRAME_RELATED_P (tmp) = 1;
19825 insn = emit_insn (tmp);
19826 RTX_FRAME_RELATED_P (insn) = 1;
19827 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19828 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19829 RTX_FRAME_RELATED_P (tmp) = 1;
19830 i++;
19831 regno++;
19832 XVECEXP (dwarf, 0, i) = tmp;
19833 first = false;
19836 while (i < num_regs)
19837 if (saved_regs_mask & (1 << regno))
19839 rtx reg1, reg2, mem1, mem2;
19840 rtx tmp0, tmp1, tmp2;
19841 int regno2;
19843 /* Find the register to pair with this one. */
19844 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19845 regno2++)
19848 reg1 = gen_rtx_REG (SImode, regno);
19849 reg2 = gen_rtx_REG (SImode, regno2);
19851 if (first)
19853 rtx insn;
19855 first = false;
19856 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19857 stack_pointer_rtx,
19858 -4 * num_regs));
19859 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19860 stack_pointer_rtx,
19861 -4 * (num_regs - 1)));
19862 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19863 plus_constant (Pmode, stack_pointer_rtx,
19864 -4 * (num_regs)));
19865 tmp1 = gen_rtx_SET (mem1, reg1);
19866 tmp2 = gen_rtx_SET (mem2, reg2);
19867 RTX_FRAME_RELATED_P (tmp0) = 1;
19868 RTX_FRAME_RELATED_P (tmp1) = 1;
19869 RTX_FRAME_RELATED_P (tmp2) = 1;
19870 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19871 XVECEXP (par, 0, 0) = tmp0;
19872 XVECEXP (par, 0, 1) = tmp1;
19873 XVECEXP (par, 0, 2) = tmp2;
19874 insn = emit_insn (par);
19875 RTX_FRAME_RELATED_P (insn) = 1;
19876 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19878 else
19880 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19881 stack_pointer_rtx,
19882 4 * i));
19883 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19884 stack_pointer_rtx,
19885 4 * (i + 1)));
19886 tmp1 = gen_rtx_SET (mem1, reg1);
19887 tmp2 = gen_rtx_SET (mem2, reg2);
19888 RTX_FRAME_RELATED_P (tmp1) = 1;
19889 RTX_FRAME_RELATED_P (tmp2) = 1;
19890 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19891 XVECEXP (par, 0, 0) = tmp1;
19892 XVECEXP (par, 0, 1) = tmp2;
19893 emit_insn (par);
19896 /* Create unwind information. This is an approximation. */
19897 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19898 plus_constant (Pmode,
19899 stack_pointer_rtx,
19900 4 * i)),
19901 reg1);
19902 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19903 plus_constant (Pmode,
19904 stack_pointer_rtx,
19905 4 * (i + 1))),
19906 reg2);
19908 RTX_FRAME_RELATED_P (tmp1) = 1;
19909 RTX_FRAME_RELATED_P (tmp2) = 1;
19910 XVECEXP (dwarf, 0, i + 1) = tmp1;
19911 XVECEXP (dwarf, 0, i + 2) = tmp2;
19912 i += 2;
19913 regno = regno2 + 1;
19915 else
19916 regno++;
19918 return;
19921 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19922 whenever possible, otherwise it emits single-word stores. The first store
19923 also allocates stack space for all saved registers, using writeback with
19924 post-addressing mode. All other stores use offset addressing. If no STRD
19925 can be emitted, this function emits a sequence of single-word stores,
19926 and not an STM as before, because single-word stores provide more freedom
19927 scheduling and can be turned into an STM by peephole optimizations. */
19928 static void
19929 arm_emit_strd_push (unsigned long saved_regs_mask)
19931 int num_regs = 0;
19932 int i, j, dwarf_index = 0;
19933 int offset = 0;
19934 rtx dwarf = NULL_RTX;
19935 rtx insn = NULL_RTX;
19936 rtx tmp, mem;
19938 /* TODO: A more efficient code can be emitted by changing the
19939 layout, e.g., first push all pairs that can use STRD to keep the
19940 stack aligned, and then push all other registers. */
19941 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19942 if (saved_regs_mask & (1 << i))
19943 num_regs++;
19945 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19946 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19947 gcc_assert (num_regs > 0);
19949 /* Create sequence for DWARF info. */
19950 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19952 /* For dwarf info, we generate explicit stack update. */
19953 tmp = gen_rtx_SET (stack_pointer_rtx,
19954 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19955 RTX_FRAME_RELATED_P (tmp) = 1;
19956 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19958 /* Save registers. */
19959 offset = - 4 * num_regs;
19960 j = 0;
19961 while (j <= LAST_ARM_REGNUM)
19962 if (saved_regs_mask & (1 << j))
19964 if ((j % 2 == 0)
19965 && (saved_regs_mask & (1 << (j + 1))))
19967 /* Current register and previous register form register pair for
19968 which STRD can be generated. */
19969 if (offset < 0)
19971 /* Allocate stack space for all saved registers. */
19972 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19973 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19974 mem = gen_frame_mem (DImode, tmp);
19975 offset = 0;
19977 else if (offset > 0)
19978 mem = gen_frame_mem (DImode,
19979 plus_constant (Pmode,
19980 stack_pointer_rtx,
19981 offset));
19982 else
19983 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19985 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
19986 RTX_FRAME_RELATED_P (tmp) = 1;
19987 tmp = emit_insn (tmp);
19989 /* Record the first store insn. */
19990 if (dwarf_index == 1)
19991 insn = tmp;
19993 /* Generate dwarf info. */
19994 mem = gen_frame_mem (SImode,
19995 plus_constant (Pmode,
19996 stack_pointer_rtx,
19997 offset));
19998 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19999 RTX_FRAME_RELATED_P (tmp) = 1;
20000 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20002 mem = gen_frame_mem (SImode,
20003 plus_constant (Pmode,
20004 stack_pointer_rtx,
20005 offset + 4));
20006 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20007 RTX_FRAME_RELATED_P (tmp) = 1;
20008 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20010 offset += 8;
20011 j += 2;
20013 else
20015 /* Emit a single word store. */
20016 if (offset < 0)
20018 /* Allocate stack space for all saved registers. */
20019 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20020 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20021 mem = gen_frame_mem (SImode, tmp);
20022 offset = 0;
20024 else if (offset > 0)
20025 mem = gen_frame_mem (SImode,
20026 plus_constant (Pmode,
20027 stack_pointer_rtx,
20028 offset));
20029 else
20030 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20032 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20033 RTX_FRAME_RELATED_P (tmp) = 1;
20034 tmp = emit_insn (tmp);
20036 /* Record the first store insn. */
20037 if (dwarf_index == 1)
20038 insn = tmp;
20040 /* Generate dwarf info. */
20041 mem = gen_frame_mem (SImode,
20042 plus_constant(Pmode,
20043 stack_pointer_rtx,
20044 offset));
20045 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20046 RTX_FRAME_RELATED_P (tmp) = 1;
20047 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20049 offset += 4;
20050 j += 1;
20053 else
20054 j++;
20056 /* Attach dwarf info to the first insn we generate. */
20057 gcc_assert (insn != NULL_RTX);
20058 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20059 RTX_FRAME_RELATED_P (insn) = 1;
20062 /* Generate and emit an insn that we will recognize as a push_multi.
20063 Unfortunately, since this insn does not reflect very well the actual
20064 semantics of the operation, we need to annotate the insn for the benefit
20065 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20066 MASK for registers that should be annotated for DWARF2 frame unwind
20067 information. */
20068 static rtx
20069 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20071 int num_regs = 0;
20072 int num_dwarf_regs = 0;
20073 int i, j;
20074 rtx par;
20075 rtx dwarf;
20076 int dwarf_par_index;
20077 rtx tmp, reg;
20079 /* We don't record the PC in the dwarf frame information. */
20080 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20082 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20084 if (mask & (1 << i))
20085 num_regs++;
20086 if (dwarf_regs_mask & (1 << i))
20087 num_dwarf_regs++;
20090 gcc_assert (num_regs && num_regs <= 16);
20091 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20093 /* For the body of the insn we are going to generate an UNSPEC in
20094 parallel with several USEs. This allows the insn to be recognized
20095 by the push_multi pattern in the arm.md file.
20097 The body of the insn looks something like this:
20099 (parallel [
20100 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20101 (const_int:SI <num>)))
20102 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20103 (use (reg:SI XX))
20104 (use (reg:SI YY))
20108 For the frame note however, we try to be more explicit and actually
20109 show each register being stored into the stack frame, plus a (single)
20110 decrement of the stack pointer. We do it this way in order to be
20111 friendly to the stack unwinding code, which only wants to see a single
20112 stack decrement per instruction. The RTL we generate for the note looks
20113 something like this:
20115 (sequence [
20116 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20117 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20118 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20119 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20123 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20124 instead we'd have a parallel expression detailing all
20125 the stores to the various memory addresses so that debug
20126 information is more up-to-date. Remember however while writing
20127 this to take care of the constraints with the push instruction.
20129 Note also that this has to be taken care of for the VFP registers.
20131 For more see PR43399. */
20133 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20134 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20135 dwarf_par_index = 1;
20137 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20139 if (mask & (1 << i))
20141 reg = gen_rtx_REG (SImode, i);
20143 XVECEXP (par, 0, 0)
20144 = gen_rtx_SET (gen_frame_mem
20145 (BLKmode,
20146 gen_rtx_PRE_MODIFY (Pmode,
20147 stack_pointer_rtx,
20148 plus_constant
20149 (Pmode, stack_pointer_rtx,
20150 -4 * num_regs))
20152 gen_rtx_UNSPEC (BLKmode,
20153 gen_rtvec (1, reg),
20154 UNSPEC_PUSH_MULT));
20156 if (dwarf_regs_mask & (1 << i))
20158 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20159 reg);
20160 RTX_FRAME_RELATED_P (tmp) = 1;
20161 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20164 break;
20168 for (j = 1, i++; j < num_regs; i++)
20170 if (mask & (1 << i))
20172 reg = gen_rtx_REG (SImode, i);
20174 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20176 if (dwarf_regs_mask & (1 << i))
20179 = gen_rtx_SET (gen_frame_mem
20180 (SImode,
20181 plus_constant (Pmode, stack_pointer_rtx,
20182 4 * j)),
20183 reg);
20184 RTX_FRAME_RELATED_P (tmp) = 1;
20185 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20188 j++;
20192 par = emit_insn (par);
20194 tmp = gen_rtx_SET (stack_pointer_rtx,
20195 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20196 RTX_FRAME_RELATED_P (tmp) = 1;
20197 XVECEXP (dwarf, 0, 0) = tmp;
20199 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20201 return par;
20204 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20205 SIZE is the offset to be adjusted.
20206 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20207 static void
20208 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20210 rtx dwarf;
20212 RTX_FRAME_RELATED_P (insn) = 1;
20213 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20214 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20217 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20218 SAVED_REGS_MASK shows which registers need to be restored.
20220 Unfortunately, since this insn does not reflect very well the actual
20221 semantics of the operation, we need to annotate the insn for the benefit
20222 of DWARF2 frame unwind information. */
20223 static void
20224 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20226 int num_regs = 0;
20227 int i, j;
20228 rtx par;
20229 rtx dwarf = NULL_RTX;
20230 rtx tmp, reg;
20231 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20232 int offset_adj;
20233 int emit_update;
20235 offset_adj = return_in_pc ? 1 : 0;
20236 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20237 if (saved_regs_mask & (1 << i))
20238 num_regs++;
20240 gcc_assert (num_regs && num_regs <= 16);
20242 /* If SP is in reglist, then we don't emit SP update insn. */
20243 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20245 /* The parallel needs to hold num_regs SETs
20246 and one SET for the stack update. */
20247 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20249 if (return_in_pc)
20250 XVECEXP (par, 0, 0) = ret_rtx;
20252 if (emit_update)
20254 /* Increment the stack pointer, based on there being
20255 num_regs 4-byte registers to restore. */
20256 tmp = gen_rtx_SET (stack_pointer_rtx,
20257 plus_constant (Pmode,
20258 stack_pointer_rtx,
20259 4 * num_regs));
20260 RTX_FRAME_RELATED_P (tmp) = 1;
20261 XVECEXP (par, 0, offset_adj) = tmp;
20264 /* Now restore every reg, which may include PC. */
20265 for (j = 0, i = 0; j < num_regs; i++)
20266 if (saved_regs_mask & (1 << i))
20268 reg = gen_rtx_REG (SImode, i);
20269 if ((num_regs == 1) && emit_update && !return_in_pc)
20271 /* Emit single load with writeback. */
20272 tmp = gen_frame_mem (SImode,
20273 gen_rtx_POST_INC (Pmode,
20274 stack_pointer_rtx));
20275 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20276 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20277 return;
20280 tmp = gen_rtx_SET (reg,
20281 gen_frame_mem
20282 (SImode,
20283 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20284 RTX_FRAME_RELATED_P (tmp) = 1;
20285 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20287 /* We need to maintain a sequence for DWARF info too. As dwarf info
20288 should not have PC, skip PC. */
20289 if (i != PC_REGNUM)
20290 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20292 j++;
20295 if (return_in_pc)
20296 par = emit_jump_insn (par);
20297 else
20298 par = emit_insn (par);
20300 REG_NOTES (par) = dwarf;
20301 if (!return_in_pc)
20302 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20303 stack_pointer_rtx, stack_pointer_rtx);
20306 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20307 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20309 Unfortunately, since this insn does not reflect very well the actual
20310 semantics of the operation, we need to annotate the insn for the benefit
20311 of DWARF2 frame unwind information. */
20312 static void
20313 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20315 int i, j;
20316 rtx par;
20317 rtx dwarf = NULL_RTX;
20318 rtx tmp, reg;
20320 gcc_assert (num_regs && num_regs <= 32);
20322 /* Workaround ARM10 VFPr1 bug. */
20323 if (num_regs == 2 && !arm_arch6)
20325 if (first_reg == 15)
20326 first_reg--;
20328 num_regs++;
20331 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20332 there could be up to 32 D-registers to restore.
20333 If there are more than 16 D-registers, make two recursive calls,
20334 each of which emits one pop_multi instruction. */
20335 if (num_regs > 16)
20337 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20338 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20339 return;
20342 /* The parallel needs to hold num_regs SETs
20343 and one SET for the stack update. */
20344 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20346 /* Increment the stack pointer, based on there being
20347 num_regs 8-byte registers to restore. */
20348 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20349 RTX_FRAME_RELATED_P (tmp) = 1;
20350 XVECEXP (par, 0, 0) = tmp;
20352 /* Now show every reg that will be restored, using a SET for each. */
20353 for (j = 0, i=first_reg; j < num_regs; i += 2)
20355 reg = gen_rtx_REG (DFmode, i);
20357 tmp = gen_rtx_SET (reg,
20358 gen_frame_mem
20359 (DFmode,
20360 plus_constant (Pmode, base_reg, 8 * j)));
20361 RTX_FRAME_RELATED_P (tmp) = 1;
20362 XVECEXP (par, 0, j + 1) = tmp;
20364 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20366 j++;
20369 par = emit_insn (par);
20370 REG_NOTES (par) = dwarf;
20372 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20373 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20375 RTX_FRAME_RELATED_P (par) = 1;
20376 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20378 else
20379 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20380 base_reg, base_reg);
20383 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20384 number of registers are being popped, multiple LDRD patterns are created for
20385 all register pairs. If odd number of registers are popped, last register is
20386 loaded by using LDR pattern. */
20387 static void
20388 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20390 int num_regs = 0;
20391 int i, j;
20392 rtx par = NULL_RTX;
20393 rtx dwarf = NULL_RTX;
20394 rtx tmp, reg, tmp1;
20395 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20397 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20398 if (saved_regs_mask & (1 << i))
20399 num_regs++;
20401 gcc_assert (num_regs && num_regs <= 16);
20403 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20404 to be popped. So, if num_regs is even, now it will become odd,
20405 and we can generate pop with PC. If num_regs is odd, it will be
20406 even now, and ldr with return can be generated for PC. */
20407 if (return_in_pc)
20408 num_regs--;
20410 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20412 /* Var j iterates over all the registers to gather all the registers in
20413 saved_regs_mask. Var i gives index of saved registers in stack frame.
20414 A PARALLEL RTX of register-pair is created here, so that pattern for
20415 LDRD can be matched. As PC is always last register to be popped, and
20416 we have already decremented num_regs if PC, we don't have to worry
20417 about PC in this loop. */
20418 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20419 if (saved_regs_mask & (1 << j))
20421 /* Create RTX for memory load. */
20422 reg = gen_rtx_REG (SImode, j);
20423 tmp = gen_rtx_SET (reg,
20424 gen_frame_mem (SImode,
20425 plus_constant (Pmode,
20426 stack_pointer_rtx, 4 * i)));
20427 RTX_FRAME_RELATED_P (tmp) = 1;
20429 if (i % 2 == 0)
20431 /* When saved-register index (i) is even, the RTX to be emitted is
20432 yet to be created. Hence create it first. The LDRD pattern we
20433 are generating is :
20434 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20435 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20436 where target registers need not be consecutive. */
20437 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20438 dwarf = NULL_RTX;
20441 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20442 added as 0th element and if i is odd, reg_i is added as 1st element
20443 of LDRD pattern shown above. */
20444 XVECEXP (par, 0, (i % 2)) = tmp;
20445 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20447 if ((i % 2) == 1)
20449 /* When saved-register index (i) is odd, RTXs for both the registers
20450 to be loaded are generated in above given LDRD pattern, and the
20451 pattern can be emitted now. */
20452 par = emit_insn (par);
20453 REG_NOTES (par) = dwarf;
20454 RTX_FRAME_RELATED_P (par) = 1;
20457 i++;
20460 /* If the number of registers pushed is odd AND return_in_pc is false OR
20461 number of registers are even AND return_in_pc is true, last register is
20462 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20463 then LDR with post increment. */
20465 /* Increment the stack pointer, based on there being
20466 num_regs 4-byte registers to restore. */
20467 tmp = gen_rtx_SET (stack_pointer_rtx,
20468 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20469 RTX_FRAME_RELATED_P (tmp) = 1;
20470 tmp = emit_insn (tmp);
20471 if (!return_in_pc)
20473 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20474 stack_pointer_rtx, stack_pointer_rtx);
20477 dwarf = NULL_RTX;
20479 if (((num_regs % 2) == 1 && !return_in_pc)
20480 || ((num_regs % 2) == 0 && return_in_pc))
20482 /* Scan for the single register to be popped. Skip until the saved
20483 register is found. */
20484 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20486 /* Gen LDR with post increment here. */
20487 tmp1 = gen_rtx_MEM (SImode,
20488 gen_rtx_POST_INC (SImode,
20489 stack_pointer_rtx));
20490 set_mem_alias_set (tmp1, get_frame_alias_set ());
20492 reg = gen_rtx_REG (SImode, j);
20493 tmp = gen_rtx_SET (reg, tmp1);
20494 RTX_FRAME_RELATED_P (tmp) = 1;
20495 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20497 if (return_in_pc)
20499 /* If return_in_pc, j must be PC_REGNUM. */
20500 gcc_assert (j == PC_REGNUM);
20501 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20502 XVECEXP (par, 0, 0) = ret_rtx;
20503 XVECEXP (par, 0, 1) = tmp;
20504 par = emit_jump_insn (par);
20506 else
20508 par = emit_insn (tmp);
20509 REG_NOTES (par) = dwarf;
20510 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20511 stack_pointer_rtx, stack_pointer_rtx);
20515 else if ((num_regs % 2) == 1 && return_in_pc)
20517 /* There are 2 registers to be popped. So, generate the pattern
20518 pop_multiple_with_stack_update_and_return to pop in PC. */
20519 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20522 return;
20525 /* LDRD in ARM mode needs consecutive registers as operands. This function
20526 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20527 offset addressing and then generates one separate stack udpate. This provides
20528 more scheduling freedom, compared to writeback on every load. However,
20529 if the function returns using load into PC directly
20530 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20531 before the last load. TODO: Add a peephole optimization to recognize
20532 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20533 peephole optimization to merge the load at stack-offset zero
20534 with the stack update instruction using load with writeback
20535 in post-index addressing mode. */
20536 static void
20537 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20539 int j = 0;
20540 int offset = 0;
20541 rtx par = NULL_RTX;
20542 rtx dwarf = NULL_RTX;
20543 rtx tmp, mem;
20545 /* Restore saved registers. */
20546 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20547 j = 0;
20548 while (j <= LAST_ARM_REGNUM)
20549 if (saved_regs_mask & (1 << j))
20551 if ((j % 2) == 0
20552 && (saved_regs_mask & (1 << (j + 1)))
20553 && (j + 1) != PC_REGNUM)
20555 /* Current register and next register form register pair for which
20556 LDRD can be generated. PC is always the last register popped, and
20557 we handle it separately. */
20558 if (offset > 0)
20559 mem = gen_frame_mem (DImode,
20560 plus_constant (Pmode,
20561 stack_pointer_rtx,
20562 offset));
20563 else
20564 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20566 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20567 tmp = emit_insn (tmp);
20568 RTX_FRAME_RELATED_P (tmp) = 1;
20570 /* Generate dwarf info. */
20572 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20573 gen_rtx_REG (SImode, j),
20574 NULL_RTX);
20575 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20576 gen_rtx_REG (SImode, j + 1),
20577 dwarf);
20579 REG_NOTES (tmp) = dwarf;
20581 offset += 8;
20582 j += 2;
20584 else if (j != PC_REGNUM)
20586 /* Emit a single word load. */
20587 if (offset > 0)
20588 mem = gen_frame_mem (SImode,
20589 plus_constant (Pmode,
20590 stack_pointer_rtx,
20591 offset));
20592 else
20593 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20595 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20596 tmp = emit_insn (tmp);
20597 RTX_FRAME_RELATED_P (tmp) = 1;
20599 /* Generate dwarf info. */
20600 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20601 gen_rtx_REG (SImode, j),
20602 NULL_RTX);
20604 offset += 4;
20605 j += 1;
20607 else /* j == PC_REGNUM */
20608 j++;
20610 else
20611 j++;
20613 /* Update the stack. */
20614 if (offset > 0)
20616 tmp = gen_rtx_SET (stack_pointer_rtx,
20617 plus_constant (Pmode,
20618 stack_pointer_rtx,
20619 offset));
20620 tmp = emit_insn (tmp);
20621 arm_add_cfa_adjust_cfa_note (tmp, offset,
20622 stack_pointer_rtx, stack_pointer_rtx);
20623 offset = 0;
20626 if (saved_regs_mask & (1 << PC_REGNUM))
20628 /* Only PC is to be popped. */
20629 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20630 XVECEXP (par, 0, 0) = ret_rtx;
20631 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20632 gen_frame_mem (SImode,
20633 gen_rtx_POST_INC (SImode,
20634 stack_pointer_rtx)));
20635 RTX_FRAME_RELATED_P (tmp) = 1;
20636 XVECEXP (par, 0, 1) = tmp;
20637 par = emit_jump_insn (par);
20639 /* Generate dwarf info. */
20640 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20641 gen_rtx_REG (SImode, PC_REGNUM),
20642 NULL_RTX);
20643 REG_NOTES (par) = dwarf;
20644 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20645 stack_pointer_rtx, stack_pointer_rtx);
20649 /* Calculate the size of the return value that is passed in registers. */
20650 static unsigned
20651 arm_size_return_regs (void)
20653 machine_mode mode;
20655 if (crtl->return_rtx != 0)
20656 mode = GET_MODE (crtl->return_rtx);
20657 else
20658 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20660 return GET_MODE_SIZE (mode);
20663 /* Return true if the current function needs to save/restore LR. */
20664 static bool
20665 thumb_force_lr_save (void)
20667 return !cfun->machine->lr_save_eliminated
20668 && (!leaf_function_p ()
20669 || thumb_far_jump_used_p ()
20670 || df_regs_ever_live_p (LR_REGNUM));
20673 /* We do not know if r3 will be available because
20674 we do have an indirect tailcall happening in this
20675 particular case. */
20676 static bool
20677 is_indirect_tailcall_p (rtx call)
20679 rtx pat = PATTERN (call);
20681 /* Indirect tail call. */
20682 pat = XVECEXP (pat, 0, 0);
20683 if (GET_CODE (pat) == SET)
20684 pat = SET_SRC (pat);
20686 pat = XEXP (XEXP (pat, 0), 0);
20687 return REG_P (pat);
20690 /* Return true if r3 is used by any of the tail call insns in the
20691 current function. */
20692 static bool
20693 any_sibcall_could_use_r3 (void)
20695 edge_iterator ei;
20696 edge e;
20698 if (!crtl->tail_call_emit)
20699 return false;
20700 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20701 if (e->flags & EDGE_SIBCALL)
20703 rtx call = BB_END (e->src);
20704 if (!CALL_P (call))
20705 call = prev_nonnote_nondebug_insn (call);
20706 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20707 if (find_regno_fusage (call, USE, 3)
20708 || is_indirect_tailcall_p (call))
20709 return true;
20711 return false;
20715 /* Compute the distance from register FROM to register TO.
20716 These can be the arg pointer (26), the soft frame pointer (25),
20717 the stack pointer (13) or the hard frame pointer (11).
20718 In thumb mode r7 is used as the soft frame pointer, if needed.
20719 Typical stack layout looks like this:
20721 old stack pointer -> | |
20722 ----
20723 | | \
20724 | | saved arguments for
20725 | | vararg functions
20726 | | /
20728 hard FP & arg pointer -> | | \
20729 | | stack
20730 | | frame
20731 | | /
20733 | | \
20734 | | call saved
20735 | | registers
20736 soft frame pointer -> | | /
20738 | | \
20739 | | local
20740 | | variables
20741 locals base pointer -> | | /
20743 | | \
20744 | | outgoing
20745 | | arguments
20746 current stack pointer -> | | /
20749 For a given function some or all of these stack components
20750 may not be needed, giving rise to the possibility of
20751 eliminating some of the registers.
20753 The values returned by this function must reflect the behavior
20754 of arm_expand_prologue() and arm_compute_save_reg_mask().
20756 The sign of the number returned reflects the direction of stack
20757 growth, so the values are positive for all eliminations except
20758 from the soft frame pointer to the hard frame pointer.
20760 SFP may point just inside the local variables block to ensure correct
20761 alignment. */
20764 /* Calculate stack offsets. These are used to calculate register elimination
20765 offsets and in prologue/epilogue code. Also calculates which registers
20766 should be saved. */
20768 static arm_stack_offsets *
20769 arm_get_frame_offsets (void)
20771 struct arm_stack_offsets *offsets;
20772 unsigned long func_type;
20773 int leaf;
20774 int saved;
20775 int core_saved;
20776 HOST_WIDE_INT frame_size;
20777 int i;
20779 offsets = &cfun->machine->stack_offsets;
20781 /* We need to know if we are a leaf function. Unfortunately, it
20782 is possible to be called after start_sequence has been called,
20783 which causes get_insns to return the insns for the sequence,
20784 not the function, which will cause leaf_function_p to return
20785 the incorrect result.
20787 to know about leaf functions once reload has completed, and the
20788 frame size cannot be changed after that time, so we can safely
20789 use the cached value. */
20791 if (reload_completed)
20792 return offsets;
20794 /* Initially this is the size of the local variables. It will translated
20795 into an offset once we have determined the size of preceding data. */
20796 frame_size = ROUND_UP_WORD (get_frame_size ());
20798 leaf = leaf_function_p ();
20800 /* Space for variadic functions. */
20801 offsets->saved_args = crtl->args.pretend_args_size;
20803 /* In Thumb mode this is incorrect, but never used. */
20804 offsets->frame
20805 = (offsets->saved_args
20806 + arm_compute_static_chain_stack_bytes ()
20807 + (frame_pointer_needed ? 4 : 0));
20809 if (TARGET_32BIT)
20811 unsigned int regno;
20813 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20814 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20815 saved = core_saved;
20817 /* We know that SP will be doubleword aligned on entry, and we must
20818 preserve that condition at any subroutine call. We also require the
20819 soft frame pointer to be doubleword aligned. */
20821 if (TARGET_REALLY_IWMMXT)
20823 /* Check for the call-saved iWMMXt registers. */
20824 for (regno = FIRST_IWMMXT_REGNUM;
20825 regno <= LAST_IWMMXT_REGNUM;
20826 regno++)
20827 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20828 saved += 8;
20831 func_type = arm_current_func_type ();
20832 /* Space for saved VFP registers. */
20833 if (! IS_VOLATILE (func_type)
20834 && TARGET_HARD_FLOAT && TARGET_VFP)
20835 saved += arm_get_vfp_saved_size ();
20837 else /* TARGET_THUMB1 */
20839 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20840 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20841 saved = core_saved;
20842 if (TARGET_BACKTRACE)
20843 saved += 16;
20846 /* Saved registers include the stack frame. */
20847 offsets->saved_regs
20848 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20849 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20851 /* A leaf function does not need any stack alignment if it has nothing
20852 on the stack. */
20853 if (leaf && frame_size == 0
20854 /* However if it calls alloca(), we have a dynamically allocated
20855 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20856 && ! cfun->calls_alloca)
20858 offsets->outgoing_args = offsets->soft_frame;
20859 offsets->locals_base = offsets->soft_frame;
20860 return offsets;
20863 /* Ensure SFP has the correct alignment. */
20864 if (ARM_DOUBLEWORD_ALIGN
20865 && (offsets->soft_frame & 7))
20867 offsets->soft_frame += 4;
20868 /* Try to align stack by pushing an extra reg. Don't bother doing this
20869 when there is a stack frame as the alignment will be rolled into
20870 the normal stack adjustment. */
20871 if (frame_size + crtl->outgoing_args_size == 0)
20873 int reg = -1;
20875 /* Register r3 is caller-saved. Normally it does not need to be
20876 saved on entry by the prologue. However if we choose to save
20877 it for padding then we may confuse the compiler into thinking
20878 a prologue sequence is required when in fact it is not. This
20879 will occur when shrink-wrapping if r3 is used as a scratch
20880 register and there are no other callee-saved writes.
20882 This situation can be avoided when other callee-saved registers
20883 are available and r3 is not mandatory if we choose a callee-saved
20884 register for padding. */
20885 bool prefer_callee_reg_p = false;
20887 /* If it is safe to use r3, then do so. This sometimes
20888 generates better code on Thumb-2 by avoiding the need to
20889 use 32-bit push/pop instructions. */
20890 if (! any_sibcall_could_use_r3 ()
20891 && arm_size_return_regs () <= 12
20892 && (offsets->saved_regs_mask & (1 << 3)) == 0
20893 && (TARGET_THUMB2
20894 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20896 reg = 3;
20897 if (!TARGET_THUMB2)
20898 prefer_callee_reg_p = true;
20900 if (reg == -1
20901 || prefer_callee_reg_p)
20903 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20905 /* Avoid fixed registers; they may be changed at
20906 arbitrary times so it's unsafe to restore them
20907 during the epilogue. */
20908 if (!fixed_regs[i]
20909 && (offsets->saved_regs_mask & (1 << i)) == 0)
20911 reg = i;
20912 break;
20917 if (reg != -1)
20919 offsets->saved_regs += 4;
20920 offsets->saved_regs_mask |= (1 << reg);
20925 offsets->locals_base = offsets->soft_frame + frame_size;
20926 offsets->outgoing_args = (offsets->locals_base
20927 + crtl->outgoing_args_size);
20929 if (ARM_DOUBLEWORD_ALIGN)
20931 /* Ensure SP remains doubleword aligned. */
20932 if (offsets->outgoing_args & 7)
20933 offsets->outgoing_args += 4;
20934 gcc_assert (!(offsets->outgoing_args & 7));
20937 return offsets;
20941 /* Calculate the relative offsets for the different stack pointers. Positive
20942 offsets are in the direction of stack growth. */
20944 HOST_WIDE_INT
20945 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20947 arm_stack_offsets *offsets;
20949 offsets = arm_get_frame_offsets ();
20951 /* OK, now we have enough information to compute the distances.
20952 There must be an entry in these switch tables for each pair
20953 of registers in ELIMINABLE_REGS, even if some of the entries
20954 seem to be redundant or useless. */
20955 switch (from)
20957 case ARG_POINTER_REGNUM:
20958 switch (to)
20960 case THUMB_HARD_FRAME_POINTER_REGNUM:
20961 return 0;
20963 case FRAME_POINTER_REGNUM:
20964 /* This is the reverse of the soft frame pointer
20965 to hard frame pointer elimination below. */
20966 return offsets->soft_frame - offsets->saved_args;
20968 case ARM_HARD_FRAME_POINTER_REGNUM:
20969 /* This is only non-zero in the case where the static chain register
20970 is stored above the frame. */
20971 return offsets->frame - offsets->saved_args - 4;
20973 case STACK_POINTER_REGNUM:
20974 /* If nothing has been pushed on the stack at all
20975 then this will return -4. This *is* correct! */
20976 return offsets->outgoing_args - (offsets->saved_args + 4);
20978 default:
20979 gcc_unreachable ();
20981 gcc_unreachable ();
20983 case FRAME_POINTER_REGNUM:
20984 switch (to)
20986 case THUMB_HARD_FRAME_POINTER_REGNUM:
20987 return 0;
20989 case ARM_HARD_FRAME_POINTER_REGNUM:
20990 /* The hard frame pointer points to the top entry in the
20991 stack frame. The soft frame pointer to the bottom entry
20992 in the stack frame. If there is no stack frame at all,
20993 then they are identical. */
20995 return offsets->frame - offsets->soft_frame;
20997 case STACK_POINTER_REGNUM:
20998 return offsets->outgoing_args - offsets->soft_frame;
21000 default:
21001 gcc_unreachable ();
21003 gcc_unreachable ();
21005 default:
21006 /* You cannot eliminate from the stack pointer.
21007 In theory you could eliminate from the hard frame
21008 pointer to the stack pointer, but this will never
21009 happen, since if a stack frame is not needed the
21010 hard frame pointer will never be used. */
21011 gcc_unreachable ();
21015 /* Given FROM and TO register numbers, say whether this elimination is
21016 allowed. Frame pointer elimination is automatically handled.
21018 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21019 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21020 pointer, we must eliminate FRAME_POINTER_REGNUM into
21021 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21022 ARG_POINTER_REGNUM. */
21024 bool
21025 arm_can_eliminate (const int from, const int to)
21027 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21028 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21029 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21030 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21031 true);
21034 /* Emit RTL to save coprocessor registers on function entry. Returns the
21035 number of bytes pushed. */
21037 static int
21038 arm_save_coproc_regs(void)
21040 int saved_size = 0;
21041 unsigned reg;
21042 unsigned start_reg;
21043 rtx insn;
21045 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21046 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21048 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21049 insn = gen_rtx_MEM (V2SImode, insn);
21050 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21051 RTX_FRAME_RELATED_P (insn) = 1;
21052 saved_size += 8;
21055 if (TARGET_HARD_FLOAT && TARGET_VFP)
21057 start_reg = FIRST_VFP_REGNUM;
21059 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21061 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21062 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21064 if (start_reg != reg)
21065 saved_size += vfp_emit_fstmd (start_reg,
21066 (reg - start_reg) / 2);
21067 start_reg = reg + 2;
21070 if (start_reg != reg)
21071 saved_size += vfp_emit_fstmd (start_reg,
21072 (reg - start_reg) / 2);
21074 return saved_size;
21078 /* Set the Thumb frame pointer from the stack pointer. */
21080 static void
21081 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21083 HOST_WIDE_INT amount;
21084 rtx insn, dwarf;
21086 amount = offsets->outgoing_args - offsets->locals_base;
21087 if (amount < 1024)
21088 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21089 stack_pointer_rtx, GEN_INT (amount)));
21090 else
21092 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21093 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21094 expects the first two operands to be the same. */
21095 if (TARGET_THUMB2)
21097 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21098 stack_pointer_rtx,
21099 hard_frame_pointer_rtx));
21101 else
21103 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21104 hard_frame_pointer_rtx,
21105 stack_pointer_rtx));
21107 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21108 plus_constant (Pmode, stack_pointer_rtx, amount));
21109 RTX_FRAME_RELATED_P (dwarf) = 1;
21110 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21113 RTX_FRAME_RELATED_P (insn) = 1;
21116 struct scratch_reg {
21117 rtx reg;
21118 bool saved;
21121 /* Return a short-lived scratch register for use as a 2nd scratch register on
21122 function entry after the registers are saved in the prologue. This register
21123 must be released by means of release_scratch_register_on_entry. IP is not
21124 considered since it is always used as the 1st scratch register if available.
21126 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21127 mask of live registers. */
21129 static void
21130 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21131 unsigned long live_regs)
21133 int regno = -1;
21135 sr->saved = false;
21137 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21138 regno = LR_REGNUM;
21139 else
21141 unsigned int i;
21143 for (i = 4; i < 11; i++)
21144 if (regno1 != i && (live_regs & (1 << i)) != 0)
21146 regno = i;
21147 break;
21150 if (regno < 0)
21152 /* If IP is used as the 1st scratch register for a nested function,
21153 then either r3 wasn't available or is used to preserve IP. */
21154 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21155 regno1 = 3;
21156 regno = (regno1 == 3 ? 2 : 3);
21157 sr->saved
21158 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21159 regno);
21163 sr->reg = gen_rtx_REG (SImode, regno);
21164 if (sr->saved)
21166 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21167 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21168 rtx x = gen_rtx_SET (stack_pointer_rtx,
21169 plus_constant (Pmode, stack_pointer_rtx, -4));
21170 RTX_FRAME_RELATED_P (insn) = 1;
21171 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21175 /* Release a scratch register obtained from the preceding function. */
21177 static void
21178 release_scratch_register_on_entry (struct scratch_reg *sr)
21180 if (sr->saved)
21182 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21183 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21184 rtx x = gen_rtx_SET (stack_pointer_rtx,
21185 plus_constant (Pmode, stack_pointer_rtx, 4));
21186 RTX_FRAME_RELATED_P (insn) = 1;
21187 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21191 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21193 #if PROBE_INTERVAL > 4096
21194 #error Cannot use indexed addressing mode for stack probing
21195 #endif
21197 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21198 inclusive. These are offsets from the current stack pointer. REGNO1
21199 is the index number of the 1st scratch register and LIVE_REGS is the
21200 mask of live registers. */
21202 static void
21203 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21204 unsigned int regno1, unsigned long live_regs)
21206 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21208 /* See if we have a constant small number of probes to generate. If so,
21209 that's the easy case. */
21210 if (size <= PROBE_INTERVAL)
21212 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21213 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21214 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21217 /* The run-time loop is made up of 10 insns in the generic case while the
21218 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21219 else if (size <= 5 * PROBE_INTERVAL)
21221 HOST_WIDE_INT i, rem;
21223 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21224 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21225 emit_stack_probe (reg1);
21227 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21228 it exceeds SIZE. If only two probes are needed, this will not
21229 generate any code. Then probe at FIRST + SIZE. */
21230 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21232 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21233 emit_stack_probe (reg1);
21236 rem = size - (i - PROBE_INTERVAL);
21237 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21239 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21240 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21242 else
21243 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21246 /* Otherwise, do the same as above, but in a loop. Note that we must be
21247 extra careful with variables wrapping around because we might be at
21248 the very top (or the very bottom) of the address space and we have
21249 to be able to handle this case properly; in particular, we use an
21250 equality test for the loop condition. */
21251 else
21253 HOST_WIDE_INT rounded_size;
21254 struct scratch_reg sr;
21256 get_scratch_register_on_entry (&sr, regno1, live_regs);
21258 emit_move_insn (reg1, GEN_INT (first));
21261 /* Step 1: round SIZE to the previous multiple of the interval. */
21263 rounded_size = size & -PROBE_INTERVAL;
21264 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21267 /* Step 2: compute initial and final value of the loop counter. */
21269 /* TEST_ADDR = SP + FIRST. */
21270 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21272 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21273 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21276 /* Step 3: the loop
21280 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21281 probe at TEST_ADDR
21283 while (TEST_ADDR != LAST_ADDR)
21285 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21286 until it is equal to ROUNDED_SIZE. */
21288 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21291 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21292 that SIZE is equal to ROUNDED_SIZE. */
21294 if (size != rounded_size)
21296 HOST_WIDE_INT rem = size - rounded_size;
21298 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21300 emit_set_insn (sr.reg,
21301 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21302 emit_stack_probe (plus_constant (Pmode, sr.reg,
21303 PROBE_INTERVAL - rem));
21305 else
21306 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21309 release_scratch_register_on_entry (&sr);
21312 /* Make sure nothing is scheduled before we are done. */
21313 emit_insn (gen_blockage ());
21316 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21317 absolute addresses. */
21319 const char *
21320 output_probe_stack_range (rtx reg1, rtx reg2)
21322 static int labelno = 0;
21323 char loop_lab[32];
21324 rtx xops[2];
21326 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21328 /* Loop. */
21329 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21331 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21332 xops[0] = reg1;
21333 xops[1] = GEN_INT (PROBE_INTERVAL);
21334 output_asm_insn ("sub\t%0, %0, %1", xops);
21336 /* Probe at TEST_ADDR. */
21337 output_asm_insn ("str\tr0, [%0, #0]", xops);
21339 /* Test if TEST_ADDR == LAST_ADDR. */
21340 xops[1] = reg2;
21341 output_asm_insn ("cmp\t%0, %1", xops);
21343 /* Branch. */
21344 fputs ("\tbne\t", asm_out_file);
21345 assemble_name_raw (asm_out_file, loop_lab);
21346 fputc ('\n', asm_out_file);
21348 return "";
21351 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21352 function. */
21353 void
21354 arm_expand_prologue (void)
21356 rtx amount;
21357 rtx insn;
21358 rtx ip_rtx;
21359 unsigned long live_regs_mask;
21360 unsigned long func_type;
21361 int fp_offset = 0;
21362 int saved_pretend_args = 0;
21363 int saved_regs = 0;
21364 unsigned HOST_WIDE_INT args_to_push;
21365 HOST_WIDE_INT size;
21366 arm_stack_offsets *offsets;
21367 bool clobber_ip;
21369 func_type = arm_current_func_type ();
21371 /* Naked functions don't have prologues. */
21372 if (IS_NAKED (func_type))
21373 return;
21375 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21376 args_to_push = crtl->args.pretend_args_size;
21378 /* Compute which register we will have to save onto the stack. */
21379 offsets = arm_get_frame_offsets ();
21380 live_regs_mask = offsets->saved_regs_mask;
21382 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21384 if (IS_STACKALIGN (func_type))
21386 rtx r0, r1;
21388 /* Handle a word-aligned stack pointer. We generate the following:
21390 mov r0, sp
21391 bic r1, r0, #7
21392 mov sp, r1
21393 <save and restore r0 in normal prologue/epilogue>
21394 mov sp, r0
21395 bx lr
21397 The unwinder doesn't need to know about the stack realignment.
21398 Just tell it we saved SP in r0. */
21399 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21401 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21402 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21404 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21405 RTX_FRAME_RELATED_P (insn) = 1;
21406 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21408 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21410 /* ??? The CFA changes here, which may cause GDB to conclude that it
21411 has entered a different function. That said, the unwind info is
21412 correct, individually, before and after this instruction because
21413 we've described the save of SP, which will override the default
21414 handling of SP as restoring from the CFA. */
21415 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21418 /* The static chain register is the same as the IP register. If it is
21419 clobbered when creating the frame, we need to save and restore it. */
21420 clobber_ip = IS_NESTED (func_type)
21421 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21422 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21423 && !df_regs_ever_live_p (LR_REGNUM)
21424 && arm_r3_live_at_start_p ()));
21426 /* Find somewhere to store IP whilst the frame is being created.
21427 We try the following places in order:
21429 1. The last argument register r3 if it is available.
21430 2. A slot on the stack above the frame if there are no
21431 arguments to push onto the stack.
21432 3. Register r3 again, after pushing the argument registers
21433 onto the stack, if this is a varargs function.
21434 4. The last slot on the stack created for the arguments to
21435 push, if this isn't a varargs function.
21437 Note - we only need to tell the dwarf2 backend about the SP
21438 adjustment in the second variant; the static chain register
21439 doesn't need to be unwound, as it doesn't contain a value
21440 inherited from the caller. */
21441 if (clobber_ip)
21443 if (!arm_r3_live_at_start_p ())
21444 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21445 else if (args_to_push == 0)
21447 rtx addr, dwarf;
21449 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21450 saved_regs += 4;
21452 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21453 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21454 fp_offset = 4;
21456 /* Just tell the dwarf backend that we adjusted SP. */
21457 dwarf = gen_rtx_SET (stack_pointer_rtx,
21458 plus_constant (Pmode, stack_pointer_rtx,
21459 -fp_offset));
21460 RTX_FRAME_RELATED_P (insn) = 1;
21461 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21463 else
21465 /* Store the args on the stack. */
21466 if (cfun->machine->uses_anonymous_args)
21468 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21469 (0xf0 >> (args_to_push / 4)) & 0xf);
21470 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21471 saved_pretend_args = 1;
21473 else
21475 rtx addr, dwarf;
21477 if (args_to_push == 4)
21478 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21479 else
21480 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21481 plus_constant (Pmode,
21482 stack_pointer_rtx,
21483 -args_to_push));
21485 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21487 /* Just tell the dwarf backend that we adjusted SP. */
21488 dwarf = gen_rtx_SET (stack_pointer_rtx,
21489 plus_constant (Pmode, stack_pointer_rtx,
21490 -args_to_push));
21491 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21494 RTX_FRAME_RELATED_P (insn) = 1;
21495 fp_offset = args_to_push;
21496 args_to_push = 0;
21500 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21502 if (IS_INTERRUPT (func_type))
21504 /* Interrupt functions must not corrupt any registers.
21505 Creating a frame pointer however, corrupts the IP
21506 register, so we must push it first. */
21507 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21509 /* Do not set RTX_FRAME_RELATED_P on this insn.
21510 The dwarf stack unwinding code only wants to see one
21511 stack decrement per function, and this is not it. If
21512 this instruction is labeled as being part of the frame
21513 creation sequence then dwarf2out_frame_debug_expr will
21514 die when it encounters the assignment of IP to FP
21515 later on, since the use of SP here establishes SP as
21516 the CFA register and not IP.
21518 Anyway this instruction is not really part of the stack
21519 frame creation although it is part of the prologue. */
21522 insn = emit_set_insn (ip_rtx,
21523 plus_constant (Pmode, stack_pointer_rtx,
21524 fp_offset));
21525 RTX_FRAME_RELATED_P (insn) = 1;
21528 if (args_to_push)
21530 /* Push the argument registers, or reserve space for them. */
21531 if (cfun->machine->uses_anonymous_args)
21532 insn = emit_multi_reg_push
21533 ((0xf0 >> (args_to_push / 4)) & 0xf,
21534 (0xf0 >> (args_to_push / 4)) & 0xf);
21535 else
21536 insn = emit_insn
21537 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21538 GEN_INT (- args_to_push)));
21539 RTX_FRAME_RELATED_P (insn) = 1;
21542 /* If this is an interrupt service routine, and the link register
21543 is going to be pushed, and we're not generating extra
21544 push of IP (needed when frame is needed and frame layout if apcs),
21545 subtracting four from LR now will mean that the function return
21546 can be done with a single instruction. */
21547 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21548 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21549 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21550 && TARGET_ARM)
21552 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21554 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21557 if (live_regs_mask)
21559 unsigned long dwarf_regs_mask = live_regs_mask;
21561 saved_regs += bit_count (live_regs_mask) * 4;
21562 if (optimize_size && !frame_pointer_needed
21563 && saved_regs == offsets->saved_regs - offsets->saved_args)
21565 /* If no coprocessor registers are being pushed and we don't have
21566 to worry about a frame pointer then push extra registers to
21567 create the stack frame. This is done is a way that does not
21568 alter the frame layout, so is independent of the epilogue. */
21569 int n;
21570 int frame;
21571 n = 0;
21572 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21573 n++;
21574 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21575 if (frame && n * 4 >= frame)
21577 n = frame / 4;
21578 live_regs_mask |= (1 << n) - 1;
21579 saved_regs += frame;
21583 if (TARGET_LDRD
21584 && current_tune->prefer_ldrd_strd
21585 && !optimize_function_for_size_p (cfun))
21587 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21588 if (TARGET_THUMB2)
21589 thumb2_emit_strd_push (live_regs_mask);
21590 else if (TARGET_ARM
21591 && !TARGET_APCS_FRAME
21592 && !IS_INTERRUPT (func_type))
21593 arm_emit_strd_push (live_regs_mask);
21594 else
21596 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21597 RTX_FRAME_RELATED_P (insn) = 1;
21600 else
21602 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21603 RTX_FRAME_RELATED_P (insn) = 1;
21607 if (! IS_VOLATILE (func_type))
21608 saved_regs += arm_save_coproc_regs ();
21610 if (frame_pointer_needed && TARGET_ARM)
21612 /* Create the new frame pointer. */
21613 if (TARGET_APCS_FRAME)
21615 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21616 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21617 RTX_FRAME_RELATED_P (insn) = 1;
21619 else
21621 insn = GEN_INT (saved_regs - (4 + fp_offset));
21622 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21623 stack_pointer_rtx, insn));
21624 RTX_FRAME_RELATED_P (insn) = 1;
21628 size = offsets->outgoing_args - offsets->saved_args;
21629 if (flag_stack_usage_info)
21630 current_function_static_stack_size = size;
21632 /* If this isn't an interrupt service routine and we have a frame, then do
21633 stack checking. We use IP as the first scratch register, except for the
21634 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21635 if (!IS_INTERRUPT (func_type)
21636 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21638 unsigned int regno;
21640 if (!IS_NESTED (func_type) || clobber_ip)
21641 regno = IP_REGNUM;
21642 else if (df_regs_ever_live_p (LR_REGNUM))
21643 regno = LR_REGNUM;
21644 else
21645 regno = 3;
21647 if (crtl->is_leaf && !cfun->calls_alloca)
21649 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21650 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21651 size - STACK_CHECK_PROTECT,
21652 regno, live_regs_mask);
21654 else if (size > 0)
21655 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21656 regno, live_regs_mask);
21659 /* Recover the static chain register. */
21660 if (clobber_ip)
21662 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21663 insn = gen_rtx_REG (SImode, 3);
21664 else
21666 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21667 insn = gen_frame_mem (SImode, insn);
21669 emit_set_insn (ip_rtx, insn);
21670 emit_insn (gen_force_register_use (ip_rtx));
21673 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21675 /* This add can produce multiple insns for a large constant, so we
21676 need to get tricky. */
21677 rtx_insn *last = get_last_insn ();
21679 amount = GEN_INT (offsets->saved_args + saved_regs
21680 - offsets->outgoing_args);
21682 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21683 amount));
21686 last = last ? NEXT_INSN (last) : get_insns ();
21687 RTX_FRAME_RELATED_P (last) = 1;
21689 while (last != insn);
21691 /* If the frame pointer is needed, emit a special barrier that
21692 will prevent the scheduler from moving stores to the frame
21693 before the stack adjustment. */
21694 if (frame_pointer_needed)
21695 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21696 hard_frame_pointer_rtx));
21700 if (frame_pointer_needed && TARGET_THUMB2)
21701 thumb_set_frame_pointer (offsets);
21703 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21705 unsigned long mask;
21707 mask = live_regs_mask;
21708 mask &= THUMB2_WORK_REGS;
21709 if (!IS_NESTED (func_type))
21710 mask |= (1 << IP_REGNUM);
21711 arm_load_pic_register (mask);
21714 /* If we are profiling, make sure no instructions are scheduled before
21715 the call to mcount. Similarly if the user has requested no
21716 scheduling in the prolog. Similarly if we want non-call exceptions
21717 using the EABI unwinder, to prevent faulting instructions from being
21718 swapped with a stack adjustment. */
21719 if (crtl->profile || !TARGET_SCHED_PROLOG
21720 || (arm_except_unwind_info (&global_options) == UI_TARGET
21721 && cfun->can_throw_non_call_exceptions))
21722 emit_insn (gen_blockage ());
21724 /* If the link register is being kept alive, with the return address in it,
21725 then make sure that it does not get reused by the ce2 pass. */
21726 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21727 cfun->machine->lr_save_eliminated = 1;
21730 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21731 static void
21732 arm_print_condition (FILE *stream)
21734 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21736 /* Branch conversion is not implemented for Thumb-2. */
21737 if (TARGET_THUMB)
21739 output_operand_lossage ("predicated Thumb instruction");
21740 return;
21742 if (current_insn_predicate != NULL)
21744 output_operand_lossage
21745 ("predicated instruction in conditional sequence");
21746 return;
21749 fputs (arm_condition_codes[arm_current_cc], stream);
21751 else if (current_insn_predicate)
21753 enum arm_cond_code code;
21755 if (TARGET_THUMB1)
21757 output_operand_lossage ("predicated Thumb instruction");
21758 return;
21761 code = get_arm_condition_code (current_insn_predicate);
21762 fputs (arm_condition_codes[code], stream);
21767 /* Globally reserved letters: acln
21768 Puncutation letters currently used: @_|?().!#
21769 Lower case letters currently used: bcdefhimpqtvwxyz
21770 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21771 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21773 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21775 If CODE is 'd', then the X is a condition operand and the instruction
21776 should only be executed if the condition is true.
21777 if CODE is 'D', then the X is a condition operand and the instruction
21778 should only be executed if the condition is false: however, if the mode
21779 of the comparison is CCFPEmode, then always execute the instruction -- we
21780 do this because in these circumstances !GE does not necessarily imply LT;
21781 in these cases the instruction pattern will take care to make sure that
21782 an instruction containing %d will follow, thereby undoing the effects of
21783 doing this instruction unconditionally.
21784 If CODE is 'N' then X is a floating point operand that must be negated
21785 before output.
21786 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21787 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21788 static void
21789 arm_print_operand (FILE *stream, rtx x, int code)
21791 switch (code)
21793 case '@':
21794 fputs (ASM_COMMENT_START, stream);
21795 return;
21797 case '_':
21798 fputs (user_label_prefix, stream);
21799 return;
21801 case '|':
21802 fputs (REGISTER_PREFIX, stream);
21803 return;
21805 case '?':
21806 arm_print_condition (stream);
21807 return;
21809 case '(':
21810 /* Nothing in unified syntax, otherwise the current condition code. */
21811 if (!TARGET_UNIFIED_ASM)
21812 arm_print_condition (stream);
21813 break;
21815 case ')':
21816 /* The current condition code in unified syntax, otherwise nothing. */
21817 if (TARGET_UNIFIED_ASM)
21818 arm_print_condition (stream);
21819 break;
21821 case '.':
21822 /* The current condition code for a condition code setting instruction.
21823 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21824 if (TARGET_UNIFIED_ASM)
21826 fputc('s', stream);
21827 arm_print_condition (stream);
21829 else
21831 arm_print_condition (stream);
21832 fputc('s', stream);
21834 return;
21836 case '!':
21837 /* If the instruction is conditionally executed then print
21838 the current condition code, otherwise print 's'. */
21839 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21840 if (current_insn_predicate)
21841 arm_print_condition (stream);
21842 else
21843 fputc('s', stream);
21844 break;
21846 /* %# is a "break" sequence. It doesn't output anything, but is used to
21847 separate e.g. operand numbers from following text, if that text consists
21848 of further digits which we don't want to be part of the operand
21849 number. */
21850 case '#':
21851 return;
21853 case 'N':
21855 REAL_VALUE_TYPE r;
21856 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21857 fprintf (stream, "%s", fp_const_from_val (&r));
21859 return;
21861 /* An integer or symbol address without a preceding # sign. */
21862 case 'c':
21863 switch (GET_CODE (x))
21865 case CONST_INT:
21866 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21867 break;
21869 case SYMBOL_REF:
21870 output_addr_const (stream, x);
21871 break;
21873 case CONST:
21874 if (GET_CODE (XEXP (x, 0)) == PLUS
21875 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21877 output_addr_const (stream, x);
21878 break;
21880 /* Fall through. */
21882 default:
21883 output_operand_lossage ("Unsupported operand for code '%c'", code);
21885 return;
21887 /* An integer that we want to print in HEX. */
21888 case 'x':
21889 switch (GET_CODE (x))
21891 case CONST_INT:
21892 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21893 break;
21895 default:
21896 output_operand_lossage ("Unsupported operand for code '%c'", code);
21898 return;
21900 case 'B':
21901 if (CONST_INT_P (x))
21903 HOST_WIDE_INT val;
21904 val = ARM_SIGN_EXTEND (~INTVAL (x));
21905 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21907 else
21909 putc ('~', stream);
21910 output_addr_const (stream, x);
21912 return;
21914 case 'b':
21915 /* Print the log2 of a CONST_INT. */
21917 HOST_WIDE_INT val;
21919 if (!CONST_INT_P (x)
21920 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21921 output_operand_lossage ("Unsupported operand for code '%c'", code);
21922 else
21923 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21925 return;
21927 case 'L':
21928 /* The low 16 bits of an immediate constant. */
21929 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21930 return;
21932 case 'i':
21933 fprintf (stream, "%s", arithmetic_instr (x, 1));
21934 return;
21936 case 'I':
21937 fprintf (stream, "%s", arithmetic_instr (x, 0));
21938 return;
21940 case 'S':
21942 HOST_WIDE_INT val;
21943 const char *shift;
21945 shift = shift_op (x, &val);
21947 if (shift)
21949 fprintf (stream, ", %s ", shift);
21950 if (val == -1)
21951 arm_print_operand (stream, XEXP (x, 1), 0);
21952 else
21953 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21956 return;
21958 /* An explanation of the 'Q', 'R' and 'H' register operands:
21960 In a pair of registers containing a DI or DF value the 'Q'
21961 operand returns the register number of the register containing
21962 the least significant part of the value. The 'R' operand returns
21963 the register number of the register containing the most
21964 significant part of the value.
21966 The 'H' operand returns the higher of the two register numbers.
21967 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21968 same as the 'Q' operand, since the most significant part of the
21969 value is held in the lower number register. The reverse is true
21970 on systems where WORDS_BIG_ENDIAN is false.
21972 The purpose of these operands is to distinguish between cases
21973 where the endian-ness of the values is important (for example
21974 when they are added together), and cases where the endian-ness
21975 is irrelevant, but the order of register operations is important.
21976 For example when loading a value from memory into a register
21977 pair, the endian-ness does not matter. Provided that the value
21978 from the lower memory address is put into the lower numbered
21979 register, and the value from the higher address is put into the
21980 higher numbered register, the load will work regardless of whether
21981 the value being loaded is big-wordian or little-wordian. The
21982 order of the two register loads can matter however, if the address
21983 of the memory location is actually held in one of the registers
21984 being overwritten by the load.
21986 The 'Q' and 'R' constraints are also available for 64-bit
21987 constants. */
21988 case 'Q':
21989 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21991 rtx part = gen_lowpart (SImode, x);
21992 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21993 return;
21996 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21998 output_operand_lossage ("invalid operand for code '%c'", code);
21999 return;
22002 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22003 return;
22005 case 'R':
22006 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22008 machine_mode mode = GET_MODE (x);
22009 rtx part;
22011 if (mode == VOIDmode)
22012 mode = DImode;
22013 part = gen_highpart_mode (SImode, mode, x);
22014 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22015 return;
22018 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22020 output_operand_lossage ("invalid operand for code '%c'", code);
22021 return;
22024 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22025 return;
22027 case 'H':
22028 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22030 output_operand_lossage ("invalid operand for code '%c'", code);
22031 return;
22034 asm_fprintf (stream, "%r", REGNO (x) + 1);
22035 return;
22037 case 'J':
22038 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22040 output_operand_lossage ("invalid operand for code '%c'", code);
22041 return;
22044 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22045 return;
22047 case 'K':
22048 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22050 output_operand_lossage ("invalid operand for code '%c'", code);
22051 return;
22054 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22055 return;
22057 case 'm':
22058 asm_fprintf (stream, "%r",
22059 REG_P (XEXP (x, 0))
22060 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22061 return;
22063 case 'M':
22064 asm_fprintf (stream, "{%r-%r}",
22065 REGNO (x),
22066 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22067 return;
22069 /* Like 'M', but writing doubleword vector registers, for use by Neon
22070 insns. */
22071 case 'h':
22073 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22074 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22075 if (numregs == 1)
22076 asm_fprintf (stream, "{d%d}", regno);
22077 else
22078 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22080 return;
22082 case 'd':
22083 /* CONST_TRUE_RTX means always -- that's the default. */
22084 if (x == const_true_rtx)
22085 return;
22087 if (!COMPARISON_P (x))
22089 output_operand_lossage ("invalid operand for code '%c'", code);
22090 return;
22093 fputs (arm_condition_codes[get_arm_condition_code (x)],
22094 stream);
22095 return;
22097 case 'D':
22098 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22099 want to do that. */
22100 if (x == const_true_rtx)
22102 output_operand_lossage ("instruction never executed");
22103 return;
22105 if (!COMPARISON_P (x))
22107 output_operand_lossage ("invalid operand for code '%c'", code);
22108 return;
22111 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22112 (get_arm_condition_code (x))],
22113 stream);
22114 return;
22116 case 's':
22117 case 'V':
22118 case 'W':
22119 case 'X':
22120 case 'Y':
22121 case 'Z':
22122 /* Former Maverick support, removed after GCC-4.7. */
22123 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22124 return;
22126 case 'U':
22127 if (!REG_P (x)
22128 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22129 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22130 /* Bad value for wCG register number. */
22132 output_operand_lossage ("invalid operand for code '%c'", code);
22133 return;
22136 else
22137 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22138 return;
22140 /* Print an iWMMXt control register name. */
22141 case 'w':
22142 if (!CONST_INT_P (x)
22143 || INTVAL (x) < 0
22144 || INTVAL (x) >= 16)
22145 /* Bad value for wC register number. */
22147 output_operand_lossage ("invalid operand for code '%c'", code);
22148 return;
22151 else
22153 static const char * wc_reg_names [16] =
22155 "wCID", "wCon", "wCSSF", "wCASF",
22156 "wC4", "wC5", "wC6", "wC7",
22157 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22158 "wC12", "wC13", "wC14", "wC15"
22161 fputs (wc_reg_names [INTVAL (x)], stream);
22163 return;
22165 /* Print the high single-precision register of a VFP double-precision
22166 register. */
22167 case 'p':
22169 machine_mode mode = GET_MODE (x);
22170 int regno;
22172 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22174 output_operand_lossage ("invalid operand for code '%c'", code);
22175 return;
22178 regno = REGNO (x);
22179 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22181 output_operand_lossage ("invalid operand for code '%c'", code);
22182 return;
22185 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22187 return;
22189 /* Print a VFP/Neon double precision or quad precision register name. */
22190 case 'P':
22191 case 'q':
22193 machine_mode mode = GET_MODE (x);
22194 int is_quad = (code == 'q');
22195 int regno;
22197 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22199 output_operand_lossage ("invalid operand for code '%c'", code);
22200 return;
22203 if (!REG_P (x)
22204 || !IS_VFP_REGNUM (REGNO (x)))
22206 output_operand_lossage ("invalid operand for code '%c'", code);
22207 return;
22210 regno = REGNO (x);
22211 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22212 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22214 output_operand_lossage ("invalid operand for code '%c'", code);
22215 return;
22218 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22219 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22221 return;
22223 /* These two codes print the low/high doubleword register of a Neon quad
22224 register, respectively. For pair-structure types, can also print
22225 low/high quadword registers. */
22226 case 'e':
22227 case 'f':
22229 machine_mode mode = GET_MODE (x);
22230 int regno;
22232 if ((GET_MODE_SIZE (mode) != 16
22233 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22235 output_operand_lossage ("invalid operand for code '%c'", code);
22236 return;
22239 regno = REGNO (x);
22240 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22242 output_operand_lossage ("invalid operand for code '%c'", code);
22243 return;
22246 if (GET_MODE_SIZE (mode) == 16)
22247 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22248 + (code == 'f' ? 1 : 0));
22249 else
22250 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22251 + (code == 'f' ? 1 : 0));
22253 return;
22255 /* Print a VFPv3 floating-point constant, represented as an integer
22256 index. */
22257 case 'G':
22259 int index = vfp3_const_double_index (x);
22260 gcc_assert (index != -1);
22261 fprintf (stream, "%d", index);
22263 return;
22265 /* Print bits representing opcode features for Neon.
22267 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22268 and polynomials as unsigned.
22270 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22272 Bit 2 is 1 for rounding functions, 0 otherwise. */
22274 /* Identify the type as 's', 'u', 'p' or 'f'. */
22275 case 'T':
22277 HOST_WIDE_INT bits = INTVAL (x);
22278 fputc ("uspf"[bits & 3], stream);
22280 return;
22282 /* Likewise, but signed and unsigned integers are both 'i'. */
22283 case 'F':
22285 HOST_WIDE_INT bits = INTVAL (x);
22286 fputc ("iipf"[bits & 3], stream);
22288 return;
22290 /* As for 'T', but emit 'u' instead of 'p'. */
22291 case 't':
22293 HOST_WIDE_INT bits = INTVAL (x);
22294 fputc ("usuf"[bits & 3], stream);
22296 return;
22298 /* Bit 2: rounding (vs none). */
22299 case 'O':
22301 HOST_WIDE_INT bits = INTVAL (x);
22302 fputs ((bits & 4) != 0 ? "r" : "", stream);
22304 return;
22306 /* Memory operand for vld1/vst1 instruction. */
22307 case 'A':
22309 rtx addr;
22310 bool postinc = FALSE;
22311 rtx postinc_reg = NULL;
22312 unsigned align, memsize, align_bits;
22314 gcc_assert (MEM_P (x));
22315 addr = XEXP (x, 0);
22316 if (GET_CODE (addr) == POST_INC)
22318 postinc = 1;
22319 addr = XEXP (addr, 0);
22321 if (GET_CODE (addr) == POST_MODIFY)
22323 postinc_reg = XEXP( XEXP (addr, 1), 1);
22324 addr = XEXP (addr, 0);
22326 asm_fprintf (stream, "[%r", REGNO (addr));
22328 /* We know the alignment of this access, so we can emit a hint in the
22329 instruction (for some alignments) as an aid to the memory subsystem
22330 of the target. */
22331 align = MEM_ALIGN (x) >> 3;
22332 memsize = MEM_SIZE (x);
22334 /* Only certain alignment specifiers are supported by the hardware. */
22335 if (memsize == 32 && (align % 32) == 0)
22336 align_bits = 256;
22337 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22338 align_bits = 128;
22339 else if (memsize >= 8 && (align % 8) == 0)
22340 align_bits = 64;
22341 else
22342 align_bits = 0;
22344 if (align_bits != 0)
22345 asm_fprintf (stream, ":%d", align_bits);
22347 asm_fprintf (stream, "]");
22349 if (postinc)
22350 fputs("!", stream);
22351 if (postinc_reg)
22352 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22354 return;
22356 case 'C':
22358 rtx addr;
22360 gcc_assert (MEM_P (x));
22361 addr = XEXP (x, 0);
22362 gcc_assert (REG_P (addr));
22363 asm_fprintf (stream, "[%r]", REGNO (addr));
22365 return;
22367 /* Translate an S register number into a D register number and element index. */
22368 case 'y':
22370 machine_mode mode = GET_MODE (x);
22371 int regno;
22373 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22375 output_operand_lossage ("invalid operand for code '%c'", code);
22376 return;
22379 regno = REGNO (x);
22380 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22382 output_operand_lossage ("invalid operand for code '%c'", code);
22383 return;
22386 regno = regno - FIRST_VFP_REGNUM;
22387 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22389 return;
22391 case 'v':
22392 gcc_assert (CONST_DOUBLE_P (x));
22393 int result;
22394 result = vfp3_const_double_for_fract_bits (x);
22395 if (result == 0)
22396 result = vfp3_const_double_for_bits (x);
22397 fprintf (stream, "#%d", result);
22398 return;
22400 /* Register specifier for vld1.16/vst1.16. Translate the S register
22401 number into a D register number and element index. */
22402 case 'z':
22404 machine_mode mode = GET_MODE (x);
22405 int regno;
22407 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22409 output_operand_lossage ("invalid operand for code '%c'", code);
22410 return;
22413 regno = REGNO (x);
22414 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22416 output_operand_lossage ("invalid operand for code '%c'", code);
22417 return;
22420 regno = regno - FIRST_VFP_REGNUM;
22421 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22423 return;
22425 default:
22426 if (x == 0)
22428 output_operand_lossage ("missing operand");
22429 return;
22432 switch (GET_CODE (x))
22434 case REG:
22435 asm_fprintf (stream, "%r", REGNO (x));
22436 break;
22438 case MEM:
22439 output_memory_reference_mode = GET_MODE (x);
22440 output_address (XEXP (x, 0));
22441 break;
22443 case CONST_DOUBLE:
22445 char fpstr[20];
22446 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22447 sizeof (fpstr), 0, 1);
22448 fprintf (stream, "#%s", fpstr);
22450 break;
22452 default:
22453 gcc_assert (GET_CODE (x) != NEG);
22454 fputc ('#', stream);
22455 if (GET_CODE (x) == HIGH)
22457 fputs (":lower16:", stream);
22458 x = XEXP (x, 0);
22461 output_addr_const (stream, x);
22462 break;
22467 /* Target hook for printing a memory address. */
22468 static void
22469 arm_print_operand_address (FILE *stream, rtx x)
22471 if (TARGET_32BIT)
22473 int is_minus = GET_CODE (x) == MINUS;
22475 if (REG_P (x))
22476 asm_fprintf (stream, "[%r]", REGNO (x));
22477 else if (GET_CODE (x) == PLUS || is_minus)
22479 rtx base = XEXP (x, 0);
22480 rtx index = XEXP (x, 1);
22481 HOST_WIDE_INT offset = 0;
22482 if (!REG_P (base)
22483 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22485 /* Ensure that BASE is a register. */
22486 /* (one of them must be). */
22487 /* Also ensure the SP is not used as in index register. */
22488 std::swap (base, index);
22490 switch (GET_CODE (index))
22492 case CONST_INT:
22493 offset = INTVAL (index);
22494 if (is_minus)
22495 offset = -offset;
22496 asm_fprintf (stream, "[%r, #%wd]",
22497 REGNO (base), offset);
22498 break;
22500 case REG:
22501 asm_fprintf (stream, "[%r, %s%r]",
22502 REGNO (base), is_minus ? "-" : "",
22503 REGNO (index));
22504 break;
22506 case MULT:
22507 case ASHIFTRT:
22508 case LSHIFTRT:
22509 case ASHIFT:
22510 case ROTATERT:
22512 asm_fprintf (stream, "[%r, %s%r",
22513 REGNO (base), is_minus ? "-" : "",
22514 REGNO (XEXP (index, 0)));
22515 arm_print_operand (stream, index, 'S');
22516 fputs ("]", stream);
22517 break;
22520 default:
22521 gcc_unreachable ();
22524 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22525 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22527 extern machine_mode output_memory_reference_mode;
22529 gcc_assert (REG_P (XEXP (x, 0)));
22531 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22532 asm_fprintf (stream, "[%r, #%s%d]!",
22533 REGNO (XEXP (x, 0)),
22534 GET_CODE (x) == PRE_DEC ? "-" : "",
22535 GET_MODE_SIZE (output_memory_reference_mode));
22536 else
22537 asm_fprintf (stream, "[%r], #%s%d",
22538 REGNO (XEXP (x, 0)),
22539 GET_CODE (x) == POST_DEC ? "-" : "",
22540 GET_MODE_SIZE (output_memory_reference_mode));
22542 else if (GET_CODE (x) == PRE_MODIFY)
22544 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22545 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22546 asm_fprintf (stream, "#%wd]!",
22547 INTVAL (XEXP (XEXP (x, 1), 1)));
22548 else
22549 asm_fprintf (stream, "%r]!",
22550 REGNO (XEXP (XEXP (x, 1), 1)));
22552 else if (GET_CODE (x) == POST_MODIFY)
22554 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22555 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22556 asm_fprintf (stream, "#%wd",
22557 INTVAL (XEXP (XEXP (x, 1), 1)));
22558 else
22559 asm_fprintf (stream, "%r",
22560 REGNO (XEXP (XEXP (x, 1), 1)));
22562 else output_addr_const (stream, x);
22564 else
22566 if (REG_P (x))
22567 asm_fprintf (stream, "[%r]", REGNO (x));
22568 else if (GET_CODE (x) == POST_INC)
22569 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22570 else if (GET_CODE (x) == PLUS)
22572 gcc_assert (REG_P (XEXP (x, 0)));
22573 if (CONST_INT_P (XEXP (x, 1)))
22574 asm_fprintf (stream, "[%r, #%wd]",
22575 REGNO (XEXP (x, 0)),
22576 INTVAL (XEXP (x, 1)));
22577 else
22578 asm_fprintf (stream, "[%r, %r]",
22579 REGNO (XEXP (x, 0)),
22580 REGNO (XEXP (x, 1)));
22582 else
22583 output_addr_const (stream, x);
22587 /* Target hook for indicating whether a punctuation character for
22588 TARGET_PRINT_OPERAND is valid. */
22589 static bool
22590 arm_print_operand_punct_valid_p (unsigned char code)
22592 return (code == '@' || code == '|' || code == '.'
22593 || code == '(' || code == ')' || code == '#'
22594 || (TARGET_32BIT && (code == '?'))
22595 || (TARGET_THUMB2 && (code == '!'))
22596 || (TARGET_THUMB && (code == '_')));
22599 /* Target hook for assembling integer objects. The ARM version needs to
22600 handle word-sized values specially. */
22601 static bool
22602 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22604 machine_mode mode;
22606 if (size == UNITS_PER_WORD && aligned_p)
22608 fputs ("\t.word\t", asm_out_file);
22609 output_addr_const (asm_out_file, x);
22611 /* Mark symbols as position independent. We only do this in the
22612 .text segment, not in the .data segment. */
22613 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22614 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22616 /* See legitimize_pic_address for an explanation of the
22617 TARGET_VXWORKS_RTP check. */
22618 if (!arm_pic_data_is_text_relative
22619 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22620 fputs ("(GOT)", asm_out_file);
22621 else
22622 fputs ("(GOTOFF)", asm_out_file);
22624 fputc ('\n', asm_out_file);
22625 return true;
22628 mode = GET_MODE (x);
22630 if (arm_vector_mode_supported_p (mode))
22632 int i, units;
22634 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22636 units = CONST_VECTOR_NUNITS (x);
22637 size = GET_MODE_UNIT_SIZE (mode);
22639 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22640 for (i = 0; i < units; i++)
22642 rtx elt = CONST_VECTOR_ELT (x, i);
22643 assemble_integer
22644 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22646 else
22647 for (i = 0; i < units; i++)
22649 rtx elt = CONST_VECTOR_ELT (x, i);
22650 assemble_real
22651 (*CONST_DOUBLE_REAL_VALUE (elt), GET_MODE_INNER (mode),
22652 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22655 return true;
22658 return default_assemble_integer (x, size, aligned_p);
22661 static void
22662 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22664 section *s;
22666 if (!TARGET_AAPCS_BASED)
22668 (is_ctor ?
22669 default_named_section_asm_out_constructor
22670 : default_named_section_asm_out_destructor) (symbol, priority);
22671 return;
22674 /* Put these in the .init_array section, using a special relocation. */
22675 if (priority != DEFAULT_INIT_PRIORITY)
22677 char buf[18];
22678 sprintf (buf, "%s.%.5u",
22679 is_ctor ? ".init_array" : ".fini_array",
22680 priority);
22681 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22683 else if (is_ctor)
22684 s = ctors_section;
22685 else
22686 s = dtors_section;
22688 switch_to_section (s);
22689 assemble_align (POINTER_SIZE);
22690 fputs ("\t.word\t", asm_out_file);
22691 output_addr_const (asm_out_file, symbol);
22692 fputs ("(target1)\n", asm_out_file);
22695 /* Add a function to the list of static constructors. */
22697 static void
22698 arm_elf_asm_constructor (rtx symbol, int priority)
22700 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22703 /* Add a function to the list of static destructors. */
22705 static void
22706 arm_elf_asm_destructor (rtx symbol, int priority)
22708 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22711 /* A finite state machine takes care of noticing whether or not instructions
22712 can be conditionally executed, and thus decrease execution time and code
22713 size by deleting branch instructions. The fsm is controlled by
22714 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22716 /* The state of the fsm controlling condition codes are:
22717 0: normal, do nothing special
22718 1: make ASM_OUTPUT_OPCODE not output this instruction
22719 2: make ASM_OUTPUT_OPCODE not output this instruction
22720 3: make instructions conditional
22721 4: make instructions conditional
22723 State transitions (state->state by whom under condition):
22724 0 -> 1 final_prescan_insn if the `target' is a label
22725 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22726 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22727 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22728 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22729 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22730 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22731 (the target insn is arm_target_insn).
22733 If the jump clobbers the conditions then we use states 2 and 4.
22735 A similar thing can be done with conditional return insns.
22737 XXX In case the `target' is an unconditional branch, this conditionalising
22738 of the instructions always reduces code size, but not always execution
22739 time. But then, I want to reduce the code size to somewhere near what
22740 /bin/cc produces. */
22742 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22743 instructions. When a COND_EXEC instruction is seen the subsequent
22744 instructions are scanned so that multiple conditional instructions can be
22745 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22746 specify the length and true/false mask for the IT block. These will be
22747 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22749 /* Returns the index of the ARM condition code string in
22750 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22751 COMPARISON should be an rtx like `(eq (...) (...))'. */
22753 enum arm_cond_code
22754 maybe_get_arm_condition_code (rtx comparison)
22756 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22757 enum arm_cond_code code;
22758 enum rtx_code comp_code = GET_CODE (comparison);
22760 if (GET_MODE_CLASS (mode) != MODE_CC)
22761 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22762 XEXP (comparison, 1));
22764 switch (mode)
22766 case CC_DNEmode: code = ARM_NE; goto dominance;
22767 case CC_DEQmode: code = ARM_EQ; goto dominance;
22768 case CC_DGEmode: code = ARM_GE; goto dominance;
22769 case CC_DGTmode: code = ARM_GT; goto dominance;
22770 case CC_DLEmode: code = ARM_LE; goto dominance;
22771 case CC_DLTmode: code = ARM_LT; goto dominance;
22772 case CC_DGEUmode: code = ARM_CS; goto dominance;
22773 case CC_DGTUmode: code = ARM_HI; goto dominance;
22774 case CC_DLEUmode: code = ARM_LS; goto dominance;
22775 case CC_DLTUmode: code = ARM_CC;
22777 dominance:
22778 if (comp_code == EQ)
22779 return ARM_INVERSE_CONDITION_CODE (code);
22780 if (comp_code == NE)
22781 return code;
22782 return ARM_NV;
22784 case CC_NOOVmode:
22785 switch (comp_code)
22787 case NE: return ARM_NE;
22788 case EQ: return ARM_EQ;
22789 case GE: return ARM_PL;
22790 case LT: return ARM_MI;
22791 default: return ARM_NV;
22794 case CC_Zmode:
22795 switch (comp_code)
22797 case NE: return ARM_NE;
22798 case EQ: return ARM_EQ;
22799 default: return ARM_NV;
22802 case CC_Nmode:
22803 switch (comp_code)
22805 case NE: return ARM_MI;
22806 case EQ: return ARM_PL;
22807 default: return ARM_NV;
22810 case CCFPEmode:
22811 case CCFPmode:
22812 /* We can handle all cases except UNEQ and LTGT. */
22813 switch (comp_code)
22815 case GE: return ARM_GE;
22816 case GT: return ARM_GT;
22817 case LE: return ARM_LS;
22818 case LT: return ARM_MI;
22819 case NE: return ARM_NE;
22820 case EQ: return ARM_EQ;
22821 case ORDERED: return ARM_VC;
22822 case UNORDERED: return ARM_VS;
22823 case UNLT: return ARM_LT;
22824 case UNLE: return ARM_LE;
22825 case UNGT: return ARM_HI;
22826 case UNGE: return ARM_PL;
22827 /* UNEQ and LTGT do not have a representation. */
22828 case UNEQ: /* Fall through. */
22829 case LTGT: /* Fall through. */
22830 default: return ARM_NV;
22833 case CC_SWPmode:
22834 switch (comp_code)
22836 case NE: return ARM_NE;
22837 case EQ: return ARM_EQ;
22838 case GE: return ARM_LE;
22839 case GT: return ARM_LT;
22840 case LE: return ARM_GE;
22841 case LT: return ARM_GT;
22842 case GEU: return ARM_LS;
22843 case GTU: return ARM_CC;
22844 case LEU: return ARM_CS;
22845 case LTU: return ARM_HI;
22846 default: return ARM_NV;
22849 case CC_Cmode:
22850 switch (comp_code)
22852 case LTU: return ARM_CS;
22853 case GEU: return ARM_CC;
22854 default: return ARM_NV;
22857 case CC_CZmode:
22858 switch (comp_code)
22860 case NE: return ARM_NE;
22861 case EQ: return ARM_EQ;
22862 case GEU: return ARM_CS;
22863 case GTU: return ARM_HI;
22864 case LEU: return ARM_LS;
22865 case LTU: return ARM_CC;
22866 default: return ARM_NV;
22869 case CC_NCVmode:
22870 switch (comp_code)
22872 case GE: return ARM_GE;
22873 case LT: return ARM_LT;
22874 case GEU: return ARM_CS;
22875 case LTU: return ARM_CC;
22876 default: return ARM_NV;
22879 case CCmode:
22880 switch (comp_code)
22882 case NE: return ARM_NE;
22883 case EQ: return ARM_EQ;
22884 case GE: return ARM_GE;
22885 case GT: return ARM_GT;
22886 case LE: return ARM_LE;
22887 case LT: return ARM_LT;
22888 case GEU: return ARM_CS;
22889 case GTU: return ARM_HI;
22890 case LEU: return ARM_LS;
22891 case LTU: return ARM_CC;
22892 default: return ARM_NV;
22895 default: gcc_unreachable ();
22899 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22900 static enum arm_cond_code
22901 get_arm_condition_code (rtx comparison)
22903 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22904 gcc_assert (code != ARM_NV);
22905 return code;
22908 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22909 instructions. */
22910 void
22911 thumb2_final_prescan_insn (rtx_insn *insn)
22913 rtx_insn *first_insn = insn;
22914 rtx body = PATTERN (insn);
22915 rtx predicate;
22916 enum arm_cond_code code;
22917 int n;
22918 int mask;
22919 int max;
22921 /* max_insns_skipped in the tune was already taken into account in the
22922 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22923 just emit the IT blocks as we can. It does not make sense to split
22924 the IT blocks. */
22925 max = MAX_INSN_PER_IT_BLOCK;
22927 /* Remove the previous insn from the count of insns to be output. */
22928 if (arm_condexec_count)
22929 arm_condexec_count--;
22931 /* Nothing to do if we are already inside a conditional block. */
22932 if (arm_condexec_count)
22933 return;
22935 if (GET_CODE (body) != COND_EXEC)
22936 return;
22938 /* Conditional jumps are implemented directly. */
22939 if (JUMP_P (insn))
22940 return;
22942 predicate = COND_EXEC_TEST (body);
22943 arm_current_cc = get_arm_condition_code (predicate);
22945 n = get_attr_ce_count (insn);
22946 arm_condexec_count = 1;
22947 arm_condexec_mask = (1 << n) - 1;
22948 arm_condexec_masklen = n;
22949 /* See if subsequent instructions can be combined into the same block. */
22950 for (;;)
22952 insn = next_nonnote_insn (insn);
22954 /* Jumping into the middle of an IT block is illegal, so a label or
22955 barrier terminates the block. */
22956 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22957 break;
22959 body = PATTERN (insn);
22960 /* USE and CLOBBER aren't really insns, so just skip them. */
22961 if (GET_CODE (body) == USE
22962 || GET_CODE (body) == CLOBBER)
22963 continue;
22965 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22966 if (GET_CODE (body) != COND_EXEC)
22967 break;
22968 /* Maximum number of conditionally executed instructions in a block. */
22969 n = get_attr_ce_count (insn);
22970 if (arm_condexec_masklen + n > max)
22971 break;
22973 predicate = COND_EXEC_TEST (body);
22974 code = get_arm_condition_code (predicate);
22975 mask = (1 << n) - 1;
22976 if (arm_current_cc == code)
22977 arm_condexec_mask |= (mask << arm_condexec_masklen);
22978 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22979 break;
22981 arm_condexec_count++;
22982 arm_condexec_masklen += n;
22984 /* A jump must be the last instruction in a conditional block. */
22985 if (JUMP_P (insn))
22986 break;
22988 /* Restore recog_data (getting the attributes of other insns can
22989 destroy this array, but final.c assumes that it remains intact
22990 across this call). */
22991 extract_constrain_insn_cached (first_insn);
22994 void
22995 arm_final_prescan_insn (rtx_insn *insn)
22997 /* BODY will hold the body of INSN. */
22998 rtx body = PATTERN (insn);
23000 /* This will be 1 if trying to repeat the trick, and things need to be
23001 reversed if it appears to fail. */
23002 int reverse = 0;
23004 /* If we start with a return insn, we only succeed if we find another one. */
23005 int seeking_return = 0;
23006 enum rtx_code return_code = UNKNOWN;
23008 /* START_INSN will hold the insn from where we start looking. This is the
23009 first insn after the following code_label if REVERSE is true. */
23010 rtx_insn *start_insn = insn;
23012 /* If in state 4, check if the target branch is reached, in order to
23013 change back to state 0. */
23014 if (arm_ccfsm_state == 4)
23016 if (insn == arm_target_insn)
23018 arm_target_insn = NULL;
23019 arm_ccfsm_state = 0;
23021 return;
23024 /* If in state 3, it is possible to repeat the trick, if this insn is an
23025 unconditional branch to a label, and immediately following this branch
23026 is the previous target label which is only used once, and the label this
23027 branch jumps to is not too far off. */
23028 if (arm_ccfsm_state == 3)
23030 if (simplejump_p (insn))
23032 start_insn = next_nonnote_insn (start_insn);
23033 if (BARRIER_P (start_insn))
23035 /* XXX Isn't this always a barrier? */
23036 start_insn = next_nonnote_insn (start_insn);
23038 if (LABEL_P (start_insn)
23039 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23040 && LABEL_NUSES (start_insn) == 1)
23041 reverse = TRUE;
23042 else
23043 return;
23045 else if (ANY_RETURN_P (body))
23047 start_insn = next_nonnote_insn (start_insn);
23048 if (BARRIER_P (start_insn))
23049 start_insn = next_nonnote_insn (start_insn);
23050 if (LABEL_P (start_insn)
23051 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23052 && LABEL_NUSES (start_insn) == 1)
23054 reverse = TRUE;
23055 seeking_return = 1;
23056 return_code = GET_CODE (body);
23058 else
23059 return;
23061 else
23062 return;
23065 gcc_assert (!arm_ccfsm_state || reverse);
23066 if (!JUMP_P (insn))
23067 return;
23069 /* This jump might be paralleled with a clobber of the condition codes
23070 the jump should always come first */
23071 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23072 body = XVECEXP (body, 0, 0);
23074 if (reverse
23075 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23076 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23078 int insns_skipped;
23079 int fail = FALSE, succeed = FALSE;
23080 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23081 int then_not_else = TRUE;
23082 rtx_insn *this_insn = start_insn;
23083 rtx label = 0;
23085 /* Register the insn jumped to. */
23086 if (reverse)
23088 if (!seeking_return)
23089 label = XEXP (SET_SRC (body), 0);
23091 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23092 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23093 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23095 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23096 then_not_else = FALSE;
23098 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23100 seeking_return = 1;
23101 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23103 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23105 seeking_return = 1;
23106 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23107 then_not_else = FALSE;
23109 else
23110 gcc_unreachable ();
23112 /* See how many insns this branch skips, and what kind of insns. If all
23113 insns are okay, and the label or unconditional branch to the same
23114 label is not too far away, succeed. */
23115 for (insns_skipped = 0;
23116 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23118 rtx scanbody;
23120 this_insn = next_nonnote_insn (this_insn);
23121 if (!this_insn)
23122 break;
23124 switch (GET_CODE (this_insn))
23126 case CODE_LABEL:
23127 /* Succeed if it is the target label, otherwise fail since
23128 control falls in from somewhere else. */
23129 if (this_insn == label)
23131 arm_ccfsm_state = 1;
23132 succeed = TRUE;
23134 else
23135 fail = TRUE;
23136 break;
23138 case BARRIER:
23139 /* Succeed if the following insn is the target label.
23140 Otherwise fail.
23141 If return insns are used then the last insn in a function
23142 will be a barrier. */
23143 this_insn = next_nonnote_insn (this_insn);
23144 if (this_insn && this_insn == label)
23146 arm_ccfsm_state = 1;
23147 succeed = TRUE;
23149 else
23150 fail = TRUE;
23151 break;
23153 case CALL_INSN:
23154 /* The AAPCS says that conditional calls should not be
23155 used since they make interworking inefficient (the
23156 linker can't transform BL<cond> into BLX). That's
23157 only a problem if the machine has BLX. */
23158 if (arm_arch5)
23160 fail = TRUE;
23161 break;
23164 /* Succeed if the following insn is the target label, or
23165 if the following two insns are a barrier and the
23166 target label. */
23167 this_insn = next_nonnote_insn (this_insn);
23168 if (this_insn && BARRIER_P (this_insn))
23169 this_insn = next_nonnote_insn (this_insn);
23171 if (this_insn && this_insn == label
23172 && insns_skipped < max_insns_skipped)
23174 arm_ccfsm_state = 1;
23175 succeed = TRUE;
23177 else
23178 fail = TRUE;
23179 break;
23181 case JUMP_INSN:
23182 /* If this is an unconditional branch to the same label, succeed.
23183 If it is to another label, do nothing. If it is conditional,
23184 fail. */
23185 /* XXX Probably, the tests for SET and the PC are
23186 unnecessary. */
23188 scanbody = PATTERN (this_insn);
23189 if (GET_CODE (scanbody) == SET
23190 && GET_CODE (SET_DEST (scanbody)) == PC)
23192 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23193 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23195 arm_ccfsm_state = 2;
23196 succeed = TRUE;
23198 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23199 fail = TRUE;
23201 /* Fail if a conditional return is undesirable (e.g. on a
23202 StrongARM), but still allow this if optimizing for size. */
23203 else if (GET_CODE (scanbody) == return_code
23204 && !use_return_insn (TRUE, NULL)
23205 && !optimize_size)
23206 fail = TRUE;
23207 else if (GET_CODE (scanbody) == return_code)
23209 arm_ccfsm_state = 2;
23210 succeed = TRUE;
23212 else if (GET_CODE (scanbody) == PARALLEL)
23214 switch (get_attr_conds (this_insn))
23216 case CONDS_NOCOND:
23217 break;
23218 default:
23219 fail = TRUE;
23220 break;
23223 else
23224 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23226 break;
23228 case INSN:
23229 /* Instructions using or affecting the condition codes make it
23230 fail. */
23231 scanbody = PATTERN (this_insn);
23232 if (!(GET_CODE (scanbody) == SET
23233 || GET_CODE (scanbody) == PARALLEL)
23234 || get_attr_conds (this_insn) != CONDS_NOCOND)
23235 fail = TRUE;
23236 break;
23238 default:
23239 break;
23242 if (succeed)
23244 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23245 arm_target_label = CODE_LABEL_NUMBER (label);
23246 else
23248 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23250 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23252 this_insn = next_nonnote_insn (this_insn);
23253 gcc_assert (!this_insn
23254 || (!BARRIER_P (this_insn)
23255 && !LABEL_P (this_insn)));
23257 if (!this_insn)
23259 /* Oh, dear! we ran off the end.. give up. */
23260 extract_constrain_insn_cached (insn);
23261 arm_ccfsm_state = 0;
23262 arm_target_insn = NULL;
23263 return;
23265 arm_target_insn = this_insn;
23268 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23269 what it was. */
23270 if (!reverse)
23271 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23273 if (reverse || then_not_else)
23274 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23277 /* Restore recog_data (getting the attributes of other insns can
23278 destroy this array, but final.c assumes that it remains intact
23279 across this call. */
23280 extract_constrain_insn_cached (insn);
23284 /* Output IT instructions. */
23285 void
23286 thumb2_asm_output_opcode (FILE * stream)
23288 char buff[5];
23289 int n;
23291 if (arm_condexec_mask)
23293 for (n = 0; n < arm_condexec_masklen; n++)
23294 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23295 buff[n] = 0;
23296 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23297 arm_condition_codes[arm_current_cc]);
23298 arm_condexec_mask = 0;
23302 /* Returns true if REGNO is a valid register
23303 for holding a quantity of type MODE. */
23305 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23307 if (GET_MODE_CLASS (mode) == MODE_CC)
23308 return (regno == CC_REGNUM
23309 || (TARGET_HARD_FLOAT && TARGET_VFP
23310 && regno == VFPCC_REGNUM));
23312 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23313 return false;
23315 if (TARGET_THUMB1)
23316 /* For the Thumb we only allow values bigger than SImode in
23317 registers 0 - 6, so that there is always a second low
23318 register available to hold the upper part of the value.
23319 We probably we ought to ensure that the register is the
23320 start of an even numbered register pair. */
23321 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23323 if (TARGET_HARD_FLOAT && TARGET_VFP
23324 && IS_VFP_REGNUM (regno))
23326 if (mode == SFmode || mode == SImode)
23327 return VFP_REGNO_OK_FOR_SINGLE (regno);
23329 if (mode == DFmode)
23330 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23332 /* VFP registers can hold HFmode values, but there is no point in
23333 putting them there unless we have hardware conversion insns. */
23334 if (mode == HFmode)
23335 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23337 if (TARGET_NEON)
23338 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23339 || (VALID_NEON_QREG_MODE (mode)
23340 && NEON_REGNO_OK_FOR_QUAD (regno))
23341 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23342 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23343 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23344 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23345 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23347 return FALSE;
23350 if (TARGET_REALLY_IWMMXT)
23352 if (IS_IWMMXT_GR_REGNUM (regno))
23353 return mode == SImode;
23355 if (IS_IWMMXT_REGNUM (regno))
23356 return VALID_IWMMXT_REG_MODE (mode);
23359 /* We allow almost any value to be stored in the general registers.
23360 Restrict doubleword quantities to even register pairs in ARM state
23361 so that we can use ldrd. Do not allow very large Neon structure
23362 opaque modes in general registers; they would use too many. */
23363 if (regno <= LAST_ARM_REGNUM)
23365 if (ARM_NUM_REGS (mode) > 4)
23366 return FALSE;
23368 if (TARGET_THUMB2)
23369 return TRUE;
23371 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23374 if (regno == FRAME_POINTER_REGNUM
23375 || regno == ARG_POINTER_REGNUM)
23376 /* We only allow integers in the fake hard registers. */
23377 return GET_MODE_CLASS (mode) == MODE_INT;
23379 return FALSE;
23382 /* Implement MODES_TIEABLE_P. */
23384 bool
23385 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23387 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23388 return true;
23390 /* We specifically want to allow elements of "structure" modes to
23391 be tieable to the structure. This more general condition allows
23392 other rarer situations too. */
23393 if (TARGET_NEON
23394 && (VALID_NEON_DREG_MODE (mode1)
23395 || VALID_NEON_QREG_MODE (mode1)
23396 || VALID_NEON_STRUCT_MODE (mode1))
23397 && (VALID_NEON_DREG_MODE (mode2)
23398 || VALID_NEON_QREG_MODE (mode2)
23399 || VALID_NEON_STRUCT_MODE (mode2)))
23400 return true;
23402 return false;
23405 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23406 not used in arm mode. */
23408 enum reg_class
23409 arm_regno_class (int regno)
23411 if (regno == PC_REGNUM)
23412 return NO_REGS;
23414 if (TARGET_THUMB1)
23416 if (regno == STACK_POINTER_REGNUM)
23417 return STACK_REG;
23418 if (regno == CC_REGNUM)
23419 return CC_REG;
23420 if (regno < 8)
23421 return LO_REGS;
23422 return HI_REGS;
23425 if (TARGET_THUMB2 && regno < 8)
23426 return LO_REGS;
23428 if ( regno <= LAST_ARM_REGNUM
23429 || regno == FRAME_POINTER_REGNUM
23430 || regno == ARG_POINTER_REGNUM)
23431 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23433 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23434 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23436 if (IS_VFP_REGNUM (regno))
23438 if (regno <= D7_VFP_REGNUM)
23439 return VFP_D0_D7_REGS;
23440 else if (regno <= LAST_LO_VFP_REGNUM)
23441 return VFP_LO_REGS;
23442 else
23443 return VFP_HI_REGS;
23446 if (IS_IWMMXT_REGNUM (regno))
23447 return IWMMXT_REGS;
23449 if (IS_IWMMXT_GR_REGNUM (regno))
23450 return IWMMXT_GR_REGS;
23452 return NO_REGS;
23455 /* Handle a special case when computing the offset
23456 of an argument from the frame pointer. */
23458 arm_debugger_arg_offset (int value, rtx addr)
23460 rtx_insn *insn;
23462 /* We are only interested if dbxout_parms() failed to compute the offset. */
23463 if (value != 0)
23464 return 0;
23466 /* We can only cope with the case where the address is held in a register. */
23467 if (!REG_P (addr))
23468 return 0;
23470 /* If we are using the frame pointer to point at the argument, then
23471 an offset of 0 is correct. */
23472 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23473 return 0;
23475 /* If we are using the stack pointer to point at the
23476 argument, then an offset of 0 is correct. */
23477 /* ??? Check this is consistent with thumb2 frame layout. */
23478 if ((TARGET_THUMB || !frame_pointer_needed)
23479 && REGNO (addr) == SP_REGNUM)
23480 return 0;
23482 /* Oh dear. The argument is pointed to by a register rather
23483 than being held in a register, or being stored at a known
23484 offset from the frame pointer. Since GDB only understands
23485 those two kinds of argument we must translate the address
23486 held in the register into an offset from the frame pointer.
23487 We do this by searching through the insns for the function
23488 looking to see where this register gets its value. If the
23489 register is initialized from the frame pointer plus an offset
23490 then we are in luck and we can continue, otherwise we give up.
23492 This code is exercised by producing debugging information
23493 for a function with arguments like this:
23495 double func (double a, double b, int c, double d) {return d;}
23497 Without this code the stab for parameter 'd' will be set to
23498 an offset of 0 from the frame pointer, rather than 8. */
23500 /* The if() statement says:
23502 If the insn is a normal instruction
23503 and if the insn is setting the value in a register
23504 and if the register being set is the register holding the address of the argument
23505 and if the address is computing by an addition
23506 that involves adding to a register
23507 which is the frame pointer
23508 a constant integer
23510 then... */
23512 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23514 if ( NONJUMP_INSN_P (insn)
23515 && GET_CODE (PATTERN (insn)) == SET
23516 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23517 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23518 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23519 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23520 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23523 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23525 break;
23529 if (value == 0)
23531 debug_rtx (addr);
23532 warning (0, "unable to compute real location of stacked parameter");
23533 value = 8; /* XXX magic hack */
23536 return value;
23539 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23541 static const char *
23542 arm_invalid_parameter_type (const_tree t)
23544 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23545 return N_("function parameters cannot have __fp16 type");
23546 return NULL;
23549 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23551 static const char *
23552 arm_invalid_return_type (const_tree t)
23554 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23555 return N_("functions cannot return __fp16 type");
23556 return NULL;
23559 /* Implement TARGET_PROMOTED_TYPE. */
23561 static tree
23562 arm_promoted_type (const_tree t)
23564 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23565 return float_type_node;
23566 return NULL_TREE;
23569 /* Implement TARGET_CONVERT_TO_TYPE.
23570 Specifically, this hook implements the peculiarity of the ARM
23571 half-precision floating-point C semantics that requires conversions between
23572 __fp16 to or from double to do an intermediate conversion to float. */
23574 static tree
23575 arm_convert_to_type (tree type, tree expr)
23577 tree fromtype = TREE_TYPE (expr);
23578 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23579 return NULL_TREE;
23580 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23581 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23582 return convert (type, convert (float_type_node, expr));
23583 return NULL_TREE;
23586 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23587 This simply adds HFmode as a supported mode; even though we don't
23588 implement arithmetic on this type directly, it's supported by
23589 optabs conversions, much the way the double-word arithmetic is
23590 special-cased in the default hook. */
23592 static bool
23593 arm_scalar_mode_supported_p (machine_mode mode)
23595 if (mode == HFmode)
23596 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23597 else if (ALL_FIXED_POINT_MODE_P (mode))
23598 return true;
23599 else
23600 return default_scalar_mode_supported_p (mode);
23603 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23604 void
23605 neon_reinterpret (rtx dest, rtx src)
23607 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23610 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23611 not to early-clobber SRC registers in the process.
23613 We assume that the operands described by SRC and DEST represent a
23614 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23615 number of components into which the copy has been decomposed. */
23616 void
23617 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23619 unsigned int i;
23621 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23622 || REGNO (operands[0]) < REGNO (operands[1]))
23624 for (i = 0; i < count; i++)
23626 operands[2 * i] = dest[i];
23627 operands[2 * i + 1] = src[i];
23630 else
23632 for (i = 0; i < count; i++)
23634 operands[2 * i] = dest[count - i - 1];
23635 operands[2 * i + 1] = src[count - i - 1];
23640 /* Split operands into moves from op[1] + op[2] into op[0]. */
23642 void
23643 neon_split_vcombine (rtx operands[3])
23645 unsigned int dest = REGNO (operands[0]);
23646 unsigned int src1 = REGNO (operands[1]);
23647 unsigned int src2 = REGNO (operands[2]);
23648 machine_mode halfmode = GET_MODE (operands[1]);
23649 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23650 rtx destlo, desthi;
23652 if (src1 == dest && src2 == dest + halfregs)
23654 /* No-op move. Can't split to nothing; emit something. */
23655 emit_note (NOTE_INSN_DELETED);
23656 return;
23659 /* Preserve register attributes for variable tracking. */
23660 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23661 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23662 GET_MODE_SIZE (halfmode));
23664 /* Special case of reversed high/low parts. Use VSWP. */
23665 if (src2 == dest && src1 == dest + halfregs)
23667 rtx x = gen_rtx_SET (destlo, operands[1]);
23668 rtx y = gen_rtx_SET (desthi, operands[2]);
23669 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23670 return;
23673 if (!reg_overlap_mentioned_p (operands[2], destlo))
23675 /* Try to avoid unnecessary moves if part of the result
23676 is in the right place already. */
23677 if (src1 != dest)
23678 emit_move_insn (destlo, operands[1]);
23679 if (src2 != dest + halfregs)
23680 emit_move_insn (desthi, operands[2]);
23682 else
23684 if (src2 != dest + halfregs)
23685 emit_move_insn (desthi, operands[2]);
23686 if (src1 != dest)
23687 emit_move_insn (destlo, operands[1]);
23691 /* Return the number (counting from 0) of
23692 the least significant set bit in MASK. */
23694 inline static int
23695 number_of_first_bit_set (unsigned mask)
23697 return ctz_hwi (mask);
23700 /* Like emit_multi_reg_push, but allowing for a different set of
23701 registers to be described as saved. MASK is the set of registers
23702 to be saved; REAL_REGS is the set of registers to be described as
23703 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23705 static rtx_insn *
23706 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23708 unsigned long regno;
23709 rtx par[10], tmp, reg;
23710 rtx_insn *insn;
23711 int i, j;
23713 /* Build the parallel of the registers actually being stored. */
23714 for (i = 0; mask; ++i, mask &= mask - 1)
23716 regno = ctz_hwi (mask);
23717 reg = gen_rtx_REG (SImode, regno);
23719 if (i == 0)
23720 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23721 else
23722 tmp = gen_rtx_USE (VOIDmode, reg);
23724 par[i] = tmp;
23727 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23728 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23729 tmp = gen_frame_mem (BLKmode, tmp);
23730 tmp = gen_rtx_SET (tmp, par[0]);
23731 par[0] = tmp;
23733 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23734 insn = emit_insn (tmp);
23736 /* Always build the stack adjustment note for unwind info. */
23737 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23738 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23739 par[0] = tmp;
23741 /* Build the parallel of the registers recorded as saved for unwind. */
23742 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23744 regno = ctz_hwi (real_regs);
23745 reg = gen_rtx_REG (SImode, regno);
23747 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23748 tmp = gen_frame_mem (SImode, tmp);
23749 tmp = gen_rtx_SET (tmp, reg);
23750 RTX_FRAME_RELATED_P (tmp) = 1;
23751 par[j + 1] = tmp;
23754 if (j == 0)
23755 tmp = par[0];
23756 else
23758 RTX_FRAME_RELATED_P (par[0]) = 1;
23759 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23762 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23764 return insn;
23767 /* Emit code to push or pop registers to or from the stack. F is the
23768 assembly file. MASK is the registers to pop. */
23769 static void
23770 thumb_pop (FILE *f, unsigned long mask)
23772 int regno;
23773 int lo_mask = mask & 0xFF;
23774 int pushed_words = 0;
23776 gcc_assert (mask);
23778 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23780 /* Special case. Do not generate a POP PC statement here, do it in
23781 thumb_exit() */
23782 thumb_exit (f, -1);
23783 return;
23786 fprintf (f, "\tpop\t{");
23788 /* Look at the low registers first. */
23789 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23791 if (lo_mask & 1)
23793 asm_fprintf (f, "%r", regno);
23795 if ((lo_mask & ~1) != 0)
23796 fprintf (f, ", ");
23798 pushed_words++;
23802 if (mask & (1 << PC_REGNUM))
23804 /* Catch popping the PC. */
23805 if (TARGET_INTERWORK || TARGET_BACKTRACE
23806 || crtl->calls_eh_return)
23808 /* The PC is never poped directly, instead
23809 it is popped into r3 and then BX is used. */
23810 fprintf (f, "}\n");
23812 thumb_exit (f, -1);
23814 return;
23816 else
23818 if (mask & 0xFF)
23819 fprintf (f, ", ");
23821 asm_fprintf (f, "%r", PC_REGNUM);
23825 fprintf (f, "}\n");
23828 /* Generate code to return from a thumb function.
23829 If 'reg_containing_return_addr' is -1, then the return address is
23830 actually on the stack, at the stack pointer. */
23831 static void
23832 thumb_exit (FILE *f, int reg_containing_return_addr)
23834 unsigned regs_available_for_popping;
23835 unsigned regs_to_pop;
23836 int pops_needed;
23837 unsigned available;
23838 unsigned required;
23839 machine_mode mode;
23840 int size;
23841 int restore_a4 = FALSE;
23843 /* Compute the registers we need to pop. */
23844 regs_to_pop = 0;
23845 pops_needed = 0;
23847 if (reg_containing_return_addr == -1)
23849 regs_to_pop |= 1 << LR_REGNUM;
23850 ++pops_needed;
23853 if (TARGET_BACKTRACE)
23855 /* Restore the (ARM) frame pointer and stack pointer. */
23856 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23857 pops_needed += 2;
23860 /* If there is nothing to pop then just emit the BX instruction and
23861 return. */
23862 if (pops_needed == 0)
23864 if (crtl->calls_eh_return)
23865 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23867 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23868 return;
23870 /* Otherwise if we are not supporting interworking and we have not created
23871 a backtrace structure and the function was not entered in ARM mode then
23872 just pop the return address straight into the PC. */
23873 else if (!TARGET_INTERWORK
23874 && !TARGET_BACKTRACE
23875 && !is_called_in_ARM_mode (current_function_decl)
23876 && !crtl->calls_eh_return)
23878 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23879 return;
23882 /* Find out how many of the (return) argument registers we can corrupt. */
23883 regs_available_for_popping = 0;
23885 /* If returning via __builtin_eh_return, the bottom three registers
23886 all contain information needed for the return. */
23887 if (crtl->calls_eh_return)
23888 size = 12;
23889 else
23891 /* If we can deduce the registers used from the function's
23892 return value. This is more reliable that examining
23893 df_regs_ever_live_p () because that will be set if the register is
23894 ever used in the function, not just if the register is used
23895 to hold a return value. */
23897 if (crtl->return_rtx != 0)
23898 mode = GET_MODE (crtl->return_rtx);
23899 else
23900 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23902 size = GET_MODE_SIZE (mode);
23904 if (size == 0)
23906 /* In a void function we can use any argument register.
23907 In a function that returns a structure on the stack
23908 we can use the second and third argument registers. */
23909 if (mode == VOIDmode)
23910 regs_available_for_popping =
23911 (1 << ARG_REGISTER (1))
23912 | (1 << ARG_REGISTER (2))
23913 | (1 << ARG_REGISTER (3));
23914 else
23915 regs_available_for_popping =
23916 (1 << ARG_REGISTER (2))
23917 | (1 << ARG_REGISTER (3));
23919 else if (size <= 4)
23920 regs_available_for_popping =
23921 (1 << ARG_REGISTER (2))
23922 | (1 << ARG_REGISTER (3));
23923 else if (size <= 8)
23924 regs_available_for_popping =
23925 (1 << ARG_REGISTER (3));
23928 /* Match registers to be popped with registers into which we pop them. */
23929 for (available = regs_available_for_popping,
23930 required = regs_to_pop;
23931 required != 0 && available != 0;
23932 available &= ~(available & - available),
23933 required &= ~(required & - required))
23934 -- pops_needed;
23936 /* If we have any popping registers left over, remove them. */
23937 if (available > 0)
23938 regs_available_for_popping &= ~available;
23940 /* Otherwise if we need another popping register we can use
23941 the fourth argument register. */
23942 else if (pops_needed)
23944 /* If we have not found any free argument registers and
23945 reg a4 contains the return address, we must move it. */
23946 if (regs_available_for_popping == 0
23947 && reg_containing_return_addr == LAST_ARG_REGNUM)
23949 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23950 reg_containing_return_addr = LR_REGNUM;
23952 else if (size > 12)
23954 /* Register a4 is being used to hold part of the return value,
23955 but we have dire need of a free, low register. */
23956 restore_a4 = TRUE;
23958 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23961 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23963 /* The fourth argument register is available. */
23964 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23966 --pops_needed;
23970 /* Pop as many registers as we can. */
23971 thumb_pop (f, regs_available_for_popping);
23973 /* Process the registers we popped. */
23974 if (reg_containing_return_addr == -1)
23976 /* The return address was popped into the lowest numbered register. */
23977 regs_to_pop &= ~(1 << LR_REGNUM);
23979 reg_containing_return_addr =
23980 number_of_first_bit_set (regs_available_for_popping);
23982 /* Remove this register for the mask of available registers, so that
23983 the return address will not be corrupted by further pops. */
23984 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23987 /* If we popped other registers then handle them here. */
23988 if (regs_available_for_popping)
23990 int frame_pointer;
23992 /* Work out which register currently contains the frame pointer. */
23993 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23995 /* Move it into the correct place. */
23996 asm_fprintf (f, "\tmov\t%r, %r\n",
23997 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23999 /* (Temporarily) remove it from the mask of popped registers. */
24000 regs_available_for_popping &= ~(1 << frame_pointer);
24001 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24003 if (regs_available_for_popping)
24005 int stack_pointer;
24007 /* We popped the stack pointer as well,
24008 find the register that contains it. */
24009 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24011 /* Move it into the stack register. */
24012 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24014 /* At this point we have popped all necessary registers, so
24015 do not worry about restoring regs_available_for_popping
24016 to its correct value:
24018 assert (pops_needed == 0)
24019 assert (regs_available_for_popping == (1 << frame_pointer))
24020 assert (regs_to_pop == (1 << STACK_POINTER)) */
24022 else
24024 /* Since we have just move the popped value into the frame
24025 pointer, the popping register is available for reuse, and
24026 we know that we still have the stack pointer left to pop. */
24027 regs_available_for_popping |= (1 << frame_pointer);
24031 /* If we still have registers left on the stack, but we no longer have
24032 any registers into which we can pop them, then we must move the return
24033 address into the link register and make available the register that
24034 contained it. */
24035 if (regs_available_for_popping == 0 && pops_needed > 0)
24037 regs_available_for_popping |= 1 << reg_containing_return_addr;
24039 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24040 reg_containing_return_addr);
24042 reg_containing_return_addr = LR_REGNUM;
24045 /* If we have registers left on the stack then pop some more.
24046 We know that at most we will want to pop FP and SP. */
24047 if (pops_needed > 0)
24049 int popped_into;
24050 int move_to;
24052 thumb_pop (f, regs_available_for_popping);
24054 /* We have popped either FP or SP.
24055 Move whichever one it is into the correct register. */
24056 popped_into = number_of_first_bit_set (regs_available_for_popping);
24057 move_to = number_of_first_bit_set (regs_to_pop);
24059 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24061 regs_to_pop &= ~(1 << move_to);
24063 --pops_needed;
24066 /* If we still have not popped everything then we must have only
24067 had one register available to us and we are now popping the SP. */
24068 if (pops_needed > 0)
24070 int popped_into;
24072 thumb_pop (f, regs_available_for_popping);
24074 popped_into = number_of_first_bit_set (regs_available_for_popping);
24076 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24078 assert (regs_to_pop == (1 << STACK_POINTER))
24079 assert (pops_needed == 1)
24083 /* If necessary restore the a4 register. */
24084 if (restore_a4)
24086 if (reg_containing_return_addr != LR_REGNUM)
24088 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24089 reg_containing_return_addr = LR_REGNUM;
24092 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24095 if (crtl->calls_eh_return)
24096 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24098 /* Return to caller. */
24099 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24102 /* Scan INSN just before assembler is output for it.
24103 For Thumb-1, we track the status of the condition codes; this
24104 information is used in the cbranchsi4_insn pattern. */
24105 void
24106 thumb1_final_prescan_insn (rtx_insn *insn)
24108 if (flag_print_asm_name)
24109 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24110 INSN_ADDRESSES (INSN_UID (insn)));
24111 /* Don't overwrite the previous setter when we get to a cbranch. */
24112 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24114 enum attr_conds conds;
24116 if (cfun->machine->thumb1_cc_insn)
24118 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24119 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24120 CC_STATUS_INIT;
24122 conds = get_attr_conds (insn);
24123 if (conds == CONDS_SET)
24125 rtx set = single_set (insn);
24126 cfun->machine->thumb1_cc_insn = insn;
24127 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24128 cfun->machine->thumb1_cc_op1 = const0_rtx;
24129 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24130 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24132 rtx src1 = XEXP (SET_SRC (set), 1);
24133 if (src1 == const0_rtx)
24134 cfun->machine->thumb1_cc_mode = CCmode;
24136 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24138 /* Record the src register operand instead of dest because
24139 cprop_hardreg pass propagates src. */
24140 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24143 else if (conds != CONDS_NOCOND)
24144 cfun->machine->thumb1_cc_insn = NULL_RTX;
24147 /* Check if unexpected far jump is used. */
24148 if (cfun->machine->lr_save_eliminated
24149 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24150 internal_error("Unexpected thumb1 far jump");
24154 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24156 unsigned HOST_WIDE_INT mask = 0xff;
24157 int i;
24159 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24160 if (val == 0) /* XXX */
24161 return 0;
24163 for (i = 0; i < 25; i++)
24164 if ((val & (mask << i)) == val)
24165 return 1;
24167 return 0;
24170 /* Returns nonzero if the current function contains,
24171 or might contain a far jump. */
24172 static int
24173 thumb_far_jump_used_p (void)
24175 rtx_insn *insn;
24176 bool far_jump = false;
24177 unsigned int func_size = 0;
24179 /* This test is only important for leaf functions. */
24180 /* assert (!leaf_function_p ()); */
24182 /* If we have already decided that far jumps may be used,
24183 do not bother checking again, and always return true even if
24184 it turns out that they are not being used. Once we have made
24185 the decision that far jumps are present (and that hence the link
24186 register will be pushed onto the stack) we cannot go back on it. */
24187 if (cfun->machine->far_jump_used)
24188 return 1;
24190 /* If this function is not being called from the prologue/epilogue
24191 generation code then it must be being called from the
24192 INITIAL_ELIMINATION_OFFSET macro. */
24193 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24195 /* In this case we know that we are being asked about the elimination
24196 of the arg pointer register. If that register is not being used,
24197 then there are no arguments on the stack, and we do not have to
24198 worry that a far jump might force the prologue to push the link
24199 register, changing the stack offsets. In this case we can just
24200 return false, since the presence of far jumps in the function will
24201 not affect stack offsets.
24203 If the arg pointer is live (or if it was live, but has now been
24204 eliminated and so set to dead) then we do have to test to see if
24205 the function might contain a far jump. This test can lead to some
24206 false negatives, since before reload is completed, then length of
24207 branch instructions is not known, so gcc defaults to returning their
24208 longest length, which in turn sets the far jump attribute to true.
24210 A false negative will not result in bad code being generated, but it
24211 will result in a needless push and pop of the link register. We
24212 hope that this does not occur too often.
24214 If we need doubleword stack alignment this could affect the other
24215 elimination offsets so we can't risk getting it wrong. */
24216 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24217 cfun->machine->arg_pointer_live = 1;
24218 else if (!cfun->machine->arg_pointer_live)
24219 return 0;
24222 /* We should not change far_jump_used during or after reload, as there is
24223 no chance to change stack frame layout. */
24224 if (reload_in_progress || reload_completed)
24225 return 0;
24227 /* Check to see if the function contains a branch
24228 insn with the far jump attribute set. */
24229 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24231 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24233 far_jump = true;
24235 func_size += get_attr_length (insn);
24238 /* Attribute far_jump will always be true for thumb1 before
24239 shorten_branch pass. So checking far_jump attribute before
24240 shorten_branch isn't much useful.
24242 Following heuristic tries to estimate more accurately if a far jump
24243 may finally be used. The heuristic is very conservative as there is
24244 no chance to roll-back the decision of not to use far jump.
24246 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24247 2-byte insn is associated with a 4 byte constant pool. Using
24248 function size 2048/3 as the threshold is conservative enough. */
24249 if (far_jump)
24251 if ((func_size * 3) >= 2048)
24253 /* Record the fact that we have decided that
24254 the function does use far jumps. */
24255 cfun->machine->far_jump_used = 1;
24256 return 1;
24260 return 0;
24263 /* Return nonzero if FUNC must be entered in ARM mode. */
24264 static bool
24265 is_called_in_ARM_mode (tree func)
24267 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24269 /* Ignore the problem about functions whose address is taken. */
24270 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24271 return true;
24273 #ifdef ARM_PE
24274 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24275 #else
24276 return false;
24277 #endif
24280 /* Given the stack offsets and register mask in OFFSETS, decide how
24281 many additional registers to push instead of subtracting a constant
24282 from SP. For epilogues the principle is the same except we use pop.
24283 FOR_PROLOGUE indicates which we're generating. */
24284 static int
24285 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24287 HOST_WIDE_INT amount;
24288 unsigned long live_regs_mask = offsets->saved_regs_mask;
24289 /* Extract a mask of the ones we can give to the Thumb's push/pop
24290 instruction. */
24291 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24292 /* Then count how many other high registers will need to be pushed. */
24293 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24294 int n_free, reg_base, size;
24296 if (!for_prologue && frame_pointer_needed)
24297 amount = offsets->locals_base - offsets->saved_regs;
24298 else
24299 amount = offsets->outgoing_args - offsets->saved_regs;
24301 /* If the stack frame size is 512 exactly, we can save one load
24302 instruction, which should make this a win even when optimizing
24303 for speed. */
24304 if (!optimize_size && amount != 512)
24305 return 0;
24307 /* Can't do this if there are high registers to push. */
24308 if (high_regs_pushed != 0)
24309 return 0;
24311 /* Shouldn't do it in the prologue if no registers would normally
24312 be pushed at all. In the epilogue, also allow it if we'll have
24313 a pop insn for the PC. */
24314 if (l_mask == 0
24315 && (for_prologue
24316 || TARGET_BACKTRACE
24317 || (live_regs_mask & 1 << LR_REGNUM) == 0
24318 || TARGET_INTERWORK
24319 || crtl->args.pretend_args_size != 0))
24320 return 0;
24322 /* Don't do this if thumb_expand_prologue wants to emit instructions
24323 between the push and the stack frame allocation. */
24324 if (for_prologue
24325 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24326 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24327 return 0;
24329 reg_base = 0;
24330 n_free = 0;
24331 if (!for_prologue)
24333 size = arm_size_return_regs ();
24334 reg_base = ARM_NUM_INTS (size);
24335 live_regs_mask >>= reg_base;
24338 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24339 && (for_prologue || call_used_regs[reg_base + n_free]))
24341 live_regs_mask >>= 1;
24342 n_free++;
24345 if (n_free == 0)
24346 return 0;
24347 gcc_assert (amount / 4 * 4 == amount);
24349 if (amount >= 512 && (amount - n_free * 4) < 512)
24350 return (amount - 508) / 4;
24351 if (amount <= n_free * 4)
24352 return amount / 4;
24353 return 0;
24356 /* The bits which aren't usefully expanded as rtl. */
24357 const char *
24358 thumb1_unexpanded_epilogue (void)
24360 arm_stack_offsets *offsets;
24361 int regno;
24362 unsigned long live_regs_mask = 0;
24363 int high_regs_pushed = 0;
24364 int extra_pop;
24365 int had_to_push_lr;
24366 int size;
24368 if (cfun->machine->return_used_this_function != 0)
24369 return "";
24371 if (IS_NAKED (arm_current_func_type ()))
24372 return "";
24374 offsets = arm_get_frame_offsets ();
24375 live_regs_mask = offsets->saved_regs_mask;
24376 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24378 /* If we can deduce the registers used from the function's return value.
24379 This is more reliable that examining df_regs_ever_live_p () because that
24380 will be set if the register is ever used in the function, not just if
24381 the register is used to hold a return value. */
24382 size = arm_size_return_regs ();
24384 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24385 if (extra_pop > 0)
24387 unsigned long extra_mask = (1 << extra_pop) - 1;
24388 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24391 /* The prolog may have pushed some high registers to use as
24392 work registers. e.g. the testsuite file:
24393 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24394 compiles to produce:
24395 push {r4, r5, r6, r7, lr}
24396 mov r7, r9
24397 mov r6, r8
24398 push {r6, r7}
24399 as part of the prolog. We have to undo that pushing here. */
24401 if (high_regs_pushed)
24403 unsigned long mask = live_regs_mask & 0xff;
24404 int next_hi_reg;
24406 /* The available low registers depend on the size of the value we are
24407 returning. */
24408 if (size <= 12)
24409 mask |= 1 << 3;
24410 if (size <= 8)
24411 mask |= 1 << 2;
24413 if (mask == 0)
24414 /* Oh dear! We have no low registers into which we can pop
24415 high registers! */
24416 internal_error
24417 ("no low registers available for popping high registers");
24419 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24420 if (live_regs_mask & (1 << next_hi_reg))
24421 break;
24423 while (high_regs_pushed)
24425 /* Find lo register(s) into which the high register(s) can
24426 be popped. */
24427 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24429 if (mask & (1 << regno))
24430 high_regs_pushed--;
24431 if (high_regs_pushed == 0)
24432 break;
24435 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24437 /* Pop the values into the low register(s). */
24438 thumb_pop (asm_out_file, mask);
24440 /* Move the value(s) into the high registers. */
24441 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24443 if (mask & (1 << regno))
24445 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24446 regno);
24448 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24449 if (live_regs_mask & (1 << next_hi_reg))
24450 break;
24454 live_regs_mask &= ~0x0f00;
24457 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24458 live_regs_mask &= 0xff;
24460 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24462 /* Pop the return address into the PC. */
24463 if (had_to_push_lr)
24464 live_regs_mask |= 1 << PC_REGNUM;
24466 /* Either no argument registers were pushed or a backtrace
24467 structure was created which includes an adjusted stack
24468 pointer, so just pop everything. */
24469 if (live_regs_mask)
24470 thumb_pop (asm_out_file, live_regs_mask);
24472 /* We have either just popped the return address into the
24473 PC or it is was kept in LR for the entire function.
24474 Note that thumb_pop has already called thumb_exit if the
24475 PC was in the list. */
24476 if (!had_to_push_lr)
24477 thumb_exit (asm_out_file, LR_REGNUM);
24479 else
24481 /* Pop everything but the return address. */
24482 if (live_regs_mask)
24483 thumb_pop (asm_out_file, live_regs_mask);
24485 if (had_to_push_lr)
24487 if (size > 12)
24489 /* We have no free low regs, so save one. */
24490 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24491 LAST_ARG_REGNUM);
24494 /* Get the return address into a temporary register. */
24495 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24497 if (size > 12)
24499 /* Move the return address to lr. */
24500 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24501 LAST_ARG_REGNUM);
24502 /* Restore the low register. */
24503 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24504 IP_REGNUM);
24505 regno = LR_REGNUM;
24507 else
24508 regno = LAST_ARG_REGNUM;
24510 else
24511 regno = LR_REGNUM;
24513 /* Remove the argument registers that were pushed onto the stack. */
24514 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24515 SP_REGNUM, SP_REGNUM,
24516 crtl->args.pretend_args_size);
24518 thumb_exit (asm_out_file, regno);
24521 return "";
24524 /* Functions to save and restore machine-specific function data. */
24525 static struct machine_function *
24526 arm_init_machine_status (void)
24528 struct machine_function *machine;
24529 machine = ggc_cleared_alloc<machine_function> ();
24531 #if ARM_FT_UNKNOWN != 0
24532 machine->func_type = ARM_FT_UNKNOWN;
24533 #endif
24534 return machine;
24537 /* Return an RTX indicating where the return address to the
24538 calling function can be found. */
24540 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24542 if (count != 0)
24543 return NULL_RTX;
24545 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24548 /* Do anything needed before RTL is emitted for each function. */
24549 void
24550 arm_init_expanders (void)
24552 /* Arrange to initialize and mark the machine per-function status. */
24553 init_machine_status = arm_init_machine_status;
24555 /* This is to stop the combine pass optimizing away the alignment
24556 adjustment of va_arg. */
24557 /* ??? It is claimed that this should not be necessary. */
24558 if (cfun)
24559 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24562 /* Check that FUNC is called with a different mode. */
24564 bool
24565 arm_change_mode_p (tree func)
24567 if (TREE_CODE (func) != FUNCTION_DECL)
24568 return false;
24570 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24572 if (!callee_tree)
24573 callee_tree = target_option_default_node;
24575 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24576 int flags = callee_opts->x_target_flags;
24578 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24581 /* Like arm_compute_initial_elimination offset. Simpler because there
24582 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24583 to point at the base of the local variables after static stack
24584 space for a function has been allocated. */
24586 HOST_WIDE_INT
24587 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24589 arm_stack_offsets *offsets;
24591 offsets = arm_get_frame_offsets ();
24593 switch (from)
24595 case ARG_POINTER_REGNUM:
24596 switch (to)
24598 case STACK_POINTER_REGNUM:
24599 return offsets->outgoing_args - offsets->saved_args;
24601 case FRAME_POINTER_REGNUM:
24602 return offsets->soft_frame - offsets->saved_args;
24604 case ARM_HARD_FRAME_POINTER_REGNUM:
24605 return offsets->saved_regs - offsets->saved_args;
24607 case THUMB_HARD_FRAME_POINTER_REGNUM:
24608 return offsets->locals_base - offsets->saved_args;
24610 default:
24611 gcc_unreachable ();
24613 break;
24615 case FRAME_POINTER_REGNUM:
24616 switch (to)
24618 case STACK_POINTER_REGNUM:
24619 return offsets->outgoing_args - offsets->soft_frame;
24621 case ARM_HARD_FRAME_POINTER_REGNUM:
24622 return offsets->saved_regs - offsets->soft_frame;
24624 case THUMB_HARD_FRAME_POINTER_REGNUM:
24625 return offsets->locals_base - offsets->soft_frame;
24627 default:
24628 gcc_unreachable ();
24630 break;
24632 default:
24633 gcc_unreachable ();
24637 /* Generate the function's prologue. */
24639 void
24640 thumb1_expand_prologue (void)
24642 rtx_insn *insn;
24644 HOST_WIDE_INT amount;
24645 HOST_WIDE_INT size;
24646 arm_stack_offsets *offsets;
24647 unsigned long func_type;
24648 int regno;
24649 unsigned long live_regs_mask;
24650 unsigned long l_mask;
24651 unsigned high_regs_pushed = 0;
24653 func_type = arm_current_func_type ();
24655 /* Naked functions don't have prologues. */
24656 if (IS_NAKED (func_type))
24657 return;
24659 if (IS_INTERRUPT (func_type))
24661 error ("interrupt Service Routines cannot be coded in Thumb mode");
24662 return;
24665 if (is_called_in_ARM_mode (current_function_decl))
24666 emit_insn (gen_prologue_thumb1_interwork ());
24668 offsets = arm_get_frame_offsets ();
24669 live_regs_mask = offsets->saved_regs_mask;
24671 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24672 l_mask = live_regs_mask & 0x40ff;
24673 /* Then count how many other high registers will need to be pushed. */
24674 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24676 if (crtl->args.pretend_args_size)
24678 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24680 if (cfun->machine->uses_anonymous_args)
24682 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24683 unsigned long mask;
24685 mask = 1ul << (LAST_ARG_REGNUM + 1);
24686 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24688 insn = thumb1_emit_multi_reg_push (mask, 0);
24690 else
24692 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24693 stack_pointer_rtx, x));
24695 RTX_FRAME_RELATED_P (insn) = 1;
24698 if (TARGET_BACKTRACE)
24700 HOST_WIDE_INT offset = 0;
24701 unsigned work_register;
24702 rtx work_reg, x, arm_hfp_rtx;
24704 /* We have been asked to create a stack backtrace structure.
24705 The code looks like this:
24707 0 .align 2
24708 0 func:
24709 0 sub SP, #16 Reserve space for 4 registers.
24710 2 push {R7} Push low registers.
24711 4 add R7, SP, #20 Get the stack pointer before the push.
24712 6 str R7, [SP, #8] Store the stack pointer
24713 (before reserving the space).
24714 8 mov R7, PC Get hold of the start of this code + 12.
24715 10 str R7, [SP, #16] Store it.
24716 12 mov R7, FP Get hold of the current frame pointer.
24717 14 str R7, [SP, #4] Store it.
24718 16 mov R7, LR Get hold of the current return address.
24719 18 str R7, [SP, #12] Store it.
24720 20 add R7, SP, #16 Point at the start of the
24721 backtrace structure.
24722 22 mov FP, R7 Put this value into the frame pointer. */
24724 work_register = thumb_find_work_register (live_regs_mask);
24725 work_reg = gen_rtx_REG (SImode, work_register);
24726 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24728 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24729 stack_pointer_rtx, GEN_INT (-16)));
24730 RTX_FRAME_RELATED_P (insn) = 1;
24732 if (l_mask)
24734 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24735 RTX_FRAME_RELATED_P (insn) = 1;
24737 offset = bit_count (l_mask) * UNITS_PER_WORD;
24740 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24741 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24743 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24744 x = gen_frame_mem (SImode, x);
24745 emit_move_insn (x, work_reg);
24747 /* Make sure that the instruction fetching the PC is in the right place
24748 to calculate "start of backtrace creation code + 12". */
24749 /* ??? The stores using the common WORK_REG ought to be enough to
24750 prevent the scheduler from doing anything weird. Failing that
24751 we could always move all of the following into an UNSPEC_VOLATILE. */
24752 if (l_mask)
24754 x = gen_rtx_REG (SImode, PC_REGNUM);
24755 emit_move_insn (work_reg, x);
24757 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24758 x = gen_frame_mem (SImode, x);
24759 emit_move_insn (x, work_reg);
24761 emit_move_insn (work_reg, arm_hfp_rtx);
24763 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24764 x = gen_frame_mem (SImode, x);
24765 emit_move_insn (x, work_reg);
24767 else
24769 emit_move_insn (work_reg, arm_hfp_rtx);
24771 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24772 x = gen_frame_mem (SImode, x);
24773 emit_move_insn (x, work_reg);
24775 x = gen_rtx_REG (SImode, PC_REGNUM);
24776 emit_move_insn (work_reg, x);
24778 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24779 x = gen_frame_mem (SImode, x);
24780 emit_move_insn (x, work_reg);
24783 x = gen_rtx_REG (SImode, LR_REGNUM);
24784 emit_move_insn (work_reg, x);
24786 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24787 x = gen_frame_mem (SImode, x);
24788 emit_move_insn (x, work_reg);
24790 x = GEN_INT (offset + 12);
24791 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24793 emit_move_insn (arm_hfp_rtx, work_reg);
24795 /* Optimization: If we are not pushing any low registers but we are going
24796 to push some high registers then delay our first push. This will just
24797 be a push of LR and we can combine it with the push of the first high
24798 register. */
24799 else if ((l_mask & 0xff) != 0
24800 || (high_regs_pushed == 0 && l_mask))
24802 unsigned long mask = l_mask;
24803 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24804 insn = thumb1_emit_multi_reg_push (mask, mask);
24805 RTX_FRAME_RELATED_P (insn) = 1;
24808 if (high_regs_pushed)
24810 unsigned pushable_regs;
24811 unsigned next_hi_reg;
24812 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24813 : crtl->args.info.nregs;
24814 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24816 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24817 if (live_regs_mask & (1 << next_hi_reg))
24818 break;
24820 /* Here we need to mask out registers used for passing arguments
24821 even if they can be pushed. This is to avoid using them to stash the high
24822 registers. Such kind of stash may clobber the use of arguments. */
24823 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24825 if (pushable_regs == 0)
24826 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24828 while (high_regs_pushed > 0)
24830 unsigned long real_regs_mask = 0;
24832 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24834 if (pushable_regs & (1 << regno))
24836 emit_move_insn (gen_rtx_REG (SImode, regno),
24837 gen_rtx_REG (SImode, next_hi_reg));
24839 high_regs_pushed --;
24840 real_regs_mask |= (1 << next_hi_reg);
24842 if (high_regs_pushed)
24844 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24845 next_hi_reg --)
24846 if (live_regs_mask & (1 << next_hi_reg))
24847 break;
24849 else
24851 pushable_regs &= ~((1 << regno) - 1);
24852 break;
24857 /* If we had to find a work register and we have not yet
24858 saved the LR then add it to the list of regs to push. */
24859 if (l_mask == (1 << LR_REGNUM))
24861 pushable_regs |= l_mask;
24862 real_regs_mask |= l_mask;
24863 l_mask = 0;
24866 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24867 RTX_FRAME_RELATED_P (insn) = 1;
24871 /* Load the pic register before setting the frame pointer,
24872 so we can use r7 as a temporary work register. */
24873 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24874 arm_load_pic_register (live_regs_mask);
24876 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24877 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24878 stack_pointer_rtx);
24880 size = offsets->outgoing_args - offsets->saved_args;
24881 if (flag_stack_usage_info)
24882 current_function_static_stack_size = size;
24884 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24885 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24886 sorry ("-fstack-check=specific for Thumb-1");
24888 amount = offsets->outgoing_args - offsets->saved_regs;
24889 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24890 if (amount)
24892 if (amount < 512)
24894 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24895 GEN_INT (- amount)));
24896 RTX_FRAME_RELATED_P (insn) = 1;
24898 else
24900 rtx reg, dwarf;
24902 /* The stack decrement is too big for an immediate value in a single
24903 insn. In theory we could issue multiple subtracts, but after
24904 three of them it becomes more space efficient to place the full
24905 value in the constant pool and load into a register. (Also the
24906 ARM debugger really likes to see only one stack decrement per
24907 function). So instead we look for a scratch register into which
24908 we can load the decrement, and then we subtract this from the
24909 stack pointer. Unfortunately on the thumb the only available
24910 scratch registers are the argument registers, and we cannot use
24911 these as they may hold arguments to the function. Instead we
24912 attempt to locate a call preserved register which is used by this
24913 function. If we can find one, then we know that it will have
24914 been pushed at the start of the prologue and so we can corrupt
24915 it now. */
24916 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24917 if (live_regs_mask & (1 << regno))
24918 break;
24920 gcc_assert(regno <= LAST_LO_REGNUM);
24922 reg = gen_rtx_REG (SImode, regno);
24924 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24926 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24927 stack_pointer_rtx, reg));
24929 dwarf = gen_rtx_SET (stack_pointer_rtx,
24930 plus_constant (Pmode, stack_pointer_rtx,
24931 -amount));
24932 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24933 RTX_FRAME_RELATED_P (insn) = 1;
24937 if (frame_pointer_needed)
24938 thumb_set_frame_pointer (offsets);
24940 /* If we are profiling, make sure no instructions are scheduled before
24941 the call to mcount. Similarly if the user has requested no
24942 scheduling in the prolog. Similarly if we want non-call exceptions
24943 using the EABI unwinder, to prevent faulting instructions from being
24944 swapped with a stack adjustment. */
24945 if (crtl->profile || !TARGET_SCHED_PROLOG
24946 || (arm_except_unwind_info (&global_options) == UI_TARGET
24947 && cfun->can_throw_non_call_exceptions))
24948 emit_insn (gen_blockage ());
24950 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24951 if (live_regs_mask & 0xff)
24952 cfun->machine->lr_save_eliminated = 0;
24955 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24956 POP instruction can be generated. LR should be replaced by PC. All
24957 the checks required are already done by USE_RETURN_INSN (). Hence,
24958 all we really need to check here is if single register is to be
24959 returned, or multiple register return. */
24960 void
24961 thumb2_expand_return (bool simple_return)
24963 int i, num_regs;
24964 unsigned long saved_regs_mask;
24965 arm_stack_offsets *offsets;
24967 offsets = arm_get_frame_offsets ();
24968 saved_regs_mask = offsets->saved_regs_mask;
24970 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24971 if (saved_regs_mask & (1 << i))
24972 num_regs++;
24974 if (!simple_return && saved_regs_mask)
24976 if (num_regs == 1)
24978 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24979 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24980 rtx addr = gen_rtx_MEM (SImode,
24981 gen_rtx_POST_INC (SImode,
24982 stack_pointer_rtx));
24983 set_mem_alias_set (addr, get_frame_alias_set ());
24984 XVECEXP (par, 0, 0) = ret_rtx;
24985 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
24986 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24987 emit_jump_insn (par);
24989 else
24991 saved_regs_mask &= ~ (1 << LR_REGNUM);
24992 saved_regs_mask |= (1 << PC_REGNUM);
24993 arm_emit_multi_reg_pop (saved_regs_mask);
24996 else
24998 emit_jump_insn (simple_return_rtx);
25002 void
25003 thumb1_expand_epilogue (void)
25005 HOST_WIDE_INT amount;
25006 arm_stack_offsets *offsets;
25007 int regno;
25009 /* Naked functions don't have prologues. */
25010 if (IS_NAKED (arm_current_func_type ()))
25011 return;
25013 offsets = arm_get_frame_offsets ();
25014 amount = offsets->outgoing_args - offsets->saved_regs;
25016 if (frame_pointer_needed)
25018 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25019 amount = offsets->locals_base - offsets->saved_regs;
25021 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25023 gcc_assert (amount >= 0);
25024 if (amount)
25026 emit_insn (gen_blockage ());
25028 if (amount < 512)
25029 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25030 GEN_INT (amount)));
25031 else
25033 /* r3 is always free in the epilogue. */
25034 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25036 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25037 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25041 /* Emit a USE (stack_pointer_rtx), so that
25042 the stack adjustment will not be deleted. */
25043 emit_insn (gen_force_register_use (stack_pointer_rtx));
25045 if (crtl->profile || !TARGET_SCHED_PROLOG)
25046 emit_insn (gen_blockage ());
25048 /* Emit a clobber for each insn that will be restored in the epilogue,
25049 so that flow2 will get register lifetimes correct. */
25050 for (regno = 0; regno < 13; regno++)
25051 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25052 emit_clobber (gen_rtx_REG (SImode, regno));
25054 if (! df_regs_ever_live_p (LR_REGNUM))
25055 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25058 /* Epilogue code for APCS frame. */
25059 static void
25060 arm_expand_epilogue_apcs_frame (bool really_return)
25062 unsigned long func_type;
25063 unsigned long saved_regs_mask;
25064 int num_regs = 0;
25065 int i;
25066 int floats_from_frame = 0;
25067 arm_stack_offsets *offsets;
25069 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25070 func_type = arm_current_func_type ();
25072 /* Get frame offsets for ARM. */
25073 offsets = arm_get_frame_offsets ();
25074 saved_regs_mask = offsets->saved_regs_mask;
25076 /* Find the offset of the floating-point save area in the frame. */
25077 floats_from_frame
25078 = (offsets->saved_args
25079 + arm_compute_static_chain_stack_bytes ()
25080 - offsets->frame);
25082 /* Compute how many core registers saved and how far away the floats are. */
25083 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25084 if (saved_regs_mask & (1 << i))
25086 num_regs++;
25087 floats_from_frame += 4;
25090 if (TARGET_HARD_FLOAT && TARGET_VFP)
25092 int start_reg;
25093 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25095 /* The offset is from IP_REGNUM. */
25096 int saved_size = arm_get_vfp_saved_size ();
25097 if (saved_size > 0)
25099 rtx_insn *insn;
25100 floats_from_frame += saved_size;
25101 insn = emit_insn (gen_addsi3 (ip_rtx,
25102 hard_frame_pointer_rtx,
25103 GEN_INT (-floats_from_frame)));
25104 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25105 ip_rtx, hard_frame_pointer_rtx);
25108 /* Generate VFP register multi-pop. */
25109 start_reg = FIRST_VFP_REGNUM;
25111 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25112 /* Look for a case where a reg does not need restoring. */
25113 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25114 && (!df_regs_ever_live_p (i + 1)
25115 || call_used_regs[i + 1]))
25117 if (start_reg != i)
25118 arm_emit_vfp_multi_reg_pop (start_reg,
25119 (i - start_reg) / 2,
25120 gen_rtx_REG (SImode,
25121 IP_REGNUM));
25122 start_reg = i + 2;
25125 /* Restore the remaining regs that we have discovered (or possibly
25126 even all of them, if the conditional in the for loop never
25127 fired). */
25128 if (start_reg != i)
25129 arm_emit_vfp_multi_reg_pop (start_reg,
25130 (i - start_reg) / 2,
25131 gen_rtx_REG (SImode, IP_REGNUM));
25134 if (TARGET_IWMMXT)
25136 /* The frame pointer is guaranteed to be non-double-word aligned, as
25137 it is set to double-word-aligned old_stack_pointer - 4. */
25138 rtx_insn *insn;
25139 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25141 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25142 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25144 rtx addr = gen_frame_mem (V2SImode,
25145 plus_constant (Pmode, hard_frame_pointer_rtx,
25146 - lrm_count * 4));
25147 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25148 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25149 gen_rtx_REG (V2SImode, i),
25150 NULL_RTX);
25151 lrm_count += 2;
25155 /* saved_regs_mask should contain IP which contains old stack pointer
25156 at the time of activation creation. Since SP and IP are adjacent registers,
25157 we can restore the value directly into SP. */
25158 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25159 saved_regs_mask &= ~(1 << IP_REGNUM);
25160 saved_regs_mask |= (1 << SP_REGNUM);
25162 /* There are two registers left in saved_regs_mask - LR and PC. We
25163 only need to restore LR (the return address), but to
25164 save time we can load it directly into PC, unless we need a
25165 special function exit sequence, or we are not really returning. */
25166 if (really_return
25167 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25168 && !crtl->calls_eh_return)
25169 /* Delete LR from the register mask, so that LR on
25170 the stack is loaded into the PC in the register mask. */
25171 saved_regs_mask &= ~(1 << LR_REGNUM);
25172 else
25173 saved_regs_mask &= ~(1 << PC_REGNUM);
25175 num_regs = bit_count (saved_regs_mask);
25176 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25178 rtx_insn *insn;
25179 emit_insn (gen_blockage ());
25180 /* Unwind the stack to just below the saved registers. */
25181 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25182 hard_frame_pointer_rtx,
25183 GEN_INT (- 4 * num_regs)));
25185 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25186 stack_pointer_rtx, hard_frame_pointer_rtx);
25189 arm_emit_multi_reg_pop (saved_regs_mask);
25191 if (IS_INTERRUPT (func_type))
25193 /* Interrupt handlers will have pushed the
25194 IP onto the stack, so restore it now. */
25195 rtx_insn *insn;
25196 rtx addr = gen_rtx_MEM (SImode,
25197 gen_rtx_POST_INC (SImode,
25198 stack_pointer_rtx));
25199 set_mem_alias_set (addr, get_frame_alias_set ());
25200 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25201 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25202 gen_rtx_REG (SImode, IP_REGNUM),
25203 NULL_RTX);
25206 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25207 return;
25209 if (crtl->calls_eh_return)
25210 emit_insn (gen_addsi3 (stack_pointer_rtx,
25211 stack_pointer_rtx,
25212 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25214 if (IS_STACKALIGN (func_type))
25215 /* Restore the original stack pointer. Before prologue, the stack was
25216 realigned and the original stack pointer saved in r0. For details,
25217 see comment in arm_expand_prologue. */
25218 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25220 emit_jump_insn (simple_return_rtx);
25223 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25224 function is not a sibcall. */
25225 void
25226 arm_expand_epilogue (bool really_return)
25228 unsigned long func_type;
25229 unsigned long saved_regs_mask;
25230 int num_regs = 0;
25231 int i;
25232 int amount;
25233 arm_stack_offsets *offsets;
25235 func_type = arm_current_func_type ();
25237 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25238 let output_return_instruction take care of instruction emission if any. */
25239 if (IS_NAKED (func_type)
25240 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25242 if (really_return)
25243 emit_jump_insn (simple_return_rtx);
25244 return;
25247 /* If we are throwing an exception, then we really must be doing a
25248 return, so we can't tail-call. */
25249 gcc_assert (!crtl->calls_eh_return || really_return);
25251 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25253 arm_expand_epilogue_apcs_frame (really_return);
25254 return;
25257 /* Get frame offsets for ARM. */
25258 offsets = arm_get_frame_offsets ();
25259 saved_regs_mask = offsets->saved_regs_mask;
25260 num_regs = bit_count (saved_regs_mask);
25262 if (frame_pointer_needed)
25264 rtx_insn *insn;
25265 /* Restore stack pointer if necessary. */
25266 if (TARGET_ARM)
25268 /* In ARM mode, frame pointer points to first saved register.
25269 Restore stack pointer to last saved register. */
25270 amount = offsets->frame - offsets->saved_regs;
25272 /* Force out any pending memory operations that reference stacked data
25273 before stack de-allocation occurs. */
25274 emit_insn (gen_blockage ());
25275 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25276 hard_frame_pointer_rtx,
25277 GEN_INT (amount)));
25278 arm_add_cfa_adjust_cfa_note (insn, amount,
25279 stack_pointer_rtx,
25280 hard_frame_pointer_rtx);
25282 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25283 deleted. */
25284 emit_insn (gen_force_register_use (stack_pointer_rtx));
25286 else
25288 /* In Thumb-2 mode, the frame pointer points to the last saved
25289 register. */
25290 amount = offsets->locals_base - offsets->saved_regs;
25291 if (amount)
25293 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25294 hard_frame_pointer_rtx,
25295 GEN_INT (amount)));
25296 arm_add_cfa_adjust_cfa_note (insn, amount,
25297 hard_frame_pointer_rtx,
25298 hard_frame_pointer_rtx);
25301 /* Force out any pending memory operations that reference stacked data
25302 before stack de-allocation occurs. */
25303 emit_insn (gen_blockage ());
25304 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25305 hard_frame_pointer_rtx));
25306 arm_add_cfa_adjust_cfa_note (insn, 0,
25307 stack_pointer_rtx,
25308 hard_frame_pointer_rtx);
25309 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25310 deleted. */
25311 emit_insn (gen_force_register_use (stack_pointer_rtx));
25314 else
25316 /* Pop off outgoing args and local frame to adjust stack pointer to
25317 last saved register. */
25318 amount = offsets->outgoing_args - offsets->saved_regs;
25319 if (amount)
25321 rtx_insn *tmp;
25322 /* Force out any pending memory operations that reference stacked data
25323 before stack de-allocation occurs. */
25324 emit_insn (gen_blockage ());
25325 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25326 stack_pointer_rtx,
25327 GEN_INT (amount)));
25328 arm_add_cfa_adjust_cfa_note (tmp, amount,
25329 stack_pointer_rtx, stack_pointer_rtx);
25330 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25331 not deleted. */
25332 emit_insn (gen_force_register_use (stack_pointer_rtx));
25336 if (TARGET_HARD_FLOAT && TARGET_VFP)
25338 /* Generate VFP register multi-pop. */
25339 int end_reg = LAST_VFP_REGNUM + 1;
25341 /* Scan the registers in reverse order. We need to match
25342 any groupings made in the prologue and generate matching
25343 vldm operations. The need to match groups is because,
25344 unlike pop, vldm can only do consecutive regs. */
25345 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25346 /* Look for a case where a reg does not need restoring. */
25347 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25348 && (!df_regs_ever_live_p (i + 1)
25349 || call_used_regs[i + 1]))
25351 /* Restore the regs discovered so far (from reg+2 to
25352 end_reg). */
25353 if (end_reg > i + 2)
25354 arm_emit_vfp_multi_reg_pop (i + 2,
25355 (end_reg - (i + 2)) / 2,
25356 stack_pointer_rtx);
25357 end_reg = i;
25360 /* Restore the remaining regs that we have discovered (or possibly
25361 even all of them, if the conditional in the for loop never
25362 fired). */
25363 if (end_reg > i + 2)
25364 arm_emit_vfp_multi_reg_pop (i + 2,
25365 (end_reg - (i + 2)) / 2,
25366 stack_pointer_rtx);
25369 if (TARGET_IWMMXT)
25370 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25371 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25373 rtx_insn *insn;
25374 rtx addr = gen_rtx_MEM (V2SImode,
25375 gen_rtx_POST_INC (SImode,
25376 stack_pointer_rtx));
25377 set_mem_alias_set (addr, get_frame_alias_set ());
25378 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25379 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25380 gen_rtx_REG (V2SImode, i),
25381 NULL_RTX);
25382 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25383 stack_pointer_rtx, stack_pointer_rtx);
25386 if (saved_regs_mask)
25388 rtx insn;
25389 bool return_in_pc = false;
25391 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25392 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25393 && !IS_STACKALIGN (func_type)
25394 && really_return
25395 && crtl->args.pretend_args_size == 0
25396 && saved_regs_mask & (1 << LR_REGNUM)
25397 && !crtl->calls_eh_return)
25399 saved_regs_mask &= ~(1 << LR_REGNUM);
25400 saved_regs_mask |= (1 << PC_REGNUM);
25401 return_in_pc = true;
25404 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25406 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25407 if (saved_regs_mask & (1 << i))
25409 rtx addr = gen_rtx_MEM (SImode,
25410 gen_rtx_POST_INC (SImode,
25411 stack_pointer_rtx));
25412 set_mem_alias_set (addr, get_frame_alias_set ());
25414 if (i == PC_REGNUM)
25416 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25417 XVECEXP (insn, 0, 0) = ret_rtx;
25418 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25419 addr);
25420 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25421 insn = emit_jump_insn (insn);
25423 else
25425 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25426 addr));
25427 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25428 gen_rtx_REG (SImode, i),
25429 NULL_RTX);
25430 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25431 stack_pointer_rtx,
25432 stack_pointer_rtx);
25436 else
25438 if (TARGET_LDRD
25439 && current_tune->prefer_ldrd_strd
25440 && !optimize_function_for_size_p (cfun))
25442 if (TARGET_THUMB2)
25443 thumb2_emit_ldrd_pop (saved_regs_mask);
25444 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25445 arm_emit_ldrd_pop (saved_regs_mask);
25446 else
25447 arm_emit_multi_reg_pop (saved_regs_mask);
25449 else
25450 arm_emit_multi_reg_pop (saved_regs_mask);
25453 if (return_in_pc)
25454 return;
25457 amount
25458 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25459 if (amount)
25461 int i, j;
25462 rtx dwarf = NULL_RTX;
25463 rtx_insn *tmp =
25464 emit_insn (gen_addsi3 (stack_pointer_rtx,
25465 stack_pointer_rtx,
25466 GEN_INT (amount)));
25468 RTX_FRAME_RELATED_P (tmp) = 1;
25470 if (cfun->machine->uses_anonymous_args)
25472 /* Restore pretend args. Refer arm_expand_prologue on how to save
25473 pretend_args in stack. */
25474 int num_regs = crtl->args.pretend_args_size / 4;
25475 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25476 for (j = 0, i = 0; j < num_regs; i++)
25477 if (saved_regs_mask & (1 << i))
25479 rtx reg = gen_rtx_REG (SImode, i);
25480 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25481 j++;
25483 REG_NOTES (tmp) = dwarf;
25485 arm_add_cfa_adjust_cfa_note (tmp, amount,
25486 stack_pointer_rtx, stack_pointer_rtx);
25489 if (!really_return)
25490 return;
25492 if (crtl->calls_eh_return)
25493 emit_insn (gen_addsi3 (stack_pointer_rtx,
25494 stack_pointer_rtx,
25495 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25497 if (IS_STACKALIGN (func_type))
25498 /* Restore the original stack pointer. Before prologue, the stack was
25499 realigned and the original stack pointer saved in r0. For details,
25500 see comment in arm_expand_prologue. */
25501 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25503 emit_jump_insn (simple_return_rtx);
25506 /* Implementation of insn prologue_thumb1_interwork. This is the first
25507 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25509 const char *
25510 thumb1_output_interwork (void)
25512 const char * name;
25513 FILE *f = asm_out_file;
25515 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25516 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25517 == SYMBOL_REF);
25518 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25520 /* Generate code sequence to switch us into Thumb mode. */
25521 /* The .code 32 directive has already been emitted by
25522 ASM_DECLARE_FUNCTION_NAME. */
25523 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25524 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25526 /* Generate a label, so that the debugger will notice the
25527 change in instruction sets. This label is also used by
25528 the assembler to bypass the ARM code when this function
25529 is called from a Thumb encoded function elsewhere in the
25530 same file. Hence the definition of STUB_NAME here must
25531 agree with the definition in gas/config/tc-arm.c. */
25533 #define STUB_NAME ".real_start_of"
25535 fprintf (f, "\t.code\t16\n");
25536 #ifdef ARM_PE
25537 if (arm_dllexport_name_p (name))
25538 name = arm_strip_name_encoding (name);
25539 #endif
25540 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25541 fprintf (f, "\t.thumb_func\n");
25542 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25544 return "";
25547 /* Handle the case of a double word load into a low register from
25548 a computed memory address. The computed address may involve a
25549 register which is overwritten by the load. */
25550 const char *
25551 thumb_load_double_from_address (rtx *operands)
25553 rtx addr;
25554 rtx base;
25555 rtx offset;
25556 rtx arg1;
25557 rtx arg2;
25559 gcc_assert (REG_P (operands[0]));
25560 gcc_assert (MEM_P (operands[1]));
25562 /* Get the memory address. */
25563 addr = XEXP (operands[1], 0);
25565 /* Work out how the memory address is computed. */
25566 switch (GET_CODE (addr))
25568 case REG:
25569 operands[2] = adjust_address (operands[1], SImode, 4);
25571 if (REGNO (operands[0]) == REGNO (addr))
25573 output_asm_insn ("ldr\t%H0, %2", operands);
25574 output_asm_insn ("ldr\t%0, %1", operands);
25576 else
25578 output_asm_insn ("ldr\t%0, %1", operands);
25579 output_asm_insn ("ldr\t%H0, %2", operands);
25581 break;
25583 case CONST:
25584 /* Compute <address> + 4 for the high order load. */
25585 operands[2] = adjust_address (operands[1], SImode, 4);
25587 output_asm_insn ("ldr\t%0, %1", operands);
25588 output_asm_insn ("ldr\t%H0, %2", operands);
25589 break;
25591 case PLUS:
25592 arg1 = XEXP (addr, 0);
25593 arg2 = XEXP (addr, 1);
25595 if (CONSTANT_P (arg1))
25596 base = arg2, offset = arg1;
25597 else
25598 base = arg1, offset = arg2;
25600 gcc_assert (REG_P (base));
25602 /* Catch the case of <address> = <reg> + <reg> */
25603 if (REG_P (offset))
25605 int reg_offset = REGNO (offset);
25606 int reg_base = REGNO (base);
25607 int reg_dest = REGNO (operands[0]);
25609 /* Add the base and offset registers together into the
25610 higher destination register. */
25611 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25612 reg_dest + 1, reg_base, reg_offset);
25614 /* Load the lower destination register from the address in
25615 the higher destination register. */
25616 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25617 reg_dest, reg_dest + 1);
25619 /* Load the higher destination register from its own address
25620 plus 4. */
25621 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25622 reg_dest + 1, reg_dest + 1);
25624 else
25626 /* Compute <address> + 4 for the high order load. */
25627 operands[2] = adjust_address (operands[1], SImode, 4);
25629 /* If the computed address is held in the low order register
25630 then load the high order register first, otherwise always
25631 load the low order register first. */
25632 if (REGNO (operands[0]) == REGNO (base))
25634 output_asm_insn ("ldr\t%H0, %2", operands);
25635 output_asm_insn ("ldr\t%0, %1", operands);
25637 else
25639 output_asm_insn ("ldr\t%0, %1", operands);
25640 output_asm_insn ("ldr\t%H0, %2", operands);
25643 break;
25645 case LABEL_REF:
25646 /* With no registers to worry about we can just load the value
25647 directly. */
25648 operands[2] = adjust_address (operands[1], SImode, 4);
25650 output_asm_insn ("ldr\t%H0, %2", operands);
25651 output_asm_insn ("ldr\t%0, %1", operands);
25652 break;
25654 default:
25655 gcc_unreachable ();
25658 return "";
25661 const char *
25662 thumb_output_move_mem_multiple (int n, rtx *operands)
25664 switch (n)
25666 case 2:
25667 if (REGNO (operands[4]) > REGNO (operands[5]))
25668 std::swap (operands[4], operands[5]);
25670 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25671 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25672 break;
25674 case 3:
25675 if (REGNO (operands[4]) > REGNO (operands[5]))
25676 std::swap (operands[4], operands[5]);
25677 if (REGNO (operands[5]) > REGNO (operands[6]))
25678 std::swap (operands[5], operands[6]);
25679 if (REGNO (operands[4]) > REGNO (operands[5]))
25680 std::swap (operands[4], operands[5]);
25682 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25683 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25684 break;
25686 default:
25687 gcc_unreachable ();
25690 return "";
25693 /* Output a call-via instruction for thumb state. */
25694 const char *
25695 thumb_call_via_reg (rtx reg)
25697 int regno = REGNO (reg);
25698 rtx *labelp;
25700 gcc_assert (regno < LR_REGNUM);
25702 /* If we are in the normal text section we can use a single instance
25703 per compilation unit. If we are doing function sections, then we need
25704 an entry per section, since we can't rely on reachability. */
25705 if (in_section == text_section)
25707 thumb_call_reg_needed = 1;
25709 if (thumb_call_via_label[regno] == NULL)
25710 thumb_call_via_label[regno] = gen_label_rtx ();
25711 labelp = thumb_call_via_label + regno;
25713 else
25715 if (cfun->machine->call_via[regno] == NULL)
25716 cfun->machine->call_via[regno] = gen_label_rtx ();
25717 labelp = cfun->machine->call_via + regno;
25720 output_asm_insn ("bl\t%a0", labelp);
25721 return "";
25724 /* Routines for generating rtl. */
25725 void
25726 thumb_expand_movmemqi (rtx *operands)
25728 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25729 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25730 HOST_WIDE_INT len = INTVAL (operands[2]);
25731 HOST_WIDE_INT offset = 0;
25733 while (len >= 12)
25735 emit_insn (gen_movmem12b (out, in, out, in));
25736 len -= 12;
25739 if (len >= 8)
25741 emit_insn (gen_movmem8b (out, in, out, in));
25742 len -= 8;
25745 if (len >= 4)
25747 rtx reg = gen_reg_rtx (SImode);
25748 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25749 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25750 len -= 4;
25751 offset += 4;
25754 if (len >= 2)
25756 rtx reg = gen_reg_rtx (HImode);
25757 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25758 plus_constant (Pmode, in,
25759 offset))));
25760 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25761 offset)),
25762 reg));
25763 len -= 2;
25764 offset += 2;
25767 if (len)
25769 rtx reg = gen_reg_rtx (QImode);
25770 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25771 plus_constant (Pmode, in,
25772 offset))));
25773 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25774 offset)),
25775 reg));
25779 void
25780 thumb_reload_out_hi (rtx *operands)
25782 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25785 /* Handle reading a half-word from memory during reload. */
25786 void
25787 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25789 gcc_unreachable ();
25792 /* Return the length of a function name prefix
25793 that starts with the character 'c'. */
25794 static int
25795 arm_get_strip_length (int c)
25797 switch (c)
25799 ARM_NAME_ENCODING_LENGTHS
25800 default: return 0;
25804 /* Return a pointer to a function's name with any
25805 and all prefix encodings stripped from it. */
25806 const char *
25807 arm_strip_name_encoding (const char *name)
25809 int skip;
25811 while ((skip = arm_get_strip_length (* name)))
25812 name += skip;
25814 return name;
25817 /* If there is a '*' anywhere in the name's prefix, then
25818 emit the stripped name verbatim, otherwise prepend an
25819 underscore if leading underscores are being used. */
25820 void
25821 arm_asm_output_labelref (FILE *stream, const char *name)
25823 int skip;
25824 int verbatim = 0;
25826 while ((skip = arm_get_strip_length (* name)))
25828 verbatim |= (*name == '*');
25829 name += skip;
25832 if (verbatim)
25833 fputs (name, stream);
25834 else
25835 asm_fprintf (stream, "%U%s", name);
25838 /* This function is used to emit an EABI tag and its associated value.
25839 We emit the numerical value of the tag in case the assembler does not
25840 support textual tags. (Eg gas prior to 2.20). If requested we include
25841 the tag name in a comment so that anyone reading the assembler output
25842 will know which tag is being set.
25844 This function is not static because arm-c.c needs it too. */
25846 void
25847 arm_emit_eabi_attribute (const char *name, int num, int val)
25849 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25850 if (flag_verbose_asm || flag_debug_asm)
25851 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25852 asm_fprintf (asm_out_file, "\n");
25855 /* This function is used to print CPU tuning information as comment
25856 in assembler file. Pointers are not printed for now. */
25858 void
25859 arm_print_tune_info (void)
25861 asm_fprintf (asm_out_file, "\t@.tune parameters\n");
25862 asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
25863 current_tune->constant_limit);
25864 asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
25865 current_tune->max_insns_skipped);
25866 asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n",
25867 current_tune->prefetch.num_slots);
25868 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n",
25869 current_tune->prefetch.l1_cache_size);
25870 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25871 current_tune->prefetch.l1_cache_line_size);
25872 asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
25873 (int) current_tune->prefer_constant_pool);
25874 asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25875 asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
25876 asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
25877 current_tune->branch_cost (false, false));
25878 asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
25879 current_tune->branch_cost (false, true));
25880 asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
25881 current_tune->branch_cost (true, false));
25882 asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
25883 current_tune->branch_cost (true, true));
25884 asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
25885 (int) current_tune->prefer_ldrd_strd);
25886 asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25887 (int) current_tune->logical_op_non_short_circuit_thumb,
25888 (int) current_tune->logical_op_non_short_circuit_arm);
25889 asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
25890 (int) current_tune->prefer_neon_for_64bits);
25891 asm_fprintf (asm_out_file,
25892 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25893 (int) current_tune->disparage_flag_setting_t16_encodings);
25894 asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
25895 (int) current_tune->string_ops_prefer_neon);
25896 asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
25897 current_tune->max_insns_inline_memset);
25898 asm_fprintf (asm_out_file, "\t\t@fusible_ops:\t%u\n",
25899 current_tune->fusible_ops);
25900 asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
25901 (int) current_tune->sched_autopref);
25904 static void
25905 arm_file_start (void)
25907 int val;
25909 if (TARGET_BPABI)
25911 const char *fpu_name;
25912 if (arm_selected_arch)
25914 /* armv7ve doesn't support any extensions. */
25915 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25917 /* Keep backward compatability for assemblers
25918 which don't support armv7ve. */
25919 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25920 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25921 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25922 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25923 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25925 else
25927 const char* pos = strchr (arm_selected_arch->name, '+');
25928 if (pos)
25930 char buf[15];
25931 gcc_assert (strlen (arm_selected_arch->name)
25932 <= sizeof (buf) / sizeof (*pos));
25933 strncpy (buf, arm_selected_arch->name,
25934 (pos - arm_selected_arch->name) * sizeof (*pos));
25935 buf[pos - arm_selected_arch->name] = '\0';
25936 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25937 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25939 else
25940 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25943 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25944 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25945 else
25947 const char* truncated_name
25948 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25949 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25952 if (print_tune_info)
25953 arm_print_tune_info ();
25955 if (TARGET_SOFT_FLOAT)
25957 fpu_name = "softvfp";
25959 else
25961 fpu_name = arm_fpu_desc->name;
25962 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25964 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
25965 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25967 if (TARGET_HARD_FLOAT_ABI)
25968 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25971 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25973 /* Some of these attributes only apply when the corresponding features
25974 are used. However we don't have any easy way of figuring this out.
25975 Conservatively record the setting that would have been used. */
25977 if (flag_rounding_math)
25978 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25980 if (!flag_unsafe_math_optimizations)
25982 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25983 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25985 if (flag_signaling_nans)
25986 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25988 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25989 flag_finite_math_only ? 1 : 3);
25991 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25992 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25993 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25994 flag_short_enums ? 1 : 2);
25996 /* Tag_ABI_optimization_goals. */
25997 if (optimize_size)
25998 val = 4;
25999 else if (optimize >= 2)
26000 val = 2;
26001 else if (optimize)
26002 val = 1;
26003 else
26004 val = 6;
26005 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26007 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26008 unaligned_access);
26010 if (arm_fp16_format)
26011 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26012 (int) arm_fp16_format);
26014 if (arm_lang_output_object_attributes_hook)
26015 arm_lang_output_object_attributes_hook();
26018 default_file_start ();
26021 static void
26022 arm_file_end (void)
26024 int regno;
26026 if (NEED_INDICATE_EXEC_STACK)
26027 /* Add .note.GNU-stack. */
26028 file_end_indicate_exec_stack ();
26030 if (! thumb_call_reg_needed)
26031 return;
26033 switch_to_section (text_section);
26034 asm_fprintf (asm_out_file, "\t.code 16\n");
26035 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26037 for (regno = 0; regno < LR_REGNUM; regno++)
26039 rtx label = thumb_call_via_label[regno];
26041 if (label != 0)
26043 targetm.asm_out.internal_label (asm_out_file, "L",
26044 CODE_LABEL_NUMBER (label));
26045 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26050 #ifndef ARM_PE
26051 /* Symbols in the text segment can be accessed without indirecting via the
26052 constant pool; it may take an extra binary operation, but this is still
26053 faster than indirecting via memory. Don't do this when not optimizing,
26054 since we won't be calculating al of the offsets necessary to do this
26055 simplification. */
26057 static void
26058 arm_encode_section_info (tree decl, rtx rtl, int first)
26060 if (optimize > 0 && TREE_CONSTANT (decl))
26061 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26063 default_encode_section_info (decl, rtl, first);
26065 #endif /* !ARM_PE */
26067 static void
26068 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26070 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26071 && !strcmp (prefix, "L"))
26073 arm_ccfsm_state = 0;
26074 arm_target_insn = NULL;
26076 default_internal_label (stream, prefix, labelno);
26079 /* Output code to add DELTA to the first argument, and then jump
26080 to FUNCTION. Used for C++ multiple inheritance. */
26081 static void
26082 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
26083 HOST_WIDE_INT delta,
26084 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
26085 tree function)
26087 static int thunk_label = 0;
26088 char label[256];
26089 char labelpc[256];
26090 int mi_delta = delta;
26091 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26092 int shift = 0;
26093 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26094 ? 1 : 0);
26095 if (mi_delta < 0)
26096 mi_delta = - mi_delta;
26098 final_start_function (emit_barrier (), file, 1);
26100 if (TARGET_THUMB1)
26102 int labelno = thunk_label++;
26103 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26104 /* Thunks are entered in arm mode when avaiable. */
26105 if (TARGET_THUMB1_ONLY)
26107 /* push r3 so we can use it as a temporary. */
26108 /* TODO: Omit this save if r3 is not used. */
26109 fputs ("\tpush {r3}\n", file);
26110 fputs ("\tldr\tr3, ", file);
26112 else
26114 fputs ("\tldr\tr12, ", file);
26116 assemble_name (file, label);
26117 fputc ('\n', file);
26118 if (flag_pic)
26120 /* If we are generating PIC, the ldr instruction below loads
26121 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26122 the address of the add + 8, so we have:
26124 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26125 = target + 1.
26127 Note that we have "+ 1" because some versions of GNU ld
26128 don't set the low bit of the result for R_ARM_REL32
26129 relocations against thumb function symbols.
26130 On ARMv6M this is +4, not +8. */
26131 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26132 assemble_name (file, labelpc);
26133 fputs (":\n", file);
26134 if (TARGET_THUMB1_ONLY)
26136 /* This is 2 insns after the start of the thunk, so we know it
26137 is 4-byte aligned. */
26138 fputs ("\tadd\tr3, pc, r3\n", file);
26139 fputs ("\tmov r12, r3\n", file);
26141 else
26142 fputs ("\tadd\tr12, pc, r12\n", file);
26144 else if (TARGET_THUMB1_ONLY)
26145 fputs ("\tmov r12, r3\n", file);
26147 if (TARGET_THUMB1_ONLY)
26149 if (mi_delta > 255)
26151 fputs ("\tldr\tr3, ", file);
26152 assemble_name (file, label);
26153 fputs ("+4\n", file);
26154 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26155 mi_op, this_regno, this_regno);
26157 else if (mi_delta != 0)
26159 /* Thumb1 unified syntax requires s suffix in instruction name when
26160 one of the operands is immediate. */
26161 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26162 mi_op, this_regno, this_regno,
26163 mi_delta);
26166 else
26168 /* TODO: Use movw/movt for large constants when available. */
26169 while (mi_delta != 0)
26171 if ((mi_delta & (3 << shift)) == 0)
26172 shift += 2;
26173 else
26175 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26176 mi_op, this_regno, this_regno,
26177 mi_delta & (0xff << shift));
26178 mi_delta &= ~(0xff << shift);
26179 shift += 8;
26183 if (TARGET_THUMB1)
26185 if (TARGET_THUMB1_ONLY)
26186 fputs ("\tpop\t{r3}\n", file);
26188 fprintf (file, "\tbx\tr12\n");
26189 ASM_OUTPUT_ALIGN (file, 2);
26190 assemble_name (file, label);
26191 fputs (":\n", file);
26192 if (flag_pic)
26194 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26195 rtx tem = XEXP (DECL_RTL (function), 0);
26196 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26197 pipeline offset is four rather than eight. Adjust the offset
26198 accordingly. */
26199 tem = plus_constant (GET_MODE (tem), tem,
26200 TARGET_THUMB1_ONLY ? -3 : -7);
26201 tem = gen_rtx_MINUS (GET_MODE (tem),
26202 tem,
26203 gen_rtx_SYMBOL_REF (Pmode,
26204 ggc_strdup (labelpc)));
26205 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26207 else
26208 /* Output ".word .LTHUNKn". */
26209 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26211 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26212 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26214 else
26216 fputs ("\tb\t", file);
26217 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26218 if (NEED_PLT_RELOC)
26219 fputs ("(PLT)", file);
26220 fputc ('\n', file);
26223 final_end_function ();
26227 arm_emit_vector_const (FILE *file, rtx x)
26229 int i;
26230 const char * pattern;
26232 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26234 switch (GET_MODE (x))
26236 case V2SImode: pattern = "%08x"; break;
26237 case V4HImode: pattern = "%04x"; break;
26238 case V8QImode: pattern = "%02x"; break;
26239 default: gcc_unreachable ();
26242 fprintf (file, "0x");
26243 for (i = CONST_VECTOR_NUNITS (x); i--;)
26245 rtx element;
26247 element = CONST_VECTOR_ELT (x, i);
26248 fprintf (file, pattern, INTVAL (element));
26251 return 1;
26254 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26255 HFmode constant pool entries are actually loaded with ldr. */
26256 void
26257 arm_emit_fp16_const (rtx c)
26259 long bits;
26261 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26262 if (WORDS_BIG_ENDIAN)
26263 assemble_zeros (2);
26264 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26265 if (!WORDS_BIG_ENDIAN)
26266 assemble_zeros (2);
26269 const char *
26270 arm_output_load_gr (rtx *operands)
26272 rtx reg;
26273 rtx offset;
26274 rtx wcgr;
26275 rtx sum;
26277 if (!MEM_P (operands [1])
26278 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26279 || !REG_P (reg = XEXP (sum, 0))
26280 || !CONST_INT_P (offset = XEXP (sum, 1))
26281 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26282 return "wldrw%?\t%0, %1";
26284 /* Fix up an out-of-range load of a GR register. */
26285 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26286 wcgr = operands[0];
26287 operands[0] = reg;
26288 output_asm_insn ("ldr%?\t%0, %1", operands);
26290 operands[0] = wcgr;
26291 operands[1] = reg;
26292 output_asm_insn ("tmcr%?\t%0, %1", operands);
26293 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26295 return "";
26298 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26300 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26301 named arg and all anonymous args onto the stack.
26302 XXX I know the prologue shouldn't be pushing registers, but it is faster
26303 that way. */
26305 static void
26306 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26307 machine_mode mode,
26308 tree type,
26309 int *pretend_size,
26310 int second_time ATTRIBUTE_UNUSED)
26312 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26313 int nregs;
26315 cfun->machine->uses_anonymous_args = 1;
26316 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26318 nregs = pcum->aapcs_ncrn;
26319 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
26320 nregs++;
26322 else
26323 nregs = pcum->nregs;
26325 if (nregs < NUM_ARG_REGS)
26326 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26329 /* We can't rely on the caller doing the proper promotion when
26330 using APCS or ATPCS. */
26332 static bool
26333 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26335 return !TARGET_AAPCS_BASED;
26338 static machine_mode
26339 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26340 machine_mode mode,
26341 int *punsignedp ATTRIBUTE_UNUSED,
26342 const_tree fntype ATTRIBUTE_UNUSED,
26343 int for_return ATTRIBUTE_UNUSED)
26345 if (GET_MODE_CLASS (mode) == MODE_INT
26346 && GET_MODE_SIZE (mode) < 4)
26347 return SImode;
26349 return mode;
26352 /* AAPCS based ABIs use short enums by default. */
26354 static bool
26355 arm_default_short_enums (void)
26357 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26361 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26363 static bool
26364 arm_align_anon_bitfield (void)
26366 return TARGET_AAPCS_BASED;
26370 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26372 static tree
26373 arm_cxx_guard_type (void)
26375 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26379 /* The EABI says test the least significant bit of a guard variable. */
26381 static bool
26382 arm_cxx_guard_mask_bit (void)
26384 return TARGET_AAPCS_BASED;
26388 /* The EABI specifies that all array cookies are 8 bytes long. */
26390 static tree
26391 arm_get_cookie_size (tree type)
26393 tree size;
26395 if (!TARGET_AAPCS_BASED)
26396 return default_cxx_get_cookie_size (type);
26398 size = build_int_cst (sizetype, 8);
26399 return size;
26403 /* The EABI says that array cookies should also contain the element size. */
26405 static bool
26406 arm_cookie_has_size (void)
26408 return TARGET_AAPCS_BASED;
26412 /* The EABI says constructors and destructors should return a pointer to
26413 the object constructed/destroyed. */
26415 static bool
26416 arm_cxx_cdtor_returns_this (void)
26418 return TARGET_AAPCS_BASED;
26421 /* The EABI says that an inline function may never be the key
26422 method. */
26424 static bool
26425 arm_cxx_key_method_may_be_inline (void)
26427 return !TARGET_AAPCS_BASED;
26430 static void
26431 arm_cxx_determine_class_data_visibility (tree decl)
26433 if (!TARGET_AAPCS_BASED
26434 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26435 return;
26437 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26438 is exported. However, on systems without dynamic vague linkage,
26439 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26440 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26441 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26442 else
26443 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26444 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26447 static bool
26448 arm_cxx_class_data_always_comdat (void)
26450 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26451 vague linkage if the class has no key function. */
26452 return !TARGET_AAPCS_BASED;
26456 /* The EABI says __aeabi_atexit should be used to register static
26457 destructors. */
26459 static bool
26460 arm_cxx_use_aeabi_atexit (void)
26462 return TARGET_AAPCS_BASED;
26466 void
26467 arm_set_return_address (rtx source, rtx scratch)
26469 arm_stack_offsets *offsets;
26470 HOST_WIDE_INT delta;
26471 rtx addr;
26472 unsigned long saved_regs;
26474 offsets = arm_get_frame_offsets ();
26475 saved_regs = offsets->saved_regs_mask;
26477 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26478 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26479 else
26481 if (frame_pointer_needed)
26482 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26483 else
26485 /* LR will be the first saved register. */
26486 delta = offsets->outgoing_args - (offsets->frame + 4);
26489 if (delta >= 4096)
26491 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26492 GEN_INT (delta & ~4095)));
26493 addr = scratch;
26494 delta &= 4095;
26496 else
26497 addr = stack_pointer_rtx;
26499 addr = plus_constant (Pmode, addr, delta);
26501 /* The store needs to be marked as frame related in order to prevent
26502 DSE from deleting it as dead if it is based on fp. */
26503 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26504 RTX_FRAME_RELATED_P (insn) = 1;
26505 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26510 void
26511 thumb_set_return_address (rtx source, rtx scratch)
26513 arm_stack_offsets *offsets;
26514 HOST_WIDE_INT delta;
26515 HOST_WIDE_INT limit;
26516 int reg;
26517 rtx addr;
26518 unsigned long mask;
26520 emit_use (source);
26522 offsets = arm_get_frame_offsets ();
26523 mask = offsets->saved_regs_mask;
26524 if (mask & (1 << LR_REGNUM))
26526 limit = 1024;
26527 /* Find the saved regs. */
26528 if (frame_pointer_needed)
26530 delta = offsets->soft_frame - offsets->saved_args;
26531 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26532 if (TARGET_THUMB1)
26533 limit = 128;
26535 else
26537 delta = offsets->outgoing_args - offsets->saved_args;
26538 reg = SP_REGNUM;
26540 /* Allow for the stack frame. */
26541 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26542 delta -= 16;
26543 /* The link register is always the first saved register. */
26544 delta -= 4;
26546 /* Construct the address. */
26547 addr = gen_rtx_REG (SImode, reg);
26548 if (delta > limit)
26550 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26551 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26552 addr = scratch;
26554 else
26555 addr = plus_constant (Pmode, addr, delta);
26557 /* The store needs to be marked as frame related in order to prevent
26558 DSE from deleting it as dead if it is based on fp. */
26559 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26560 RTX_FRAME_RELATED_P (insn) = 1;
26561 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26563 else
26564 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26567 /* Implements target hook vector_mode_supported_p. */
26568 bool
26569 arm_vector_mode_supported_p (machine_mode mode)
26571 /* Neon also supports V2SImode, etc. listed in the clause below. */
26572 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26573 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26574 || mode == V2DImode || mode == V8HFmode))
26575 return true;
26577 if ((TARGET_NEON || TARGET_IWMMXT)
26578 && ((mode == V2SImode)
26579 || (mode == V4HImode)
26580 || (mode == V8QImode)))
26581 return true;
26583 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26584 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26585 || mode == V2HAmode))
26586 return true;
26588 return false;
26591 /* Implements target hook array_mode_supported_p. */
26593 static bool
26594 arm_array_mode_supported_p (machine_mode mode,
26595 unsigned HOST_WIDE_INT nelems)
26597 if (TARGET_NEON
26598 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26599 && (nelems >= 2 && nelems <= 4))
26600 return true;
26602 return false;
26605 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26606 registers when autovectorizing for Neon, at least until multiple vector
26607 widths are supported properly by the middle-end. */
26609 static machine_mode
26610 arm_preferred_simd_mode (machine_mode mode)
26612 if (TARGET_NEON)
26613 switch (mode)
26615 case SFmode:
26616 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26617 case SImode:
26618 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26619 case HImode:
26620 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26621 case QImode:
26622 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26623 case DImode:
26624 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26625 return V2DImode;
26626 break;
26628 default:;
26631 if (TARGET_REALLY_IWMMXT)
26632 switch (mode)
26634 case SImode:
26635 return V2SImode;
26636 case HImode:
26637 return V4HImode;
26638 case QImode:
26639 return V8QImode;
26641 default:;
26644 return word_mode;
26647 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26649 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26650 using r0-r4 for function arguments, r7 for the stack frame and don't have
26651 enough left over to do doubleword arithmetic. For Thumb-2 all the
26652 potentially problematic instructions accept high registers so this is not
26653 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26654 that require many low registers. */
26655 static bool
26656 arm_class_likely_spilled_p (reg_class_t rclass)
26658 if ((TARGET_THUMB1 && rclass == LO_REGS)
26659 || rclass == CC_REG)
26660 return true;
26662 return false;
26665 /* Implements target hook small_register_classes_for_mode_p. */
26666 bool
26667 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26669 return TARGET_THUMB1;
26672 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26673 ARM insns and therefore guarantee that the shift count is modulo 256.
26674 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26675 guarantee no particular behavior for out-of-range counts. */
26677 static unsigned HOST_WIDE_INT
26678 arm_shift_truncation_mask (machine_mode mode)
26680 return mode == SImode ? 255 : 0;
26684 /* Map internal gcc register numbers to DWARF2 register numbers. */
26686 unsigned int
26687 arm_dbx_register_number (unsigned int regno)
26689 if (regno < 16)
26690 return regno;
26692 if (IS_VFP_REGNUM (regno))
26694 /* See comment in arm_dwarf_register_span. */
26695 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26696 return 64 + regno - FIRST_VFP_REGNUM;
26697 else
26698 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26701 if (IS_IWMMXT_GR_REGNUM (regno))
26702 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26704 if (IS_IWMMXT_REGNUM (regno))
26705 return 112 + regno - FIRST_IWMMXT_REGNUM;
26707 return DWARF_FRAME_REGISTERS;
26710 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26711 GCC models tham as 64 32-bit registers, so we need to describe this to
26712 the DWARF generation code. Other registers can use the default. */
26713 static rtx
26714 arm_dwarf_register_span (rtx rtl)
26716 machine_mode mode;
26717 unsigned regno;
26718 rtx parts[16];
26719 int nregs;
26720 int i;
26722 regno = REGNO (rtl);
26723 if (!IS_VFP_REGNUM (regno))
26724 return NULL_RTX;
26726 /* XXX FIXME: The EABI defines two VFP register ranges:
26727 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26728 256-287: D0-D31
26729 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26730 corresponding D register. Until GDB supports this, we shall use the
26731 legacy encodings. We also use these encodings for D0-D15 for
26732 compatibility with older debuggers. */
26733 mode = GET_MODE (rtl);
26734 if (GET_MODE_SIZE (mode) < 8)
26735 return NULL_RTX;
26737 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26739 nregs = GET_MODE_SIZE (mode) / 4;
26740 for (i = 0; i < nregs; i += 2)
26741 if (TARGET_BIG_END)
26743 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26744 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26746 else
26748 parts[i] = gen_rtx_REG (SImode, regno + i);
26749 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26752 else
26754 nregs = GET_MODE_SIZE (mode) / 8;
26755 for (i = 0; i < nregs; i++)
26756 parts[i] = gen_rtx_REG (DImode, regno + i);
26759 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26762 #if ARM_UNWIND_INFO
26763 /* Emit unwind directives for a store-multiple instruction or stack pointer
26764 push during alignment.
26765 These should only ever be generated by the function prologue code, so
26766 expect them to have a particular form.
26767 The store-multiple instruction sometimes pushes pc as the last register,
26768 although it should not be tracked into unwind information, or for -Os
26769 sometimes pushes some dummy registers before first register that needs
26770 to be tracked in unwind information; such dummy registers are there just
26771 to avoid separate stack adjustment, and will not be restored in the
26772 epilogue. */
26774 static void
26775 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26777 int i;
26778 HOST_WIDE_INT offset;
26779 HOST_WIDE_INT nregs;
26780 int reg_size;
26781 unsigned reg;
26782 unsigned lastreg;
26783 unsigned padfirst = 0, padlast = 0;
26784 rtx e;
26786 e = XVECEXP (p, 0, 0);
26787 gcc_assert (GET_CODE (e) == SET);
26789 /* First insn will adjust the stack pointer. */
26790 gcc_assert (GET_CODE (e) == SET
26791 && REG_P (SET_DEST (e))
26792 && REGNO (SET_DEST (e)) == SP_REGNUM
26793 && GET_CODE (SET_SRC (e)) == PLUS);
26795 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26796 nregs = XVECLEN (p, 0) - 1;
26797 gcc_assert (nregs);
26799 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26800 if (reg < 16)
26802 /* For -Os dummy registers can be pushed at the beginning to
26803 avoid separate stack pointer adjustment. */
26804 e = XVECEXP (p, 0, 1);
26805 e = XEXP (SET_DEST (e), 0);
26806 if (GET_CODE (e) == PLUS)
26807 padfirst = INTVAL (XEXP (e, 1));
26808 gcc_assert (padfirst == 0 || optimize_size);
26809 /* The function prologue may also push pc, but not annotate it as it is
26810 never restored. We turn this into a stack pointer adjustment. */
26811 e = XVECEXP (p, 0, nregs);
26812 e = XEXP (SET_DEST (e), 0);
26813 if (GET_CODE (e) == PLUS)
26814 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26815 else
26816 padlast = offset - 4;
26817 gcc_assert (padlast == 0 || padlast == 4);
26818 if (padlast == 4)
26819 fprintf (asm_out_file, "\t.pad #4\n");
26820 reg_size = 4;
26821 fprintf (asm_out_file, "\t.save {");
26823 else if (IS_VFP_REGNUM (reg))
26825 reg_size = 8;
26826 fprintf (asm_out_file, "\t.vsave {");
26828 else
26829 /* Unknown register type. */
26830 gcc_unreachable ();
26832 /* If the stack increment doesn't match the size of the saved registers,
26833 something has gone horribly wrong. */
26834 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26836 offset = padfirst;
26837 lastreg = 0;
26838 /* The remaining insns will describe the stores. */
26839 for (i = 1; i <= nregs; i++)
26841 /* Expect (set (mem <addr>) (reg)).
26842 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26843 e = XVECEXP (p, 0, i);
26844 gcc_assert (GET_CODE (e) == SET
26845 && MEM_P (SET_DEST (e))
26846 && REG_P (SET_SRC (e)));
26848 reg = REGNO (SET_SRC (e));
26849 gcc_assert (reg >= lastreg);
26851 if (i != 1)
26852 fprintf (asm_out_file, ", ");
26853 /* We can't use %r for vfp because we need to use the
26854 double precision register names. */
26855 if (IS_VFP_REGNUM (reg))
26856 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26857 else
26858 asm_fprintf (asm_out_file, "%r", reg);
26860 #ifdef ENABLE_CHECKING
26861 /* Check that the addresses are consecutive. */
26862 e = XEXP (SET_DEST (e), 0);
26863 if (GET_CODE (e) == PLUS)
26864 gcc_assert (REG_P (XEXP (e, 0))
26865 && REGNO (XEXP (e, 0)) == SP_REGNUM
26866 && CONST_INT_P (XEXP (e, 1))
26867 && offset == INTVAL (XEXP (e, 1)));
26868 else
26869 gcc_assert (i == 1
26870 && REG_P (e)
26871 && REGNO (e) == SP_REGNUM);
26872 offset += reg_size;
26873 #endif
26875 fprintf (asm_out_file, "}\n");
26876 if (padfirst)
26877 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26880 /* Emit unwind directives for a SET. */
26882 static void
26883 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26885 rtx e0;
26886 rtx e1;
26887 unsigned reg;
26889 e0 = XEXP (p, 0);
26890 e1 = XEXP (p, 1);
26891 switch (GET_CODE (e0))
26893 case MEM:
26894 /* Pushing a single register. */
26895 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26896 || !REG_P (XEXP (XEXP (e0, 0), 0))
26897 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26898 abort ();
26900 asm_fprintf (asm_out_file, "\t.save ");
26901 if (IS_VFP_REGNUM (REGNO (e1)))
26902 asm_fprintf(asm_out_file, "{d%d}\n",
26903 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26904 else
26905 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26906 break;
26908 case REG:
26909 if (REGNO (e0) == SP_REGNUM)
26911 /* A stack increment. */
26912 if (GET_CODE (e1) != PLUS
26913 || !REG_P (XEXP (e1, 0))
26914 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26915 || !CONST_INT_P (XEXP (e1, 1)))
26916 abort ();
26918 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26919 -INTVAL (XEXP (e1, 1)));
26921 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26923 HOST_WIDE_INT offset;
26925 if (GET_CODE (e1) == PLUS)
26927 if (!REG_P (XEXP (e1, 0))
26928 || !CONST_INT_P (XEXP (e1, 1)))
26929 abort ();
26930 reg = REGNO (XEXP (e1, 0));
26931 offset = INTVAL (XEXP (e1, 1));
26932 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26933 HARD_FRAME_POINTER_REGNUM, reg,
26934 offset);
26936 else if (REG_P (e1))
26938 reg = REGNO (e1);
26939 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26940 HARD_FRAME_POINTER_REGNUM, reg);
26942 else
26943 abort ();
26945 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26947 /* Move from sp to reg. */
26948 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26950 else if (GET_CODE (e1) == PLUS
26951 && REG_P (XEXP (e1, 0))
26952 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26953 && CONST_INT_P (XEXP (e1, 1)))
26955 /* Set reg to offset from sp. */
26956 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26957 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26959 else
26960 abort ();
26961 break;
26963 default:
26964 abort ();
26969 /* Emit unwind directives for the given insn. */
26971 static void
26972 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26974 rtx note, pat;
26975 bool handled_one = false;
26977 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26978 return;
26980 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26981 && (TREE_NOTHROW (current_function_decl)
26982 || crtl->all_throwers_are_sibcalls))
26983 return;
26985 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26986 return;
26988 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26990 switch (REG_NOTE_KIND (note))
26992 case REG_FRAME_RELATED_EXPR:
26993 pat = XEXP (note, 0);
26994 goto found;
26996 case REG_CFA_REGISTER:
26997 pat = XEXP (note, 0);
26998 if (pat == NULL)
27000 pat = PATTERN (insn);
27001 if (GET_CODE (pat) == PARALLEL)
27002 pat = XVECEXP (pat, 0, 0);
27005 /* Only emitted for IS_STACKALIGN re-alignment. */
27007 rtx dest, src;
27008 unsigned reg;
27010 src = SET_SRC (pat);
27011 dest = SET_DEST (pat);
27013 gcc_assert (src == stack_pointer_rtx);
27014 reg = REGNO (dest);
27015 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27016 reg + 0x90, reg);
27018 handled_one = true;
27019 break;
27021 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27022 to get correct dwarf information for shrink-wrap. We should not
27023 emit unwind information for it because these are used either for
27024 pretend arguments or notes to adjust sp and restore registers from
27025 stack. */
27026 case REG_CFA_DEF_CFA:
27027 case REG_CFA_ADJUST_CFA:
27028 case REG_CFA_RESTORE:
27029 return;
27031 case REG_CFA_EXPRESSION:
27032 case REG_CFA_OFFSET:
27033 /* ??? Only handling here what we actually emit. */
27034 gcc_unreachable ();
27036 default:
27037 break;
27040 if (handled_one)
27041 return;
27042 pat = PATTERN (insn);
27043 found:
27045 switch (GET_CODE (pat))
27047 case SET:
27048 arm_unwind_emit_set (asm_out_file, pat);
27049 break;
27051 case SEQUENCE:
27052 /* Store multiple. */
27053 arm_unwind_emit_sequence (asm_out_file, pat);
27054 break;
27056 default:
27057 abort();
27062 /* Output a reference from a function exception table to the type_info
27063 object X. The EABI specifies that the symbol should be relocated by
27064 an R_ARM_TARGET2 relocation. */
27066 static bool
27067 arm_output_ttype (rtx x)
27069 fputs ("\t.word\t", asm_out_file);
27070 output_addr_const (asm_out_file, x);
27071 /* Use special relocations for symbol references. */
27072 if (!CONST_INT_P (x))
27073 fputs ("(TARGET2)", asm_out_file);
27074 fputc ('\n', asm_out_file);
27076 return TRUE;
27079 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27081 static void
27082 arm_asm_emit_except_personality (rtx personality)
27084 fputs ("\t.personality\t", asm_out_file);
27085 output_addr_const (asm_out_file, personality);
27086 fputc ('\n', asm_out_file);
27089 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27091 static void
27092 arm_asm_init_sections (void)
27094 exception_section = get_unnamed_section (0, output_section_asm_op,
27095 "\t.handlerdata");
27097 #endif /* ARM_UNWIND_INFO */
27099 /* Output unwind directives for the start/end of a function. */
27101 void
27102 arm_output_fn_unwind (FILE * f, bool prologue)
27104 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27105 return;
27107 if (prologue)
27108 fputs ("\t.fnstart\n", f);
27109 else
27111 /* If this function will never be unwound, then mark it as such.
27112 The came condition is used in arm_unwind_emit to suppress
27113 the frame annotations. */
27114 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27115 && (TREE_NOTHROW (current_function_decl)
27116 || crtl->all_throwers_are_sibcalls))
27117 fputs("\t.cantunwind\n", f);
27119 fputs ("\t.fnend\n", f);
27123 static bool
27124 arm_emit_tls_decoration (FILE *fp, rtx x)
27126 enum tls_reloc reloc;
27127 rtx val;
27129 val = XVECEXP (x, 0, 0);
27130 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27132 output_addr_const (fp, val);
27134 switch (reloc)
27136 case TLS_GD32:
27137 fputs ("(tlsgd)", fp);
27138 break;
27139 case TLS_LDM32:
27140 fputs ("(tlsldm)", fp);
27141 break;
27142 case TLS_LDO32:
27143 fputs ("(tlsldo)", fp);
27144 break;
27145 case TLS_IE32:
27146 fputs ("(gottpoff)", fp);
27147 break;
27148 case TLS_LE32:
27149 fputs ("(tpoff)", fp);
27150 break;
27151 case TLS_DESCSEQ:
27152 fputs ("(tlsdesc)", fp);
27153 break;
27154 default:
27155 gcc_unreachable ();
27158 switch (reloc)
27160 case TLS_GD32:
27161 case TLS_LDM32:
27162 case TLS_IE32:
27163 case TLS_DESCSEQ:
27164 fputs (" + (. - ", fp);
27165 output_addr_const (fp, XVECEXP (x, 0, 2));
27166 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27167 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27168 output_addr_const (fp, XVECEXP (x, 0, 3));
27169 fputc (')', fp);
27170 break;
27171 default:
27172 break;
27175 return TRUE;
27178 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27180 static void
27181 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27183 gcc_assert (size == 4);
27184 fputs ("\t.word\t", file);
27185 output_addr_const (file, x);
27186 fputs ("(tlsldo)", file);
27189 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27191 static bool
27192 arm_output_addr_const_extra (FILE *fp, rtx x)
27194 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27195 return arm_emit_tls_decoration (fp, x);
27196 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27198 char label[256];
27199 int labelno = INTVAL (XVECEXP (x, 0, 0));
27201 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27202 assemble_name_raw (fp, label);
27204 return TRUE;
27206 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27208 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27209 if (GOT_PCREL)
27210 fputs ("+.", fp);
27211 fputs ("-(", fp);
27212 output_addr_const (fp, XVECEXP (x, 0, 0));
27213 fputc (')', fp);
27214 return TRUE;
27216 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27218 output_addr_const (fp, XVECEXP (x, 0, 0));
27219 if (GOT_PCREL)
27220 fputs ("+.", fp);
27221 fputs ("-(", fp);
27222 output_addr_const (fp, XVECEXP (x, 0, 1));
27223 fputc (')', fp);
27224 return TRUE;
27226 else if (GET_CODE (x) == CONST_VECTOR)
27227 return arm_emit_vector_const (fp, x);
27229 return FALSE;
27232 /* Output assembly for a shift instruction.
27233 SET_FLAGS determines how the instruction modifies the condition codes.
27234 0 - Do not set condition codes.
27235 1 - Set condition codes.
27236 2 - Use smallest instruction. */
27237 const char *
27238 arm_output_shift(rtx * operands, int set_flags)
27240 char pattern[100];
27241 static const char flag_chars[3] = {'?', '.', '!'};
27242 const char *shift;
27243 HOST_WIDE_INT val;
27244 char c;
27246 c = flag_chars[set_flags];
27247 if (TARGET_UNIFIED_ASM)
27249 shift = shift_op(operands[3], &val);
27250 if (shift)
27252 if (val != -1)
27253 operands[2] = GEN_INT(val);
27254 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27256 else
27257 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27259 else
27260 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
27261 output_asm_insn (pattern, operands);
27262 return "";
27265 /* Output assembly for a WMMX immediate shift instruction. */
27266 const char *
27267 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27269 int shift = INTVAL (operands[2]);
27270 char templ[50];
27271 machine_mode opmode = GET_MODE (operands[0]);
27273 gcc_assert (shift >= 0);
27275 /* If the shift value in the register versions is > 63 (for D qualifier),
27276 31 (for W qualifier) or 15 (for H qualifier). */
27277 if (((opmode == V4HImode) && (shift > 15))
27278 || ((opmode == V2SImode) && (shift > 31))
27279 || ((opmode == DImode) && (shift > 63)))
27281 if (wror_or_wsra)
27283 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27284 output_asm_insn (templ, operands);
27285 if (opmode == DImode)
27287 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27288 output_asm_insn (templ, operands);
27291 else
27293 /* The destination register will contain all zeros. */
27294 sprintf (templ, "wzero\t%%0");
27295 output_asm_insn (templ, operands);
27297 return "";
27300 if ((opmode == DImode) && (shift > 32))
27302 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27303 output_asm_insn (templ, operands);
27304 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27305 output_asm_insn (templ, operands);
27307 else
27309 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27310 output_asm_insn (templ, operands);
27312 return "";
27315 /* Output assembly for a WMMX tinsr instruction. */
27316 const char *
27317 arm_output_iwmmxt_tinsr (rtx *operands)
27319 int mask = INTVAL (operands[3]);
27320 int i;
27321 char templ[50];
27322 int units = mode_nunits[GET_MODE (operands[0])];
27323 gcc_assert ((mask & (mask - 1)) == 0);
27324 for (i = 0; i < units; ++i)
27326 if ((mask & 0x01) == 1)
27328 break;
27330 mask >>= 1;
27332 gcc_assert (i < units);
27334 switch (GET_MODE (operands[0]))
27336 case V8QImode:
27337 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27338 break;
27339 case V4HImode:
27340 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27341 break;
27342 case V2SImode:
27343 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27344 break;
27345 default:
27346 gcc_unreachable ();
27347 break;
27349 output_asm_insn (templ, operands);
27351 return "";
27354 /* Output a Thumb-1 casesi dispatch sequence. */
27355 const char *
27356 thumb1_output_casesi (rtx *operands)
27358 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27360 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27362 switch (GET_MODE(diff_vec))
27364 case QImode:
27365 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27366 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27367 case HImode:
27368 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27369 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27370 case SImode:
27371 return "bl\t%___gnu_thumb1_case_si";
27372 default:
27373 gcc_unreachable ();
27377 /* Output a Thumb-2 casesi instruction. */
27378 const char *
27379 thumb2_output_casesi (rtx *operands)
27381 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27383 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27385 output_asm_insn ("cmp\t%0, %1", operands);
27386 output_asm_insn ("bhi\t%l3", operands);
27387 switch (GET_MODE(diff_vec))
27389 case QImode:
27390 return "tbb\t[%|pc, %0]";
27391 case HImode:
27392 return "tbh\t[%|pc, %0, lsl #1]";
27393 case SImode:
27394 if (flag_pic)
27396 output_asm_insn ("adr\t%4, %l2", operands);
27397 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27398 output_asm_insn ("add\t%4, %4, %5", operands);
27399 return "bx\t%4";
27401 else
27403 output_asm_insn ("adr\t%4, %l2", operands);
27404 return "ldr\t%|pc, [%4, %0, lsl #2]";
27406 default:
27407 gcc_unreachable ();
27411 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27412 per-core tuning structs. */
27413 static int
27414 arm_issue_rate (void)
27416 return current_tune->issue_rate;
27419 /* Return how many instructions should scheduler lookahead to choose the
27420 best one. */
27421 static int
27422 arm_first_cycle_multipass_dfa_lookahead (void)
27424 int issue_rate = arm_issue_rate ();
27426 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27429 /* Enable modeling of L2 auto-prefetcher. */
27430 static int
27431 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27433 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27436 const char *
27437 arm_mangle_type (const_tree type)
27439 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27440 has to be managled as if it is in the "std" namespace. */
27441 if (TARGET_AAPCS_BASED
27442 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27443 return "St9__va_list";
27445 /* Half-precision float. */
27446 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27447 return "Dh";
27449 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27450 builtin type. */
27451 if (TYPE_NAME (type) != NULL)
27452 return arm_mangle_builtin_type (type);
27454 /* Use the default mangling. */
27455 return NULL;
27458 /* Order of allocation of core registers for Thumb: this allocation is
27459 written over the corresponding initial entries of the array
27460 initialized with REG_ALLOC_ORDER. We allocate all low registers
27461 first. Saving and restoring a low register is usually cheaper than
27462 using a call-clobbered high register. */
27464 static const int thumb_core_reg_alloc_order[] =
27466 3, 2, 1, 0, 4, 5, 6, 7,
27467 14, 12, 8, 9, 10, 11
27470 /* Adjust register allocation order when compiling for Thumb. */
27472 void
27473 arm_order_regs_for_local_alloc (void)
27475 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27476 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27477 if (TARGET_THUMB)
27478 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27479 sizeof (thumb_core_reg_alloc_order));
27482 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27484 bool
27485 arm_frame_pointer_required (void)
27487 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27488 return true;
27490 /* If the function receives nonlocal gotos, it needs to save the frame
27491 pointer in the nonlocal_goto_save_area object. */
27492 if (cfun->has_nonlocal_label)
27493 return true;
27495 /* The frame pointer is required for non-leaf APCS frames. */
27496 if (TARGET_ARM && TARGET_APCS_FRAME && !leaf_function_p ())
27497 return true;
27499 /* If we are probing the stack in the prologue, we will have a faulting
27500 instruction prior to the stack adjustment and this requires a frame
27501 pointer if we want to catch the exception using the EABI unwinder. */
27502 if (!IS_INTERRUPT (arm_current_func_type ())
27503 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27504 && arm_except_unwind_info (&global_options) == UI_TARGET
27505 && cfun->can_throw_non_call_exceptions)
27507 HOST_WIDE_INT size = get_frame_size ();
27509 /* That's irrelevant if there is no stack adjustment. */
27510 if (size <= 0)
27511 return false;
27513 /* That's relevant only if there is a stack probe. */
27514 if (crtl->is_leaf && !cfun->calls_alloca)
27516 /* We don't have the final size of the frame so adjust. */
27517 size += 32 * UNITS_PER_WORD;
27518 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27519 return true;
27521 else
27522 return true;
27525 return false;
27528 /* Only thumb1 can't support conditional execution, so return true if
27529 the target is not thumb1. */
27530 static bool
27531 arm_have_conditional_execution (void)
27533 return !TARGET_THUMB1;
27536 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27537 static HOST_WIDE_INT
27538 arm_vector_alignment (const_tree type)
27540 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27542 if (TARGET_AAPCS_BASED)
27543 align = MIN (align, 64);
27545 return align;
27548 static unsigned int
27549 arm_autovectorize_vector_sizes (void)
27551 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27554 static bool
27555 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27557 /* Vectors which aren't in packed structures will not be less aligned than
27558 the natural alignment of their element type, so this is safe. */
27559 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27560 return !is_packed;
27562 return default_builtin_vector_alignment_reachable (type, is_packed);
27565 static bool
27566 arm_builtin_support_vector_misalignment (machine_mode mode,
27567 const_tree type, int misalignment,
27568 bool is_packed)
27570 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27572 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27574 if (is_packed)
27575 return align == 1;
27577 /* If the misalignment is unknown, we should be able to handle the access
27578 so long as it is not to a member of a packed data structure. */
27579 if (misalignment == -1)
27580 return true;
27582 /* Return true if the misalignment is a multiple of the natural alignment
27583 of the vector's element type. This is probably always going to be
27584 true in practice, since we've already established that this isn't a
27585 packed access. */
27586 return ((misalignment % align) == 0);
27589 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27590 is_packed);
27593 static void
27594 arm_conditional_register_usage (void)
27596 int regno;
27598 if (TARGET_THUMB1 && optimize_size)
27600 /* When optimizing for size on Thumb-1, it's better not
27601 to use the HI regs, because of the overhead of
27602 stacking them. */
27603 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27604 fixed_regs[regno] = call_used_regs[regno] = 1;
27607 /* The link register can be clobbered by any branch insn,
27608 but we have no way to track that at present, so mark
27609 it as unavailable. */
27610 if (TARGET_THUMB1)
27611 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27613 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27615 /* VFPv3 registers are disabled when earlier VFP
27616 versions are selected due to the definition of
27617 LAST_VFP_REGNUM. */
27618 for (regno = FIRST_VFP_REGNUM;
27619 regno <= LAST_VFP_REGNUM; ++ regno)
27621 fixed_regs[regno] = 0;
27622 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27623 || regno >= FIRST_VFP_REGNUM + 32;
27627 if (TARGET_REALLY_IWMMXT)
27629 regno = FIRST_IWMMXT_GR_REGNUM;
27630 /* The 2002/10/09 revision of the XScale ABI has wCG0
27631 and wCG1 as call-preserved registers. The 2002/11/21
27632 revision changed this so that all wCG registers are
27633 scratch registers. */
27634 for (regno = FIRST_IWMMXT_GR_REGNUM;
27635 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27636 fixed_regs[regno] = 0;
27637 /* The XScale ABI has wR0 - wR9 as scratch registers,
27638 the rest as call-preserved registers. */
27639 for (regno = FIRST_IWMMXT_REGNUM;
27640 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27642 fixed_regs[regno] = 0;
27643 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27647 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27649 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27650 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27652 else if (TARGET_APCS_STACK)
27654 fixed_regs[10] = 1;
27655 call_used_regs[10] = 1;
27657 /* -mcaller-super-interworking reserves r11 for calls to
27658 _interwork_r11_call_via_rN(). Making the register global
27659 is an easy way of ensuring that it remains valid for all
27660 calls. */
27661 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27662 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27664 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27665 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27666 if (TARGET_CALLER_INTERWORKING)
27667 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27669 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27672 static reg_class_t
27673 arm_preferred_rename_class (reg_class_t rclass)
27675 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27676 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27677 and code size can be reduced. */
27678 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27679 return LO_REGS;
27680 else
27681 return NO_REGS;
27684 /* Compute the atrribute "length" of insn "*push_multi".
27685 So this function MUST be kept in sync with that insn pattern. */
27687 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27689 int i, regno, hi_reg;
27690 int num_saves = XVECLEN (parallel_op, 0);
27692 /* ARM mode. */
27693 if (TARGET_ARM)
27694 return 4;
27695 /* Thumb1 mode. */
27696 if (TARGET_THUMB1)
27697 return 2;
27699 /* Thumb2 mode. */
27700 regno = REGNO (first_op);
27701 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27702 for (i = 1; i < num_saves && !hi_reg; i++)
27704 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27705 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27708 if (!hi_reg)
27709 return 2;
27710 return 4;
27713 /* Compute the number of instructions emitted by output_move_double. */
27715 arm_count_output_move_double_insns (rtx *operands)
27717 int count;
27718 rtx ops[2];
27719 /* output_move_double may modify the operands array, so call it
27720 here on a copy of the array. */
27721 ops[0] = operands[0];
27722 ops[1] = operands[1];
27723 output_move_double (ops, false, &count);
27724 return count;
27728 vfp3_const_double_for_fract_bits (rtx operand)
27730 REAL_VALUE_TYPE r0;
27732 if (!CONST_DOUBLE_P (operand))
27733 return 0;
27735 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
27736 if (exact_real_inverse (DFmode, &r0)
27737 && !REAL_VALUE_NEGATIVE (r0))
27739 if (exact_real_truncate (DFmode, &r0))
27741 HOST_WIDE_INT value = real_to_integer (&r0);
27742 value = value & 0xffffffff;
27743 if ((value != 0) && ( (value & (value - 1)) == 0))
27744 return int_log2 (value);
27747 return 0;
27750 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
27751 log2 is in [1, 32], return that log2. Otherwise return -1.
27752 This is used in the patterns for vcvt.s32.f32 floating-point to
27753 fixed-point conversions. */
27756 vfp3_const_double_for_bits (rtx x)
27758 const REAL_VALUE_TYPE *r;
27760 if (!CONST_DOUBLE_P (x))
27761 return -1;
27763 r = CONST_DOUBLE_REAL_VALUE (x);
27765 if (REAL_VALUE_NEGATIVE (*r)
27766 || REAL_VALUE_ISNAN (*r)
27767 || REAL_VALUE_ISINF (*r)
27768 || !real_isinteger (r, SFmode))
27769 return -1;
27771 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
27773 /* The exact_log2 above will have returned -1 if this is
27774 not an exact log2. */
27775 if (!IN_RANGE (hwint, 1, 32))
27776 return -1;
27778 return hwint;
27782 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27784 static void
27785 arm_pre_atomic_barrier (enum memmodel model)
27787 if (need_atomic_barrier_p (model, true))
27788 emit_insn (gen_memory_barrier ());
27791 static void
27792 arm_post_atomic_barrier (enum memmodel model)
27794 if (need_atomic_barrier_p (model, false))
27795 emit_insn (gen_memory_barrier ());
27798 /* Emit the load-exclusive and store-exclusive instructions.
27799 Use acquire and release versions if necessary. */
27801 static void
27802 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27804 rtx (*gen) (rtx, rtx);
27806 if (acq)
27808 switch (mode)
27810 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27811 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27812 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27813 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27814 default:
27815 gcc_unreachable ();
27818 else
27820 switch (mode)
27822 case QImode: gen = gen_arm_load_exclusiveqi; break;
27823 case HImode: gen = gen_arm_load_exclusivehi; break;
27824 case SImode: gen = gen_arm_load_exclusivesi; break;
27825 case DImode: gen = gen_arm_load_exclusivedi; break;
27826 default:
27827 gcc_unreachable ();
27831 emit_insn (gen (rval, mem));
27834 static void
27835 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27836 rtx mem, bool rel)
27838 rtx (*gen) (rtx, rtx, rtx);
27840 if (rel)
27842 switch (mode)
27844 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27845 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27846 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27847 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27848 default:
27849 gcc_unreachable ();
27852 else
27854 switch (mode)
27856 case QImode: gen = gen_arm_store_exclusiveqi; break;
27857 case HImode: gen = gen_arm_store_exclusivehi; break;
27858 case SImode: gen = gen_arm_store_exclusivesi; break;
27859 case DImode: gen = gen_arm_store_exclusivedi; break;
27860 default:
27861 gcc_unreachable ();
27865 emit_insn (gen (bval, rval, mem));
27868 /* Mark the previous jump instruction as unlikely. */
27870 static void
27871 emit_unlikely_jump (rtx insn)
27873 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27875 insn = emit_jump_insn (insn);
27876 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27879 /* Expand a compare and swap pattern. */
27881 void
27882 arm_expand_compare_and_swap (rtx operands[])
27884 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27885 machine_mode mode;
27886 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27888 bval = operands[0];
27889 rval = operands[1];
27890 mem = operands[2];
27891 oldval = operands[3];
27892 newval = operands[4];
27893 is_weak = operands[5];
27894 mod_s = operands[6];
27895 mod_f = operands[7];
27896 mode = GET_MODE (mem);
27898 /* Normally the succ memory model must be stronger than fail, but in the
27899 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27900 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27902 if (TARGET_HAVE_LDACQ
27903 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
27904 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
27905 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27907 switch (mode)
27909 case QImode:
27910 case HImode:
27911 /* For narrow modes, we're going to perform the comparison in SImode,
27912 so do the zero-extension now. */
27913 rval = gen_reg_rtx (SImode);
27914 oldval = convert_modes (SImode, mode, oldval, true);
27915 /* FALLTHRU */
27917 case SImode:
27918 /* Force the value into a register if needed. We waited until after
27919 the zero-extension above to do this properly. */
27920 if (!arm_add_operand (oldval, SImode))
27921 oldval = force_reg (SImode, oldval);
27922 break;
27924 case DImode:
27925 if (!cmpdi_operand (oldval, mode))
27926 oldval = force_reg (mode, oldval);
27927 break;
27929 default:
27930 gcc_unreachable ();
27933 switch (mode)
27935 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27936 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27937 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27938 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27939 default:
27940 gcc_unreachable ();
27943 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27945 if (mode == QImode || mode == HImode)
27946 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27948 /* In all cases, we arrange for success to be signaled by Z set.
27949 This arrangement allows for the boolean result to be used directly
27950 in a subsequent branch, post optimization. */
27951 x = gen_rtx_REG (CCmode, CC_REGNUM);
27952 x = gen_rtx_EQ (SImode, x, const0_rtx);
27953 emit_insn (gen_rtx_SET (bval, x));
27956 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27957 another memory store between the load-exclusive and store-exclusive can
27958 reset the monitor from Exclusive to Open state. This means we must wait
27959 until after reload to split the pattern, lest we get a register spill in
27960 the middle of the atomic sequence. */
27962 void
27963 arm_split_compare_and_swap (rtx operands[])
27965 rtx rval, mem, oldval, newval, scratch;
27966 machine_mode mode;
27967 enum memmodel mod_s, mod_f;
27968 bool is_weak;
27969 rtx_code_label *label1, *label2;
27970 rtx x, cond;
27972 rval = operands[0];
27973 mem = operands[1];
27974 oldval = operands[2];
27975 newval = operands[3];
27976 is_weak = (operands[4] != const0_rtx);
27977 mod_s = memmodel_from_int (INTVAL (operands[5]));
27978 mod_f = memmodel_from_int (INTVAL (operands[6]));
27979 scratch = operands[7];
27980 mode = GET_MODE (mem);
27982 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
27984 bool use_acquire = TARGET_HAVE_LDACQ
27985 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27986 || is_mm_release (mod_s));
27988 bool use_release = TARGET_HAVE_LDACQ
27989 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27990 || is_mm_acquire (mod_s));
27992 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
27993 a full barrier is emitted after the store-release. */
27994 if (is_armv8_sync)
27995 use_acquire = false;
27997 /* Checks whether a barrier is needed and emits one accordingly. */
27998 if (!(use_acquire || use_release))
27999 arm_pre_atomic_barrier (mod_s);
28001 label1 = NULL;
28002 if (!is_weak)
28004 label1 = gen_label_rtx ();
28005 emit_label (label1);
28007 label2 = gen_label_rtx ();
28009 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28011 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
28012 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28013 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28014 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28015 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28017 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
28019 /* Weak or strong, we want EQ to be true for success, so that we
28020 match the flags that we got from the compare above. */
28021 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28022 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
28023 emit_insn (gen_rtx_SET (cond, x));
28025 if (!is_weak)
28027 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28028 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28029 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
28030 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28033 if (!is_mm_relaxed (mod_f))
28034 emit_label (label2);
28036 /* Checks whether a barrier is needed and emits one accordingly. */
28037 if (is_armv8_sync
28038 || !(use_acquire || use_release))
28039 arm_post_atomic_barrier (mod_s);
28041 if (is_mm_relaxed (mod_f))
28042 emit_label (label2);
28045 void
28046 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28047 rtx value, rtx model_rtx, rtx cond)
28049 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28050 machine_mode mode = GET_MODE (mem);
28051 machine_mode wmode = (mode == DImode ? DImode : SImode);
28052 rtx_code_label *label;
28053 rtx x;
28055 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28057 bool use_acquire = TARGET_HAVE_LDACQ
28058 && !(is_mm_relaxed (model) || is_mm_consume (model)
28059 || is_mm_release (model));
28061 bool use_release = TARGET_HAVE_LDACQ
28062 && !(is_mm_relaxed (model) || is_mm_consume (model)
28063 || is_mm_acquire (model));
28065 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28066 a full barrier is emitted after the store-release. */
28067 if (is_armv8_sync)
28068 use_acquire = false;
28070 /* Checks whether a barrier is needed and emits one accordingly. */
28071 if (!(use_acquire || use_release))
28072 arm_pre_atomic_barrier (model);
28074 label = gen_label_rtx ();
28075 emit_label (label);
28077 if (new_out)
28078 new_out = gen_lowpart (wmode, new_out);
28079 if (old_out)
28080 old_out = gen_lowpart (wmode, old_out);
28081 else
28082 old_out = new_out;
28083 value = simplify_gen_subreg (wmode, value, mode, 0);
28085 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28087 switch (code)
28089 case SET:
28090 new_out = value;
28091 break;
28093 case NOT:
28094 x = gen_rtx_AND (wmode, old_out, value);
28095 emit_insn (gen_rtx_SET (new_out, x));
28096 x = gen_rtx_NOT (wmode, new_out);
28097 emit_insn (gen_rtx_SET (new_out, x));
28098 break;
28100 case MINUS:
28101 if (CONST_INT_P (value))
28103 value = GEN_INT (-INTVAL (value));
28104 code = PLUS;
28106 /* FALLTHRU */
28108 case PLUS:
28109 if (mode == DImode)
28111 /* DImode plus/minus need to clobber flags. */
28112 /* The adddi3 and subdi3 patterns are incorrectly written so that
28113 they require matching operands, even when we could easily support
28114 three operands. Thankfully, this can be fixed up post-splitting,
28115 as the individual add+adc patterns do accept three operands and
28116 post-reload cprop can make these moves go away. */
28117 emit_move_insn (new_out, old_out);
28118 if (code == PLUS)
28119 x = gen_adddi3 (new_out, new_out, value);
28120 else
28121 x = gen_subdi3 (new_out, new_out, value);
28122 emit_insn (x);
28123 break;
28125 /* FALLTHRU */
28127 default:
28128 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28129 emit_insn (gen_rtx_SET (new_out, x));
28130 break;
28133 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28134 use_release);
28136 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28137 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28139 /* Checks whether a barrier is needed and emits one accordingly. */
28140 if (is_armv8_sync
28141 || !(use_acquire || use_release))
28142 arm_post_atomic_barrier (model);
28145 #define MAX_VECT_LEN 16
28147 struct expand_vec_perm_d
28149 rtx target, op0, op1;
28150 unsigned char perm[MAX_VECT_LEN];
28151 machine_mode vmode;
28152 unsigned char nelt;
28153 bool one_vector_p;
28154 bool testing_p;
28157 /* Generate a variable permutation. */
28159 static void
28160 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28162 machine_mode vmode = GET_MODE (target);
28163 bool one_vector_p = rtx_equal_p (op0, op1);
28165 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28166 gcc_checking_assert (GET_MODE (op0) == vmode);
28167 gcc_checking_assert (GET_MODE (op1) == vmode);
28168 gcc_checking_assert (GET_MODE (sel) == vmode);
28169 gcc_checking_assert (TARGET_NEON);
28171 if (one_vector_p)
28173 if (vmode == V8QImode)
28174 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28175 else
28176 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28178 else
28180 rtx pair;
28182 if (vmode == V8QImode)
28184 pair = gen_reg_rtx (V16QImode);
28185 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28186 pair = gen_lowpart (TImode, pair);
28187 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28189 else
28191 pair = gen_reg_rtx (OImode);
28192 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28193 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28198 void
28199 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28201 machine_mode vmode = GET_MODE (target);
28202 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28203 bool one_vector_p = rtx_equal_p (op0, op1);
28204 rtx rmask[MAX_VECT_LEN], mask;
28206 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28207 numbering of elements for big-endian, we must reverse the order. */
28208 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28210 /* The VTBL instruction does not use a modulo index, so we must take care
28211 of that ourselves. */
28212 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28213 for (i = 0; i < nelt; ++i)
28214 rmask[i] = mask;
28215 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28216 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28218 arm_expand_vec_perm_1 (target, op0, op1, sel);
28221 /* Generate or test for an insn that supports a constant permutation. */
28223 /* Recognize patterns for the VUZP insns. */
28225 static bool
28226 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28228 unsigned int i, odd, mask, nelt = d->nelt;
28229 rtx out0, out1, in0, in1;
28230 rtx (*gen)(rtx, rtx, rtx, rtx);
28232 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28233 return false;
28235 /* Note that these are little-endian tests. Adjust for big-endian later. */
28236 if (d->perm[0] == 0)
28237 odd = 0;
28238 else if (d->perm[0] == 1)
28239 odd = 1;
28240 else
28241 return false;
28242 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28244 for (i = 0; i < nelt; i++)
28246 unsigned elt = (i * 2 + odd) & mask;
28247 if (d->perm[i] != elt)
28248 return false;
28251 /* Success! */
28252 if (d->testing_p)
28253 return true;
28255 switch (d->vmode)
28257 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28258 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28259 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28260 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28261 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28262 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28263 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28264 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28265 default:
28266 gcc_unreachable ();
28269 in0 = d->op0;
28270 in1 = d->op1;
28271 if (BYTES_BIG_ENDIAN)
28273 std::swap (in0, in1);
28274 odd = !odd;
28277 out0 = d->target;
28278 out1 = gen_reg_rtx (d->vmode);
28279 if (odd)
28280 std::swap (out0, out1);
28282 emit_insn (gen (out0, in0, in1, out1));
28283 return true;
28286 /* Recognize patterns for the VZIP insns. */
28288 static bool
28289 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28291 unsigned int i, high, mask, nelt = d->nelt;
28292 rtx out0, out1, in0, in1;
28293 rtx (*gen)(rtx, rtx, rtx, rtx);
28295 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28296 return false;
28298 /* Note that these are little-endian tests. Adjust for big-endian later. */
28299 high = nelt / 2;
28300 if (d->perm[0] == high)
28302 else if (d->perm[0] == 0)
28303 high = 0;
28304 else
28305 return false;
28306 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28308 for (i = 0; i < nelt / 2; i++)
28310 unsigned elt = (i + high) & mask;
28311 if (d->perm[i * 2] != elt)
28312 return false;
28313 elt = (elt + nelt) & mask;
28314 if (d->perm[i * 2 + 1] != elt)
28315 return false;
28318 /* Success! */
28319 if (d->testing_p)
28320 return true;
28322 switch (d->vmode)
28324 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28325 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28326 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28327 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28328 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
28329 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
28330 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28331 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28332 default:
28333 gcc_unreachable ();
28336 in0 = d->op0;
28337 in1 = d->op1;
28338 if (BYTES_BIG_ENDIAN)
28340 std::swap (in0, in1);
28341 high = !high;
28344 out0 = d->target;
28345 out1 = gen_reg_rtx (d->vmode);
28346 if (high)
28347 std::swap (out0, out1);
28349 emit_insn (gen (out0, in0, in1, out1));
28350 return true;
28353 /* Recognize patterns for the VREV insns. */
28355 static bool
28356 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28358 unsigned int i, j, diff, nelt = d->nelt;
28359 rtx (*gen)(rtx, rtx);
28361 if (!d->one_vector_p)
28362 return false;
28364 diff = d->perm[0];
28365 switch (diff)
28367 case 7:
28368 switch (d->vmode)
28370 case V16QImode: gen = gen_neon_vrev64v16qi; break;
28371 case V8QImode: gen = gen_neon_vrev64v8qi; break;
28372 default:
28373 return false;
28375 break;
28376 case 3:
28377 switch (d->vmode)
28379 case V16QImode: gen = gen_neon_vrev32v16qi; break;
28380 case V8QImode: gen = gen_neon_vrev32v8qi; break;
28381 case V8HImode: gen = gen_neon_vrev64v8hi; break;
28382 case V4HImode: gen = gen_neon_vrev64v4hi; break;
28383 default:
28384 return false;
28386 break;
28387 case 1:
28388 switch (d->vmode)
28390 case V16QImode: gen = gen_neon_vrev16v16qi; break;
28391 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28392 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28393 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28394 case V4SImode: gen = gen_neon_vrev64v4si; break;
28395 case V2SImode: gen = gen_neon_vrev64v2si; break;
28396 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28397 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28398 default:
28399 return false;
28401 break;
28402 default:
28403 return false;
28406 for (i = 0; i < nelt ; i += diff + 1)
28407 for (j = 0; j <= diff; j += 1)
28409 /* This is guaranteed to be true as the value of diff
28410 is 7, 3, 1 and we should have enough elements in the
28411 queue to generate this. Getting a vector mask with a
28412 value of diff other than these values implies that
28413 something is wrong by the time we get here. */
28414 gcc_assert (i + j < nelt);
28415 if (d->perm[i + j] != i + diff - j)
28416 return false;
28419 /* Success! */
28420 if (d->testing_p)
28421 return true;
28423 emit_insn (gen (d->target, d->op0));
28424 return true;
28427 /* Recognize patterns for the VTRN insns. */
28429 static bool
28430 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28432 unsigned int i, odd, mask, nelt = d->nelt;
28433 rtx out0, out1, in0, in1;
28434 rtx (*gen)(rtx, rtx, rtx, rtx);
28436 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28437 return false;
28439 /* Note that these are little-endian tests. Adjust for big-endian later. */
28440 if (d->perm[0] == 0)
28441 odd = 0;
28442 else if (d->perm[0] == 1)
28443 odd = 1;
28444 else
28445 return false;
28446 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28448 for (i = 0; i < nelt; i += 2)
28450 if (d->perm[i] != i + odd)
28451 return false;
28452 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28453 return false;
28456 /* Success! */
28457 if (d->testing_p)
28458 return true;
28460 switch (d->vmode)
28462 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28463 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28464 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28465 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28466 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28467 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28468 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28469 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28470 default:
28471 gcc_unreachable ();
28474 in0 = d->op0;
28475 in1 = d->op1;
28476 if (BYTES_BIG_ENDIAN)
28478 std::swap (in0, in1);
28479 odd = !odd;
28482 out0 = d->target;
28483 out1 = gen_reg_rtx (d->vmode);
28484 if (odd)
28485 std::swap (out0, out1);
28487 emit_insn (gen (out0, in0, in1, out1));
28488 return true;
28491 /* Recognize patterns for the VEXT insns. */
28493 static bool
28494 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28496 unsigned int i, nelt = d->nelt;
28497 rtx (*gen) (rtx, rtx, rtx, rtx);
28498 rtx offset;
28500 unsigned int location;
28502 unsigned int next = d->perm[0] + 1;
28504 /* TODO: Handle GCC's numbering of elements for big-endian. */
28505 if (BYTES_BIG_ENDIAN)
28506 return false;
28508 /* Check if the extracted indexes are increasing by one. */
28509 for (i = 1; i < nelt; next++, i++)
28511 /* If we hit the most significant element of the 2nd vector in
28512 the previous iteration, no need to test further. */
28513 if (next == 2 * nelt)
28514 return false;
28516 /* If we are operating on only one vector: it could be a
28517 rotation. If there are only two elements of size < 64, let
28518 arm_evpc_neon_vrev catch it. */
28519 if (d->one_vector_p && (next == nelt))
28521 if ((nelt == 2) && (d->vmode != V2DImode))
28522 return false;
28523 else
28524 next = 0;
28527 if (d->perm[i] != next)
28528 return false;
28531 location = d->perm[0];
28533 switch (d->vmode)
28535 case V16QImode: gen = gen_neon_vextv16qi; break;
28536 case V8QImode: gen = gen_neon_vextv8qi; break;
28537 case V4HImode: gen = gen_neon_vextv4hi; break;
28538 case V8HImode: gen = gen_neon_vextv8hi; break;
28539 case V2SImode: gen = gen_neon_vextv2si; break;
28540 case V4SImode: gen = gen_neon_vextv4si; break;
28541 case V2SFmode: gen = gen_neon_vextv2sf; break;
28542 case V4SFmode: gen = gen_neon_vextv4sf; break;
28543 case V2DImode: gen = gen_neon_vextv2di; break;
28544 default:
28545 return false;
28548 /* Success! */
28549 if (d->testing_p)
28550 return true;
28552 offset = GEN_INT (location);
28553 emit_insn (gen (d->target, d->op0, d->op1, offset));
28554 return true;
28557 /* The NEON VTBL instruction is a fully variable permuation that's even
28558 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28559 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28560 can do slightly better by expanding this as a constant where we don't
28561 have to apply a mask. */
28563 static bool
28564 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28566 rtx rperm[MAX_VECT_LEN], sel;
28567 machine_mode vmode = d->vmode;
28568 unsigned int i, nelt = d->nelt;
28570 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28571 numbering of elements for big-endian, we must reverse the order. */
28572 if (BYTES_BIG_ENDIAN)
28573 return false;
28575 if (d->testing_p)
28576 return true;
28578 /* Generic code will try constant permutation twice. Once with the
28579 original mode and again with the elements lowered to QImode.
28580 So wait and don't do the selector expansion ourselves. */
28581 if (vmode != V8QImode && vmode != V16QImode)
28582 return false;
28584 for (i = 0; i < nelt; ++i)
28585 rperm[i] = GEN_INT (d->perm[i]);
28586 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28587 sel = force_reg (vmode, sel);
28589 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28590 return true;
28593 static bool
28594 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28596 /* Check if the input mask matches vext before reordering the
28597 operands. */
28598 if (TARGET_NEON)
28599 if (arm_evpc_neon_vext (d))
28600 return true;
28602 /* The pattern matching functions above are written to look for a small
28603 number to begin the sequence (0, 1, N/2). If we begin with an index
28604 from the second operand, we can swap the operands. */
28605 if (d->perm[0] >= d->nelt)
28607 unsigned i, nelt = d->nelt;
28609 for (i = 0; i < nelt; ++i)
28610 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28612 std::swap (d->op0, d->op1);
28615 if (TARGET_NEON)
28617 if (arm_evpc_neon_vuzp (d))
28618 return true;
28619 if (arm_evpc_neon_vzip (d))
28620 return true;
28621 if (arm_evpc_neon_vrev (d))
28622 return true;
28623 if (arm_evpc_neon_vtrn (d))
28624 return true;
28625 return arm_evpc_neon_vtbl (d);
28627 return false;
28630 /* Expand a vec_perm_const pattern. */
28632 bool
28633 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28635 struct expand_vec_perm_d d;
28636 int i, nelt, which;
28638 d.target = target;
28639 d.op0 = op0;
28640 d.op1 = op1;
28642 d.vmode = GET_MODE (target);
28643 gcc_assert (VECTOR_MODE_P (d.vmode));
28644 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28645 d.testing_p = false;
28647 for (i = which = 0; i < nelt; ++i)
28649 rtx e = XVECEXP (sel, 0, i);
28650 int ei = INTVAL (e) & (2 * nelt - 1);
28651 which |= (ei < nelt ? 1 : 2);
28652 d.perm[i] = ei;
28655 switch (which)
28657 default:
28658 gcc_unreachable();
28660 case 3:
28661 d.one_vector_p = false;
28662 if (!rtx_equal_p (op0, op1))
28663 break;
28665 /* The elements of PERM do not suggest that only the first operand
28666 is used, but both operands are identical. Allow easier matching
28667 of the permutation by folding the permutation into the single
28668 input vector. */
28669 /* FALLTHRU */
28670 case 2:
28671 for (i = 0; i < nelt; ++i)
28672 d.perm[i] &= nelt - 1;
28673 d.op0 = op1;
28674 d.one_vector_p = true;
28675 break;
28677 case 1:
28678 d.op1 = op0;
28679 d.one_vector_p = true;
28680 break;
28683 return arm_expand_vec_perm_const_1 (&d);
28686 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28688 static bool
28689 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28690 const unsigned char *sel)
28692 struct expand_vec_perm_d d;
28693 unsigned int i, nelt, which;
28694 bool ret;
28696 d.vmode = vmode;
28697 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28698 d.testing_p = true;
28699 memcpy (d.perm, sel, nelt);
28701 /* Categorize the set of elements in the selector. */
28702 for (i = which = 0; i < nelt; ++i)
28704 unsigned char e = d.perm[i];
28705 gcc_assert (e < 2 * nelt);
28706 which |= (e < nelt ? 1 : 2);
28709 /* For all elements from second vector, fold the elements to first. */
28710 if (which == 2)
28711 for (i = 0; i < nelt; ++i)
28712 d.perm[i] -= nelt;
28714 /* Check whether the mask can be applied to the vector type. */
28715 d.one_vector_p = (which != 3);
28717 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28718 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28719 if (!d.one_vector_p)
28720 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28722 start_sequence ();
28723 ret = arm_expand_vec_perm_const_1 (&d);
28724 end_sequence ();
28726 return ret;
28729 bool
28730 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28732 /* If we are soft float and we do not have ldrd
28733 then all auto increment forms are ok. */
28734 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28735 return true;
28737 switch (code)
28739 /* Post increment and Pre Decrement are supported for all
28740 instruction forms except for vector forms. */
28741 case ARM_POST_INC:
28742 case ARM_PRE_DEC:
28743 if (VECTOR_MODE_P (mode))
28745 if (code != ARM_PRE_DEC)
28746 return true;
28747 else
28748 return false;
28751 return true;
28753 case ARM_POST_DEC:
28754 case ARM_PRE_INC:
28755 /* Without LDRD and mode size greater than
28756 word size, there is no point in auto-incrementing
28757 because ldm and stm will not have these forms. */
28758 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28759 return false;
28761 /* Vector and floating point modes do not support
28762 these auto increment forms. */
28763 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28764 return false;
28766 return true;
28768 default:
28769 return false;
28773 return false;
28776 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28777 on ARM, since we know that shifts by negative amounts are no-ops.
28778 Additionally, the default expansion code is not available or suitable
28779 for post-reload insn splits (this can occur when the register allocator
28780 chooses not to do a shift in NEON).
28782 This function is used in both initial expand and post-reload splits, and
28783 handles all kinds of 64-bit shifts.
28785 Input requirements:
28786 - It is safe for the input and output to be the same register, but
28787 early-clobber rules apply for the shift amount and scratch registers.
28788 - Shift by register requires both scratch registers. In all other cases
28789 the scratch registers may be NULL.
28790 - Ashiftrt by a register also clobbers the CC register. */
28791 void
28792 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28793 rtx amount, rtx scratch1, rtx scratch2)
28795 rtx out_high = gen_highpart (SImode, out);
28796 rtx out_low = gen_lowpart (SImode, out);
28797 rtx in_high = gen_highpart (SImode, in);
28798 rtx in_low = gen_lowpart (SImode, in);
28800 /* Terminology:
28801 in = the register pair containing the input value.
28802 out = the destination register pair.
28803 up = the high- or low-part of each pair.
28804 down = the opposite part to "up".
28805 In a shift, we can consider bits to shift from "up"-stream to
28806 "down"-stream, so in a left-shift "up" is the low-part and "down"
28807 is the high-part of each register pair. */
28809 rtx out_up = code == ASHIFT ? out_low : out_high;
28810 rtx out_down = code == ASHIFT ? out_high : out_low;
28811 rtx in_up = code == ASHIFT ? in_low : in_high;
28812 rtx in_down = code == ASHIFT ? in_high : in_low;
28814 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28815 gcc_assert (out
28816 && (REG_P (out) || GET_CODE (out) == SUBREG)
28817 && GET_MODE (out) == DImode);
28818 gcc_assert (in
28819 && (REG_P (in) || GET_CODE (in) == SUBREG)
28820 && GET_MODE (in) == DImode);
28821 gcc_assert (amount
28822 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28823 && GET_MODE (amount) == SImode)
28824 || CONST_INT_P (amount)));
28825 gcc_assert (scratch1 == NULL
28826 || (GET_CODE (scratch1) == SCRATCH)
28827 || (GET_MODE (scratch1) == SImode
28828 && REG_P (scratch1)));
28829 gcc_assert (scratch2 == NULL
28830 || (GET_CODE (scratch2) == SCRATCH)
28831 || (GET_MODE (scratch2) == SImode
28832 && REG_P (scratch2)));
28833 gcc_assert (!REG_P (out) || !REG_P (amount)
28834 || !HARD_REGISTER_P (out)
28835 || (REGNO (out) != REGNO (amount)
28836 && REGNO (out) + 1 != REGNO (amount)));
28838 /* Macros to make following code more readable. */
28839 #define SUB_32(DEST,SRC) \
28840 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28841 #define RSB_32(DEST,SRC) \
28842 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28843 #define SUB_S_32(DEST,SRC) \
28844 gen_addsi3_compare0 ((DEST), (SRC), \
28845 GEN_INT (-32))
28846 #define SET(DEST,SRC) \
28847 gen_rtx_SET ((DEST), (SRC))
28848 #define SHIFT(CODE,SRC,AMOUNT) \
28849 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28850 #define LSHIFT(CODE,SRC,AMOUNT) \
28851 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28852 SImode, (SRC), (AMOUNT))
28853 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28854 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28855 SImode, (SRC), (AMOUNT))
28856 #define ORR(A,B) \
28857 gen_rtx_IOR (SImode, (A), (B))
28858 #define BRANCH(COND,LABEL) \
28859 gen_arm_cond_branch ((LABEL), \
28860 gen_rtx_ ## COND (CCmode, cc_reg, \
28861 const0_rtx), \
28862 cc_reg)
28864 /* Shifts by register and shifts by constant are handled separately. */
28865 if (CONST_INT_P (amount))
28867 /* We have a shift-by-constant. */
28869 /* First, handle out-of-range shift amounts.
28870 In both cases we try to match the result an ARM instruction in a
28871 shift-by-register would give. This helps reduce execution
28872 differences between optimization levels, but it won't stop other
28873 parts of the compiler doing different things. This is "undefined
28874 behaviour, in any case. */
28875 if (INTVAL (amount) <= 0)
28876 emit_insn (gen_movdi (out, in));
28877 else if (INTVAL (amount) >= 64)
28879 if (code == ASHIFTRT)
28881 rtx const31_rtx = GEN_INT (31);
28882 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28883 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28885 else
28886 emit_insn (gen_movdi (out, const0_rtx));
28889 /* Now handle valid shifts. */
28890 else if (INTVAL (amount) < 32)
28892 /* Shifts by a constant less than 32. */
28893 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28895 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28896 emit_insn (SET (out_down,
28897 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28898 out_down)));
28899 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28901 else
28903 /* Shifts by a constant greater than 31. */
28904 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28906 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28907 if (code == ASHIFTRT)
28908 emit_insn (gen_ashrsi3 (out_up, in_up,
28909 GEN_INT (31)));
28910 else
28911 emit_insn (SET (out_up, const0_rtx));
28914 else
28916 /* We have a shift-by-register. */
28917 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28919 /* This alternative requires the scratch registers. */
28920 gcc_assert (scratch1 && REG_P (scratch1));
28921 gcc_assert (scratch2 && REG_P (scratch2));
28923 /* We will need the values "amount-32" and "32-amount" later.
28924 Swapping them around now allows the later code to be more general. */
28925 switch (code)
28927 case ASHIFT:
28928 emit_insn (SUB_32 (scratch1, amount));
28929 emit_insn (RSB_32 (scratch2, amount));
28930 break;
28931 case ASHIFTRT:
28932 emit_insn (RSB_32 (scratch1, amount));
28933 /* Also set CC = amount > 32. */
28934 emit_insn (SUB_S_32 (scratch2, amount));
28935 break;
28936 case LSHIFTRT:
28937 emit_insn (RSB_32 (scratch1, amount));
28938 emit_insn (SUB_32 (scratch2, amount));
28939 break;
28940 default:
28941 gcc_unreachable ();
28944 /* Emit code like this:
28946 arithmetic-left:
28947 out_down = in_down << amount;
28948 out_down = (in_up << (amount - 32)) | out_down;
28949 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28950 out_up = in_up << amount;
28952 arithmetic-right:
28953 out_down = in_down >> amount;
28954 out_down = (in_up << (32 - amount)) | out_down;
28955 if (amount < 32)
28956 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28957 out_up = in_up << amount;
28959 logical-right:
28960 out_down = in_down >> amount;
28961 out_down = (in_up << (32 - amount)) | out_down;
28962 if (amount < 32)
28963 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28964 out_up = in_up << amount;
28966 The ARM and Thumb2 variants are the same but implemented slightly
28967 differently. If this were only called during expand we could just
28968 use the Thumb2 case and let combine do the right thing, but this
28969 can also be called from post-reload splitters. */
28971 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28973 if (!TARGET_THUMB2)
28975 /* Emit code for ARM mode. */
28976 emit_insn (SET (out_down,
28977 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28978 if (code == ASHIFTRT)
28980 rtx_code_label *done_label = gen_label_rtx ();
28981 emit_jump_insn (BRANCH (LT, done_label));
28982 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28983 out_down)));
28984 emit_label (done_label);
28986 else
28987 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28988 out_down)));
28990 else
28992 /* Emit code for Thumb2 mode.
28993 Thumb2 can't do shift and or in one insn. */
28994 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28995 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28997 if (code == ASHIFTRT)
28999 rtx_code_label *done_label = gen_label_rtx ();
29000 emit_jump_insn (BRANCH (LT, done_label));
29001 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29002 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29003 emit_label (done_label);
29005 else
29007 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29008 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29012 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29015 #undef SUB_32
29016 #undef RSB_32
29017 #undef SUB_S_32
29018 #undef SET
29019 #undef SHIFT
29020 #undef LSHIFT
29021 #undef REV_LSHIFT
29022 #undef ORR
29023 #undef BRANCH
29026 /* Returns true if the pattern is a valid symbolic address, which is either a
29027 symbol_ref or (symbol_ref + addend).
29029 According to the ARM ELF ABI, the initial addend of REL-type relocations
29030 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29031 literal field of the instruction as a 16-bit signed value in the range
29032 -32768 <= A < 32768. */
29034 bool
29035 arm_valid_symbolic_address_p (rtx addr)
29037 rtx xop0, xop1 = NULL_RTX;
29038 rtx tmp = addr;
29040 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29041 return true;
29043 /* (const (plus: symbol_ref const_int)) */
29044 if (GET_CODE (addr) == CONST)
29045 tmp = XEXP (addr, 0);
29047 if (GET_CODE (tmp) == PLUS)
29049 xop0 = XEXP (tmp, 0);
29050 xop1 = XEXP (tmp, 1);
29052 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29053 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29056 return false;
29059 /* Returns true if a valid comparison operation and makes
29060 the operands in a form that is valid. */
29061 bool
29062 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29064 enum rtx_code code = GET_CODE (*comparison);
29065 int code_int;
29066 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29067 ? GET_MODE (*op2) : GET_MODE (*op1);
29069 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29071 if (code == UNEQ || code == LTGT)
29072 return false;
29074 code_int = (int)code;
29075 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29076 PUT_CODE (*comparison, (enum rtx_code)code_int);
29078 switch (mode)
29080 case SImode:
29081 if (!arm_add_operand (*op1, mode))
29082 *op1 = force_reg (mode, *op1);
29083 if (!arm_add_operand (*op2, mode))
29084 *op2 = force_reg (mode, *op2);
29085 return true;
29087 case DImode:
29088 if (!cmpdi_operand (*op1, mode))
29089 *op1 = force_reg (mode, *op1);
29090 if (!cmpdi_operand (*op2, mode))
29091 *op2 = force_reg (mode, *op2);
29092 return true;
29094 case SFmode:
29095 case DFmode:
29096 if (!arm_float_compare_operand (*op1, mode))
29097 *op1 = force_reg (mode, *op1);
29098 if (!arm_float_compare_operand (*op2, mode))
29099 *op2 = force_reg (mode, *op2);
29100 return true;
29101 default:
29102 break;
29105 return false;
29109 /* Maximum number of instructions to set block of memory. */
29110 static int
29111 arm_block_set_max_insns (void)
29113 if (optimize_function_for_size_p (cfun))
29114 return 4;
29115 else
29116 return current_tune->max_insns_inline_memset;
29119 /* Return TRUE if it's profitable to set block of memory for
29120 non-vectorized case. VAL is the value to set the memory
29121 with. LENGTH is the number of bytes to set. ALIGN is the
29122 alignment of the destination memory in bytes. UNALIGNED_P
29123 is TRUE if we can only set the memory with instructions
29124 meeting alignment requirements. USE_STRD_P is TRUE if we
29125 can use strd to set the memory. */
29126 static bool
29127 arm_block_set_non_vect_profit_p (rtx val,
29128 unsigned HOST_WIDE_INT length,
29129 unsigned HOST_WIDE_INT align,
29130 bool unaligned_p, bool use_strd_p)
29132 int num = 0;
29133 /* For leftovers in bytes of 0-7, we can set the memory block using
29134 strb/strh/str with minimum instruction number. */
29135 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29137 if (unaligned_p)
29139 num = arm_const_inline_cost (SET, val);
29140 num += length / align + length % align;
29142 else if (use_strd_p)
29144 num = arm_const_double_inline_cost (val);
29145 num += (length >> 3) + leftover[length & 7];
29147 else
29149 num = arm_const_inline_cost (SET, val);
29150 num += (length >> 2) + leftover[length & 3];
29153 /* We may be able to combine last pair STRH/STRB into a single STR
29154 by shifting one byte back. */
29155 if (unaligned_access && length > 3 && (length & 3) == 3)
29156 num--;
29158 return (num <= arm_block_set_max_insns ());
29161 /* Return TRUE if it's profitable to set block of memory for
29162 vectorized case. LENGTH is the number of bytes to set.
29163 ALIGN is the alignment of destination memory in bytes.
29164 MODE is the vector mode used to set the memory. */
29165 static bool
29166 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29167 unsigned HOST_WIDE_INT align,
29168 machine_mode mode)
29170 int num;
29171 bool unaligned_p = ((align & 3) != 0);
29172 unsigned int nelt = GET_MODE_NUNITS (mode);
29174 /* Instruction loading constant value. */
29175 num = 1;
29176 /* Instructions storing the memory. */
29177 num += (length + nelt - 1) / nelt;
29178 /* Instructions adjusting the address expression. Only need to
29179 adjust address expression if it's 4 bytes aligned and bytes
29180 leftover can only be stored by mis-aligned store instruction. */
29181 if (!unaligned_p && (length & 3) != 0)
29182 num++;
29184 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29185 if (!unaligned_p && mode == V16QImode)
29186 num--;
29188 return (num <= arm_block_set_max_insns ());
29191 /* Set a block of memory using vectorization instructions for the
29192 unaligned case. We fill the first LENGTH bytes of the memory
29193 area starting from DSTBASE with byte constant VALUE. ALIGN is
29194 the alignment requirement of memory. Return TRUE if succeeded. */
29195 static bool
29196 arm_block_set_unaligned_vect (rtx dstbase,
29197 unsigned HOST_WIDE_INT length,
29198 unsigned HOST_WIDE_INT value,
29199 unsigned HOST_WIDE_INT align)
29201 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29202 rtx dst, mem;
29203 rtx val_elt, val_vec, reg;
29204 rtx rval[MAX_VECT_LEN];
29205 rtx (*gen_func) (rtx, rtx);
29206 machine_mode mode;
29207 unsigned HOST_WIDE_INT v = value;
29209 gcc_assert ((align & 0x3) != 0);
29210 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29211 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29212 if (length >= nelt_v16)
29214 mode = V16QImode;
29215 gen_func = gen_movmisalignv16qi;
29217 else
29219 mode = V8QImode;
29220 gen_func = gen_movmisalignv8qi;
29222 nelt_mode = GET_MODE_NUNITS (mode);
29223 gcc_assert (length >= nelt_mode);
29224 /* Skip if it isn't profitable. */
29225 if (!arm_block_set_vect_profit_p (length, align, mode))
29226 return false;
29228 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29229 mem = adjust_automodify_address (dstbase, mode, dst, 0);
29231 v = sext_hwi (v, BITS_PER_WORD);
29232 val_elt = GEN_INT (v);
29233 for (j = 0; j < nelt_mode; j++)
29234 rval[j] = val_elt;
29236 reg = gen_reg_rtx (mode);
29237 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29238 /* Emit instruction loading the constant value. */
29239 emit_move_insn (reg, val_vec);
29241 /* Handle nelt_mode bytes in a vector. */
29242 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29244 emit_insn ((*gen_func) (mem, reg));
29245 if (i + 2 * nelt_mode <= length)
29246 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29249 /* If there are not less than nelt_v8 bytes leftover, we must be in
29250 V16QI mode. */
29251 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29253 /* Handle (8, 16) bytes leftover. */
29254 if (i + nelt_v8 < length)
29256 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29257 /* We are shifting bytes back, set the alignment accordingly. */
29258 if ((length & 1) != 0 && align >= 2)
29259 set_mem_align (mem, BITS_PER_UNIT);
29261 emit_insn (gen_movmisalignv16qi (mem, reg));
29263 /* Handle (0, 8] bytes leftover. */
29264 else if (i < length && i + nelt_v8 >= length)
29266 if (mode == V16QImode)
29268 reg = gen_lowpart (V8QImode, reg);
29269 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
29271 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29272 + (nelt_mode - nelt_v8))));
29273 /* We are shifting bytes back, set the alignment accordingly. */
29274 if ((length & 1) != 0 && align >= 2)
29275 set_mem_align (mem, BITS_PER_UNIT);
29277 emit_insn (gen_movmisalignv8qi (mem, reg));
29280 return true;
29283 /* Set a block of memory using vectorization instructions for the
29284 aligned case. We fill the first LENGTH bytes of the memory area
29285 starting from DSTBASE with byte constant VALUE. ALIGN is the
29286 alignment requirement of memory. Return TRUE if succeeded. */
29287 static bool
29288 arm_block_set_aligned_vect (rtx dstbase,
29289 unsigned HOST_WIDE_INT length,
29290 unsigned HOST_WIDE_INT value,
29291 unsigned HOST_WIDE_INT align)
29293 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29294 rtx dst, addr, mem;
29295 rtx val_elt, val_vec, reg;
29296 rtx rval[MAX_VECT_LEN];
29297 machine_mode mode;
29298 unsigned HOST_WIDE_INT v = value;
29300 gcc_assert ((align & 0x3) == 0);
29301 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29302 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29303 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29304 mode = V16QImode;
29305 else
29306 mode = V8QImode;
29308 nelt_mode = GET_MODE_NUNITS (mode);
29309 gcc_assert (length >= nelt_mode);
29310 /* Skip if it isn't profitable. */
29311 if (!arm_block_set_vect_profit_p (length, align, mode))
29312 return false;
29314 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29316 v = sext_hwi (v, BITS_PER_WORD);
29317 val_elt = GEN_INT (v);
29318 for (j = 0; j < nelt_mode; j++)
29319 rval[j] = val_elt;
29321 reg = gen_reg_rtx (mode);
29322 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29323 /* Emit instruction loading the constant value. */
29324 emit_move_insn (reg, val_vec);
29326 i = 0;
29327 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29328 if (mode == V16QImode)
29330 mem = adjust_automodify_address (dstbase, mode, dst, 0);
29331 emit_insn (gen_movmisalignv16qi (mem, reg));
29332 i += nelt_mode;
29333 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29334 if (i + nelt_v8 < length && i + nelt_v16 > length)
29336 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29337 mem = adjust_automodify_address (dstbase, mode, dst, 0);
29338 /* We are shifting bytes back, set the alignment accordingly. */
29339 if ((length & 0x3) == 0)
29340 set_mem_align (mem, BITS_PER_UNIT * 4);
29341 else if ((length & 0x1) == 0)
29342 set_mem_align (mem, BITS_PER_UNIT * 2);
29343 else
29344 set_mem_align (mem, BITS_PER_UNIT);
29346 emit_insn (gen_movmisalignv16qi (mem, reg));
29347 return true;
29349 /* Fall through for bytes leftover. */
29350 mode = V8QImode;
29351 nelt_mode = GET_MODE_NUNITS (mode);
29352 reg = gen_lowpart (V8QImode, reg);
29355 /* Handle 8 bytes in a vector. */
29356 for (; (i + nelt_mode <= length); i += nelt_mode)
29358 addr = plus_constant (Pmode, dst, i);
29359 mem = adjust_automodify_address (dstbase, mode, addr, i);
29360 emit_move_insn (mem, reg);
29363 /* Handle single word leftover by shifting 4 bytes back. We can
29364 use aligned access for this case. */
29365 if (i + UNITS_PER_WORD == length)
29367 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29368 mem = adjust_automodify_address (dstbase, mode,
29369 addr, i - UNITS_PER_WORD);
29370 /* We are shifting 4 bytes back, set the alignment accordingly. */
29371 if (align > UNITS_PER_WORD)
29372 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29374 emit_move_insn (mem, reg);
29376 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29377 We have to use unaligned access for this case. */
29378 else if (i < length)
29380 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29381 mem = adjust_automodify_address (dstbase, mode, dst, 0);
29382 /* We are shifting bytes back, set the alignment accordingly. */
29383 if ((length & 1) == 0)
29384 set_mem_align (mem, BITS_PER_UNIT * 2);
29385 else
29386 set_mem_align (mem, BITS_PER_UNIT);
29388 emit_insn (gen_movmisalignv8qi (mem, reg));
29391 return true;
29394 /* Set a block of memory using plain strh/strb instructions, only
29395 using instructions allowed by ALIGN on processor. We fill the
29396 first LENGTH bytes of the memory area starting from DSTBASE
29397 with byte constant VALUE. ALIGN is the alignment requirement
29398 of memory. */
29399 static bool
29400 arm_block_set_unaligned_non_vect (rtx dstbase,
29401 unsigned HOST_WIDE_INT length,
29402 unsigned HOST_WIDE_INT value,
29403 unsigned HOST_WIDE_INT align)
29405 unsigned int i;
29406 rtx dst, addr, mem;
29407 rtx val_exp, val_reg, reg;
29408 machine_mode mode;
29409 HOST_WIDE_INT v = value;
29411 gcc_assert (align == 1 || align == 2);
29413 if (align == 2)
29414 v |= (value << BITS_PER_UNIT);
29416 v = sext_hwi (v, BITS_PER_WORD);
29417 val_exp = GEN_INT (v);
29418 /* Skip if it isn't profitable. */
29419 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29420 align, true, false))
29421 return false;
29423 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29424 mode = (align == 2 ? HImode : QImode);
29425 val_reg = force_reg (SImode, val_exp);
29426 reg = gen_lowpart (mode, val_reg);
29428 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29430 addr = plus_constant (Pmode, dst, i);
29431 mem = adjust_automodify_address (dstbase, mode, addr, i);
29432 emit_move_insn (mem, reg);
29435 /* Handle single byte leftover. */
29436 if (i + 1 == length)
29438 reg = gen_lowpart (QImode, val_reg);
29439 addr = plus_constant (Pmode, dst, i);
29440 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29441 emit_move_insn (mem, reg);
29442 i++;
29445 gcc_assert (i == length);
29446 return true;
29449 /* Set a block of memory using plain strd/str/strh/strb instructions,
29450 to permit unaligned copies on processors which support unaligned
29451 semantics for those instructions. We fill the first LENGTH bytes
29452 of the memory area starting from DSTBASE with byte constant VALUE.
29453 ALIGN is the alignment requirement of memory. */
29454 static bool
29455 arm_block_set_aligned_non_vect (rtx dstbase,
29456 unsigned HOST_WIDE_INT length,
29457 unsigned HOST_WIDE_INT value,
29458 unsigned HOST_WIDE_INT align)
29460 unsigned int i;
29461 rtx dst, addr, mem;
29462 rtx val_exp, val_reg, reg;
29463 unsigned HOST_WIDE_INT v;
29464 bool use_strd_p;
29466 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29467 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29469 v = (value | (value << 8) | (value << 16) | (value << 24));
29470 if (length < UNITS_PER_WORD)
29471 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29473 if (use_strd_p)
29474 v |= (v << BITS_PER_WORD);
29475 else
29476 v = sext_hwi (v, BITS_PER_WORD);
29478 val_exp = GEN_INT (v);
29479 /* Skip if it isn't profitable. */
29480 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29481 align, false, use_strd_p))
29483 if (!use_strd_p)
29484 return false;
29486 /* Try without strd. */
29487 v = (v >> BITS_PER_WORD);
29488 v = sext_hwi (v, BITS_PER_WORD);
29489 val_exp = GEN_INT (v);
29490 use_strd_p = false;
29491 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29492 align, false, use_strd_p))
29493 return false;
29496 i = 0;
29497 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29498 /* Handle double words using strd if possible. */
29499 if (use_strd_p)
29501 val_reg = force_reg (DImode, val_exp);
29502 reg = val_reg;
29503 for (; (i + 8 <= length); i += 8)
29505 addr = plus_constant (Pmode, dst, i);
29506 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29507 emit_move_insn (mem, reg);
29510 else
29511 val_reg = force_reg (SImode, val_exp);
29513 /* Handle words. */
29514 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29515 for (; (i + 4 <= length); i += 4)
29517 addr = plus_constant (Pmode, dst, i);
29518 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29519 if ((align & 3) == 0)
29520 emit_move_insn (mem, reg);
29521 else
29522 emit_insn (gen_unaligned_storesi (mem, reg));
29525 /* Merge last pair of STRH and STRB into a STR if possible. */
29526 if (unaligned_access && i > 0 && (i + 3) == length)
29528 addr = plus_constant (Pmode, dst, i - 1);
29529 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29530 /* We are shifting one byte back, set the alignment accordingly. */
29531 if ((align & 1) == 0)
29532 set_mem_align (mem, BITS_PER_UNIT);
29534 /* Most likely this is an unaligned access, and we can't tell at
29535 compilation time. */
29536 emit_insn (gen_unaligned_storesi (mem, reg));
29537 return true;
29540 /* Handle half word leftover. */
29541 if (i + 2 <= length)
29543 reg = gen_lowpart (HImode, val_reg);
29544 addr = plus_constant (Pmode, dst, i);
29545 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29546 if ((align & 1) == 0)
29547 emit_move_insn (mem, reg);
29548 else
29549 emit_insn (gen_unaligned_storehi (mem, reg));
29551 i += 2;
29554 /* Handle single byte leftover. */
29555 if (i + 1 == length)
29557 reg = gen_lowpart (QImode, val_reg);
29558 addr = plus_constant (Pmode, dst, i);
29559 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29560 emit_move_insn (mem, reg);
29563 return true;
29566 /* Set a block of memory using vectorization instructions for both
29567 aligned and unaligned cases. We fill the first LENGTH bytes of
29568 the memory area starting from DSTBASE with byte constant VALUE.
29569 ALIGN is the alignment requirement of memory. */
29570 static bool
29571 arm_block_set_vect (rtx dstbase,
29572 unsigned HOST_WIDE_INT length,
29573 unsigned HOST_WIDE_INT value,
29574 unsigned HOST_WIDE_INT align)
29576 /* Check whether we need to use unaligned store instruction. */
29577 if (((align & 3) != 0 || (length & 3) != 0)
29578 /* Check whether unaligned store instruction is available. */
29579 && (!unaligned_access || BYTES_BIG_ENDIAN))
29580 return false;
29582 if ((align & 3) == 0)
29583 return arm_block_set_aligned_vect (dstbase, length, value, align);
29584 else
29585 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29588 /* Expand string store operation. Firstly we try to do that by using
29589 vectorization instructions, then try with ARM unaligned access and
29590 double-word store if profitable. OPERANDS[0] is the destination,
29591 OPERANDS[1] is the number of bytes, operands[2] is the value to
29592 initialize the memory, OPERANDS[3] is the known alignment of the
29593 destination. */
29594 bool
29595 arm_gen_setmem (rtx *operands)
29597 rtx dstbase = operands[0];
29598 unsigned HOST_WIDE_INT length;
29599 unsigned HOST_WIDE_INT value;
29600 unsigned HOST_WIDE_INT align;
29602 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29603 return false;
29605 length = UINTVAL (operands[1]);
29606 if (length > 64)
29607 return false;
29609 value = (UINTVAL (operands[2]) & 0xFF);
29610 align = UINTVAL (operands[3]);
29611 if (TARGET_NEON && length >= 8
29612 && current_tune->string_ops_prefer_neon
29613 && arm_block_set_vect (dstbase, length, value, align))
29614 return true;
29616 if (!unaligned_access && (align & 3) != 0)
29617 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29619 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29623 static bool
29624 arm_macro_fusion_p (void)
29626 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
29630 static bool
29631 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
29633 rtx set_dest;
29634 rtx prev_set = single_set (prev);
29635 rtx curr_set = single_set (curr);
29637 if (!prev_set
29638 || !curr_set)
29639 return false;
29641 if (any_condjump_p (curr))
29642 return false;
29644 if (!arm_macro_fusion_p ())
29645 return false;
29647 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT)
29649 /* We are trying to fuse
29650 movw imm / movt imm
29651 instructions as a group that gets scheduled together. */
29653 set_dest = SET_DEST (curr_set);
29655 if (GET_MODE (set_dest) != SImode)
29656 return false;
29658 /* We are trying to match:
29659 prev (movw) == (set (reg r0) (const_int imm16))
29660 curr (movt) == (set (zero_extract (reg r0)
29661 (const_int 16)
29662 (const_int 16))
29663 (const_int imm16_1))
29665 prev (movw) == (set (reg r1)
29666 (high (symbol_ref ("SYM"))))
29667 curr (movt) == (set (reg r0)
29668 (lo_sum (reg r1)
29669 (symbol_ref ("SYM")))) */
29670 if (GET_CODE (set_dest) == ZERO_EXTRACT)
29672 if (CONST_INT_P (SET_SRC (curr_set))
29673 && CONST_INT_P (SET_SRC (prev_set))
29674 && REG_P (XEXP (set_dest, 0))
29675 && REG_P (SET_DEST (prev_set))
29676 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
29677 return true;
29679 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
29680 && REG_P (SET_DEST (curr_set))
29681 && REG_P (SET_DEST (prev_set))
29682 && GET_CODE (SET_SRC (prev_set)) == HIGH
29683 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
29684 return true;
29686 return false;
29689 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29691 static unsigned HOST_WIDE_INT
29692 arm_asan_shadow_offset (void)
29694 return (unsigned HOST_WIDE_INT) 1 << 29;
29698 /* This is a temporary fix for PR60655. Ideally we need
29699 to handle most of these cases in the generic part but
29700 currently we reject minus (..) (sym_ref). We try to
29701 ameliorate the case with minus (sym_ref1) (sym_ref2)
29702 where they are in the same section. */
29704 static bool
29705 arm_const_not_ok_for_debug_p (rtx p)
29707 tree decl_op0 = NULL;
29708 tree decl_op1 = NULL;
29710 if (GET_CODE (p) == MINUS)
29712 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29714 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29715 if (decl_op1
29716 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29717 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29719 if ((TREE_CODE (decl_op1) == VAR_DECL
29720 || TREE_CODE (decl_op1) == CONST_DECL)
29721 && (TREE_CODE (decl_op0) == VAR_DECL
29722 || TREE_CODE (decl_op0) == CONST_DECL))
29723 return (get_variable_section (decl_op1, false)
29724 != get_variable_section (decl_op0, false));
29726 if (TREE_CODE (decl_op1) == LABEL_DECL
29727 && TREE_CODE (decl_op0) == LABEL_DECL)
29728 return (DECL_CONTEXT (decl_op1)
29729 != DECL_CONTEXT (decl_op0));
29732 return true;
29736 return false;
29739 /* return TRUE if x is a reference to a value in a constant pool */
29740 extern bool
29741 arm_is_constant_pool_ref (rtx x)
29743 return (MEM_P (x)
29744 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29745 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29748 /* Remember the last target of arm_set_current_function. */
29749 static GTY(()) tree arm_previous_fndecl;
29751 /* Invalidate arm_previous_fndecl. */
29752 void
29753 arm_reset_previous_fndecl (void)
29755 arm_previous_fndecl = NULL_TREE;
29758 /* Establish appropriate back-end context for processing the function
29759 FNDECL. The argument might be NULL to indicate processing at top
29760 level, outside of any function scope. */
29761 static void
29762 arm_set_current_function (tree fndecl)
29764 if (!fndecl || fndecl == arm_previous_fndecl)
29765 return;
29767 tree old_tree = (arm_previous_fndecl
29768 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
29769 : NULL_TREE);
29771 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
29773 arm_previous_fndecl = fndecl;
29774 if (old_tree == new_tree)
29775 return;
29777 if (new_tree && new_tree != target_option_default_node)
29779 cl_target_option_restore (&global_options,
29780 TREE_TARGET_OPTION (new_tree));
29782 if (TREE_TARGET_GLOBALS (new_tree))
29783 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
29784 else
29785 TREE_TARGET_GLOBALS (new_tree)
29786 = save_target_globals_default_opts ();
29789 else if (old_tree && old_tree != target_option_default_node)
29791 new_tree = target_option_current_node;
29793 cl_target_option_restore (&global_options,
29794 TREE_TARGET_OPTION (new_tree));
29795 if (TREE_TARGET_GLOBALS (new_tree))
29796 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
29797 else if (new_tree == target_option_default_node)
29798 restore_target_globals (&default_target_globals);
29799 else
29800 TREE_TARGET_GLOBALS (new_tree)
29801 = save_target_globals_default_opts ();
29804 arm_option_params_internal ();
29807 /* Implement TARGET_OPTION_PRINT. */
29809 static void
29810 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
29812 int flags = ptr->x_target_flags;
29814 fprintf (file, "%*sselected arch %s\n", indent, "",
29815 TARGET_THUMB2_P (flags) ? "thumb2" :
29816 TARGET_THUMB_P (flags) ? "thumb1" :
29817 "arm");
29820 /* Hook to determine if one function can safely inline another. */
29822 static bool
29823 arm_can_inline_p (tree caller ATTRIBUTE_UNUSED, tree callee ATTRIBUTE_UNUSED)
29825 /* Overidde default hook: Always OK to inline between different modes.
29826 Function with mode specific instructions, e.g using asm, must be explicitely
29827 protected with noinline. */
29828 return true;
29831 /* Hook to fix function's alignment affected by target attribute. */
29833 static void
29834 arm_relayout_function (tree fndecl)
29836 if (DECL_USER_ALIGN (fndecl))
29837 return;
29839 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
29841 if (!callee_tree)
29842 callee_tree = target_option_default_node;
29844 DECL_ALIGN (fndecl) =
29845 FUNCTION_BOUNDARY_P (TREE_TARGET_OPTION (callee_tree)->x_target_flags);
29848 /* Inner function to process the attribute((target(...))), take an argument and
29849 set the current options from the argument. If we have a list, recursively
29850 go over the list. */
29852 static bool
29853 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
29855 if (TREE_CODE (args) == TREE_LIST)
29857 bool ret = true;
29858 for (; args; args = TREE_CHAIN (args))
29859 if (TREE_VALUE (args)
29860 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
29861 ret = false;
29862 return ret;
29865 else if (TREE_CODE (args) != STRING_CST)
29867 error ("attribute %<target%> argument not a string");
29868 return false;
29871 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
29872 while (argstr && *argstr != '\0')
29874 while (ISSPACE (*argstr))
29875 argstr++;
29877 if (!strcmp (argstr, "thumb"))
29879 opts->x_target_flags |= MASK_THUMB;
29880 arm_option_check_internal (opts);
29881 return true;
29884 if (!strcmp (argstr, "arm"))
29886 opts->x_target_flags &= ~MASK_THUMB;
29887 arm_option_check_internal (opts);
29888 return true;
29891 warning (0, "attribute(target(\"%s\")) is unknown", argstr);
29892 return false;
29895 return false;
29898 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
29900 tree
29901 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
29902 struct gcc_options *opts_set)
29904 if (!arm_valid_target_attribute_rec (args, opts))
29905 return NULL_TREE;
29907 /* Do any overrides, such as global options arch=xxx. */
29908 arm_option_override_internal (opts, opts_set);
29910 return build_target_option_node (opts);
29913 static void
29914 add_attribute (const char * mode, tree *attributes)
29916 size_t len = strlen (mode);
29917 tree value = build_string (len, mode);
29919 TREE_TYPE (value) = build_array_type (char_type_node,
29920 build_index_type (size_int (len)));
29922 *attributes = tree_cons (get_identifier ("target"),
29923 build_tree_list (NULL_TREE, value),
29924 *attributes);
29927 /* For testing. Insert thumb or arm modes alternatively on functions. */
29929 static void
29930 arm_insert_attributes (tree fndecl, tree * attributes)
29932 const char *mode;
29934 if (! TARGET_FLIP_THUMB)
29935 return;
29937 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
29938 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
29939 return;
29941 /* Nested definitions must inherit mode. */
29942 if (current_function_decl)
29944 mode = TARGET_THUMB ? "thumb" : "arm";
29945 add_attribute (mode, attributes);
29946 return;
29949 /* If there is already a setting don't change it. */
29950 if (lookup_attribute ("target", *attributes) != NULL)
29951 return;
29953 mode = thumb_flipper ? "thumb" : "arm";
29954 add_attribute (mode, attributes);
29956 thumb_flipper = !thumb_flipper;
29959 /* Hook to validate attribute((target("string"))). */
29961 static bool
29962 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
29963 tree args, int ARG_UNUSED (flags))
29965 bool ret = true;
29966 struct gcc_options func_options;
29967 tree cur_tree, new_optimize;
29968 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
29970 /* Get the optimization options of the current function. */
29971 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
29973 /* If the function changed the optimization levels as well as setting target
29974 options, start with the optimizations specified. */
29975 if (!func_optimize)
29976 func_optimize = optimization_default_node;
29978 /* Init func_options. */
29979 memset (&func_options, 0, sizeof (func_options));
29980 init_options_struct (&func_options, NULL);
29981 lang_hooks.init_options_struct (&func_options);
29983 /* Initialize func_options to the defaults. */
29984 cl_optimization_restore (&func_options,
29985 TREE_OPTIMIZATION (func_optimize));
29987 cl_target_option_restore (&func_options,
29988 TREE_TARGET_OPTION (target_option_default_node));
29990 /* Set func_options flags with new target mode. */
29991 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
29992 &global_options_set);
29994 if (cur_tree == NULL_TREE)
29995 ret = false;
29997 new_optimize = build_optimization_node (&func_options);
29999 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30001 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30003 return ret;
30006 void
30007 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30009 if (TARGET_UNIFIED_ASM)
30010 fprintf (stream, "\t.syntax unified\n");
30011 else
30012 fprintf (stream, "\t.syntax divided\n");
30014 if (TARGET_THUMB)
30016 if (is_called_in_ARM_mode (decl)
30017 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30018 && cfun->is_thunk))
30019 fprintf (stream, "\t.code 32\n");
30020 else if (TARGET_THUMB1)
30021 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30022 else
30023 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30025 else
30026 fprintf (stream, "\t.arm\n");
30028 if (TARGET_POKE_FUNCTION_NAME)
30029 arm_poke_function_name (stream, (const char *) name);
30032 /* If MEM is in the form of [base+offset], extract the two parts
30033 of address and set to BASE and OFFSET, otherwise return false
30034 after clearing BASE and OFFSET. */
30036 static bool
30037 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30039 rtx addr;
30041 gcc_assert (MEM_P (mem));
30043 addr = XEXP (mem, 0);
30045 /* Strip off const from addresses like (const (addr)). */
30046 if (GET_CODE (addr) == CONST)
30047 addr = XEXP (addr, 0);
30049 if (GET_CODE (addr) == REG)
30051 *base = addr;
30052 *offset = const0_rtx;
30053 return true;
30056 if (GET_CODE (addr) == PLUS
30057 && GET_CODE (XEXP (addr, 0)) == REG
30058 && CONST_INT_P (XEXP (addr, 1)))
30060 *base = XEXP (addr, 0);
30061 *offset = XEXP (addr, 1);
30062 return true;
30065 *base = NULL_RTX;
30066 *offset = NULL_RTX;
30068 return false;
30071 /* If INSN is a load or store of address in the form of [base+offset],
30072 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30073 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30074 otherwise return FALSE. */
30076 static bool
30077 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30079 rtx x, dest, src;
30081 gcc_assert (INSN_P (insn));
30082 x = PATTERN (insn);
30083 if (GET_CODE (x) != SET)
30084 return false;
30086 src = SET_SRC (x);
30087 dest = SET_DEST (x);
30088 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30090 *is_load = false;
30091 extract_base_offset_in_addr (dest, base, offset);
30093 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30095 *is_load = true;
30096 extract_base_offset_in_addr (src, base, offset);
30098 else
30099 return false;
30101 return (*base != NULL_RTX && *offset != NULL_RTX);
30104 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30106 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30107 and PRI are only calculated for these instructions. For other instruction,
30108 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30109 instruction fusion can be supported by returning different priorities.
30111 It's important that irrelevant instructions get the largest FUSION_PRI. */
30113 static void
30114 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30115 int *fusion_pri, int *pri)
30117 int tmp, off_val;
30118 bool is_load;
30119 rtx base, offset;
30121 gcc_assert (INSN_P (insn));
30123 tmp = max_pri - 1;
30124 if (!fusion_load_store (insn, &base, &offset, &is_load))
30126 *pri = tmp;
30127 *fusion_pri = tmp;
30128 return;
30131 /* Load goes first. */
30132 if (is_load)
30133 *fusion_pri = tmp - 1;
30134 else
30135 *fusion_pri = tmp - 2;
30137 tmp /= 2;
30139 /* INSN with smaller base register goes first. */
30140 tmp -= ((REGNO (base) & 0xff) << 20);
30142 /* INSN with smaller offset goes first. */
30143 off_val = (int)(INTVAL (offset));
30144 if (off_val >= 0)
30145 tmp -= (off_val & 0xfffff);
30146 else
30147 tmp += ((- off_val) & 0xfffff);
30149 *pri = tmp;
30150 return;
30152 #include "gt-arm.h"