[ARM] Replacing variable swaps that use a temporary variable with a call to std:...
[official-gcc.git] / gcc / config / arm / arm.c
blob02f5dc37ead14eab794be760e2281cd2ff103fa6
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "cfghooks.h"
28 #include "tree.h"
29 #include "rtl.h"
30 #include "df.h"
31 #include "alias.h"
32 #include "fold-const.h"
33 #include "stringpool.h"
34 #include "stor-layout.h"
35 #include "calls.h"
36 #include "varasm.h"
37 #include "regs.h"
38 #include "insn-config.h"
39 #include "conditions.h"
40 #include "output.h"
41 #include "insn-attr.h"
42 #include "flags.h"
43 #include "reload.h"
44 #include "expmed.h"
45 #include "dojump.h"
46 #include "explow.h"
47 #include "emit-rtl.h"
48 #include "stmt.h"
49 #include "expr.h"
50 #include "insn-codes.h"
51 #include "optabs.h"
52 #include "diagnostic-core.h"
53 #include "recog.h"
54 #include "cfgrtl.h"
55 #include "cfganal.h"
56 #include "lcm.h"
57 #include "cfgbuild.h"
58 #include "cfgcleanup.h"
59 #include "cgraph.h"
60 #include "except.h"
61 #include "tm_p.h"
62 #include "target.h"
63 #include "sched-int.h"
64 #include "common/common-target.h"
65 #include "debug.h"
66 #include "langhooks.h"
67 #include "intl.h"
68 #include "libfuncs.h"
69 #include "params.h"
70 #include "opts.h"
71 #include "dumpfile.h"
72 #include "gimple-expr.h"
73 #include "target-globals.h"
74 #include "builtins.h"
75 #include "tm-constrs.h"
76 #include "rtl-iter.h"
78 /* This file should be included last. */
79 #include "target-def.h"
81 /* Forward definitions of types. */
82 typedef struct minipool_node Mnode;
83 typedef struct minipool_fixup Mfix;
85 void (*arm_lang_output_object_attributes_hook)(void);
87 struct four_ints
89 int i[4];
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx);
94 static bool arm_needs_doubleword_align (machine_mode, const_tree);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets *arm_get_frame_offsets (void);
97 static void arm_add_gc_roots (void);
98 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
99 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
100 static unsigned bit_count (unsigned long);
101 static unsigned feature_count (const arm_feature_set*);
102 static int arm_address_register_rtx_p (rtx, int);
103 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
104 static bool is_called_in_ARM_mode (tree);
105 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
106 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
107 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
108 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
109 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
110 inline static int thumb1_index_register_rtx_p (rtx, int);
111 static int thumb_far_jump_used_p (void);
112 static bool thumb_force_lr_save (void);
113 static unsigned arm_size_return_regs (void);
114 static bool arm_assemble_integer (rtx, unsigned int, int);
115 static void arm_print_operand (FILE *, rtx, int);
116 static void arm_print_operand_address (FILE *, rtx);
117 static bool arm_print_operand_punct_valid_p (unsigned char code);
118 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
119 static arm_cc get_arm_condition_code (rtx);
120 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
121 static const char *output_multi_immediate (rtx *, const char *, const char *,
122 int, HOST_WIDE_INT);
123 static const char *shift_op (rtx, HOST_WIDE_INT *);
124 static struct machine_function *arm_init_machine_status (void);
125 static void thumb_exit (FILE *, int);
126 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
127 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
128 static Mnode *add_minipool_forward_ref (Mfix *);
129 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
130 static Mnode *add_minipool_backward_ref (Mfix *);
131 static void assign_minipool_offsets (Mfix *);
132 static void arm_print_value (FILE *, rtx);
133 static void dump_minipool (rtx_insn *);
134 static int arm_barrier_cost (rtx_insn *);
135 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
136 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
137 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
138 machine_mode, rtx);
139 static void arm_reorg (void);
140 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
141 static unsigned long arm_compute_save_reg0_reg12_mask (void);
142 static unsigned long arm_compute_save_reg_mask (void);
143 static unsigned long arm_isr_value (tree);
144 static unsigned long arm_compute_func_type (void);
145 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
146 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
148 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
149 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
150 #endif
151 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
152 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
153 static int arm_comp_type_attributes (const_tree, const_tree);
154 static void arm_set_default_type_attributes (tree);
155 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
156 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
157 static int optimal_immediate_sequence (enum rtx_code code,
158 unsigned HOST_WIDE_INT val,
159 struct four_ints *return_sequence);
160 static int optimal_immediate_sequence_1 (enum rtx_code code,
161 unsigned HOST_WIDE_INT val,
162 struct four_ints *return_sequence,
163 int i);
164 static int arm_get_strip_length (int);
165 static bool arm_function_ok_for_sibcall (tree, tree);
166 static machine_mode arm_promote_function_mode (const_tree,
167 machine_mode, int *,
168 const_tree, int);
169 static bool arm_return_in_memory (const_tree, const_tree);
170 static rtx arm_function_value (const_tree, const_tree, bool);
171 static rtx arm_libcall_value_1 (machine_mode);
172 static rtx arm_libcall_value (machine_mode, const_rtx);
173 static bool arm_function_value_regno_p (const unsigned int);
174 static void arm_internal_label (FILE *, const char *, unsigned long);
175 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
176 tree);
177 static bool arm_have_conditional_execution (void);
178 static bool arm_cannot_force_const_mem (machine_mode, rtx);
179 static bool arm_legitimate_constant_p (machine_mode, rtx);
180 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
181 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
182 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
183 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
184 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
185 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
186 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
187 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
188 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
189 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
190 static void emit_constant_insn (rtx cond, rtx pattern);
191 static rtx_insn *emit_set_insn (rtx, rtx);
192 static rtx emit_multi_reg_push (unsigned long, unsigned long);
193 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
194 tree, bool);
195 static rtx arm_function_arg (cumulative_args_t, machine_mode,
196 const_tree, bool);
197 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
198 const_tree, bool);
199 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
200 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
201 const_tree);
202 static rtx aapcs_libcall_value (machine_mode);
203 static int aapcs_select_return_coproc (const_tree, const_tree);
205 #ifdef OBJECT_FORMAT_ELF
206 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
207 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
208 #endif
209 #ifndef ARM_PE
210 static void arm_encode_section_info (tree, rtx, int);
211 #endif
213 static void arm_file_end (void);
214 static void arm_file_start (void);
215 static void arm_insert_attributes (tree, tree *);
217 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
218 tree, int *, int);
219 static bool arm_pass_by_reference (cumulative_args_t,
220 machine_mode, const_tree, bool);
221 static bool arm_promote_prototypes (const_tree);
222 static bool arm_default_short_enums (void);
223 static bool arm_align_anon_bitfield (void);
224 static bool arm_return_in_msb (const_tree);
225 static bool arm_must_pass_in_stack (machine_mode, const_tree);
226 static bool arm_return_in_memory (const_tree, const_tree);
227 #if ARM_UNWIND_INFO
228 static void arm_unwind_emit (FILE *, rtx_insn *);
229 static bool arm_output_ttype (rtx);
230 static void arm_asm_emit_except_personality (rtx);
231 static void arm_asm_init_sections (void);
232 #endif
233 static rtx arm_dwarf_register_span (rtx);
235 static tree arm_cxx_guard_type (void);
236 static bool arm_cxx_guard_mask_bit (void);
237 static tree arm_get_cookie_size (tree);
238 static bool arm_cookie_has_size (void);
239 static bool arm_cxx_cdtor_returns_this (void);
240 static bool arm_cxx_key_method_may_be_inline (void);
241 static void arm_cxx_determine_class_data_visibility (tree);
242 static bool arm_cxx_class_data_always_comdat (void);
243 static bool arm_cxx_use_aeabi_atexit (void);
244 static void arm_init_libfuncs (void);
245 static tree arm_build_builtin_va_list (void);
246 static void arm_expand_builtin_va_start (tree, rtx);
247 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
248 static void arm_option_override (void);
249 static void arm_option_print (FILE *, int, struct cl_target_option *);
250 static void arm_set_current_function (tree);
251 static bool arm_can_inline_p (tree, tree);
252 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
253 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
254 static bool arm_macro_fusion_p (void);
255 static bool arm_cannot_copy_insn_p (rtx_insn *);
256 static int arm_issue_rate (void);
257 static int arm_first_cycle_multipass_dfa_lookahead (void);
258 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
259 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
260 static bool arm_output_addr_const_extra (FILE *, rtx);
261 static bool arm_allocate_stack_slots_for_args (void);
262 static bool arm_warn_func_return (tree);
263 static const char *arm_invalid_parameter_type (const_tree t);
264 static const char *arm_invalid_return_type (const_tree t);
265 static tree arm_promoted_type (const_tree t);
266 static tree arm_convert_to_type (tree type, tree expr);
267 static bool arm_scalar_mode_supported_p (machine_mode);
268 static bool arm_frame_pointer_required (void);
269 static bool arm_can_eliminate (const int, const int);
270 static void arm_asm_trampoline_template (FILE *);
271 static void arm_trampoline_init (rtx, tree, rtx);
272 static rtx arm_trampoline_adjust_address (rtx);
273 static rtx arm_pic_static_addr (rtx orig, rtx reg);
274 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
275 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
276 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
277 static bool arm_array_mode_supported_p (machine_mode,
278 unsigned HOST_WIDE_INT);
279 static machine_mode arm_preferred_simd_mode (machine_mode);
280 static bool arm_class_likely_spilled_p (reg_class_t);
281 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
282 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
283 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
284 const_tree type,
285 int misalignment,
286 bool is_packed);
287 static void arm_conditional_register_usage (void);
288 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
289 static unsigned int arm_autovectorize_vector_sizes (void);
290 static int arm_default_branch_cost (bool, bool);
291 static int arm_cortex_a5_branch_cost (bool, bool);
292 static int arm_cortex_m_branch_cost (bool, bool);
293 static int arm_cortex_m7_branch_cost (bool, bool);
295 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
296 const unsigned char *sel);
298 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
300 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
301 tree vectype,
302 int misalign ATTRIBUTE_UNUSED);
303 static unsigned arm_add_stmt_cost (void *data, int count,
304 enum vect_cost_for_stmt kind,
305 struct _stmt_vec_info *stmt_info,
306 int misalign,
307 enum vect_cost_model_location where);
309 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
310 bool op0_preserve_value);
311 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
313 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
315 /* Table of machine attributes. */
316 static const struct attribute_spec arm_attribute_table[] =
318 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
319 affects_type_identity } */
320 /* Function calls made to this symbol must be done indirectly, because
321 it may lie outside of the 26 bit addressing range of a normal function
322 call. */
323 { "long_call", 0, 0, false, true, true, NULL, false },
324 /* Whereas these functions are always known to reside within the 26 bit
325 addressing range. */
326 { "short_call", 0, 0, false, true, true, NULL, false },
327 /* Specify the procedure call conventions for a function. */
328 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
329 false },
330 /* Interrupt Service Routines have special prologue and epilogue requirements. */
331 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
332 false },
333 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
334 false },
335 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
336 false },
337 #ifdef ARM_PE
338 /* ARM/PE has three new attributes:
339 interfacearm - ?
340 dllexport - for exporting a function/variable that will live in a dll
341 dllimport - for importing a function/variable from a dll
343 Microsoft allows multiple declspecs in one __declspec, separating
344 them with spaces. We do NOT support this. Instead, use __declspec
345 multiple times.
347 { "dllimport", 0, 0, true, false, false, NULL, false },
348 { "dllexport", 0, 0, true, false, false, NULL, false },
349 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
350 false },
351 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
352 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
353 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
354 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
355 false },
356 #endif
357 { NULL, 0, 0, false, false, false, NULL, false }
360 /* Initialize the GCC target structure. */
361 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
362 #undef TARGET_MERGE_DECL_ATTRIBUTES
363 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
364 #endif
366 #undef TARGET_LEGITIMIZE_ADDRESS
367 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
369 #undef TARGET_LRA_P
370 #define TARGET_LRA_P hook_bool_void_true
372 #undef TARGET_ATTRIBUTE_TABLE
373 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
375 #undef TARGET_INSERT_ATTRIBUTES
376 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
378 #undef TARGET_ASM_FILE_START
379 #define TARGET_ASM_FILE_START arm_file_start
380 #undef TARGET_ASM_FILE_END
381 #define TARGET_ASM_FILE_END arm_file_end
383 #undef TARGET_ASM_ALIGNED_SI_OP
384 #define TARGET_ASM_ALIGNED_SI_OP NULL
385 #undef TARGET_ASM_INTEGER
386 #define TARGET_ASM_INTEGER arm_assemble_integer
388 #undef TARGET_PRINT_OPERAND
389 #define TARGET_PRINT_OPERAND arm_print_operand
390 #undef TARGET_PRINT_OPERAND_ADDRESS
391 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
392 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
393 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
395 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
396 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
398 #undef TARGET_ASM_FUNCTION_PROLOGUE
399 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
401 #undef TARGET_ASM_FUNCTION_EPILOGUE
402 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
404 #undef TARGET_CAN_INLINE_P
405 #define TARGET_CAN_INLINE_P arm_can_inline_p
407 #undef TARGET_OPTION_OVERRIDE
408 #define TARGET_OPTION_OVERRIDE arm_option_override
410 #undef TARGET_OPTION_PRINT
411 #define TARGET_OPTION_PRINT arm_option_print
413 #undef TARGET_COMP_TYPE_ATTRIBUTES
414 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
416 #undef TARGET_SCHED_MACRO_FUSION_P
417 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
419 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
420 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
422 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
423 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
425 #undef TARGET_SCHED_ADJUST_COST
426 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
428 #undef TARGET_SET_CURRENT_FUNCTION
429 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
431 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
432 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
434 #undef TARGET_SCHED_REORDER
435 #define TARGET_SCHED_REORDER arm_sched_reorder
437 #undef TARGET_REGISTER_MOVE_COST
438 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
440 #undef TARGET_MEMORY_MOVE_COST
441 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
443 #undef TARGET_ENCODE_SECTION_INFO
444 #ifdef ARM_PE
445 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
446 #else
447 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
448 #endif
450 #undef TARGET_STRIP_NAME_ENCODING
451 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
453 #undef TARGET_ASM_INTERNAL_LABEL
454 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
456 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
457 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
459 #undef TARGET_FUNCTION_VALUE
460 #define TARGET_FUNCTION_VALUE arm_function_value
462 #undef TARGET_LIBCALL_VALUE
463 #define TARGET_LIBCALL_VALUE arm_libcall_value
465 #undef TARGET_FUNCTION_VALUE_REGNO_P
466 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
468 #undef TARGET_ASM_OUTPUT_MI_THUNK
469 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
470 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
471 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
473 #undef TARGET_RTX_COSTS
474 #define TARGET_RTX_COSTS arm_rtx_costs
475 #undef TARGET_ADDRESS_COST
476 #define TARGET_ADDRESS_COST arm_address_cost
478 #undef TARGET_SHIFT_TRUNCATION_MASK
479 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
480 #undef TARGET_VECTOR_MODE_SUPPORTED_P
481 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
482 #undef TARGET_ARRAY_MODE_SUPPORTED_P
483 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
484 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
485 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
486 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
487 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
488 arm_autovectorize_vector_sizes
490 #undef TARGET_MACHINE_DEPENDENT_REORG
491 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
493 #undef TARGET_INIT_BUILTINS
494 #define TARGET_INIT_BUILTINS arm_init_builtins
495 #undef TARGET_EXPAND_BUILTIN
496 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
497 #undef TARGET_BUILTIN_DECL
498 #define TARGET_BUILTIN_DECL arm_builtin_decl
500 #undef TARGET_INIT_LIBFUNCS
501 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
503 #undef TARGET_PROMOTE_FUNCTION_MODE
504 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
505 #undef TARGET_PROMOTE_PROTOTYPES
506 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
507 #undef TARGET_PASS_BY_REFERENCE
508 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
509 #undef TARGET_ARG_PARTIAL_BYTES
510 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
511 #undef TARGET_FUNCTION_ARG
512 #define TARGET_FUNCTION_ARG arm_function_arg
513 #undef TARGET_FUNCTION_ARG_ADVANCE
514 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
515 #undef TARGET_FUNCTION_ARG_BOUNDARY
516 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
518 #undef TARGET_SETUP_INCOMING_VARARGS
519 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
521 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
522 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
524 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
525 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
526 #undef TARGET_TRAMPOLINE_INIT
527 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
528 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
529 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
531 #undef TARGET_WARN_FUNC_RETURN
532 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
534 #undef TARGET_DEFAULT_SHORT_ENUMS
535 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
537 #undef TARGET_ALIGN_ANON_BITFIELD
538 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
540 #undef TARGET_NARROW_VOLATILE_BITFIELD
541 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
543 #undef TARGET_CXX_GUARD_TYPE
544 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
546 #undef TARGET_CXX_GUARD_MASK_BIT
547 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
549 #undef TARGET_CXX_GET_COOKIE_SIZE
550 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
552 #undef TARGET_CXX_COOKIE_HAS_SIZE
553 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
555 #undef TARGET_CXX_CDTOR_RETURNS_THIS
556 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
558 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
559 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
561 #undef TARGET_CXX_USE_AEABI_ATEXIT
562 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
564 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
565 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
566 arm_cxx_determine_class_data_visibility
568 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
569 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
571 #undef TARGET_RETURN_IN_MSB
572 #define TARGET_RETURN_IN_MSB arm_return_in_msb
574 #undef TARGET_RETURN_IN_MEMORY
575 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
577 #undef TARGET_MUST_PASS_IN_STACK
578 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
580 #if ARM_UNWIND_INFO
581 #undef TARGET_ASM_UNWIND_EMIT
582 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
584 /* EABI unwinding tables use a different format for the typeinfo tables. */
585 #undef TARGET_ASM_TTYPE
586 #define TARGET_ASM_TTYPE arm_output_ttype
588 #undef TARGET_ARM_EABI_UNWINDER
589 #define TARGET_ARM_EABI_UNWINDER true
591 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
592 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
594 #undef TARGET_ASM_INIT_SECTIONS
595 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
596 #endif /* ARM_UNWIND_INFO */
598 #undef TARGET_DWARF_REGISTER_SPAN
599 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
601 #undef TARGET_CANNOT_COPY_INSN_P
602 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
604 #ifdef HAVE_AS_TLS
605 #undef TARGET_HAVE_TLS
606 #define TARGET_HAVE_TLS true
607 #endif
609 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
610 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
612 #undef TARGET_LEGITIMATE_CONSTANT_P
613 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
615 #undef TARGET_CANNOT_FORCE_CONST_MEM
616 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
618 #undef TARGET_MAX_ANCHOR_OFFSET
619 #define TARGET_MAX_ANCHOR_OFFSET 4095
621 /* The minimum is set such that the total size of the block
622 for a particular anchor is -4088 + 1 + 4095 bytes, which is
623 divisible by eight, ensuring natural spacing of anchors. */
624 #undef TARGET_MIN_ANCHOR_OFFSET
625 #define TARGET_MIN_ANCHOR_OFFSET -4088
627 #undef TARGET_SCHED_ISSUE_RATE
628 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
630 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
631 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
632 arm_first_cycle_multipass_dfa_lookahead
634 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
635 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
636 arm_first_cycle_multipass_dfa_lookahead_guard
638 #undef TARGET_MANGLE_TYPE
639 #define TARGET_MANGLE_TYPE arm_mangle_type
641 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
642 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
644 #undef TARGET_BUILD_BUILTIN_VA_LIST
645 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
646 #undef TARGET_EXPAND_BUILTIN_VA_START
647 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
648 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
649 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
651 #ifdef HAVE_AS_TLS
652 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
653 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
654 #endif
656 #undef TARGET_LEGITIMATE_ADDRESS_P
657 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
659 #undef TARGET_PREFERRED_RELOAD_CLASS
660 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
662 #undef TARGET_INVALID_PARAMETER_TYPE
663 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
665 #undef TARGET_INVALID_RETURN_TYPE
666 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
668 #undef TARGET_PROMOTED_TYPE
669 #define TARGET_PROMOTED_TYPE arm_promoted_type
671 #undef TARGET_CONVERT_TO_TYPE
672 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
674 #undef TARGET_SCALAR_MODE_SUPPORTED_P
675 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
677 #undef TARGET_FRAME_POINTER_REQUIRED
678 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
680 #undef TARGET_CAN_ELIMINATE
681 #define TARGET_CAN_ELIMINATE arm_can_eliminate
683 #undef TARGET_CONDITIONAL_REGISTER_USAGE
684 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
686 #undef TARGET_CLASS_LIKELY_SPILLED_P
687 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
689 #undef TARGET_VECTORIZE_BUILTINS
690 #define TARGET_VECTORIZE_BUILTINS
692 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
693 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
694 arm_builtin_vectorized_function
696 #undef TARGET_VECTOR_ALIGNMENT
697 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
699 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
700 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
701 arm_vector_alignment_reachable
703 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
704 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
705 arm_builtin_support_vector_misalignment
707 #undef TARGET_PREFERRED_RENAME_CLASS
708 #define TARGET_PREFERRED_RENAME_CLASS \
709 arm_preferred_rename_class
711 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
712 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
713 arm_vectorize_vec_perm_const_ok
715 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
716 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
717 arm_builtin_vectorization_cost
718 #undef TARGET_VECTORIZE_ADD_STMT_COST
719 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
721 #undef TARGET_CANONICALIZE_COMPARISON
722 #define TARGET_CANONICALIZE_COMPARISON \
723 arm_canonicalize_comparison
725 #undef TARGET_ASAN_SHADOW_OFFSET
726 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
728 #undef MAX_INSN_PER_IT_BLOCK
729 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
731 #undef TARGET_CAN_USE_DOLOOP_P
732 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
734 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
735 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
737 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
738 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
740 #undef TARGET_SCHED_FUSION_PRIORITY
741 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
743 struct gcc_target targetm = TARGET_INITIALIZER;
745 /* Obstack for minipool constant handling. */
746 static struct obstack minipool_obstack;
747 static char * minipool_startobj;
749 /* The maximum number of insns skipped which
750 will be conditionalised if possible. */
751 static int max_insns_skipped = 5;
753 extern FILE * asm_out_file;
755 /* True if we are currently building a constant table. */
756 int making_const_table;
758 /* The processor for which instructions should be scheduled. */
759 enum processor_type arm_tune = arm_none;
761 /* The current tuning set. */
762 const struct tune_params *current_tune;
764 /* Which floating point hardware to schedule for. */
765 int arm_fpu_attr;
767 /* Which floating popint hardware to use. */
768 const struct arm_fpu_desc *arm_fpu_desc;
770 /* Used for Thumb call_via trampolines. */
771 rtx thumb_call_via_label[14];
772 static int thumb_call_reg_needed;
774 /* The bits in this mask specify which
775 instructions we are allowed to generate. */
776 arm_feature_set insn_flags = ARM_FSET_EMPTY;
778 /* The bits in this mask specify which instruction scheduling options should
779 be used. */
780 arm_feature_set tune_flags = ARM_FSET_EMPTY;
782 /* The highest ARM architecture version supported by the
783 target. */
784 enum base_architecture arm_base_arch = BASE_ARCH_0;
786 /* The following are used in the arm.md file as equivalents to bits
787 in the above two flag variables. */
789 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
790 int arm_arch3m = 0;
792 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
793 int arm_arch4 = 0;
795 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
796 int arm_arch4t = 0;
798 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
799 int arm_arch5 = 0;
801 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
802 int arm_arch5e = 0;
804 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
805 int arm_arch6 = 0;
807 /* Nonzero if this chip supports the ARM 6K extensions. */
808 int arm_arch6k = 0;
810 /* Nonzero if this chip supports the ARM 6KZ extensions. */
811 int arm_arch6kz = 0;
813 /* Nonzero if instructions present in ARMv6-M can be used. */
814 int arm_arch6m = 0;
816 /* Nonzero if this chip supports the ARM 7 extensions. */
817 int arm_arch7 = 0;
819 /* Nonzero if instructions not present in the 'M' profile can be used. */
820 int arm_arch_notm = 0;
822 /* Nonzero if instructions present in ARMv7E-M can be used. */
823 int arm_arch7em = 0;
825 /* Nonzero if instructions present in ARMv8 can be used. */
826 int arm_arch8 = 0;
828 /* Nonzero if this chip can benefit from load scheduling. */
829 int arm_ld_sched = 0;
831 /* Nonzero if this chip is a StrongARM. */
832 int arm_tune_strongarm = 0;
834 /* Nonzero if this chip supports Intel Wireless MMX technology. */
835 int arm_arch_iwmmxt = 0;
837 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
838 int arm_arch_iwmmxt2 = 0;
840 /* Nonzero if this chip is an XScale. */
841 int arm_arch_xscale = 0;
843 /* Nonzero if tuning for XScale */
844 int arm_tune_xscale = 0;
846 /* Nonzero if we want to tune for stores that access the write-buffer.
847 This typically means an ARM6 or ARM7 with MMU or MPU. */
848 int arm_tune_wbuf = 0;
850 /* Nonzero if tuning for Cortex-A9. */
851 int arm_tune_cortex_a9 = 0;
853 /* Nonzero if we should define __THUMB_INTERWORK__ in the
854 preprocessor.
855 XXX This is a bit of a hack, it's intended to help work around
856 problems in GLD which doesn't understand that armv5t code is
857 interworking clean. */
858 int arm_cpp_interwork = 0;
860 /* Nonzero if chip supports Thumb 2. */
861 int arm_arch_thumb2;
863 /* Nonzero if chip supports integer division instruction. */
864 int arm_arch_arm_hwdiv;
865 int arm_arch_thumb_hwdiv;
867 /* Nonzero if chip disallows volatile memory access in IT block. */
868 int arm_arch_no_volatile_ce;
870 /* Nonzero if we should use Neon to handle 64-bits operations rather
871 than core registers. */
872 int prefer_neon_for_64bits = 0;
874 /* Nonzero if we shouldn't use literal pools. */
875 bool arm_disable_literal_pool = false;
877 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
878 we must report the mode of the memory reference from
879 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
880 machine_mode output_memory_reference_mode;
882 /* The register number to be used for the PIC offset register. */
883 unsigned arm_pic_register = INVALID_REGNUM;
885 enum arm_pcs arm_pcs_default;
887 /* For an explanation of these variables, see final_prescan_insn below. */
888 int arm_ccfsm_state;
889 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
890 enum arm_cond_code arm_current_cc;
892 rtx arm_target_insn;
893 int arm_target_label;
894 /* The number of conditionally executed insns, including the current insn. */
895 int arm_condexec_count = 0;
896 /* A bitmask specifying the patterns for the IT block.
897 Zero means do not output an IT block before this insn. */
898 int arm_condexec_mask = 0;
899 /* The number of bits used in arm_condexec_mask. */
900 int arm_condexec_masklen = 0;
902 /* Nonzero if chip supports the ARMv8 CRC instructions. */
903 int arm_arch_crc = 0;
905 /* Nonzero if the core has a very small, high-latency, multiply unit. */
906 int arm_m_profile_small_mul = 0;
908 /* The condition codes of the ARM, and the inverse function. */
909 static const char * const arm_condition_codes[] =
911 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
912 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
915 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
916 int arm_regs_in_sequence[] =
918 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
921 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
922 #define streq(string1, string2) (strcmp (string1, string2) == 0)
924 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
925 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
926 | (1 << PIC_OFFSET_TABLE_REGNUM)))
928 /* Initialization code. */
930 struct processors
932 const char *const name;
933 enum processor_type core;
934 const char *arch;
935 enum base_architecture base_arch;
936 const arm_feature_set flags;
937 const struct tune_params *const tune;
941 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
942 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
944 num_slots, \
945 l1_size, \
946 l1_line_size \
949 /* arm generic vectorizer costs. */
950 static const
951 struct cpu_vec_costs arm_default_vec_cost = {
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 1, /* vec_unalign_load_cost. */
960 1, /* vec_unalign_store_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
966 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
967 #include "aarch-cost-tables.h"
971 const struct cpu_cost_table cortexa9_extra_costs =
973 /* ALU */
975 0, /* arith. */
976 0, /* logical. */
977 0, /* shift. */
978 COSTS_N_INSNS (1), /* shift_reg. */
979 COSTS_N_INSNS (1), /* arith_shift. */
980 COSTS_N_INSNS (2), /* arith_shift_reg. */
981 0, /* log_shift. */
982 COSTS_N_INSNS (1), /* log_shift_reg. */
983 COSTS_N_INSNS (1), /* extend. */
984 COSTS_N_INSNS (2), /* extend_arith. */
985 COSTS_N_INSNS (1), /* bfi. */
986 COSTS_N_INSNS (1), /* bfx. */
987 0, /* clz. */
988 0, /* rev. */
989 0, /* non_exec. */
990 true /* non_exec_costs_exec. */
993 /* MULT SImode */
995 COSTS_N_INSNS (3), /* simple. */
996 COSTS_N_INSNS (3), /* flag_setting. */
997 COSTS_N_INSNS (2), /* extend. */
998 COSTS_N_INSNS (3), /* add. */
999 COSTS_N_INSNS (2), /* extend_add. */
1000 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1002 /* MULT DImode */
1004 0, /* simple (N/A). */
1005 0, /* flag_setting (N/A). */
1006 COSTS_N_INSNS (4), /* extend. */
1007 0, /* add (N/A). */
1008 COSTS_N_INSNS (4), /* extend_add. */
1009 0 /* idiv (N/A). */
1012 /* LD/ST */
1014 COSTS_N_INSNS (2), /* load. */
1015 COSTS_N_INSNS (2), /* load_sign_extend. */
1016 COSTS_N_INSNS (2), /* ldrd. */
1017 COSTS_N_INSNS (2), /* ldm_1st. */
1018 1, /* ldm_regs_per_insn_1st. */
1019 2, /* ldm_regs_per_insn_subsequent. */
1020 COSTS_N_INSNS (5), /* loadf. */
1021 COSTS_N_INSNS (5), /* loadd. */
1022 COSTS_N_INSNS (1), /* load_unaligned. */
1023 COSTS_N_INSNS (2), /* store. */
1024 COSTS_N_INSNS (2), /* strd. */
1025 COSTS_N_INSNS (2), /* stm_1st. */
1026 1, /* stm_regs_per_insn_1st. */
1027 2, /* stm_regs_per_insn_subsequent. */
1028 COSTS_N_INSNS (1), /* storef. */
1029 COSTS_N_INSNS (1), /* stored. */
1030 COSTS_N_INSNS (1), /* store_unaligned. */
1031 COSTS_N_INSNS (1), /* loadv. */
1032 COSTS_N_INSNS (1) /* storev. */
1035 /* FP SFmode */
1037 COSTS_N_INSNS (14), /* div. */
1038 COSTS_N_INSNS (4), /* mult. */
1039 COSTS_N_INSNS (7), /* mult_addsub. */
1040 COSTS_N_INSNS (30), /* fma. */
1041 COSTS_N_INSNS (3), /* addsub. */
1042 COSTS_N_INSNS (1), /* fpconst. */
1043 COSTS_N_INSNS (1), /* neg. */
1044 COSTS_N_INSNS (3), /* compare. */
1045 COSTS_N_INSNS (3), /* widen. */
1046 COSTS_N_INSNS (3), /* narrow. */
1047 COSTS_N_INSNS (3), /* toint. */
1048 COSTS_N_INSNS (3), /* fromint. */
1049 COSTS_N_INSNS (3) /* roundint. */
1051 /* FP DFmode */
1053 COSTS_N_INSNS (24), /* div. */
1054 COSTS_N_INSNS (5), /* mult. */
1055 COSTS_N_INSNS (8), /* mult_addsub. */
1056 COSTS_N_INSNS (30), /* fma. */
1057 COSTS_N_INSNS (3), /* addsub. */
1058 COSTS_N_INSNS (1), /* fpconst. */
1059 COSTS_N_INSNS (1), /* neg. */
1060 COSTS_N_INSNS (3), /* compare. */
1061 COSTS_N_INSNS (3), /* widen. */
1062 COSTS_N_INSNS (3), /* narrow. */
1063 COSTS_N_INSNS (3), /* toint. */
1064 COSTS_N_INSNS (3), /* fromint. */
1065 COSTS_N_INSNS (3) /* roundint. */
1068 /* Vector */
1070 COSTS_N_INSNS (1) /* alu. */
1074 const struct cpu_cost_table cortexa8_extra_costs =
1076 /* ALU */
1078 0, /* arith. */
1079 0, /* logical. */
1080 COSTS_N_INSNS (1), /* shift. */
1081 0, /* shift_reg. */
1082 COSTS_N_INSNS (1), /* arith_shift. */
1083 0, /* arith_shift_reg. */
1084 COSTS_N_INSNS (1), /* log_shift. */
1085 0, /* log_shift_reg. */
1086 0, /* extend. */
1087 0, /* extend_arith. */
1088 0, /* bfi. */
1089 0, /* bfx. */
1090 0, /* clz. */
1091 0, /* rev. */
1092 0, /* non_exec. */
1093 true /* non_exec_costs_exec. */
1096 /* MULT SImode */
1098 COSTS_N_INSNS (1), /* simple. */
1099 COSTS_N_INSNS (1), /* flag_setting. */
1100 COSTS_N_INSNS (1), /* extend. */
1101 COSTS_N_INSNS (1), /* add. */
1102 COSTS_N_INSNS (1), /* extend_add. */
1103 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1105 /* MULT DImode */
1107 0, /* simple (N/A). */
1108 0, /* flag_setting (N/A). */
1109 COSTS_N_INSNS (2), /* extend. */
1110 0, /* add (N/A). */
1111 COSTS_N_INSNS (2), /* extend_add. */
1112 0 /* idiv (N/A). */
1115 /* LD/ST */
1117 COSTS_N_INSNS (1), /* load. */
1118 COSTS_N_INSNS (1), /* load_sign_extend. */
1119 COSTS_N_INSNS (1), /* ldrd. */
1120 COSTS_N_INSNS (1), /* ldm_1st. */
1121 1, /* ldm_regs_per_insn_1st. */
1122 2, /* ldm_regs_per_insn_subsequent. */
1123 COSTS_N_INSNS (1), /* loadf. */
1124 COSTS_N_INSNS (1), /* loadd. */
1125 COSTS_N_INSNS (1), /* load_unaligned. */
1126 COSTS_N_INSNS (1), /* store. */
1127 COSTS_N_INSNS (1), /* strd. */
1128 COSTS_N_INSNS (1), /* stm_1st. */
1129 1, /* stm_regs_per_insn_1st. */
1130 2, /* stm_regs_per_insn_subsequent. */
1131 COSTS_N_INSNS (1), /* storef. */
1132 COSTS_N_INSNS (1), /* stored. */
1133 COSTS_N_INSNS (1), /* store_unaligned. */
1134 COSTS_N_INSNS (1), /* loadv. */
1135 COSTS_N_INSNS (1) /* storev. */
1138 /* FP SFmode */
1140 COSTS_N_INSNS (36), /* div. */
1141 COSTS_N_INSNS (11), /* mult. */
1142 COSTS_N_INSNS (20), /* mult_addsub. */
1143 COSTS_N_INSNS (30), /* fma. */
1144 COSTS_N_INSNS (9), /* addsub. */
1145 COSTS_N_INSNS (3), /* fpconst. */
1146 COSTS_N_INSNS (3), /* neg. */
1147 COSTS_N_INSNS (6), /* compare. */
1148 COSTS_N_INSNS (4), /* widen. */
1149 COSTS_N_INSNS (4), /* narrow. */
1150 COSTS_N_INSNS (8), /* toint. */
1151 COSTS_N_INSNS (8), /* fromint. */
1152 COSTS_N_INSNS (8) /* roundint. */
1154 /* FP DFmode */
1156 COSTS_N_INSNS (64), /* div. */
1157 COSTS_N_INSNS (16), /* mult. */
1158 COSTS_N_INSNS (25), /* mult_addsub. */
1159 COSTS_N_INSNS (30), /* fma. */
1160 COSTS_N_INSNS (9), /* addsub. */
1161 COSTS_N_INSNS (3), /* fpconst. */
1162 COSTS_N_INSNS (3), /* neg. */
1163 COSTS_N_INSNS (6), /* compare. */
1164 COSTS_N_INSNS (6), /* widen. */
1165 COSTS_N_INSNS (6), /* narrow. */
1166 COSTS_N_INSNS (8), /* toint. */
1167 COSTS_N_INSNS (8), /* fromint. */
1168 COSTS_N_INSNS (8) /* roundint. */
1171 /* Vector */
1173 COSTS_N_INSNS (1) /* alu. */
1177 const struct cpu_cost_table cortexa5_extra_costs =
1179 /* ALU */
1181 0, /* arith. */
1182 0, /* logical. */
1183 COSTS_N_INSNS (1), /* shift. */
1184 COSTS_N_INSNS (1), /* shift_reg. */
1185 COSTS_N_INSNS (1), /* arith_shift. */
1186 COSTS_N_INSNS (1), /* arith_shift_reg. */
1187 COSTS_N_INSNS (1), /* log_shift. */
1188 COSTS_N_INSNS (1), /* log_shift_reg. */
1189 COSTS_N_INSNS (1), /* extend. */
1190 COSTS_N_INSNS (1), /* extend_arith. */
1191 COSTS_N_INSNS (1), /* bfi. */
1192 COSTS_N_INSNS (1), /* bfx. */
1193 COSTS_N_INSNS (1), /* clz. */
1194 COSTS_N_INSNS (1), /* rev. */
1195 0, /* non_exec. */
1196 true /* non_exec_costs_exec. */
1200 /* MULT SImode */
1202 0, /* simple. */
1203 COSTS_N_INSNS (1), /* flag_setting. */
1204 COSTS_N_INSNS (1), /* extend. */
1205 COSTS_N_INSNS (1), /* add. */
1206 COSTS_N_INSNS (1), /* extend_add. */
1207 COSTS_N_INSNS (7) /* idiv. */
1209 /* MULT DImode */
1211 0, /* simple (N/A). */
1212 0, /* flag_setting (N/A). */
1213 COSTS_N_INSNS (1), /* extend. */
1214 0, /* add. */
1215 COSTS_N_INSNS (2), /* extend_add. */
1216 0 /* idiv (N/A). */
1219 /* LD/ST */
1221 COSTS_N_INSNS (1), /* load. */
1222 COSTS_N_INSNS (1), /* load_sign_extend. */
1223 COSTS_N_INSNS (6), /* ldrd. */
1224 COSTS_N_INSNS (1), /* ldm_1st. */
1225 1, /* ldm_regs_per_insn_1st. */
1226 2, /* ldm_regs_per_insn_subsequent. */
1227 COSTS_N_INSNS (2), /* loadf. */
1228 COSTS_N_INSNS (4), /* loadd. */
1229 COSTS_N_INSNS (1), /* load_unaligned. */
1230 COSTS_N_INSNS (1), /* store. */
1231 COSTS_N_INSNS (3), /* strd. */
1232 COSTS_N_INSNS (1), /* stm_1st. */
1233 1, /* stm_regs_per_insn_1st. */
1234 2, /* stm_regs_per_insn_subsequent. */
1235 COSTS_N_INSNS (2), /* storef. */
1236 COSTS_N_INSNS (2), /* stored. */
1237 COSTS_N_INSNS (1), /* store_unaligned. */
1238 COSTS_N_INSNS (1), /* loadv. */
1239 COSTS_N_INSNS (1) /* storev. */
1242 /* FP SFmode */
1244 COSTS_N_INSNS (15), /* div. */
1245 COSTS_N_INSNS (3), /* mult. */
1246 COSTS_N_INSNS (7), /* mult_addsub. */
1247 COSTS_N_INSNS (7), /* fma. */
1248 COSTS_N_INSNS (3), /* addsub. */
1249 COSTS_N_INSNS (3), /* fpconst. */
1250 COSTS_N_INSNS (3), /* neg. */
1251 COSTS_N_INSNS (3), /* compare. */
1252 COSTS_N_INSNS (3), /* widen. */
1253 COSTS_N_INSNS (3), /* narrow. */
1254 COSTS_N_INSNS (3), /* toint. */
1255 COSTS_N_INSNS (3), /* fromint. */
1256 COSTS_N_INSNS (3) /* roundint. */
1258 /* FP DFmode */
1260 COSTS_N_INSNS (30), /* div. */
1261 COSTS_N_INSNS (6), /* mult. */
1262 COSTS_N_INSNS (10), /* mult_addsub. */
1263 COSTS_N_INSNS (7), /* fma. */
1264 COSTS_N_INSNS (3), /* addsub. */
1265 COSTS_N_INSNS (3), /* fpconst. */
1266 COSTS_N_INSNS (3), /* neg. */
1267 COSTS_N_INSNS (3), /* compare. */
1268 COSTS_N_INSNS (3), /* widen. */
1269 COSTS_N_INSNS (3), /* narrow. */
1270 COSTS_N_INSNS (3), /* toint. */
1271 COSTS_N_INSNS (3), /* fromint. */
1272 COSTS_N_INSNS (3) /* roundint. */
1275 /* Vector */
1277 COSTS_N_INSNS (1) /* alu. */
1282 const struct cpu_cost_table cortexa7_extra_costs =
1284 /* ALU */
1286 0, /* arith. */
1287 0, /* logical. */
1288 COSTS_N_INSNS (1), /* shift. */
1289 COSTS_N_INSNS (1), /* shift_reg. */
1290 COSTS_N_INSNS (1), /* arith_shift. */
1291 COSTS_N_INSNS (1), /* arith_shift_reg. */
1292 COSTS_N_INSNS (1), /* log_shift. */
1293 COSTS_N_INSNS (1), /* log_shift_reg. */
1294 COSTS_N_INSNS (1), /* extend. */
1295 COSTS_N_INSNS (1), /* extend_arith. */
1296 COSTS_N_INSNS (1), /* bfi. */
1297 COSTS_N_INSNS (1), /* bfx. */
1298 COSTS_N_INSNS (1), /* clz. */
1299 COSTS_N_INSNS (1), /* rev. */
1300 0, /* non_exec. */
1301 true /* non_exec_costs_exec. */
1305 /* MULT SImode */
1307 0, /* simple. */
1308 COSTS_N_INSNS (1), /* flag_setting. */
1309 COSTS_N_INSNS (1), /* extend. */
1310 COSTS_N_INSNS (1), /* add. */
1311 COSTS_N_INSNS (1), /* extend_add. */
1312 COSTS_N_INSNS (7) /* idiv. */
1314 /* MULT DImode */
1316 0, /* simple (N/A). */
1317 0, /* flag_setting (N/A). */
1318 COSTS_N_INSNS (1), /* extend. */
1319 0, /* add. */
1320 COSTS_N_INSNS (2), /* extend_add. */
1321 0 /* idiv (N/A). */
1324 /* LD/ST */
1326 COSTS_N_INSNS (1), /* load. */
1327 COSTS_N_INSNS (1), /* load_sign_extend. */
1328 COSTS_N_INSNS (3), /* ldrd. */
1329 COSTS_N_INSNS (1), /* ldm_1st. */
1330 1, /* ldm_regs_per_insn_1st. */
1331 2, /* ldm_regs_per_insn_subsequent. */
1332 COSTS_N_INSNS (2), /* loadf. */
1333 COSTS_N_INSNS (2), /* loadd. */
1334 COSTS_N_INSNS (1), /* load_unaligned. */
1335 COSTS_N_INSNS (1), /* store. */
1336 COSTS_N_INSNS (3), /* strd. */
1337 COSTS_N_INSNS (1), /* stm_1st. */
1338 1, /* stm_regs_per_insn_1st. */
1339 2, /* stm_regs_per_insn_subsequent. */
1340 COSTS_N_INSNS (2), /* storef. */
1341 COSTS_N_INSNS (2), /* stored. */
1342 COSTS_N_INSNS (1), /* store_unaligned. */
1343 COSTS_N_INSNS (1), /* loadv. */
1344 COSTS_N_INSNS (1) /* storev. */
1347 /* FP SFmode */
1349 COSTS_N_INSNS (15), /* div. */
1350 COSTS_N_INSNS (3), /* mult. */
1351 COSTS_N_INSNS (7), /* mult_addsub. */
1352 COSTS_N_INSNS (7), /* fma. */
1353 COSTS_N_INSNS (3), /* addsub. */
1354 COSTS_N_INSNS (3), /* fpconst. */
1355 COSTS_N_INSNS (3), /* neg. */
1356 COSTS_N_INSNS (3), /* compare. */
1357 COSTS_N_INSNS (3), /* widen. */
1358 COSTS_N_INSNS (3), /* narrow. */
1359 COSTS_N_INSNS (3), /* toint. */
1360 COSTS_N_INSNS (3), /* fromint. */
1361 COSTS_N_INSNS (3) /* roundint. */
1363 /* FP DFmode */
1365 COSTS_N_INSNS (30), /* div. */
1366 COSTS_N_INSNS (6), /* mult. */
1367 COSTS_N_INSNS (10), /* mult_addsub. */
1368 COSTS_N_INSNS (7), /* fma. */
1369 COSTS_N_INSNS (3), /* addsub. */
1370 COSTS_N_INSNS (3), /* fpconst. */
1371 COSTS_N_INSNS (3), /* neg. */
1372 COSTS_N_INSNS (3), /* compare. */
1373 COSTS_N_INSNS (3), /* widen. */
1374 COSTS_N_INSNS (3), /* narrow. */
1375 COSTS_N_INSNS (3), /* toint. */
1376 COSTS_N_INSNS (3), /* fromint. */
1377 COSTS_N_INSNS (3) /* roundint. */
1380 /* Vector */
1382 COSTS_N_INSNS (1) /* alu. */
1386 const struct cpu_cost_table cortexa12_extra_costs =
1388 /* ALU */
1390 0, /* arith. */
1391 0, /* logical. */
1392 0, /* shift. */
1393 COSTS_N_INSNS (1), /* shift_reg. */
1394 COSTS_N_INSNS (1), /* arith_shift. */
1395 COSTS_N_INSNS (1), /* arith_shift_reg. */
1396 COSTS_N_INSNS (1), /* log_shift. */
1397 COSTS_N_INSNS (1), /* log_shift_reg. */
1398 0, /* extend. */
1399 COSTS_N_INSNS (1), /* extend_arith. */
1400 0, /* bfi. */
1401 COSTS_N_INSNS (1), /* bfx. */
1402 COSTS_N_INSNS (1), /* clz. */
1403 COSTS_N_INSNS (1), /* rev. */
1404 0, /* non_exec. */
1405 true /* non_exec_costs_exec. */
1407 /* MULT SImode */
1410 COSTS_N_INSNS (2), /* simple. */
1411 COSTS_N_INSNS (3), /* flag_setting. */
1412 COSTS_N_INSNS (2), /* extend. */
1413 COSTS_N_INSNS (3), /* add. */
1414 COSTS_N_INSNS (2), /* extend_add. */
1415 COSTS_N_INSNS (18) /* idiv. */
1417 /* MULT DImode */
1419 0, /* simple (N/A). */
1420 0, /* flag_setting (N/A). */
1421 COSTS_N_INSNS (3), /* extend. */
1422 0, /* add (N/A). */
1423 COSTS_N_INSNS (3), /* extend_add. */
1424 0 /* idiv (N/A). */
1427 /* LD/ST */
1429 COSTS_N_INSNS (3), /* load. */
1430 COSTS_N_INSNS (3), /* load_sign_extend. */
1431 COSTS_N_INSNS (3), /* ldrd. */
1432 COSTS_N_INSNS (3), /* ldm_1st. */
1433 1, /* ldm_regs_per_insn_1st. */
1434 2, /* ldm_regs_per_insn_subsequent. */
1435 COSTS_N_INSNS (3), /* loadf. */
1436 COSTS_N_INSNS (3), /* loadd. */
1437 0, /* load_unaligned. */
1438 0, /* store. */
1439 0, /* strd. */
1440 0, /* stm_1st. */
1441 1, /* stm_regs_per_insn_1st. */
1442 2, /* stm_regs_per_insn_subsequent. */
1443 COSTS_N_INSNS (2), /* storef. */
1444 COSTS_N_INSNS (2), /* stored. */
1445 0, /* store_unaligned. */
1446 COSTS_N_INSNS (1), /* loadv. */
1447 COSTS_N_INSNS (1) /* storev. */
1450 /* FP SFmode */
1452 COSTS_N_INSNS (17), /* div. */
1453 COSTS_N_INSNS (4), /* mult. */
1454 COSTS_N_INSNS (8), /* mult_addsub. */
1455 COSTS_N_INSNS (8), /* fma. */
1456 COSTS_N_INSNS (4), /* addsub. */
1457 COSTS_N_INSNS (2), /* fpconst. */
1458 COSTS_N_INSNS (2), /* neg. */
1459 COSTS_N_INSNS (2), /* compare. */
1460 COSTS_N_INSNS (4), /* widen. */
1461 COSTS_N_INSNS (4), /* narrow. */
1462 COSTS_N_INSNS (4), /* toint. */
1463 COSTS_N_INSNS (4), /* fromint. */
1464 COSTS_N_INSNS (4) /* roundint. */
1466 /* FP DFmode */
1468 COSTS_N_INSNS (31), /* div. */
1469 COSTS_N_INSNS (4), /* mult. */
1470 COSTS_N_INSNS (8), /* mult_addsub. */
1471 COSTS_N_INSNS (8), /* fma. */
1472 COSTS_N_INSNS (4), /* addsub. */
1473 COSTS_N_INSNS (2), /* fpconst. */
1474 COSTS_N_INSNS (2), /* neg. */
1475 COSTS_N_INSNS (2), /* compare. */
1476 COSTS_N_INSNS (4), /* widen. */
1477 COSTS_N_INSNS (4), /* narrow. */
1478 COSTS_N_INSNS (4), /* toint. */
1479 COSTS_N_INSNS (4), /* fromint. */
1480 COSTS_N_INSNS (4) /* roundint. */
1483 /* Vector */
1485 COSTS_N_INSNS (1) /* alu. */
1489 const struct cpu_cost_table cortexa15_extra_costs =
1491 /* ALU */
1493 0, /* arith. */
1494 0, /* logical. */
1495 0, /* shift. */
1496 0, /* shift_reg. */
1497 COSTS_N_INSNS (1), /* arith_shift. */
1498 COSTS_N_INSNS (1), /* arith_shift_reg. */
1499 COSTS_N_INSNS (1), /* log_shift. */
1500 COSTS_N_INSNS (1), /* log_shift_reg. */
1501 0, /* extend. */
1502 COSTS_N_INSNS (1), /* extend_arith. */
1503 COSTS_N_INSNS (1), /* bfi. */
1504 0, /* bfx. */
1505 0, /* clz. */
1506 0, /* rev. */
1507 0, /* non_exec. */
1508 true /* non_exec_costs_exec. */
1510 /* MULT SImode */
1513 COSTS_N_INSNS (2), /* simple. */
1514 COSTS_N_INSNS (3), /* flag_setting. */
1515 COSTS_N_INSNS (2), /* extend. */
1516 COSTS_N_INSNS (2), /* add. */
1517 COSTS_N_INSNS (2), /* extend_add. */
1518 COSTS_N_INSNS (18) /* idiv. */
1520 /* MULT DImode */
1522 0, /* simple (N/A). */
1523 0, /* flag_setting (N/A). */
1524 COSTS_N_INSNS (3), /* extend. */
1525 0, /* add (N/A). */
1526 COSTS_N_INSNS (3), /* extend_add. */
1527 0 /* idiv (N/A). */
1530 /* LD/ST */
1532 COSTS_N_INSNS (3), /* load. */
1533 COSTS_N_INSNS (3), /* load_sign_extend. */
1534 COSTS_N_INSNS (3), /* ldrd. */
1535 COSTS_N_INSNS (4), /* ldm_1st. */
1536 1, /* ldm_regs_per_insn_1st. */
1537 2, /* ldm_regs_per_insn_subsequent. */
1538 COSTS_N_INSNS (4), /* loadf. */
1539 COSTS_N_INSNS (4), /* loadd. */
1540 0, /* load_unaligned. */
1541 0, /* store. */
1542 0, /* strd. */
1543 COSTS_N_INSNS (1), /* stm_1st. */
1544 1, /* stm_regs_per_insn_1st. */
1545 2, /* stm_regs_per_insn_subsequent. */
1546 0, /* storef. */
1547 0, /* stored. */
1548 0, /* store_unaligned. */
1549 COSTS_N_INSNS (1), /* loadv. */
1550 COSTS_N_INSNS (1) /* storev. */
1553 /* FP SFmode */
1555 COSTS_N_INSNS (17), /* div. */
1556 COSTS_N_INSNS (4), /* mult. */
1557 COSTS_N_INSNS (8), /* mult_addsub. */
1558 COSTS_N_INSNS (8), /* fma. */
1559 COSTS_N_INSNS (4), /* addsub. */
1560 COSTS_N_INSNS (2), /* fpconst. */
1561 COSTS_N_INSNS (2), /* neg. */
1562 COSTS_N_INSNS (5), /* compare. */
1563 COSTS_N_INSNS (4), /* widen. */
1564 COSTS_N_INSNS (4), /* narrow. */
1565 COSTS_N_INSNS (4), /* toint. */
1566 COSTS_N_INSNS (4), /* fromint. */
1567 COSTS_N_INSNS (4) /* roundint. */
1569 /* FP DFmode */
1571 COSTS_N_INSNS (31), /* div. */
1572 COSTS_N_INSNS (4), /* mult. */
1573 COSTS_N_INSNS (8), /* mult_addsub. */
1574 COSTS_N_INSNS (8), /* fma. */
1575 COSTS_N_INSNS (4), /* addsub. */
1576 COSTS_N_INSNS (2), /* fpconst. */
1577 COSTS_N_INSNS (2), /* neg. */
1578 COSTS_N_INSNS (2), /* compare. */
1579 COSTS_N_INSNS (4), /* widen. */
1580 COSTS_N_INSNS (4), /* narrow. */
1581 COSTS_N_INSNS (4), /* toint. */
1582 COSTS_N_INSNS (4), /* fromint. */
1583 COSTS_N_INSNS (4) /* roundint. */
1586 /* Vector */
1588 COSTS_N_INSNS (1) /* alu. */
1592 const struct cpu_cost_table v7m_extra_costs =
1594 /* ALU */
1596 0, /* arith. */
1597 0, /* logical. */
1598 0, /* shift. */
1599 0, /* shift_reg. */
1600 0, /* arith_shift. */
1601 COSTS_N_INSNS (1), /* arith_shift_reg. */
1602 0, /* log_shift. */
1603 COSTS_N_INSNS (1), /* log_shift_reg. */
1604 0, /* extend. */
1605 COSTS_N_INSNS (1), /* extend_arith. */
1606 0, /* bfi. */
1607 0, /* bfx. */
1608 0, /* clz. */
1609 0, /* rev. */
1610 COSTS_N_INSNS (1), /* non_exec. */
1611 false /* non_exec_costs_exec. */
1614 /* MULT SImode */
1616 COSTS_N_INSNS (1), /* simple. */
1617 COSTS_N_INSNS (1), /* flag_setting. */
1618 COSTS_N_INSNS (2), /* extend. */
1619 COSTS_N_INSNS (1), /* add. */
1620 COSTS_N_INSNS (3), /* extend_add. */
1621 COSTS_N_INSNS (8) /* idiv. */
1623 /* MULT DImode */
1625 0, /* simple (N/A). */
1626 0, /* flag_setting (N/A). */
1627 COSTS_N_INSNS (2), /* extend. */
1628 0, /* add (N/A). */
1629 COSTS_N_INSNS (3), /* extend_add. */
1630 0 /* idiv (N/A). */
1633 /* LD/ST */
1635 COSTS_N_INSNS (2), /* load. */
1636 0, /* load_sign_extend. */
1637 COSTS_N_INSNS (3), /* ldrd. */
1638 COSTS_N_INSNS (2), /* ldm_1st. */
1639 1, /* ldm_regs_per_insn_1st. */
1640 1, /* ldm_regs_per_insn_subsequent. */
1641 COSTS_N_INSNS (2), /* loadf. */
1642 COSTS_N_INSNS (3), /* loadd. */
1643 COSTS_N_INSNS (1), /* load_unaligned. */
1644 COSTS_N_INSNS (2), /* store. */
1645 COSTS_N_INSNS (3), /* strd. */
1646 COSTS_N_INSNS (2), /* stm_1st. */
1647 1, /* stm_regs_per_insn_1st. */
1648 1, /* stm_regs_per_insn_subsequent. */
1649 COSTS_N_INSNS (2), /* storef. */
1650 COSTS_N_INSNS (3), /* stored. */
1651 COSTS_N_INSNS (1), /* store_unaligned. */
1652 COSTS_N_INSNS (1), /* loadv. */
1653 COSTS_N_INSNS (1) /* storev. */
1656 /* FP SFmode */
1658 COSTS_N_INSNS (7), /* div. */
1659 COSTS_N_INSNS (2), /* mult. */
1660 COSTS_N_INSNS (5), /* mult_addsub. */
1661 COSTS_N_INSNS (3), /* fma. */
1662 COSTS_N_INSNS (1), /* addsub. */
1663 0, /* fpconst. */
1664 0, /* neg. */
1665 0, /* compare. */
1666 0, /* widen. */
1667 0, /* narrow. */
1668 0, /* toint. */
1669 0, /* fromint. */
1670 0 /* roundint. */
1672 /* FP DFmode */
1674 COSTS_N_INSNS (15), /* div. */
1675 COSTS_N_INSNS (5), /* mult. */
1676 COSTS_N_INSNS (7), /* mult_addsub. */
1677 COSTS_N_INSNS (7), /* fma. */
1678 COSTS_N_INSNS (3), /* addsub. */
1679 0, /* fpconst. */
1680 0, /* neg. */
1681 0, /* compare. */
1682 0, /* widen. */
1683 0, /* narrow. */
1684 0, /* toint. */
1685 0, /* fromint. */
1686 0 /* roundint. */
1689 /* Vector */
1691 COSTS_N_INSNS (1) /* alu. */
1695 const struct tune_params arm_slowmul_tune =
1697 arm_slowmul_rtx_costs,
1698 NULL, /* Insn extra costs. */
1699 NULL, /* Sched adj cost. */
1700 arm_default_branch_cost,
1701 &arm_default_vec_cost,
1702 3, /* Constant limit. */
1703 5, /* Max cond insns. */
1704 8, /* Memset max inline. */
1705 1, /* Issue rate. */
1706 ARM_PREFETCH_NOT_BENEFICIAL,
1707 tune_params::PREF_CONST_POOL_TRUE,
1708 tune_params::PREF_LDRD_FALSE,
1709 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1710 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1711 tune_params::DISPARAGE_FLAGS_NEITHER,
1712 tune_params::PREF_NEON_64_FALSE,
1713 tune_params::PREF_NEON_STRINGOPS_FALSE,
1714 tune_params::FUSE_NOTHING,
1715 tune_params::SCHED_AUTOPREF_OFF
1718 const struct tune_params arm_fastmul_tune =
1720 arm_fastmul_rtx_costs,
1721 NULL, /* Insn extra costs. */
1722 NULL, /* Sched adj cost. */
1723 arm_default_branch_cost,
1724 &arm_default_vec_cost,
1725 1, /* Constant limit. */
1726 5, /* Max cond insns. */
1727 8, /* Memset max inline. */
1728 1, /* Issue rate. */
1729 ARM_PREFETCH_NOT_BENEFICIAL,
1730 tune_params::PREF_CONST_POOL_TRUE,
1731 tune_params::PREF_LDRD_FALSE,
1732 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1733 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1734 tune_params::DISPARAGE_FLAGS_NEITHER,
1735 tune_params::PREF_NEON_64_FALSE,
1736 tune_params::PREF_NEON_STRINGOPS_FALSE,
1737 tune_params::FUSE_NOTHING,
1738 tune_params::SCHED_AUTOPREF_OFF
1741 /* StrongARM has early execution of branches, so a sequence that is worth
1742 skipping is shorter. Set max_insns_skipped to a lower value. */
1744 const struct tune_params arm_strongarm_tune =
1746 arm_fastmul_rtx_costs,
1747 NULL, /* Insn extra costs. */
1748 NULL, /* Sched adj cost. */
1749 arm_default_branch_cost,
1750 &arm_default_vec_cost,
1751 1, /* Constant limit. */
1752 3, /* Max cond insns. */
1753 8, /* Memset max inline. */
1754 1, /* Issue rate. */
1755 ARM_PREFETCH_NOT_BENEFICIAL,
1756 tune_params::PREF_CONST_POOL_TRUE,
1757 tune_params::PREF_LDRD_FALSE,
1758 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1759 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1760 tune_params::DISPARAGE_FLAGS_NEITHER,
1761 tune_params::PREF_NEON_64_FALSE,
1762 tune_params::PREF_NEON_STRINGOPS_FALSE,
1763 tune_params::FUSE_NOTHING,
1764 tune_params::SCHED_AUTOPREF_OFF
1767 const struct tune_params arm_xscale_tune =
1769 arm_xscale_rtx_costs,
1770 NULL, /* Insn extra costs. */
1771 xscale_sched_adjust_cost,
1772 arm_default_branch_cost,
1773 &arm_default_vec_cost,
1774 2, /* Constant limit. */
1775 3, /* Max cond insns. */
1776 8, /* Memset max inline. */
1777 1, /* Issue rate. */
1778 ARM_PREFETCH_NOT_BENEFICIAL,
1779 tune_params::PREF_CONST_POOL_TRUE,
1780 tune_params::PREF_LDRD_FALSE,
1781 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1782 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1783 tune_params::DISPARAGE_FLAGS_NEITHER,
1784 tune_params::PREF_NEON_64_FALSE,
1785 tune_params::PREF_NEON_STRINGOPS_FALSE,
1786 tune_params::FUSE_NOTHING,
1787 tune_params::SCHED_AUTOPREF_OFF
1790 const struct tune_params arm_9e_tune =
1792 arm_9e_rtx_costs,
1793 NULL, /* Insn extra costs. */
1794 NULL, /* Sched adj cost. */
1795 arm_default_branch_cost,
1796 &arm_default_vec_cost,
1797 1, /* Constant limit. */
1798 5, /* Max cond insns. */
1799 8, /* Memset max inline. */
1800 1, /* Issue rate. */
1801 ARM_PREFETCH_NOT_BENEFICIAL,
1802 tune_params::PREF_CONST_POOL_TRUE,
1803 tune_params::PREF_LDRD_FALSE,
1804 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1805 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1806 tune_params::DISPARAGE_FLAGS_NEITHER,
1807 tune_params::PREF_NEON_64_FALSE,
1808 tune_params::PREF_NEON_STRINGOPS_FALSE,
1809 tune_params::FUSE_NOTHING,
1810 tune_params::SCHED_AUTOPREF_OFF
1813 const struct tune_params arm_marvell_pj4_tune =
1815 arm_9e_rtx_costs,
1816 NULL, /* Insn extra costs. */
1817 NULL, /* Sched adj cost. */
1818 arm_default_branch_cost,
1819 &arm_default_vec_cost,
1820 1, /* Constant limit. */
1821 5, /* Max cond insns. */
1822 8, /* Memset max inline. */
1823 2, /* Issue rate. */
1824 ARM_PREFETCH_NOT_BENEFICIAL,
1825 tune_params::PREF_CONST_POOL_TRUE,
1826 tune_params::PREF_LDRD_FALSE,
1827 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1828 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1829 tune_params::DISPARAGE_FLAGS_NEITHER,
1830 tune_params::PREF_NEON_64_FALSE,
1831 tune_params::PREF_NEON_STRINGOPS_FALSE,
1832 tune_params::FUSE_NOTHING,
1833 tune_params::SCHED_AUTOPREF_OFF
1836 const struct tune_params arm_v6t2_tune =
1838 arm_9e_rtx_costs,
1839 NULL, /* Insn extra costs. */
1840 NULL, /* Sched adj cost. */
1841 arm_default_branch_cost,
1842 &arm_default_vec_cost,
1843 1, /* Constant limit. */
1844 5, /* Max cond insns. */
1845 8, /* Memset max inline. */
1846 1, /* Issue rate. */
1847 ARM_PREFETCH_NOT_BENEFICIAL,
1848 tune_params::PREF_CONST_POOL_FALSE,
1849 tune_params::PREF_LDRD_FALSE,
1850 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1851 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1852 tune_params::DISPARAGE_FLAGS_NEITHER,
1853 tune_params::PREF_NEON_64_FALSE,
1854 tune_params::PREF_NEON_STRINGOPS_FALSE,
1855 tune_params::FUSE_NOTHING,
1856 tune_params::SCHED_AUTOPREF_OFF
1860 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1861 const struct tune_params arm_cortex_tune =
1863 arm_9e_rtx_costs,
1864 &generic_extra_costs,
1865 NULL, /* Sched adj cost. */
1866 arm_default_branch_cost,
1867 &arm_default_vec_cost,
1868 1, /* Constant limit. */
1869 5, /* Max cond insns. */
1870 8, /* Memset max inline. */
1871 2, /* Issue rate. */
1872 ARM_PREFETCH_NOT_BENEFICIAL,
1873 tune_params::PREF_CONST_POOL_FALSE,
1874 tune_params::PREF_LDRD_FALSE,
1875 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1876 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1877 tune_params::DISPARAGE_FLAGS_NEITHER,
1878 tune_params::PREF_NEON_64_FALSE,
1879 tune_params::PREF_NEON_STRINGOPS_FALSE,
1880 tune_params::FUSE_NOTHING,
1881 tune_params::SCHED_AUTOPREF_OFF
1884 const struct tune_params arm_cortex_a8_tune =
1886 arm_9e_rtx_costs,
1887 &cortexa8_extra_costs,
1888 NULL, /* Sched adj cost. */
1889 arm_default_branch_cost,
1890 &arm_default_vec_cost,
1891 1, /* Constant limit. */
1892 5, /* Max cond insns. */
1893 8, /* Memset max inline. */
1894 2, /* Issue rate. */
1895 ARM_PREFETCH_NOT_BENEFICIAL,
1896 tune_params::PREF_CONST_POOL_FALSE,
1897 tune_params::PREF_LDRD_FALSE,
1898 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1899 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1900 tune_params::DISPARAGE_FLAGS_NEITHER,
1901 tune_params::PREF_NEON_64_FALSE,
1902 tune_params::PREF_NEON_STRINGOPS_TRUE,
1903 tune_params::FUSE_NOTHING,
1904 tune_params::SCHED_AUTOPREF_OFF
1907 const struct tune_params arm_cortex_a7_tune =
1909 arm_9e_rtx_costs,
1910 &cortexa7_extra_costs,
1911 NULL, /* Sched adj cost. */
1912 arm_default_branch_cost,
1913 &arm_default_vec_cost,
1914 1, /* Constant limit. */
1915 5, /* Max cond insns. */
1916 8, /* Memset max inline. */
1917 2, /* Issue rate. */
1918 ARM_PREFETCH_NOT_BENEFICIAL,
1919 tune_params::PREF_CONST_POOL_FALSE,
1920 tune_params::PREF_LDRD_FALSE,
1921 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1922 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1923 tune_params::DISPARAGE_FLAGS_NEITHER,
1924 tune_params::PREF_NEON_64_FALSE,
1925 tune_params::PREF_NEON_STRINGOPS_TRUE,
1926 tune_params::FUSE_NOTHING,
1927 tune_params::SCHED_AUTOPREF_OFF
1930 const struct tune_params arm_cortex_a15_tune =
1932 arm_9e_rtx_costs,
1933 &cortexa15_extra_costs,
1934 NULL, /* Sched adj cost. */
1935 arm_default_branch_cost,
1936 &arm_default_vec_cost,
1937 1, /* Constant limit. */
1938 2, /* Max cond insns. */
1939 8, /* Memset max inline. */
1940 3, /* Issue rate. */
1941 ARM_PREFETCH_NOT_BENEFICIAL,
1942 tune_params::PREF_CONST_POOL_FALSE,
1943 tune_params::PREF_LDRD_TRUE,
1944 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1945 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1946 tune_params::DISPARAGE_FLAGS_ALL,
1947 tune_params::PREF_NEON_64_FALSE,
1948 tune_params::PREF_NEON_STRINGOPS_TRUE,
1949 tune_params::FUSE_NOTHING,
1950 tune_params::SCHED_AUTOPREF_FULL
1953 const struct tune_params arm_cortex_a53_tune =
1955 arm_9e_rtx_costs,
1956 &cortexa53_extra_costs,
1957 NULL, /* Sched adj cost. */
1958 arm_default_branch_cost,
1959 &arm_default_vec_cost,
1960 1, /* Constant limit. */
1961 5, /* Max cond insns. */
1962 8, /* Memset max inline. */
1963 2, /* Issue rate. */
1964 ARM_PREFETCH_NOT_BENEFICIAL,
1965 tune_params::PREF_CONST_POOL_FALSE,
1966 tune_params::PREF_LDRD_FALSE,
1967 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1968 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1969 tune_params::DISPARAGE_FLAGS_NEITHER,
1970 tune_params::PREF_NEON_64_FALSE,
1971 tune_params::PREF_NEON_STRINGOPS_TRUE,
1972 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1973 tune_params::SCHED_AUTOPREF_OFF
1976 const struct tune_params arm_cortex_a57_tune =
1978 arm_9e_rtx_costs,
1979 &cortexa57_extra_costs,
1980 NULL, /* Sched adj cost. */
1981 arm_default_branch_cost,
1982 &arm_default_vec_cost,
1983 1, /* Constant limit. */
1984 2, /* Max cond insns. */
1985 8, /* Memset max inline. */
1986 3, /* Issue rate. */
1987 ARM_PREFETCH_NOT_BENEFICIAL,
1988 tune_params::PREF_CONST_POOL_FALSE,
1989 tune_params::PREF_LDRD_TRUE,
1990 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1991 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1992 tune_params::DISPARAGE_FLAGS_ALL,
1993 tune_params::PREF_NEON_64_FALSE,
1994 tune_params::PREF_NEON_STRINGOPS_TRUE,
1995 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1996 tune_params::SCHED_AUTOPREF_FULL
1999 const struct tune_params arm_xgene1_tune =
2001 arm_9e_rtx_costs,
2002 &xgene1_extra_costs,
2003 NULL, /* Sched adj cost. */
2004 arm_default_branch_cost,
2005 &arm_default_vec_cost,
2006 1, /* Constant limit. */
2007 2, /* Max cond insns. */
2008 32, /* Memset max inline. */
2009 4, /* Issue rate. */
2010 ARM_PREFETCH_NOT_BENEFICIAL,
2011 tune_params::PREF_CONST_POOL_FALSE,
2012 tune_params::PREF_LDRD_TRUE,
2013 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2014 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2015 tune_params::DISPARAGE_FLAGS_ALL,
2016 tune_params::PREF_NEON_64_FALSE,
2017 tune_params::PREF_NEON_STRINGOPS_FALSE,
2018 tune_params::FUSE_NOTHING,
2019 tune_params::SCHED_AUTOPREF_OFF
2022 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2023 less appealing. Set max_insns_skipped to a low value. */
2025 const struct tune_params arm_cortex_a5_tune =
2027 arm_9e_rtx_costs,
2028 &cortexa5_extra_costs,
2029 NULL, /* Sched adj cost. */
2030 arm_cortex_a5_branch_cost,
2031 &arm_default_vec_cost,
2032 1, /* Constant limit. */
2033 1, /* Max cond insns. */
2034 8, /* Memset max inline. */
2035 2, /* Issue rate. */
2036 ARM_PREFETCH_NOT_BENEFICIAL,
2037 tune_params::PREF_CONST_POOL_FALSE,
2038 tune_params::PREF_LDRD_FALSE,
2039 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2041 tune_params::DISPARAGE_FLAGS_NEITHER,
2042 tune_params::PREF_NEON_64_FALSE,
2043 tune_params::PREF_NEON_STRINGOPS_TRUE,
2044 tune_params::FUSE_NOTHING,
2045 tune_params::SCHED_AUTOPREF_OFF
2048 const struct tune_params arm_cortex_a9_tune =
2050 arm_9e_rtx_costs,
2051 &cortexa9_extra_costs,
2052 cortex_a9_sched_adjust_cost,
2053 arm_default_branch_cost,
2054 &arm_default_vec_cost,
2055 1, /* Constant limit. */
2056 5, /* Max cond insns. */
2057 8, /* Memset max inline. */
2058 2, /* Issue rate. */
2059 ARM_PREFETCH_BENEFICIAL(4,32,32),
2060 tune_params::PREF_CONST_POOL_FALSE,
2061 tune_params::PREF_LDRD_FALSE,
2062 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2064 tune_params::DISPARAGE_FLAGS_NEITHER,
2065 tune_params::PREF_NEON_64_FALSE,
2066 tune_params::PREF_NEON_STRINGOPS_FALSE,
2067 tune_params::FUSE_NOTHING,
2068 tune_params::SCHED_AUTOPREF_OFF
2071 const struct tune_params arm_cortex_a12_tune =
2073 arm_9e_rtx_costs,
2074 &cortexa12_extra_costs,
2075 NULL, /* Sched adj cost. */
2076 arm_default_branch_cost,
2077 &arm_default_vec_cost, /* Vectorizer costs. */
2078 1, /* Constant limit. */
2079 2, /* Max cond insns. */
2080 8, /* Memset max inline. */
2081 2, /* Issue rate. */
2082 ARM_PREFETCH_NOT_BENEFICIAL,
2083 tune_params::PREF_CONST_POOL_FALSE,
2084 tune_params::PREF_LDRD_TRUE,
2085 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2086 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2087 tune_params::DISPARAGE_FLAGS_ALL,
2088 tune_params::PREF_NEON_64_FALSE,
2089 tune_params::PREF_NEON_STRINGOPS_TRUE,
2090 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2091 tune_params::SCHED_AUTOPREF_OFF
2094 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2095 cycle to execute each. An LDR from the constant pool also takes two cycles
2096 to execute, but mildly increases pipelining opportunity (consecutive
2097 loads/stores can be pipelined together, saving one cycle), and may also
2098 improve icache utilisation. Hence we prefer the constant pool for such
2099 processors. */
2101 const struct tune_params arm_v7m_tune =
2103 arm_9e_rtx_costs,
2104 &v7m_extra_costs,
2105 NULL, /* Sched adj cost. */
2106 arm_cortex_m_branch_cost,
2107 &arm_default_vec_cost,
2108 1, /* Constant limit. */
2109 2, /* Max cond insns. */
2110 8, /* Memset max inline. */
2111 1, /* Issue rate. */
2112 ARM_PREFETCH_NOT_BENEFICIAL,
2113 tune_params::PREF_CONST_POOL_TRUE,
2114 tune_params::PREF_LDRD_FALSE,
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2116 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2117 tune_params::DISPARAGE_FLAGS_NEITHER,
2118 tune_params::PREF_NEON_64_FALSE,
2119 tune_params::PREF_NEON_STRINGOPS_FALSE,
2120 tune_params::FUSE_NOTHING,
2121 tune_params::SCHED_AUTOPREF_OFF
2124 /* Cortex-M7 tuning. */
2126 const struct tune_params arm_cortex_m7_tune =
2128 arm_9e_rtx_costs,
2129 &v7m_extra_costs,
2130 NULL, /* Sched adj cost. */
2131 arm_cortex_m7_branch_cost,
2132 &arm_default_vec_cost,
2133 0, /* Constant limit. */
2134 1, /* Max cond insns. */
2135 8, /* Memset max inline. */
2136 2, /* Issue rate. */
2137 ARM_PREFETCH_NOT_BENEFICIAL,
2138 tune_params::PREF_CONST_POOL_TRUE,
2139 tune_params::PREF_LDRD_FALSE,
2140 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2141 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2142 tune_params::DISPARAGE_FLAGS_NEITHER,
2143 tune_params::PREF_NEON_64_FALSE,
2144 tune_params::PREF_NEON_STRINGOPS_FALSE,
2145 tune_params::FUSE_NOTHING,
2146 tune_params::SCHED_AUTOPREF_OFF
2149 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2150 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2151 const struct tune_params arm_v6m_tune =
2153 arm_9e_rtx_costs,
2154 NULL, /* Insn extra costs. */
2155 NULL, /* Sched adj cost. */
2156 arm_default_branch_cost,
2157 &arm_default_vec_cost, /* Vectorizer costs. */
2158 1, /* Constant limit. */
2159 5, /* Max cond insns. */
2160 8, /* Memset max inline. */
2161 1, /* Issue rate. */
2162 ARM_PREFETCH_NOT_BENEFICIAL,
2163 tune_params::PREF_CONST_POOL_FALSE,
2164 tune_params::PREF_LDRD_FALSE,
2165 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2166 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2167 tune_params::DISPARAGE_FLAGS_NEITHER,
2168 tune_params::PREF_NEON_64_FALSE,
2169 tune_params::PREF_NEON_STRINGOPS_FALSE,
2170 tune_params::FUSE_NOTHING,
2171 tune_params::SCHED_AUTOPREF_OFF
2174 const struct tune_params arm_fa726te_tune =
2176 arm_9e_rtx_costs,
2177 NULL, /* Insn extra costs. */
2178 fa726te_sched_adjust_cost,
2179 arm_default_branch_cost,
2180 &arm_default_vec_cost,
2181 1, /* Constant limit. */
2182 5, /* Max cond insns. */
2183 8, /* Memset max inline. */
2184 2, /* Issue rate. */
2185 ARM_PREFETCH_NOT_BENEFICIAL,
2186 tune_params::PREF_CONST_POOL_TRUE,
2187 tune_params::PREF_LDRD_FALSE,
2188 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2189 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2190 tune_params::DISPARAGE_FLAGS_NEITHER,
2191 tune_params::PREF_NEON_64_FALSE,
2192 tune_params::PREF_NEON_STRINGOPS_FALSE,
2193 tune_params::FUSE_NOTHING,
2194 tune_params::SCHED_AUTOPREF_OFF
2198 /* Not all of these give usefully different compilation alternatives,
2199 but there is no simple way of generalizing them. */
2200 static const struct processors all_cores[] =
2202 /* ARM Cores */
2203 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2204 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2205 FLAGS, &arm_##COSTS##_tune},
2206 #include "arm-cores.def"
2207 #undef ARM_CORE
2208 {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
2211 static const struct processors all_architectures[] =
2213 /* ARM Architectures */
2214 /* We don't specify tuning costs here as it will be figured out
2215 from the core. */
2217 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2218 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2219 #include "arm-arches.def"
2220 #undef ARM_ARCH
2221 {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
2225 /* These are populated as commandline arguments are processed, or NULL
2226 if not specified. */
2227 static const struct processors *arm_selected_arch;
2228 static const struct processors *arm_selected_cpu;
2229 static const struct processors *arm_selected_tune;
2231 /* The name of the preprocessor macro to define for this architecture. */
2233 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2235 /* Available values for -mfpu=. */
2237 static const struct arm_fpu_desc all_fpus[] =
2239 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, FEATURES) \
2240 { NAME, MODEL, REV, VFP_REGS, FEATURES },
2241 #include "arm-fpus.def"
2242 #undef ARM_FPU
2246 /* Supported TLS relocations. */
2248 enum tls_reloc {
2249 TLS_GD32,
2250 TLS_LDM32,
2251 TLS_LDO32,
2252 TLS_IE32,
2253 TLS_LE32,
2254 TLS_DESCSEQ /* GNU scheme */
2257 /* The maximum number of insns to be used when loading a constant. */
2258 inline static int
2259 arm_constant_limit (bool size_p)
2261 return size_p ? 1 : current_tune->constant_limit;
2264 /* Emit an insn that's a simple single-set. Both the operands must be known
2265 to be valid. */
2266 inline static rtx_insn *
2267 emit_set_insn (rtx x, rtx y)
2269 return emit_insn (gen_rtx_SET (x, y));
2272 /* Return the number of bits set in VALUE. */
2273 static unsigned
2274 bit_count (unsigned long value)
2276 unsigned long count = 0;
2278 while (value)
2280 count++;
2281 value &= value - 1; /* Clear the least-significant set bit. */
2284 return count;
2287 /* Return the number of features in feature-set SET. */
2288 static unsigned
2289 feature_count (const arm_feature_set * set)
2291 return (bit_count (ARM_FSET_CPU1 (*set))
2292 + bit_count (ARM_FSET_CPU2 (*set)));
2295 typedef struct
2297 machine_mode mode;
2298 const char *name;
2299 } arm_fixed_mode_set;
2301 /* A small helper for setting fixed-point library libfuncs. */
2303 static void
2304 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2305 const char *funcname, const char *modename,
2306 int num_suffix)
2308 char buffer[50];
2310 if (num_suffix == 0)
2311 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2312 else
2313 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2315 set_optab_libfunc (optable, mode, buffer);
2318 static void
2319 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2320 machine_mode from, const char *funcname,
2321 const char *toname, const char *fromname)
2323 char buffer[50];
2324 const char *maybe_suffix_2 = "";
2326 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2327 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2328 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2329 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2330 maybe_suffix_2 = "2";
2332 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2333 maybe_suffix_2);
2335 set_conv_libfunc (optable, to, from, buffer);
2338 /* Set up library functions unique to ARM. */
2340 static void
2341 arm_init_libfuncs (void)
2343 /* For Linux, we have access to kernel support for atomic operations. */
2344 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2345 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2347 /* There are no special library functions unless we are using the
2348 ARM BPABI. */
2349 if (!TARGET_BPABI)
2350 return;
2352 /* The functions below are described in Section 4 of the "Run-Time
2353 ABI for the ARM architecture", Version 1.0. */
2355 /* Double-precision floating-point arithmetic. Table 2. */
2356 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2357 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2358 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2359 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2360 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2362 /* Double-precision comparisons. Table 3. */
2363 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2364 set_optab_libfunc (ne_optab, DFmode, NULL);
2365 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2366 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2367 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2368 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2369 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2371 /* Single-precision floating-point arithmetic. Table 4. */
2372 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2373 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2374 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2375 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2376 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2378 /* Single-precision comparisons. Table 5. */
2379 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2380 set_optab_libfunc (ne_optab, SFmode, NULL);
2381 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2382 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2383 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2384 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2385 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2387 /* Floating-point to integer conversions. Table 6. */
2388 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2389 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2390 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2391 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2392 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2393 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2394 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2395 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2397 /* Conversions between floating types. Table 7. */
2398 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2399 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2401 /* Integer to floating-point conversions. Table 8. */
2402 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2403 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2404 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2405 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2406 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2407 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2408 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2409 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2411 /* Long long. Table 9. */
2412 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2413 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2414 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2415 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2416 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2417 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2418 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2419 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2421 /* Integer (32/32->32) division. \S 4.3.1. */
2422 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2423 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2425 /* The divmod functions are designed so that they can be used for
2426 plain division, even though they return both the quotient and the
2427 remainder. The quotient is returned in the usual location (i.e.,
2428 r0 for SImode, {r0, r1} for DImode), just as would be expected
2429 for an ordinary division routine. Because the AAPCS calling
2430 conventions specify that all of { r0, r1, r2, r3 } are
2431 callee-saved registers, there is no need to tell the compiler
2432 explicitly that those registers are clobbered by these
2433 routines. */
2434 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2435 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2437 /* For SImode division the ABI provides div-without-mod routines,
2438 which are faster. */
2439 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2440 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2442 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2443 divmod libcalls instead. */
2444 set_optab_libfunc (smod_optab, DImode, NULL);
2445 set_optab_libfunc (umod_optab, DImode, NULL);
2446 set_optab_libfunc (smod_optab, SImode, NULL);
2447 set_optab_libfunc (umod_optab, SImode, NULL);
2449 /* Half-precision float operations. The compiler handles all operations
2450 with NULL libfuncs by converting the SFmode. */
2451 switch (arm_fp16_format)
2453 case ARM_FP16_FORMAT_IEEE:
2454 case ARM_FP16_FORMAT_ALTERNATIVE:
2456 /* Conversions. */
2457 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2458 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2459 ? "__gnu_f2h_ieee"
2460 : "__gnu_f2h_alternative"));
2461 set_conv_libfunc (sext_optab, SFmode, HFmode,
2462 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2463 ? "__gnu_h2f_ieee"
2464 : "__gnu_h2f_alternative"));
2466 /* Arithmetic. */
2467 set_optab_libfunc (add_optab, HFmode, NULL);
2468 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2469 set_optab_libfunc (smul_optab, HFmode, NULL);
2470 set_optab_libfunc (neg_optab, HFmode, NULL);
2471 set_optab_libfunc (sub_optab, HFmode, NULL);
2473 /* Comparisons. */
2474 set_optab_libfunc (eq_optab, HFmode, NULL);
2475 set_optab_libfunc (ne_optab, HFmode, NULL);
2476 set_optab_libfunc (lt_optab, HFmode, NULL);
2477 set_optab_libfunc (le_optab, HFmode, NULL);
2478 set_optab_libfunc (ge_optab, HFmode, NULL);
2479 set_optab_libfunc (gt_optab, HFmode, NULL);
2480 set_optab_libfunc (unord_optab, HFmode, NULL);
2481 break;
2483 default:
2484 break;
2487 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2489 const arm_fixed_mode_set fixed_arith_modes[] =
2491 { QQmode, "qq" },
2492 { UQQmode, "uqq" },
2493 { HQmode, "hq" },
2494 { UHQmode, "uhq" },
2495 { SQmode, "sq" },
2496 { USQmode, "usq" },
2497 { DQmode, "dq" },
2498 { UDQmode, "udq" },
2499 { TQmode, "tq" },
2500 { UTQmode, "utq" },
2501 { HAmode, "ha" },
2502 { UHAmode, "uha" },
2503 { SAmode, "sa" },
2504 { USAmode, "usa" },
2505 { DAmode, "da" },
2506 { UDAmode, "uda" },
2507 { TAmode, "ta" },
2508 { UTAmode, "uta" }
2510 const arm_fixed_mode_set fixed_conv_modes[] =
2512 { QQmode, "qq" },
2513 { UQQmode, "uqq" },
2514 { HQmode, "hq" },
2515 { UHQmode, "uhq" },
2516 { SQmode, "sq" },
2517 { USQmode, "usq" },
2518 { DQmode, "dq" },
2519 { UDQmode, "udq" },
2520 { TQmode, "tq" },
2521 { UTQmode, "utq" },
2522 { HAmode, "ha" },
2523 { UHAmode, "uha" },
2524 { SAmode, "sa" },
2525 { USAmode, "usa" },
2526 { DAmode, "da" },
2527 { UDAmode, "uda" },
2528 { TAmode, "ta" },
2529 { UTAmode, "uta" },
2530 { QImode, "qi" },
2531 { HImode, "hi" },
2532 { SImode, "si" },
2533 { DImode, "di" },
2534 { TImode, "ti" },
2535 { SFmode, "sf" },
2536 { DFmode, "df" }
2538 unsigned int i, j;
2540 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2542 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2543 "add", fixed_arith_modes[i].name, 3);
2544 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2545 "ssadd", fixed_arith_modes[i].name, 3);
2546 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2547 "usadd", fixed_arith_modes[i].name, 3);
2548 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2549 "sub", fixed_arith_modes[i].name, 3);
2550 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2551 "sssub", fixed_arith_modes[i].name, 3);
2552 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2553 "ussub", fixed_arith_modes[i].name, 3);
2554 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2555 "mul", fixed_arith_modes[i].name, 3);
2556 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2557 "ssmul", fixed_arith_modes[i].name, 3);
2558 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2559 "usmul", fixed_arith_modes[i].name, 3);
2560 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2561 "div", fixed_arith_modes[i].name, 3);
2562 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2563 "udiv", fixed_arith_modes[i].name, 3);
2564 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2565 "ssdiv", fixed_arith_modes[i].name, 3);
2566 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2567 "usdiv", fixed_arith_modes[i].name, 3);
2568 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2569 "neg", fixed_arith_modes[i].name, 2);
2570 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2571 "ssneg", fixed_arith_modes[i].name, 2);
2572 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2573 "usneg", fixed_arith_modes[i].name, 2);
2574 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2575 "ashl", fixed_arith_modes[i].name, 3);
2576 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2577 "ashr", fixed_arith_modes[i].name, 3);
2578 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2579 "lshr", fixed_arith_modes[i].name, 3);
2580 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2581 "ssashl", fixed_arith_modes[i].name, 3);
2582 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2583 "usashl", fixed_arith_modes[i].name, 3);
2584 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2585 "cmp", fixed_arith_modes[i].name, 2);
2588 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2589 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2591 if (i == j
2592 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2593 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2594 continue;
2596 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2597 fixed_conv_modes[j].mode, "fract",
2598 fixed_conv_modes[i].name,
2599 fixed_conv_modes[j].name);
2600 arm_set_fixed_conv_libfunc (satfract_optab,
2601 fixed_conv_modes[i].mode,
2602 fixed_conv_modes[j].mode, "satfract",
2603 fixed_conv_modes[i].name,
2604 fixed_conv_modes[j].name);
2605 arm_set_fixed_conv_libfunc (fractuns_optab,
2606 fixed_conv_modes[i].mode,
2607 fixed_conv_modes[j].mode, "fractuns",
2608 fixed_conv_modes[i].name,
2609 fixed_conv_modes[j].name);
2610 arm_set_fixed_conv_libfunc (satfractuns_optab,
2611 fixed_conv_modes[i].mode,
2612 fixed_conv_modes[j].mode, "satfractuns",
2613 fixed_conv_modes[i].name,
2614 fixed_conv_modes[j].name);
2618 if (TARGET_AAPCS_BASED)
2619 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2622 /* On AAPCS systems, this is the "struct __va_list". */
2623 static GTY(()) tree va_list_type;
2625 /* Return the type to use as __builtin_va_list. */
2626 static tree
2627 arm_build_builtin_va_list (void)
2629 tree va_list_name;
2630 tree ap_field;
2632 if (!TARGET_AAPCS_BASED)
2633 return std_build_builtin_va_list ();
2635 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2636 defined as:
2638 struct __va_list
2640 void *__ap;
2643 The C Library ABI further reinforces this definition in \S
2644 4.1.
2646 We must follow this definition exactly. The structure tag
2647 name is visible in C++ mangled names, and thus forms a part
2648 of the ABI. The field name may be used by people who
2649 #include <stdarg.h>. */
2650 /* Create the type. */
2651 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2652 /* Give it the required name. */
2653 va_list_name = build_decl (BUILTINS_LOCATION,
2654 TYPE_DECL,
2655 get_identifier ("__va_list"),
2656 va_list_type);
2657 DECL_ARTIFICIAL (va_list_name) = 1;
2658 TYPE_NAME (va_list_type) = va_list_name;
2659 TYPE_STUB_DECL (va_list_type) = va_list_name;
2660 /* Create the __ap field. */
2661 ap_field = build_decl (BUILTINS_LOCATION,
2662 FIELD_DECL,
2663 get_identifier ("__ap"),
2664 ptr_type_node);
2665 DECL_ARTIFICIAL (ap_field) = 1;
2666 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2667 TYPE_FIELDS (va_list_type) = ap_field;
2668 /* Compute its layout. */
2669 layout_type (va_list_type);
2671 return va_list_type;
2674 /* Return an expression of type "void *" pointing to the next
2675 available argument in a variable-argument list. VALIST is the
2676 user-level va_list object, of type __builtin_va_list. */
2677 static tree
2678 arm_extract_valist_ptr (tree valist)
2680 if (TREE_TYPE (valist) == error_mark_node)
2681 return error_mark_node;
2683 /* On an AAPCS target, the pointer is stored within "struct
2684 va_list". */
2685 if (TARGET_AAPCS_BASED)
2687 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2688 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2689 valist, ap_field, NULL_TREE);
2692 return valist;
2695 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2696 static void
2697 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2699 valist = arm_extract_valist_ptr (valist);
2700 std_expand_builtin_va_start (valist, nextarg);
2703 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2704 static tree
2705 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2706 gimple_seq *post_p)
2708 valist = arm_extract_valist_ptr (valist);
2709 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2712 /* Check any incompatible options that the user has specified. */
2713 static void
2714 arm_option_check_internal (struct gcc_options *opts)
2716 int flags = opts->x_target_flags;
2718 /* Make sure that the processor choice does not conflict with any of the
2719 other command line choices. */
2720 if (TARGET_ARM_P (flags) && !ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM))
2721 error ("target CPU does not support ARM mode");
2723 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2724 from here where no function is being compiled currently. */
2725 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2726 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2728 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2729 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2731 /* If this target is normally configured to use APCS frames, warn if they
2732 are turned off and debugging is turned on. */
2733 if (TARGET_ARM_P (flags)
2734 && write_symbols != NO_DEBUG
2735 && !TARGET_APCS_FRAME
2736 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2737 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2739 /* iWMMXt unsupported under Thumb mode. */
2740 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2741 error ("iWMMXt unsupported under Thumb mode");
2743 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2744 error ("can not use -mtp=cp15 with 16-bit Thumb");
2746 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2748 error ("RTP PIC is incompatible with Thumb");
2749 flag_pic = 0;
2752 /* We only support -mslow-flash-data on armv7-m targets. */
2753 if (target_slow_flash_data
2754 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2755 || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON)))
2756 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2759 /* Recompute the global settings depending on target attribute options. */
2761 static void
2762 arm_option_params_internal (void)
2764 /* If we are not using the default (ARM mode) section anchor offset
2765 ranges, then set the correct ranges now. */
2766 if (TARGET_THUMB1)
2768 /* Thumb-1 LDR instructions cannot have negative offsets.
2769 Permissible positive offset ranges are 5-bit (for byte loads),
2770 6-bit (for halfword loads), or 7-bit (for word loads).
2771 Empirical results suggest a 7-bit anchor range gives the best
2772 overall code size. */
2773 targetm.min_anchor_offset = 0;
2774 targetm.max_anchor_offset = 127;
2776 else if (TARGET_THUMB2)
2778 /* The minimum is set such that the total size of the block
2779 for a particular anchor is 248 + 1 + 4095 bytes, which is
2780 divisible by eight, ensuring natural spacing of anchors. */
2781 targetm.min_anchor_offset = -248;
2782 targetm.max_anchor_offset = 4095;
2784 else
2786 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2787 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2790 if (optimize_size)
2792 /* If optimizing for size, bump the number of instructions that we
2793 are prepared to conditionally execute (even on a StrongARM). */
2794 max_insns_skipped = 6;
2796 /* For THUMB2, we limit the conditional sequence to one IT block. */
2797 if (TARGET_THUMB2)
2798 max_insns_skipped = arm_restrict_it ? 1 : 4;
2800 else
2801 /* When -mrestrict-it is in use tone down the if-conversion. */
2802 max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2803 ? 1 : current_tune->max_insns_skipped;
2806 /* True if -mflip-thumb should next add an attribute for the default
2807 mode, false if it should next add an attribute for the opposite mode. */
2808 static GTY(()) bool thumb_flipper;
2810 /* Options after initial target override. */
2811 static GTY(()) tree init_optimize;
2813 /* Reset options between modes that the user has specified. */
2814 static void
2815 arm_option_override_internal (struct gcc_options *opts,
2816 struct gcc_options *opts_set)
2818 if (TARGET_THUMB_P (opts->x_target_flags)
2819 && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB)))
2821 warning (0, "target CPU does not support THUMB instructions");
2822 opts->x_target_flags &= ~MASK_THUMB;
2825 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2827 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2828 opts->x_target_flags &= ~MASK_APCS_FRAME;
2831 /* Callee super interworking implies thumb interworking. Adding
2832 this to the flags here simplifies the logic elsewhere. */
2833 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2834 opts->x_target_flags |= MASK_INTERWORK;
2836 /* need to remember initial values so combinaisons of options like
2837 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2838 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2840 if (! opts_set->x_arm_restrict_it)
2841 opts->x_arm_restrict_it = arm_arch8;
2843 if (!TARGET_THUMB2_P (opts->x_target_flags))
2844 opts->x_arm_restrict_it = 0;
2846 /* Don't warn since it's on by default in -O2. */
2847 if (TARGET_THUMB1_P (opts->x_target_flags))
2848 opts->x_flag_schedule_insns = 0;
2849 else
2850 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
2852 /* Disable shrink-wrap when optimizing function for size, since it tends to
2853 generate additional returns. */
2854 if (optimize_function_for_size_p (cfun)
2855 && TARGET_THUMB2_P (opts->x_target_flags))
2856 opts->x_flag_shrink_wrap = false;
2857 else
2858 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
2860 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2861 - epilogue_insns - does not accurately model the corresponding insns
2862 emitted in the asm file. In particular, see the comment in thumb_exit
2863 'Find out how many of the (return) argument registers we can corrupt'.
2864 As a consequence, the epilogue may clobber registers without fipa-ra
2865 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
2866 TODO: Accurately model clobbers for epilogue_insns and reenable
2867 fipa-ra. */
2868 if (TARGET_THUMB1_P (opts->x_target_flags))
2869 opts->x_flag_ipa_ra = 0;
2870 else
2871 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
2873 /* Thumb2 inline assembly code should always use unified syntax.
2874 This will apply to ARM and Thumb1 eventually. */
2875 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
2878 /* Fix up any incompatible options that the user has specified. */
2879 static void
2880 arm_option_override (void)
2882 arm_selected_arch = NULL;
2883 arm_selected_cpu = NULL;
2884 arm_selected_tune = NULL;
2886 if (global_options_set.x_arm_arch_option)
2887 arm_selected_arch = &all_architectures[arm_arch_option];
2889 if (global_options_set.x_arm_cpu_option)
2891 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2892 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2895 if (global_options_set.x_arm_tune_option)
2896 arm_selected_tune = &all_cores[(int) arm_tune_option];
2898 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2899 SUBTARGET_OVERRIDE_OPTIONS;
2900 #endif
2902 if (arm_selected_arch)
2904 if (arm_selected_cpu)
2906 const arm_feature_set tuning_flags = ARM_FSET_MAKE_CPU1 (FL_TUNE);
2907 arm_feature_set selected_flags;
2908 ARM_FSET_XOR (selected_flags, arm_selected_cpu->flags,
2909 arm_selected_arch->flags);
2910 ARM_FSET_EXCLUDE (selected_flags, selected_flags, tuning_flags);
2911 /* Check for conflict between mcpu and march. */
2912 if (!ARM_FSET_IS_EMPTY (selected_flags))
2914 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2915 arm_selected_cpu->name, arm_selected_arch->name);
2916 /* -march wins for code generation.
2917 -mcpu wins for default tuning. */
2918 if (!arm_selected_tune)
2919 arm_selected_tune = arm_selected_cpu;
2921 arm_selected_cpu = arm_selected_arch;
2923 else
2924 /* -mcpu wins. */
2925 arm_selected_arch = NULL;
2927 else
2928 /* Pick a CPU based on the architecture. */
2929 arm_selected_cpu = arm_selected_arch;
2932 /* If the user did not specify a processor, choose one for them. */
2933 if (!arm_selected_cpu)
2935 const struct processors * sel;
2936 arm_feature_set sought = ARM_FSET_EMPTY;;
2938 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2939 if (!arm_selected_cpu->name)
2941 #ifdef SUBTARGET_CPU_DEFAULT
2942 /* Use the subtarget default CPU if none was specified by
2943 configure. */
2944 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2945 #endif
2946 /* Default to ARM6. */
2947 if (!arm_selected_cpu->name)
2948 arm_selected_cpu = &all_cores[arm6];
2951 sel = arm_selected_cpu;
2952 insn_flags = sel->flags;
2954 /* Now check to see if the user has specified some command line
2955 switch that require certain abilities from the cpu. */
2957 if (TARGET_INTERWORK || TARGET_THUMB)
2959 ARM_FSET_ADD_CPU1 (sought, FL_THUMB);
2960 ARM_FSET_ADD_CPU1 (sought, FL_MODE32);
2962 /* There are no ARM processors that support both APCS-26 and
2963 interworking. Therefore we force FL_MODE26 to be removed
2964 from insn_flags here (if it was set), so that the search
2965 below will always be able to find a compatible processor. */
2966 ARM_FSET_DEL_CPU1 (insn_flags, FL_MODE26);
2969 if (!ARM_FSET_IS_EMPTY (sought)
2970 && !(ARM_FSET_CPU_SUBSET (sought, insn_flags)))
2972 /* Try to locate a CPU type that supports all of the abilities
2973 of the default CPU, plus the extra abilities requested by
2974 the user. */
2975 for (sel = all_cores; sel->name != NULL; sel++)
2976 if (ARM_FSET_CPU_SUBSET (sought, sel->flags))
2977 break;
2979 if (sel->name == NULL)
2981 unsigned current_bit_count = 0;
2982 const struct processors * best_fit = NULL;
2984 /* Ideally we would like to issue an error message here
2985 saying that it was not possible to find a CPU compatible
2986 with the default CPU, but which also supports the command
2987 line options specified by the programmer, and so they
2988 ought to use the -mcpu=<name> command line option to
2989 override the default CPU type.
2991 If we cannot find a cpu that has both the
2992 characteristics of the default cpu and the given
2993 command line options we scan the array again looking
2994 for a best match. */
2995 for (sel = all_cores; sel->name != NULL; sel++)
2997 arm_feature_set required = ARM_FSET_EMPTY;
2998 ARM_FSET_UNION (required, sought, insn_flags);
2999 if (ARM_FSET_CPU_SUBSET (required, sel->flags))
3001 unsigned count;
3002 arm_feature_set flags;
3003 ARM_FSET_INTER (flags, sel->flags, insn_flags);
3004 count = feature_count (&flags);
3006 if (count >= current_bit_count)
3008 best_fit = sel;
3009 current_bit_count = count;
3013 gcc_assert (best_fit);
3014 sel = best_fit;
3017 arm_selected_cpu = sel;
3021 gcc_assert (arm_selected_cpu);
3022 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3023 if (!arm_selected_tune)
3024 arm_selected_tune = &all_cores[arm_selected_cpu->core];
3026 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
3027 insn_flags = arm_selected_cpu->flags;
3028 arm_base_arch = arm_selected_cpu->base_arch;
3030 arm_tune = arm_selected_tune->core;
3031 tune_flags = arm_selected_tune->flags;
3032 current_tune = arm_selected_tune->tune;
3034 /* TBD: Dwarf info for apcs frame is not handled yet. */
3035 if (TARGET_APCS_FRAME)
3036 flag_shrink_wrap = false;
3038 /* BPABI targets use linker tricks to allow interworking on cores
3039 without thumb support. */
3040 if (TARGET_INTERWORK
3041 && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB) || TARGET_BPABI))
3043 warning (0, "target CPU does not support interworking" );
3044 target_flags &= ~MASK_INTERWORK;
3047 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3049 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3050 target_flags |= MASK_APCS_FRAME;
3053 if (TARGET_POKE_FUNCTION_NAME)
3054 target_flags |= MASK_APCS_FRAME;
3056 if (TARGET_APCS_REENT && flag_pic)
3057 error ("-fpic and -mapcs-reent are incompatible");
3059 if (TARGET_APCS_REENT)
3060 warning (0, "APCS reentrant code not supported. Ignored");
3062 if (TARGET_APCS_FLOAT)
3063 warning (0, "passing floating point arguments in fp regs not yet supported");
3065 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3066 arm_arch3m = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH3M);
3067 arm_arch4 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH4);
3068 arm_arch4t = arm_arch4 && (ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB));
3069 arm_arch5 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5);
3070 arm_arch5e = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5E);
3071 arm_arch6 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6);
3072 arm_arch6k = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6K);
3073 arm_arch6kz = arm_arch6k && ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6KZ);
3074 arm_arch_notm = ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM);
3075 arm_arch6m = arm_arch6 && !arm_arch_notm;
3076 arm_arch7 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7);
3077 arm_arch7em = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7EM);
3078 arm_arch8 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH8);
3079 arm_arch_thumb2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB2);
3080 arm_arch_xscale = ARM_FSET_HAS_CPU1 (insn_flags, FL_XSCALE);
3082 arm_ld_sched = ARM_FSET_HAS_CPU1 (tune_flags, FL_LDSCHED);
3083 arm_tune_strongarm = ARM_FSET_HAS_CPU1 (tune_flags, FL_STRONG);
3084 arm_tune_wbuf = ARM_FSET_HAS_CPU1 (tune_flags, FL_WBUF);
3085 arm_tune_xscale = ARM_FSET_HAS_CPU1 (tune_flags, FL_XSCALE);
3086 arm_arch_iwmmxt = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT);
3087 arm_arch_iwmmxt2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT2);
3088 arm_arch_thumb_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB_DIV);
3089 arm_arch_arm_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARM_DIV);
3090 arm_arch_no_volatile_ce = ARM_FSET_HAS_CPU1 (insn_flags, FL_NO_VOLATILE_CE);
3091 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
3092 arm_arch_crc = ARM_FSET_HAS_CPU1 (insn_flags, FL_CRC32);
3093 arm_m_profile_small_mul = ARM_FSET_HAS_CPU1 (insn_flags, FL_SMALLMUL);
3095 /* V5 code we generate is completely interworking capable, so we turn off
3096 TARGET_INTERWORK here to avoid many tests later on. */
3098 /* XXX However, we must pass the right pre-processor defines to CPP
3099 or GLD can get confused. This is a hack. */
3100 if (TARGET_INTERWORK)
3101 arm_cpp_interwork = 1;
3103 if (arm_arch5)
3104 target_flags &= ~MASK_INTERWORK;
3106 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3107 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3109 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3110 error ("iwmmxt abi requires an iwmmxt capable cpu");
3112 if (!global_options_set.x_arm_fpu_index)
3114 const char *target_fpu_name;
3115 bool ok;
3117 #ifdef FPUTYPE_DEFAULT
3118 target_fpu_name = FPUTYPE_DEFAULT;
3119 #else
3120 target_fpu_name = "vfp";
3121 #endif
3123 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
3124 CL_TARGET);
3125 gcc_assert (ok);
3128 arm_fpu_desc = &all_fpus[arm_fpu_index];
3130 switch (arm_fpu_desc->model)
3132 case ARM_FP_MODEL_VFP:
3133 arm_fpu_attr = FPU_VFP;
3134 break;
3136 default:
3137 gcc_unreachable();
3140 if (TARGET_AAPCS_BASED)
3142 if (TARGET_CALLER_INTERWORKING)
3143 error ("AAPCS does not support -mcaller-super-interworking");
3144 else
3145 if (TARGET_CALLEE_INTERWORKING)
3146 error ("AAPCS does not support -mcallee-super-interworking");
3149 /* iWMMXt and NEON are incompatible. */
3150 if (TARGET_IWMMXT && TARGET_NEON)
3151 error ("iWMMXt and NEON are incompatible");
3153 /* __fp16 support currently assumes the core has ldrh. */
3154 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3155 sorry ("__fp16 and no ldrh");
3157 /* If soft-float is specified then don't use FPU. */
3158 if (TARGET_SOFT_FLOAT)
3159 arm_fpu_attr = FPU_NONE;
3161 if (TARGET_AAPCS_BASED)
3163 if (arm_abi == ARM_ABI_IWMMXT)
3164 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3165 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3166 && TARGET_HARD_FLOAT
3167 && TARGET_VFP)
3168 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3169 else
3170 arm_pcs_default = ARM_PCS_AAPCS;
3172 else
3174 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
3175 sorry ("-mfloat-abi=hard and VFP");
3177 if (arm_abi == ARM_ABI_APCS)
3178 arm_pcs_default = ARM_PCS_APCS;
3179 else
3180 arm_pcs_default = ARM_PCS_ATPCS;
3183 /* For arm2/3 there is no need to do any scheduling if we are doing
3184 software floating-point. */
3185 if (TARGET_SOFT_FLOAT && !ARM_FSET_HAS_CPU1 (tune_flags, FL_MODE32))
3186 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3188 /* Use the cp15 method if it is available. */
3189 if (target_thread_pointer == TP_AUTO)
3191 if (arm_arch6k && !TARGET_THUMB1)
3192 target_thread_pointer = TP_CP15;
3193 else
3194 target_thread_pointer = TP_SOFT;
3197 /* Override the default structure alignment for AAPCS ABI. */
3198 if (!global_options_set.x_arm_structure_size_boundary)
3200 if (TARGET_AAPCS_BASED)
3201 arm_structure_size_boundary = 8;
3203 else
3205 if (arm_structure_size_boundary != 8
3206 && arm_structure_size_boundary != 32
3207 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3209 if (ARM_DOUBLEWORD_ALIGN)
3210 warning (0,
3211 "structure size boundary can only be set to 8, 32 or 64");
3212 else
3213 warning (0, "structure size boundary can only be set to 8 or 32");
3214 arm_structure_size_boundary
3215 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3219 /* If stack checking is disabled, we can use r10 as the PIC register,
3220 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3221 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3223 if (TARGET_VXWORKS_RTP)
3224 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3225 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3228 if (flag_pic && TARGET_VXWORKS_RTP)
3229 arm_pic_register = 9;
3231 if (arm_pic_register_string != NULL)
3233 int pic_register = decode_reg_name (arm_pic_register_string);
3235 if (!flag_pic)
3236 warning (0, "-mpic-register= is useless without -fpic");
3238 /* Prevent the user from choosing an obviously stupid PIC register. */
3239 else if (pic_register < 0 || call_used_regs[pic_register]
3240 || pic_register == HARD_FRAME_POINTER_REGNUM
3241 || pic_register == STACK_POINTER_REGNUM
3242 || pic_register >= PC_REGNUM
3243 || (TARGET_VXWORKS_RTP
3244 && (unsigned int) pic_register != arm_pic_register))
3245 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3246 else
3247 arm_pic_register = pic_register;
3250 if (TARGET_VXWORKS_RTP
3251 && !global_options_set.x_arm_pic_data_is_text_relative)
3252 arm_pic_data_is_text_relative = 0;
3254 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3255 if (fix_cm3_ldrd == 2)
3257 if (arm_selected_cpu->core == cortexm3)
3258 fix_cm3_ldrd = 1;
3259 else
3260 fix_cm3_ldrd = 0;
3263 /* Enable -munaligned-access by default for
3264 - all ARMv6 architecture-based processors
3265 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3266 - ARMv8 architecture-base processors.
3268 Disable -munaligned-access by default for
3269 - all pre-ARMv6 architecture-based processors
3270 - ARMv6-M architecture-based processors. */
3272 if (unaligned_access == 2)
3274 if (arm_arch6 && (arm_arch_notm || arm_arch7))
3275 unaligned_access = 1;
3276 else
3277 unaligned_access = 0;
3279 else if (unaligned_access == 1
3280 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3282 warning (0, "target CPU does not support unaligned accesses");
3283 unaligned_access = 0;
3286 /* Hot/Cold partitioning is not currently supported, since we can't
3287 handle literal pool placement in that case. */
3288 if (flag_reorder_blocks_and_partition)
3290 inform (input_location,
3291 "-freorder-blocks-and-partition not supported on this architecture");
3292 flag_reorder_blocks_and_partition = 0;
3293 flag_reorder_blocks = 1;
3296 if (flag_pic)
3297 /* Hoisting PIC address calculations more aggressively provides a small,
3298 but measurable, size reduction for PIC code. Therefore, we decrease
3299 the bar for unrestricted expression hoisting to the cost of PIC address
3300 calculation, which is 2 instructions. */
3301 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3302 global_options.x_param_values,
3303 global_options_set.x_param_values);
3305 /* ARM EABI defaults to strict volatile bitfields. */
3306 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3307 && abi_version_at_least(2))
3308 flag_strict_volatile_bitfields = 1;
3310 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3311 have deemed it beneficial (signified by setting
3312 prefetch.num_slots to 1 or more). */
3313 if (flag_prefetch_loop_arrays < 0
3314 && HAVE_prefetch
3315 && optimize >= 3
3316 && current_tune->prefetch.num_slots > 0)
3317 flag_prefetch_loop_arrays = 1;
3319 /* Set up parameters to be used in prefetching algorithm. Do not
3320 override the defaults unless we are tuning for a core we have
3321 researched values for. */
3322 if (current_tune->prefetch.num_slots > 0)
3323 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3324 current_tune->prefetch.num_slots,
3325 global_options.x_param_values,
3326 global_options_set.x_param_values);
3327 if (current_tune->prefetch.l1_cache_line_size >= 0)
3328 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3329 current_tune->prefetch.l1_cache_line_size,
3330 global_options.x_param_values,
3331 global_options_set.x_param_values);
3332 if (current_tune->prefetch.l1_cache_size >= 0)
3333 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3334 current_tune->prefetch.l1_cache_size,
3335 global_options.x_param_values,
3336 global_options_set.x_param_values);
3338 /* Use Neon to perform 64-bits operations rather than core
3339 registers. */
3340 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3341 if (use_neon_for_64bits == 1)
3342 prefer_neon_for_64bits = true;
3344 /* Use the alternative scheduling-pressure algorithm by default. */
3345 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3346 global_options.x_param_values,
3347 global_options_set.x_param_values);
3349 /* Look through ready list and all of queue for instructions
3350 relevant for L2 auto-prefetcher. */
3351 int param_sched_autopref_queue_depth;
3353 switch (current_tune->sched_autopref)
3355 case tune_params::SCHED_AUTOPREF_OFF:
3356 param_sched_autopref_queue_depth = -1;
3357 break;
3359 case tune_params::SCHED_AUTOPREF_RANK:
3360 param_sched_autopref_queue_depth = 0;
3361 break;
3363 case tune_params::SCHED_AUTOPREF_FULL:
3364 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3365 break;
3367 default:
3368 gcc_unreachable ();
3371 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3372 param_sched_autopref_queue_depth,
3373 global_options.x_param_values,
3374 global_options_set.x_param_values);
3376 /* Currently, for slow flash data, we just disable literal pools. */
3377 if (target_slow_flash_data)
3378 arm_disable_literal_pool = true;
3380 /* Disable scheduling fusion by default if it's not armv7 processor
3381 or doesn't prefer ldrd/strd. */
3382 if (flag_schedule_fusion == 2
3383 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3384 flag_schedule_fusion = 0;
3386 /* Need to remember initial options before they are overriden. */
3387 init_optimize = build_optimization_node (&global_options);
3389 arm_option_override_internal (&global_options, &global_options_set);
3390 arm_option_check_internal (&global_options);
3391 arm_option_params_internal ();
3393 /* Register global variables with the garbage collector. */
3394 arm_add_gc_roots ();
3396 /* Save the initial options in case the user does function specific
3397 options. */
3398 target_option_default_node = target_option_current_node
3399 = build_target_option_node (&global_options);
3401 /* Init initial mode for testing. */
3402 thumb_flipper = TARGET_THUMB;
3405 static void
3406 arm_add_gc_roots (void)
3408 gcc_obstack_init(&minipool_obstack);
3409 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3412 /* A table of known ARM exception types.
3413 For use with the interrupt function attribute. */
3415 typedef struct
3417 const char *const arg;
3418 const unsigned long return_value;
3420 isr_attribute_arg;
3422 static const isr_attribute_arg isr_attribute_args [] =
3424 { "IRQ", ARM_FT_ISR },
3425 { "irq", ARM_FT_ISR },
3426 { "FIQ", ARM_FT_FIQ },
3427 { "fiq", ARM_FT_FIQ },
3428 { "ABORT", ARM_FT_ISR },
3429 { "abort", ARM_FT_ISR },
3430 { "ABORT", ARM_FT_ISR },
3431 { "abort", ARM_FT_ISR },
3432 { "UNDEF", ARM_FT_EXCEPTION },
3433 { "undef", ARM_FT_EXCEPTION },
3434 { "SWI", ARM_FT_EXCEPTION },
3435 { "swi", ARM_FT_EXCEPTION },
3436 { NULL, ARM_FT_NORMAL }
3439 /* Returns the (interrupt) function type of the current
3440 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3442 static unsigned long
3443 arm_isr_value (tree argument)
3445 const isr_attribute_arg * ptr;
3446 const char * arg;
3448 if (!arm_arch_notm)
3449 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3451 /* No argument - default to IRQ. */
3452 if (argument == NULL_TREE)
3453 return ARM_FT_ISR;
3455 /* Get the value of the argument. */
3456 if (TREE_VALUE (argument) == NULL_TREE
3457 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3458 return ARM_FT_UNKNOWN;
3460 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3462 /* Check it against the list of known arguments. */
3463 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3464 if (streq (arg, ptr->arg))
3465 return ptr->return_value;
3467 /* An unrecognized interrupt type. */
3468 return ARM_FT_UNKNOWN;
3471 /* Computes the type of the current function. */
3473 static unsigned long
3474 arm_compute_func_type (void)
3476 unsigned long type = ARM_FT_UNKNOWN;
3477 tree a;
3478 tree attr;
3480 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3482 /* Decide if the current function is volatile. Such functions
3483 never return, and many memory cycles can be saved by not storing
3484 register values that will never be needed again. This optimization
3485 was added to speed up context switching in a kernel application. */
3486 if (optimize > 0
3487 && (TREE_NOTHROW (current_function_decl)
3488 || !(flag_unwind_tables
3489 || (flag_exceptions
3490 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3491 && TREE_THIS_VOLATILE (current_function_decl))
3492 type |= ARM_FT_VOLATILE;
3494 if (cfun->static_chain_decl != NULL)
3495 type |= ARM_FT_NESTED;
3497 attr = DECL_ATTRIBUTES (current_function_decl);
3499 a = lookup_attribute ("naked", attr);
3500 if (a != NULL_TREE)
3501 type |= ARM_FT_NAKED;
3503 a = lookup_attribute ("isr", attr);
3504 if (a == NULL_TREE)
3505 a = lookup_attribute ("interrupt", attr);
3507 if (a == NULL_TREE)
3508 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3509 else
3510 type |= arm_isr_value (TREE_VALUE (a));
3512 return type;
3515 /* Returns the type of the current function. */
3517 unsigned long
3518 arm_current_func_type (void)
3520 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3521 cfun->machine->func_type = arm_compute_func_type ();
3523 return cfun->machine->func_type;
3526 bool
3527 arm_allocate_stack_slots_for_args (void)
3529 /* Naked functions should not allocate stack slots for arguments. */
3530 return !IS_NAKED (arm_current_func_type ());
3533 static bool
3534 arm_warn_func_return (tree decl)
3536 /* Naked functions are implemented entirely in assembly, including the
3537 return sequence, so suppress warnings about this. */
3538 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3542 /* Output assembler code for a block containing the constant parts
3543 of a trampoline, leaving space for the variable parts.
3545 On the ARM, (if r8 is the static chain regnum, and remembering that
3546 referencing pc adds an offset of 8) the trampoline looks like:
3547 ldr r8, [pc, #0]
3548 ldr pc, [pc]
3549 .word static chain value
3550 .word function's address
3551 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3553 static void
3554 arm_asm_trampoline_template (FILE *f)
3556 if (TARGET_UNIFIED_ASM)
3557 fprintf (f, "\t.syntax unified\n");
3558 else
3559 fprintf (f, "\t.syntax divided\n");
3561 if (TARGET_ARM)
3563 fprintf (f, "\t.arm\n");
3564 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3565 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3567 else if (TARGET_THUMB2)
3569 fprintf (f, "\t.thumb\n");
3570 /* The Thumb-2 trampoline is similar to the arm implementation.
3571 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3572 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3573 STATIC_CHAIN_REGNUM, PC_REGNUM);
3574 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3576 else
3578 ASM_OUTPUT_ALIGN (f, 2);
3579 fprintf (f, "\t.code\t16\n");
3580 fprintf (f, ".Ltrampoline_start:\n");
3581 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3582 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3583 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3584 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3585 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3586 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3588 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3589 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3592 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3594 static void
3595 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3597 rtx fnaddr, mem, a_tramp;
3599 emit_block_move (m_tramp, assemble_trampoline_template (),
3600 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3602 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3603 emit_move_insn (mem, chain_value);
3605 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3606 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3607 emit_move_insn (mem, fnaddr);
3609 a_tramp = XEXP (m_tramp, 0);
3610 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3611 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3612 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3615 /* Thumb trampolines should be entered in thumb mode, so set
3616 the bottom bit of the address. */
3618 static rtx
3619 arm_trampoline_adjust_address (rtx addr)
3621 if (TARGET_THUMB)
3622 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3623 NULL, 0, OPTAB_LIB_WIDEN);
3624 return addr;
3627 /* Return 1 if it is possible to return using a single instruction.
3628 If SIBLING is non-null, this is a test for a return before a sibling
3629 call. SIBLING is the call insn, so we can examine its register usage. */
3632 use_return_insn (int iscond, rtx sibling)
3634 int regno;
3635 unsigned int func_type;
3636 unsigned long saved_int_regs;
3637 unsigned HOST_WIDE_INT stack_adjust;
3638 arm_stack_offsets *offsets;
3640 /* Never use a return instruction before reload has run. */
3641 if (!reload_completed)
3642 return 0;
3644 func_type = arm_current_func_type ();
3646 /* Naked, volatile and stack alignment functions need special
3647 consideration. */
3648 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3649 return 0;
3651 /* So do interrupt functions that use the frame pointer and Thumb
3652 interrupt functions. */
3653 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3654 return 0;
3656 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3657 && !optimize_function_for_size_p (cfun))
3658 return 0;
3660 offsets = arm_get_frame_offsets ();
3661 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3663 /* As do variadic functions. */
3664 if (crtl->args.pretend_args_size
3665 || cfun->machine->uses_anonymous_args
3666 /* Or if the function calls __builtin_eh_return () */
3667 || crtl->calls_eh_return
3668 /* Or if the function calls alloca */
3669 || cfun->calls_alloca
3670 /* Or if there is a stack adjustment. However, if the stack pointer
3671 is saved on the stack, we can use a pre-incrementing stack load. */
3672 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3673 && stack_adjust == 4))
3674 /* Or if the static chain register was saved above the frame, under the
3675 assumption that the stack pointer isn't saved on the stack. */
3676 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3677 && arm_compute_static_chain_stack_bytes() != 0))
3678 return 0;
3680 saved_int_regs = offsets->saved_regs_mask;
3682 /* Unfortunately, the insn
3684 ldmib sp, {..., sp, ...}
3686 triggers a bug on most SA-110 based devices, such that the stack
3687 pointer won't be correctly restored if the instruction takes a
3688 page fault. We work around this problem by popping r3 along with
3689 the other registers, since that is never slower than executing
3690 another instruction.
3692 We test for !arm_arch5 here, because code for any architecture
3693 less than this could potentially be run on one of the buggy
3694 chips. */
3695 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3697 /* Validate that r3 is a call-clobbered register (always true in
3698 the default abi) ... */
3699 if (!call_used_regs[3])
3700 return 0;
3702 /* ... that it isn't being used for a return value ... */
3703 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3704 return 0;
3706 /* ... or for a tail-call argument ... */
3707 if (sibling)
3709 gcc_assert (CALL_P (sibling));
3711 if (find_regno_fusage (sibling, USE, 3))
3712 return 0;
3715 /* ... and that there are no call-saved registers in r0-r2
3716 (always true in the default ABI). */
3717 if (saved_int_regs & 0x7)
3718 return 0;
3721 /* Can't be done if interworking with Thumb, and any registers have been
3722 stacked. */
3723 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3724 return 0;
3726 /* On StrongARM, conditional returns are expensive if they aren't
3727 taken and multiple registers have been stacked. */
3728 if (iscond && arm_tune_strongarm)
3730 /* Conditional return when just the LR is stored is a simple
3731 conditional-load instruction, that's not expensive. */
3732 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3733 return 0;
3735 if (flag_pic
3736 && arm_pic_register != INVALID_REGNUM
3737 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3738 return 0;
3741 /* If there are saved registers but the LR isn't saved, then we need
3742 two instructions for the return. */
3743 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3744 return 0;
3746 /* Can't be done if any of the VFP regs are pushed,
3747 since this also requires an insn. */
3748 if (TARGET_HARD_FLOAT && TARGET_VFP)
3749 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3750 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3751 return 0;
3753 if (TARGET_REALLY_IWMMXT)
3754 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3755 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3756 return 0;
3758 return 1;
3761 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3762 shrink-wrapping if possible. This is the case if we need to emit a
3763 prologue, which we can test by looking at the offsets. */
3764 bool
3765 use_simple_return_p (void)
3767 arm_stack_offsets *offsets;
3769 offsets = arm_get_frame_offsets ();
3770 return offsets->outgoing_args != 0;
3773 /* Return TRUE if int I is a valid immediate ARM constant. */
3776 const_ok_for_arm (HOST_WIDE_INT i)
3778 int lowbit;
3780 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3781 be all zero, or all one. */
3782 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3783 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3784 != ((~(unsigned HOST_WIDE_INT) 0)
3785 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3786 return FALSE;
3788 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3790 /* Fast return for 0 and small values. We must do this for zero, since
3791 the code below can't handle that one case. */
3792 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3793 return TRUE;
3795 /* Get the number of trailing zeros. */
3796 lowbit = ffs((int) i) - 1;
3798 /* Only even shifts are allowed in ARM mode so round down to the
3799 nearest even number. */
3800 if (TARGET_ARM)
3801 lowbit &= ~1;
3803 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3804 return TRUE;
3806 if (TARGET_ARM)
3808 /* Allow rotated constants in ARM mode. */
3809 if (lowbit <= 4
3810 && ((i & ~0xc000003f) == 0
3811 || (i & ~0xf000000f) == 0
3812 || (i & ~0xfc000003) == 0))
3813 return TRUE;
3815 else
3817 HOST_WIDE_INT v;
3819 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3820 v = i & 0xff;
3821 v |= v << 16;
3822 if (i == v || i == (v | (v << 8)))
3823 return TRUE;
3825 /* Allow repeated pattern 0xXY00XY00. */
3826 v = i & 0xff00;
3827 v |= v << 16;
3828 if (i == v)
3829 return TRUE;
3832 return FALSE;
3835 /* Return true if I is a valid constant for the operation CODE. */
3837 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3839 if (const_ok_for_arm (i))
3840 return 1;
3842 switch (code)
3844 case SET:
3845 /* See if we can use movw. */
3846 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3847 return 1;
3848 else
3849 /* Otherwise, try mvn. */
3850 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3852 case PLUS:
3853 /* See if we can use addw or subw. */
3854 if (TARGET_THUMB2
3855 && ((i & 0xfffff000) == 0
3856 || ((-i) & 0xfffff000) == 0))
3857 return 1;
3858 /* else fall through. */
3860 case COMPARE:
3861 case EQ:
3862 case NE:
3863 case GT:
3864 case LE:
3865 case LT:
3866 case GE:
3867 case GEU:
3868 case LTU:
3869 case GTU:
3870 case LEU:
3871 case UNORDERED:
3872 case ORDERED:
3873 case UNEQ:
3874 case UNGE:
3875 case UNLT:
3876 case UNGT:
3877 case UNLE:
3878 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3880 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3881 case XOR:
3882 return 0;
3884 case IOR:
3885 if (TARGET_THUMB2)
3886 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3887 return 0;
3889 case AND:
3890 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3892 default:
3893 gcc_unreachable ();
3897 /* Return true if I is a valid di mode constant for the operation CODE. */
3899 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3901 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3902 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3903 rtx hi = GEN_INT (hi_val);
3904 rtx lo = GEN_INT (lo_val);
3906 if (TARGET_THUMB1)
3907 return 0;
3909 switch (code)
3911 case AND:
3912 case IOR:
3913 case XOR:
3914 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3915 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3916 case PLUS:
3917 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3919 default:
3920 return 0;
3924 /* Emit a sequence of insns to handle a large constant.
3925 CODE is the code of the operation required, it can be any of SET, PLUS,
3926 IOR, AND, XOR, MINUS;
3927 MODE is the mode in which the operation is being performed;
3928 VAL is the integer to operate on;
3929 SOURCE is the other operand (a register, or a null-pointer for SET);
3930 SUBTARGETS means it is safe to create scratch registers if that will
3931 either produce a simpler sequence, or we will want to cse the values.
3932 Return value is the number of insns emitted. */
3934 /* ??? Tweak this for thumb2. */
3936 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3937 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3939 rtx cond;
3941 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3942 cond = COND_EXEC_TEST (PATTERN (insn));
3943 else
3944 cond = NULL_RTX;
3946 if (subtargets || code == SET
3947 || (REG_P (target) && REG_P (source)
3948 && REGNO (target) != REGNO (source)))
3950 /* After arm_reorg has been called, we can't fix up expensive
3951 constants by pushing them into memory so we must synthesize
3952 them in-line, regardless of the cost. This is only likely to
3953 be more costly on chips that have load delay slots and we are
3954 compiling without running the scheduler (so no splitting
3955 occurred before the final instruction emission).
3957 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3959 if (!cfun->machine->after_arm_reorg
3960 && !cond
3961 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3962 1, 0)
3963 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3964 + (code != SET))))
3966 if (code == SET)
3968 /* Currently SET is the only monadic value for CODE, all
3969 the rest are diadic. */
3970 if (TARGET_USE_MOVT)
3971 arm_emit_movpair (target, GEN_INT (val));
3972 else
3973 emit_set_insn (target, GEN_INT (val));
3975 return 1;
3977 else
3979 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3981 if (TARGET_USE_MOVT)
3982 arm_emit_movpair (temp, GEN_INT (val));
3983 else
3984 emit_set_insn (temp, GEN_INT (val));
3986 /* For MINUS, the value is subtracted from, since we never
3987 have subtraction of a constant. */
3988 if (code == MINUS)
3989 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3990 else
3991 emit_set_insn (target,
3992 gen_rtx_fmt_ee (code, mode, source, temp));
3993 return 2;
3998 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4002 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4003 ARM/THUMB2 immediates, and add up to VAL.
4004 Thr function return value gives the number of insns required. */
4005 static int
4006 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4007 struct four_ints *return_sequence)
4009 int best_consecutive_zeros = 0;
4010 int i;
4011 int best_start = 0;
4012 int insns1, insns2;
4013 struct four_ints tmp_sequence;
4015 /* If we aren't targeting ARM, the best place to start is always at
4016 the bottom, otherwise look more closely. */
4017 if (TARGET_ARM)
4019 for (i = 0; i < 32; i += 2)
4021 int consecutive_zeros = 0;
4023 if (!(val & (3 << i)))
4025 while ((i < 32) && !(val & (3 << i)))
4027 consecutive_zeros += 2;
4028 i += 2;
4030 if (consecutive_zeros > best_consecutive_zeros)
4032 best_consecutive_zeros = consecutive_zeros;
4033 best_start = i - consecutive_zeros;
4035 i -= 2;
4040 /* So long as it won't require any more insns to do so, it's
4041 desirable to emit a small constant (in bits 0...9) in the last
4042 insn. This way there is more chance that it can be combined with
4043 a later addressing insn to form a pre-indexed load or store
4044 operation. Consider:
4046 *((volatile int *)0xe0000100) = 1;
4047 *((volatile int *)0xe0000110) = 2;
4049 We want this to wind up as:
4051 mov rA, #0xe0000000
4052 mov rB, #1
4053 str rB, [rA, #0x100]
4054 mov rB, #2
4055 str rB, [rA, #0x110]
4057 rather than having to synthesize both large constants from scratch.
4059 Therefore, we calculate how many insns would be required to emit
4060 the constant starting from `best_start', and also starting from
4061 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4062 yield a shorter sequence, we may as well use zero. */
4063 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4064 if (best_start != 0
4065 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
4067 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4068 if (insns2 <= insns1)
4070 *return_sequence = tmp_sequence;
4071 insns1 = insns2;
4075 return insns1;
4078 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4079 static int
4080 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4081 struct four_ints *return_sequence, int i)
4083 int remainder = val & 0xffffffff;
4084 int insns = 0;
4086 /* Try and find a way of doing the job in either two or three
4087 instructions.
4089 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4090 location. We start at position I. This may be the MSB, or
4091 optimial_immediate_sequence may have positioned it at the largest block
4092 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4093 wrapping around to the top of the word when we drop off the bottom.
4094 In the worst case this code should produce no more than four insns.
4096 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4097 constants, shifted to any arbitrary location. We should always start
4098 at the MSB. */
4101 int end;
4102 unsigned int b1, b2, b3, b4;
4103 unsigned HOST_WIDE_INT result;
4104 int loc;
4106 gcc_assert (insns < 4);
4108 if (i <= 0)
4109 i += 32;
4111 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4112 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4114 loc = i;
4115 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4116 /* We can use addw/subw for the last 12 bits. */
4117 result = remainder;
4118 else
4120 /* Use an 8-bit shifted/rotated immediate. */
4121 end = i - 8;
4122 if (end < 0)
4123 end += 32;
4124 result = remainder & ((0x0ff << end)
4125 | ((i < end) ? (0xff >> (32 - end))
4126 : 0));
4127 i -= 8;
4130 else
4132 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4133 arbitrary shifts. */
4134 i -= TARGET_ARM ? 2 : 1;
4135 continue;
4138 /* Next, see if we can do a better job with a thumb2 replicated
4139 constant.
4141 We do it this way around to catch the cases like 0x01F001E0 where
4142 two 8-bit immediates would work, but a replicated constant would
4143 make it worse.
4145 TODO: 16-bit constants that don't clear all the bits, but still win.
4146 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4147 if (TARGET_THUMB2)
4149 b1 = (remainder & 0xff000000) >> 24;
4150 b2 = (remainder & 0x00ff0000) >> 16;
4151 b3 = (remainder & 0x0000ff00) >> 8;
4152 b4 = remainder & 0xff;
4154 if (loc > 24)
4156 /* The 8-bit immediate already found clears b1 (and maybe b2),
4157 but must leave b3 and b4 alone. */
4159 /* First try to find a 32-bit replicated constant that clears
4160 almost everything. We can assume that we can't do it in one,
4161 or else we wouldn't be here. */
4162 unsigned int tmp = b1 & b2 & b3 & b4;
4163 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4164 + (tmp << 24);
4165 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4166 + (tmp == b3) + (tmp == b4);
4167 if (tmp
4168 && (matching_bytes >= 3
4169 || (matching_bytes == 2
4170 && const_ok_for_op (remainder & ~tmp2, code))))
4172 /* At least 3 of the bytes match, and the fourth has at
4173 least as many bits set, or two of the bytes match
4174 and it will only require one more insn to finish. */
4175 result = tmp2;
4176 i = tmp != b1 ? 32
4177 : tmp != b2 ? 24
4178 : tmp != b3 ? 16
4179 : 8;
4182 /* Second, try to find a 16-bit replicated constant that can
4183 leave three of the bytes clear. If b2 or b4 is already
4184 zero, then we can. If the 8-bit from above would not
4185 clear b2 anyway, then we still win. */
4186 else if (b1 == b3 && (!b2 || !b4
4187 || (remainder & 0x00ff0000 & ~result)))
4189 result = remainder & 0xff00ff00;
4190 i = 24;
4193 else if (loc > 16)
4195 /* The 8-bit immediate already found clears b2 (and maybe b3)
4196 and we don't get here unless b1 is alredy clear, but it will
4197 leave b4 unchanged. */
4199 /* If we can clear b2 and b4 at once, then we win, since the
4200 8-bits couldn't possibly reach that far. */
4201 if (b2 == b4)
4203 result = remainder & 0x00ff00ff;
4204 i = 16;
4209 return_sequence->i[insns++] = result;
4210 remainder &= ~result;
4212 if (code == SET || code == MINUS)
4213 code = PLUS;
4215 while (remainder);
4217 return insns;
4220 /* Emit an instruction with the indicated PATTERN. If COND is
4221 non-NULL, conditionalize the execution of the instruction on COND
4222 being true. */
4224 static void
4225 emit_constant_insn (rtx cond, rtx pattern)
4227 if (cond)
4228 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4229 emit_insn (pattern);
4232 /* As above, but extra parameter GENERATE which, if clear, suppresses
4233 RTL generation. */
4235 static int
4236 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4237 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4238 int subtargets, int generate)
4240 int can_invert = 0;
4241 int can_negate = 0;
4242 int final_invert = 0;
4243 int i;
4244 int set_sign_bit_copies = 0;
4245 int clear_sign_bit_copies = 0;
4246 int clear_zero_bit_copies = 0;
4247 int set_zero_bit_copies = 0;
4248 int insns = 0, neg_insns, inv_insns;
4249 unsigned HOST_WIDE_INT temp1, temp2;
4250 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4251 struct four_ints *immediates;
4252 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4254 /* Find out which operations are safe for a given CODE. Also do a quick
4255 check for degenerate cases; these can occur when DImode operations
4256 are split. */
4257 switch (code)
4259 case SET:
4260 can_invert = 1;
4261 break;
4263 case PLUS:
4264 can_negate = 1;
4265 break;
4267 case IOR:
4268 if (remainder == 0xffffffff)
4270 if (generate)
4271 emit_constant_insn (cond,
4272 gen_rtx_SET (target,
4273 GEN_INT (ARM_SIGN_EXTEND (val))));
4274 return 1;
4277 if (remainder == 0)
4279 if (reload_completed && rtx_equal_p (target, source))
4280 return 0;
4282 if (generate)
4283 emit_constant_insn (cond, gen_rtx_SET (target, source));
4284 return 1;
4286 break;
4288 case AND:
4289 if (remainder == 0)
4291 if (generate)
4292 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4293 return 1;
4295 if (remainder == 0xffffffff)
4297 if (reload_completed && rtx_equal_p (target, source))
4298 return 0;
4299 if (generate)
4300 emit_constant_insn (cond, gen_rtx_SET (target, source));
4301 return 1;
4303 can_invert = 1;
4304 break;
4306 case XOR:
4307 if (remainder == 0)
4309 if (reload_completed && rtx_equal_p (target, source))
4310 return 0;
4311 if (generate)
4312 emit_constant_insn (cond, gen_rtx_SET (target, source));
4313 return 1;
4316 if (remainder == 0xffffffff)
4318 if (generate)
4319 emit_constant_insn (cond,
4320 gen_rtx_SET (target,
4321 gen_rtx_NOT (mode, source)));
4322 return 1;
4324 final_invert = 1;
4325 break;
4327 case MINUS:
4328 /* We treat MINUS as (val - source), since (source - val) is always
4329 passed as (source + (-val)). */
4330 if (remainder == 0)
4332 if (generate)
4333 emit_constant_insn (cond,
4334 gen_rtx_SET (target,
4335 gen_rtx_NEG (mode, source)));
4336 return 1;
4338 if (const_ok_for_arm (val))
4340 if (generate)
4341 emit_constant_insn (cond,
4342 gen_rtx_SET (target,
4343 gen_rtx_MINUS (mode, GEN_INT (val),
4344 source)));
4345 return 1;
4348 break;
4350 default:
4351 gcc_unreachable ();
4354 /* If we can do it in one insn get out quickly. */
4355 if (const_ok_for_op (val, code))
4357 if (generate)
4358 emit_constant_insn (cond,
4359 gen_rtx_SET (target,
4360 (source
4361 ? gen_rtx_fmt_ee (code, mode, source,
4362 GEN_INT (val))
4363 : GEN_INT (val))));
4364 return 1;
4367 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4368 insn. */
4369 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4370 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4372 if (generate)
4374 if (mode == SImode && i == 16)
4375 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4376 smaller insn. */
4377 emit_constant_insn (cond,
4378 gen_zero_extendhisi2
4379 (target, gen_lowpart (HImode, source)));
4380 else
4381 /* Extz only supports SImode, but we can coerce the operands
4382 into that mode. */
4383 emit_constant_insn (cond,
4384 gen_extzv_t2 (gen_lowpart (SImode, target),
4385 gen_lowpart (SImode, source),
4386 GEN_INT (i), const0_rtx));
4389 return 1;
4392 /* Calculate a few attributes that may be useful for specific
4393 optimizations. */
4394 /* Count number of leading zeros. */
4395 for (i = 31; i >= 0; i--)
4397 if ((remainder & (1 << i)) == 0)
4398 clear_sign_bit_copies++;
4399 else
4400 break;
4403 /* Count number of leading 1's. */
4404 for (i = 31; i >= 0; i--)
4406 if ((remainder & (1 << i)) != 0)
4407 set_sign_bit_copies++;
4408 else
4409 break;
4412 /* Count number of trailing zero's. */
4413 for (i = 0; i <= 31; i++)
4415 if ((remainder & (1 << i)) == 0)
4416 clear_zero_bit_copies++;
4417 else
4418 break;
4421 /* Count number of trailing 1's. */
4422 for (i = 0; i <= 31; i++)
4424 if ((remainder & (1 << i)) != 0)
4425 set_zero_bit_copies++;
4426 else
4427 break;
4430 switch (code)
4432 case SET:
4433 /* See if we can do this by sign_extending a constant that is known
4434 to be negative. This is a good, way of doing it, since the shift
4435 may well merge into a subsequent insn. */
4436 if (set_sign_bit_copies > 1)
4438 if (const_ok_for_arm
4439 (temp1 = ARM_SIGN_EXTEND (remainder
4440 << (set_sign_bit_copies - 1))))
4442 if (generate)
4444 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4445 emit_constant_insn (cond,
4446 gen_rtx_SET (new_src, GEN_INT (temp1)));
4447 emit_constant_insn (cond,
4448 gen_ashrsi3 (target, new_src,
4449 GEN_INT (set_sign_bit_copies - 1)));
4451 return 2;
4453 /* For an inverted constant, we will need to set the low bits,
4454 these will be shifted out of harm's way. */
4455 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4456 if (const_ok_for_arm (~temp1))
4458 if (generate)
4460 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4461 emit_constant_insn (cond,
4462 gen_rtx_SET (new_src, GEN_INT (temp1)));
4463 emit_constant_insn (cond,
4464 gen_ashrsi3 (target, new_src,
4465 GEN_INT (set_sign_bit_copies - 1)));
4467 return 2;
4471 /* See if we can calculate the value as the difference between two
4472 valid immediates. */
4473 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4475 int topshift = clear_sign_bit_copies & ~1;
4477 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4478 & (0xff000000 >> topshift));
4480 /* If temp1 is zero, then that means the 9 most significant
4481 bits of remainder were 1 and we've caused it to overflow.
4482 When topshift is 0 we don't need to do anything since we
4483 can borrow from 'bit 32'. */
4484 if (temp1 == 0 && topshift != 0)
4485 temp1 = 0x80000000 >> (topshift - 1);
4487 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4489 if (const_ok_for_arm (temp2))
4491 if (generate)
4493 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4494 emit_constant_insn (cond,
4495 gen_rtx_SET (new_src, GEN_INT (temp1)));
4496 emit_constant_insn (cond,
4497 gen_addsi3 (target, new_src,
4498 GEN_INT (-temp2)));
4501 return 2;
4505 /* See if we can generate this by setting the bottom (or the top)
4506 16 bits, and then shifting these into the other half of the
4507 word. We only look for the simplest cases, to do more would cost
4508 too much. Be careful, however, not to generate this when the
4509 alternative would take fewer insns. */
4510 if (val & 0xffff0000)
4512 temp1 = remainder & 0xffff0000;
4513 temp2 = remainder & 0x0000ffff;
4515 /* Overlaps outside this range are best done using other methods. */
4516 for (i = 9; i < 24; i++)
4518 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4519 && !const_ok_for_arm (temp2))
4521 rtx new_src = (subtargets
4522 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4523 : target);
4524 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4525 source, subtargets, generate);
4526 source = new_src;
4527 if (generate)
4528 emit_constant_insn
4529 (cond,
4530 gen_rtx_SET
4531 (target,
4532 gen_rtx_IOR (mode,
4533 gen_rtx_ASHIFT (mode, source,
4534 GEN_INT (i)),
4535 source)));
4536 return insns + 1;
4540 /* Don't duplicate cases already considered. */
4541 for (i = 17; i < 24; i++)
4543 if (((temp1 | (temp1 >> i)) == remainder)
4544 && !const_ok_for_arm (temp1))
4546 rtx new_src = (subtargets
4547 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4548 : target);
4549 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4550 source, subtargets, generate);
4551 source = new_src;
4552 if (generate)
4553 emit_constant_insn
4554 (cond,
4555 gen_rtx_SET (target,
4556 gen_rtx_IOR
4557 (mode,
4558 gen_rtx_LSHIFTRT (mode, source,
4559 GEN_INT (i)),
4560 source)));
4561 return insns + 1;
4565 break;
4567 case IOR:
4568 case XOR:
4569 /* If we have IOR or XOR, and the constant can be loaded in a
4570 single instruction, and we can find a temporary to put it in,
4571 then this can be done in two instructions instead of 3-4. */
4572 if (subtargets
4573 /* TARGET can't be NULL if SUBTARGETS is 0 */
4574 || (reload_completed && !reg_mentioned_p (target, source)))
4576 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4578 if (generate)
4580 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4582 emit_constant_insn (cond,
4583 gen_rtx_SET (sub, GEN_INT (val)));
4584 emit_constant_insn (cond,
4585 gen_rtx_SET (target,
4586 gen_rtx_fmt_ee (code, mode,
4587 source, sub)));
4589 return 2;
4593 if (code == XOR)
4594 break;
4596 /* Convert.
4597 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4598 and the remainder 0s for e.g. 0xfff00000)
4599 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4601 This can be done in 2 instructions by using shifts with mov or mvn.
4602 e.g. for
4603 x = x | 0xfff00000;
4604 we generate.
4605 mvn r0, r0, asl #12
4606 mvn r0, r0, lsr #12 */
4607 if (set_sign_bit_copies > 8
4608 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4610 if (generate)
4612 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4613 rtx shift = GEN_INT (set_sign_bit_copies);
4615 emit_constant_insn
4616 (cond,
4617 gen_rtx_SET (sub,
4618 gen_rtx_NOT (mode,
4619 gen_rtx_ASHIFT (mode,
4620 source,
4621 shift))));
4622 emit_constant_insn
4623 (cond,
4624 gen_rtx_SET (target,
4625 gen_rtx_NOT (mode,
4626 gen_rtx_LSHIFTRT (mode, sub,
4627 shift))));
4629 return 2;
4632 /* Convert
4633 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4635 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4637 For eg. r0 = r0 | 0xfff
4638 mvn r0, r0, lsr #12
4639 mvn r0, r0, asl #12
4642 if (set_zero_bit_copies > 8
4643 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4645 if (generate)
4647 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4648 rtx shift = GEN_INT (set_zero_bit_copies);
4650 emit_constant_insn
4651 (cond,
4652 gen_rtx_SET (sub,
4653 gen_rtx_NOT (mode,
4654 gen_rtx_LSHIFTRT (mode,
4655 source,
4656 shift))));
4657 emit_constant_insn
4658 (cond,
4659 gen_rtx_SET (target,
4660 gen_rtx_NOT (mode,
4661 gen_rtx_ASHIFT (mode, sub,
4662 shift))));
4664 return 2;
4667 /* This will never be reached for Thumb2 because orn is a valid
4668 instruction. This is for Thumb1 and the ARM 32 bit cases.
4670 x = y | constant (such that ~constant is a valid constant)
4671 Transform this to
4672 x = ~(~y & ~constant).
4674 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4676 if (generate)
4678 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4679 emit_constant_insn (cond,
4680 gen_rtx_SET (sub,
4681 gen_rtx_NOT (mode, source)));
4682 source = sub;
4683 if (subtargets)
4684 sub = gen_reg_rtx (mode);
4685 emit_constant_insn (cond,
4686 gen_rtx_SET (sub,
4687 gen_rtx_AND (mode, source,
4688 GEN_INT (temp1))));
4689 emit_constant_insn (cond,
4690 gen_rtx_SET (target,
4691 gen_rtx_NOT (mode, sub)));
4693 return 3;
4695 break;
4697 case AND:
4698 /* See if two shifts will do 2 or more insn's worth of work. */
4699 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4701 HOST_WIDE_INT shift_mask = ((0xffffffff
4702 << (32 - clear_sign_bit_copies))
4703 & 0xffffffff);
4705 if ((remainder | shift_mask) != 0xffffffff)
4707 HOST_WIDE_INT new_val
4708 = ARM_SIGN_EXTEND (remainder | shift_mask);
4710 if (generate)
4712 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4713 insns = arm_gen_constant (AND, SImode, cond, new_val,
4714 new_src, source, subtargets, 1);
4715 source = new_src;
4717 else
4719 rtx targ = subtargets ? NULL_RTX : target;
4720 insns = arm_gen_constant (AND, mode, cond, new_val,
4721 targ, source, subtargets, 0);
4725 if (generate)
4727 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4728 rtx shift = GEN_INT (clear_sign_bit_copies);
4730 emit_insn (gen_ashlsi3 (new_src, source, shift));
4731 emit_insn (gen_lshrsi3 (target, new_src, shift));
4734 return insns + 2;
4737 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4739 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4741 if ((remainder | shift_mask) != 0xffffffff)
4743 HOST_WIDE_INT new_val
4744 = ARM_SIGN_EXTEND (remainder | shift_mask);
4745 if (generate)
4747 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4749 insns = arm_gen_constant (AND, mode, cond, new_val,
4750 new_src, source, subtargets, 1);
4751 source = new_src;
4753 else
4755 rtx targ = subtargets ? NULL_RTX : target;
4757 insns = arm_gen_constant (AND, mode, cond, new_val,
4758 targ, source, subtargets, 0);
4762 if (generate)
4764 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4765 rtx shift = GEN_INT (clear_zero_bit_copies);
4767 emit_insn (gen_lshrsi3 (new_src, source, shift));
4768 emit_insn (gen_ashlsi3 (target, new_src, shift));
4771 return insns + 2;
4774 break;
4776 default:
4777 break;
4780 /* Calculate what the instruction sequences would be if we generated it
4781 normally, negated, or inverted. */
4782 if (code == AND)
4783 /* AND cannot be split into multiple insns, so invert and use BIC. */
4784 insns = 99;
4785 else
4786 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4788 if (can_negate)
4789 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4790 &neg_immediates);
4791 else
4792 neg_insns = 99;
4794 if (can_invert || final_invert)
4795 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4796 &inv_immediates);
4797 else
4798 inv_insns = 99;
4800 immediates = &pos_immediates;
4802 /* Is the negated immediate sequence more efficient? */
4803 if (neg_insns < insns && neg_insns <= inv_insns)
4805 insns = neg_insns;
4806 immediates = &neg_immediates;
4808 else
4809 can_negate = 0;
4811 /* Is the inverted immediate sequence more efficient?
4812 We must allow for an extra NOT instruction for XOR operations, although
4813 there is some chance that the final 'mvn' will get optimized later. */
4814 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4816 insns = inv_insns;
4817 immediates = &inv_immediates;
4819 else
4821 can_invert = 0;
4822 final_invert = 0;
4825 /* Now output the chosen sequence as instructions. */
4826 if (generate)
4828 for (i = 0; i < insns; i++)
4830 rtx new_src, temp1_rtx;
4832 temp1 = immediates->i[i];
4834 if (code == SET || code == MINUS)
4835 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4836 else if ((final_invert || i < (insns - 1)) && subtargets)
4837 new_src = gen_reg_rtx (mode);
4838 else
4839 new_src = target;
4841 if (can_invert)
4842 temp1 = ~temp1;
4843 else if (can_negate)
4844 temp1 = -temp1;
4846 temp1 = trunc_int_for_mode (temp1, mode);
4847 temp1_rtx = GEN_INT (temp1);
4849 if (code == SET)
4851 else if (code == MINUS)
4852 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4853 else
4854 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4856 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
4857 source = new_src;
4859 if (code == SET)
4861 can_negate = can_invert;
4862 can_invert = 0;
4863 code = PLUS;
4865 else if (code == MINUS)
4866 code = PLUS;
4870 if (final_invert)
4872 if (generate)
4873 emit_constant_insn (cond, gen_rtx_SET (target,
4874 gen_rtx_NOT (mode, source)));
4875 insns++;
4878 return insns;
4881 /* Canonicalize a comparison so that we are more likely to recognize it.
4882 This can be done for a few constant compares, where we can make the
4883 immediate value easier to load. */
4885 static void
4886 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4887 bool op0_preserve_value)
4889 machine_mode mode;
4890 unsigned HOST_WIDE_INT i, maxval;
4892 mode = GET_MODE (*op0);
4893 if (mode == VOIDmode)
4894 mode = GET_MODE (*op1);
4896 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4898 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4899 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4900 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4901 for GTU/LEU in Thumb mode. */
4902 if (mode == DImode)
4905 if (*code == GT || *code == LE
4906 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4908 /* Missing comparison. First try to use an available
4909 comparison. */
4910 if (CONST_INT_P (*op1))
4912 i = INTVAL (*op1);
4913 switch (*code)
4915 case GT:
4916 case LE:
4917 if (i != maxval
4918 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4920 *op1 = GEN_INT (i + 1);
4921 *code = *code == GT ? GE : LT;
4922 return;
4924 break;
4925 case GTU:
4926 case LEU:
4927 if (i != ~((unsigned HOST_WIDE_INT) 0)
4928 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4930 *op1 = GEN_INT (i + 1);
4931 *code = *code == GTU ? GEU : LTU;
4932 return;
4934 break;
4935 default:
4936 gcc_unreachable ();
4940 /* If that did not work, reverse the condition. */
4941 if (!op0_preserve_value)
4943 std::swap (*op0, *op1);
4944 *code = (int)swap_condition ((enum rtx_code)*code);
4947 return;
4950 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4951 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4952 to facilitate possible combining with a cmp into 'ands'. */
4953 if (mode == SImode
4954 && GET_CODE (*op0) == ZERO_EXTEND
4955 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4956 && GET_MODE (XEXP (*op0, 0)) == QImode
4957 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4958 && subreg_lowpart_p (XEXP (*op0, 0))
4959 && *op1 == const0_rtx)
4960 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4961 GEN_INT (255));
4963 /* Comparisons smaller than DImode. Only adjust comparisons against
4964 an out-of-range constant. */
4965 if (!CONST_INT_P (*op1)
4966 || const_ok_for_arm (INTVAL (*op1))
4967 || const_ok_for_arm (- INTVAL (*op1)))
4968 return;
4970 i = INTVAL (*op1);
4972 switch (*code)
4974 case EQ:
4975 case NE:
4976 return;
4978 case GT:
4979 case LE:
4980 if (i != maxval
4981 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4983 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4984 *code = *code == GT ? GE : LT;
4985 return;
4987 break;
4989 case GE:
4990 case LT:
4991 if (i != ~maxval
4992 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4994 *op1 = GEN_INT (i - 1);
4995 *code = *code == GE ? GT : LE;
4996 return;
4998 break;
5000 case GTU:
5001 case LEU:
5002 if (i != ~((unsigned HOST_WIDE_INT) 0)
5003 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5005 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5006 *code = *code == GTU ? GEU : LTU;
5007 return;
5009 break;
5011 case GEU:
5012 case LTU:
5013 if (i != 0
5014 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5016 *op1 = GEN_INT (i - 1);
5017 *code = *code == GEU ? GTU : LEU;
5018 return;
5020 break;
5022 default:
5023 gcc_unreachable ();
5028 /* Define how to find the value returned by a function. */
5030 static rtx
5031 arm_function_value(const_tree type, const_tree func,
5032 bool outgoing ATTRIBUTE_UNUSED)
5034 machine_mode mode;
5035 int unsignedp ATTRIBUTE_UNUSED;
5036 rtx r ATTRIBUTE_UNUSED;
5038 mode = TYPE_MODE (type);
5040 if (TARGET_AAPCS_BASED)
5041 return aapcs_allocate_return_reg (mode, type, func);
5043 /* Promote integer types. */
5044 if (INTEGRAL_TYPE_P (type))
5045 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5047 /* Promotes small structs returned in a register to full-word size
5048 for big-endian AAPCS. */
5049 if (arm_return_in_msb (type))
5051 HOST_WIDE_INT size = int_size_in_bytes (type);
5052 if (size % UNITS_PER_WORD != 0)
5054 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5055 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5059 return arm_libcall_value_1 (mode);
5062 /* libcall hashtable helpers. */
5064 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5066 static inline hashval_t hash (const rtx_def *);
5067 static inline bool equal (const rtx_def *, const rtx_def *);
5068 static inline void remove (rtx_def *);
5071 inline bool
5072 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5074 return rtx_equal_p (p1, p2);
5077 inline hashval_t
5078 libcall_hasher::hash (const rtx_def *p1)
5080 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5083 typedef hash_table<libcall_hasher> libcall_table_type;
5085 static void
5086 add_libcall (libcall_table_type *htab, rtx libcall)
5088 *htab->find_slot (libcall, INSERT) = libcall;
5091 static bool
5092 arm_libcall_uses_aapcs_base (const_rtx libcall)
5094 static bool init_done = false;
5095 static libcall_table_type *libcall_htab = NULL;
5097 if (!init_done)
5099 init_done = true;
5101 libcall_htab = new libcall_table_type (31);
5102 add_libcall (libcall_htab,
5103 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5104 add_libcall (libcall_htab,
5105 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5106 add_libcall (libcall_htab,
5107 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5108 add_libcall (libcall_htab,
5109 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5111 add_libcall (libcall_htab,
5112 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5113 add_libcall (libcall_htab,
5114 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5115 add_libcall (libcall_htab,
5116 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5117 add_libcall (libcall_htab,
5118 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5120 add_libcall (libcall_htab,
5121 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5122 add_libcall (libcall_htab,
5123 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5124 add_libcall (libcall_htab,
5125 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5126 add_libcall (libcall_htab,
5127 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5128 add_libcall (libcall_htab,
5129 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5130 add_libcall (libcall_htab,
5131 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5132 add_libcall (libcall_htab,
5133 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5134 add_libcall (libcall_htab,
5135 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5137 /* Values from double-precision helper functions are returned in core
5138 registers if the selected core only supports single-precision
5139 arithmetic, even if we are using the hard-float ABI. The same is
5140 true for single-precision helpers, but we will never be using the
5141 hard-float ABI on a CPU which doesn't support single-precision
5142 operations in hardware. */
5143 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5144 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5145 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5146 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5147 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5148 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5149 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5150 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5151 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5152 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5153 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5154 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5155 SFmode));
5156 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5157 DFmode));
5160 return libcall && libcall_htab->find (libcall) != NULL;
5163 static rtx
5164 arm_libcall_value_1 (machine_mode mode)
5166 if (TARGET_AAPCS_BASED)
5167 return aapcs_libcall_value (mode);
5168 else if (TARGET_IWMMXT_ABI
5169 && arm_vector_mode_supported_p (mode))
5170 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5171 else
5172 return gen_rtx_REG (mode, ARG_REGISTER (1));
5175 /* Define how to find the value returned by a library function
5176 assuming the value has mode MODE. */
5178 static rtx
5179 arm_libcall_value (machine_mode mode, const_rtx libcall)
5181 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5182 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5184 /* The following libcalls return their result in integer registers,
5185 even though they return a floating point value. */
5186 if (arm_libcall_uses_aapcs_base (libcall))
5187 return gen_rtx_REG (mode, ARG_REGISTER(1));
5191 return arm_libcall_value_1 (mode);
5194 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5196 static bool
5197 arm_function_value_regno_p (const unsigned int regno)
5199 if (regno == ARG_REGISTER (1)
5200 || (TARGET_32BIT
5201 && TARGET_AAPCS_BASED
5202 && TARGET_VFP
5203 && TARGET_HARD_FLOAT
5204 && regno == FIRST_VFP_REGNUM)
5205 || (TARGET_IWMMXT_ABI
5206 && regno == FIRST_IWMMXT_REGNUM))
5207 return true;
5209 return false;
5212 /* Determine the amount of memory needed to store the possible return
5213 registers of an untyped call. */
5215 arm_apply_result_size (void)
5217 int size = 16;
5219 if (TARGET_32BIT)
5221 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
5222 size += 32;
5223 if (TARGET_IWMMXT_ABI)
5224 size += 8;
5227 return size;
5230 /* Decide whether TYPE should be returned in memory (true)
5231 or in a register (false). FNTYPE is the type of the function making
5232 the call. */
5233 static bool
5234 arm_return_in_memory (const_tree type, const_tree fntype)
5236 HOST_WIDE_INT size;
5238 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5240 if (TARGET_AAPCS_BASED)
5242 /* Simple, non-aggregate types (ie not including vectors and
5243 complex) are always returned in a register (or registers).
5244 We don't care about which register here, so we can short-cut
5245 some of the detail. */
5246 if (!AGGREGATE_TYPE_P (type)
5247 && TREE_CODE (type) != VECTOR_TYPE
5248 && TREE_CODE (type) != COMPLEX_TYPE)
5249 return false;
5251 /* Any return value that is no larger than one word can be
5252 returned in r0. */
5253 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5254 return false;
5256 /* Check any available co-processors to see if they accept the
5257 type as a register candidate (VFP, for example, can return
5258 some aggregates in consecutive registers). These aren't
5259 available if the call is variadic. */
5260 if (aapcs_select_return_coproc (type, fntype) >= 0)
5261 return false;
5263 /* Vector values should be returned using ARM registers, not
5264 memory (unless they're over 16 bytes, which will break since
5265 we only have four call-clobbered registers to play with). */
5266 if (TREE_CODE (type) == VECTOR_TYPE)
5267 return (size < 0 || size > (4 * UNITS_PER_WORD));
5269 /* The rest go in memory. */
5270 return true;
5273 if (TREE_CODE (type) == VECTOR_TYPE)
5274 return (size < 0 || size > (4 * UNITS_PER_WORD));
5276 if (!AGGREGATE_TYPE_P (type) &&
5277 (TREE_CODE (type) != VECTOR_TYPE))
5278 /* All simple types are returned in registers. */
5279 return false;
5281 if (arm_abi != ARM_ABI_APCS)
5283 /* ATPCS and later return aggregate types in memory only if they are
5284 larger than a word (or are variable size). */
5285 return (size < 0 || size > UNITS_PER_WORD);
5288 /* For the arm-wince targets we choose to be compatible with Microsoft's
5289 ARM and Thumb compilers, which always return aggregates in memory. */
5290 #ifndef ARM_WINCE
5291 /* All structures/unions bigger than one word are returned in memory.
5292 Also catch the case where int_size_in_bytes returns -1. In this case
5293 the aggregate is either huge or of variable size, and in either case
5294 we will want to return it via memory and not in a register. */
5295 if (size < 0 || size > UNITS_PER_WORD)
5296 return true;
5298 if (TREE_CODE (type) == RECORD_TYPE)
5300 tree field;
5302 /* For a struct the APCS says that we only return in a register
5303 if the type is 'integer like' and every addressable element
5304 has an offset of zero. For practical purposes this means
5305 that the structure can have at most one non bit-field element
5306 and that this element must be the first one in the structure. */
5308 /* Find the first field, ignoring non FIELD_DECL things which will
5309 have been created by C++. */
5310 for (field = TYPE_FIELDS (type);
5311 field && TREE_CODE (field) != FIELD_DECL;
5312 field = DECL_CHAIN (field))
5313 continue;
5315 if (field == NULL)
5316 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5318 /* Check that the first field is valid for returning in a register. */
5320 /* ... Floats are not allowed */
5321 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5322 return true;
5324 /* ... Aggregates that are not themselves valid for returning in
5325 a register are not allowed. */
5326 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5327 return true;
5329 /* Now check the remaining fields, if any. Only bitfields are allowed,
5330 since they are not addressable. */
5331 for (field = DECL_CHAIN (field);
5332 field;
5333 field = DECL_CHAIN (field))
5335 if (TREE_CODE (field) != FIELD_DECL)
5336 continue;
5338 if (!DECL_BIT_FIELD_TYPE (field))
5339 return true;
5342 return false;
5345 if (TREE_CODE (type) == UNION_TYPE)
5347 tree field;
5349 /* Unions can be returned in registers if every element is
5350 integral, or can be returned in an integer register. */
5351 for (field = TYPE_FIELDS (type);
5352 field;
5353 field = DECL_CHAIN (field))
5355 if (TREE_CODE (field) != FIELD_DECL)
5356 continue;
5358 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5359 return true;
5361 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5362 return true;
5365 return false;
5367 #endif /* not ARM_WINCE */
5369 /* Return all other types in memory. */
5370 return true;
5373 const struct pcs_attribute_arg
5375 const char *arg;
5376 enum arm_pcs value;
5377 } pcs_attribute_args[] =
5379 {"aapcs", ARM_PCS_AAPCS},
5380 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5381 #if 0
5382 /* We could recognize these, but changes would be needed elsewhere
5383 * to implement them. */
5384 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5385 {"atpcs", ARM_PCS_ATPCS},
5386 {"apcs", ARM_PCS_APCS},
5387 #endif
5388 {NULL, ARM_PCS_UNKNOWN}
5391 static enum arm_pcs
5392 arm_pcs_from_attribute (tree attr)
5394 const struct pcs_attribute_arg *ptr;
5395 const char *arg;
5397 /* Get the value of the argument. */
5398 if (TREE_VALUE (attr) == NULL_TREE
5399 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5400 return ARM_PCS_UNKNOWN;
5402 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5404 /* Check it against the list of known arguments. */
5405 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5406 if (streq (arg, ptr->arg))
5407 return ptr->value;
5409 /* An unrecognized interrupt type. */
5410 return ARM_PCS_UNKNOWN;
5413 /* Get the PCS variant to use for this call. TYPE is the function's type
5414 specification, DECL is the specific declartion. DECL may be null if
5415 the call could be indirect or if this is a library call. */
5416 static enum arm_pcs
5417 arm_get_pcs_model (const_tree type, const_tree decl)
5419 bool user_convention = false;
5420 enum arm_pcs user_pcs = arm_pcs_default;
5421 tree attr;
5423 gcc_assert (type);
5425 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5426 if (attr)
5428 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5429 user_convention = true;
5432 if (TARGET_AAPCS_BASED)
5434 /* Detect varargs functions. These always use the base rules
5435 (no argument is ever a candidate for a co-processor
5436 register). */
5437 bool base_rules = stdarg_p (type);
5439 if (user_convention)
5441 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5442 sorry ("non-AAPCS derived PCS variant");
5443 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5444 error ("variadic functions must use the base AAPCS variant");
5447 if (base_rules)
5448 return ARM_PCS_AAPCS;
5449 else if (user_convention)
5450 return user_pcs;
5451 else if (decl && flag_unit_at_a_time)
5453 /* Local functions never leak outside this compilation unit,
5454 so we are free to use whatever conventions are
5455 appropriate. */
5456 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5457 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5458 if (i && i->local)
5459 return ARM_PCS_AAPCS_LOCAL;
5462 else if (user_convention && user_pcs != arm_pcs_default)
5463 sorry ("PCS variant");
5465 /* For everything else we use the target's default. */
5466 return arm_pcs_default;
5470 static void
5471 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5472 const_tree fntype ATTRIBUTE_UNUSED,
5473 rtx libcall ATTRIBUTE_UNUSED,
5474 const_tree fndecl ATTRIBUTE_UNUSED)
5476 /* Record the unallocated VFP registers. */
5477 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5478 pcum->aapcs_vfp_reg_alloc = 0;
5481 /* Walk down the type tree of TYPE counting consecutive base elements.
5482 If *MODEP is VOIDmode, then set it to the first valid floating point
5483 type. If a non-floating point type is found, or if a floating point
5484 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5485 otherwise return the count in the sub-tree. */
5486 static int
5487 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5489 machine_mode mode;
5490 HOST_WIDE_INT size;
5492 switch (TREE_CODE (type))
5494 case REAL_TYPE:
5495 mode = TYPE_MODE (type);
5496 if (mode != DFmode && mode != SFmode)
5497 return -1;
5499 if (*modep == VOIDmode)
5500 *modep = mode;
5502 if (*modep == mode)
5503 return 1;
5505 break;
5507 case COMPLEX_TYPE:
5508 mode = TYPE_MODE (TREE_TYPE (type));
5509 if (mode != DFmode && mode != SFmode)
5510 return -1;
5512 if (*modep == VOIDmode)
5513 *modep = mode;
5515 if (*modep == mode)
5516 return 2;
5518 break;
5520 case VECTOR_TYPE:
5521 /* Use V2SImode and V4SImode as representatives of all 64-bit
5522 and 128-bit vector types, whether or not those modes are
5523 supported with the present options. */
5524 size = int_size_in_bytes (type);
5525 switch (size)
5527 case 8:
5528 mode = V2SImode;
5529 break;
5530 case 16:
5531 mode = V4SImode;
5532 break;
5533 default:
5534 return -1;
5537 if (*modep == VOIDmode)
5538 *modep = mode;
5540 /* Vector modes are considered to be opaque: two vectors are
5541 equivalent for the purposes of being homogeneous aggregates
5542 if they are the same size. */
5543 if (*modep == mode)
5544 return 1;
5546 break;
5548 case ARRAY_TYPE:
5550 int count;
5551 tree index = TYPE_DOMAIN (type);
5553 /* Can't handle incomplete types nor sizes that are not
5554 fixed. */
5555 if (!COMPLETE_TYPE_P (type)
5556 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5557 return -1;
5559 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5560 if (count == -1
5561 || !index
5562 || !TYPE_MAX_VALUE (index)
5563 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5564 || !TYPE_MIN_VALUE (index)
5565 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5566 || count < 0)
5567 return -1;
5569 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5570 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5572 /* There must be no padding. */
5573 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5574 return -1;
5576 return count;
5579 case RECORD_TYPE:
5581 int count = 0;
5582 int sub_count;
5583 tree field;
5585 /* Can't handle incomplete types nor sizes that are not
5586 fixed. */
5587 if (!COMPLETE_TYPE_P (type)
5588 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5589 return -1;
5591 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5593 if (TREE_CODE (field) != FIELD_DECL)
5594 continue;
5596 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5597 if (sub_count < 0)
5598 return -1;
5599 count += sub_count;
5602 /* There must be no padding. */
5603 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5604 return -1;
5606 return count;
5609 case UNION_TYPE:
5610 case QUAL_UNION_TYPE:
5612 /* These aren't very interesting except in a degenerate case. */
5613 int count = 0;
5614 int sub_count;
5615 tree field;
5617 /* Can't handle incomplete types nor sizes that are not
5618 fixed. */
5619 if (!COMPLETE_TYPE_P (type)
5620 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5621 return -1;
5623 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5625 if (TREE_CODE (field) != FIELD_DECL)
5626 continue;
5628 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5629 if (sub_count < 0)
5630 return -1;
5631 count = count > sub_count ? count : sub_count;
5634 /* There must be no padding. */
5635 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5636 return -1;
5638 return count;
5641 default:
5642 break;
5645 return -1;
5648 /* Return true if PCS_VARIANT should use VFP registers. */
5649 static bool
5650 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5652 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5654 static bool seen_thumb1_vfp = false;
5656 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5658 sorry ("Thumb-1 hard-float VFP ABI");
5659 /* sorry() is not immediately fatal, so only display this once. */
5660 seen_thumb1_vfp = true;
5663 return true;
5666 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5667 return false;
5669 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5670 (TARGET_VFP_DOUBLE || !is_double));
5673 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5674 suitable for passing or returning in VFP registers for the PCS
5675 variant selected. If it is, then *BASE_MODE is updated to contain
5676 a machine mode describing each element of the argument's type and
5677 *COUNT to hold the number of such elements. */
5678 static bool
5679 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5680 machine_mode mode, const_tree type,
5681 machine_mode *base_mode, int *count)
5683 machine_mode new_mode = VOIDmode;
5685 /* If we have the type information, prefer that to working things
5686 out from the mode. */
5687 if (type)
5689 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5691 if (ag_count > 0 && ag_count <= 4)
5692 *count = ag_count;
5693 else
5694 return false;
5696 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5697 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5698 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5700 *count = 1;
5701 new_mode = mode;
5703 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5705 *count = 2;
5706 new_mode = (mode == DCmode ? DFmode : SFmode);
5708 else
5709 return false;
5712 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5713 return false;
5715 *base_mode = new_mode;
5716 return true;
5719 static bool
5720 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5721 machine_mode mode, const_tree type)
5723 int count ATTRIBUTE_UNUSED;
5724 machine_mode ag_mode ATTRIBUTE_UNUSED;
5726 if (!use_vfp_abi (pcs_variant, false))
5727 return false;
5728 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5729 &ag_mode, &count);
5732 static bool
5733 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5734 const_tree type)
5736 if (!use_vfp_abi (pcum->pcs_variant, false))
5737 return false;
5739 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5740 &pcum->aapcs_vfp_rmode,
5741 &pcum->aapcs_vfp_rcount);
5744 static bool
5745 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5746 const_tree type ATTRIBUTE_UNUSED)
5748 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5749 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5750 int regno;
5752 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5753 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5755 pcum->aapcs_vfp_reg_alloc = mask << regno;
5756 if (mode == BLKmode
5757 || (mode == TImode && ! TARGET_NEON)
5758 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5760 int i;
5761 int rcount = pcum->aapcs_vfp_rcount;
5762 int rshift = shift;
5763 machine_mode rmode = pcum->aapcs_vfp_rmode;
5764 rtx par;
5765 if (!TARGET_NEON)
5767 /* Avoid using unsupported vector modes. */
5768 if (rmode == V2SImode)
5769 rmode = DImode;
5770 else if (rmode == V4SImode)
5772 rmode = DImode;
5773 rcount *= 2;
5774 rshift /= 2;
5777 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5778 for (i = 0; i < rcount; i++)
5780 rtx tmp = gen_rtx_REG (rmode,
5781 FIRST_VFP_REGNUM + regno + i * rshift);
5782 tmp = gen_rtx_EXPR_LIST
5783 (VOIDmode, tmp,
5784 GEN_INT (i * GET_MODE_SIZE (rmode)));
5785 XVECEXP (par, 0, i) = tmp;
5788 pcum->aapcs_reg = par;
5790 else
5791 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5792 return true;
5794 return false;
5797 static rtx
5798 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5799 machine_mode mode,
5800 const_tree type ATTRIBUTE_UNUSED)
5802 if (!use_vfp_abi (pcs_variant, false))
5803 return NULL;
5805 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5807 int count;
5808 machine_mode ag_mode;
5809 int i;
5810 rtx par;
5811 int shift;
5813 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5814 &ag_mode, &count);
5816 if (!TARGET_NEON)
5818 if (ag_mode == V2SImode)
5819 ag_mode = DImode;
5820 else if (ag_mode == V4SImode)
5822 ag_mode = DImode;
5823 count *= 2;
5826 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5827 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5828 for (i = 0; i < count; i++)
5830 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5831 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5832 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5833 XVECEXP (par, 0, i) = tmp;
5836 return par;
5839 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5842 static void
5843 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5844 machine_mode mode ATTRIBUTE_UNUSED,
5845 const_tree type ATTRIBUTE_UNUSED)
5847 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5848 pcum->aapcs_vfp_reg_alloc = 0;
5849 return;
5852 #define AAPCS_CP(X) \
5854 aapcs_ ## X ## _cum_init, \
5855 aapcs_ ## X ## _is_call_candidate, \
5856 aapcs_ ## X ## _allocate, \
5857 aapcs_ ## X ## _is_return_candidate, \
5858 aapcs_ ## X ## _allocate_return_reg, \
5859 aapcs_ ## X ## _advance \
5862 /* Table of co-processors that can be used to pass arguments in
5863 registers. Idealy no arugment should be a candidate for more than
5864 one co-processor table entry, but the table is processed in order
5865 and stops after the first match. If that entry then fails to put
5866 the argument into a co-processor register, the argument will go on
5867 the stack. */
5868 static struct
5870 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5871 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5873 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5874 BLKmode) is a candidate for this co-processor's registers; this
5875 function should ignore any position-dependent state in
5876 CUMULATIVE_ARGS and only use call-type dependent information. */
5877 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5879 /* Return true if the argument does get a co-processor register; it
5880 should set aapcs_reg to an RTX of the register allocated as is
5881 required for a return from FUNCTION_ARG. */
5882 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5884 /* Return true if a result of mode MODE (or type TYPE if MODE is
5885 BLKmode) is can be returned in this co-processor's registers. */
5886 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5888 /* Allocate and return an RTX element to hold the return type of a
5889 call, this routine must not fail and will only be called if
5890 is_return_candidate returned true with the same parameters. */
5891 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5893 /* Finish processing this argument and prepare to start processing
5894 the next one. */
5895 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5896 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5898 AAPCS_CP(vfp)
5901 #undef AAPCS_CP
5903 static int
5904 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5905 const_tree type)
5907 int i;
5909 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5910 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5911 return i;
5913 return -1;
5916 static int
5917 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5919 /* We aren't passed a decl, so we can't check that a call is local.
5920 However, it isn't clear that that would be a win anyway, since it
5921 might limit some tail-calling opportunities. */
5922 enum arm_pcs pcs_variant;
5924 if (fntype)
5926 const_tree fndecl = NULL_TREE;
5928 if (TREE_CODE (fntype) == FUNCTION_DECL)
5930 fndecl = fntype;
5931 fntype = TREE_TYPE (fntype);
5934 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5936 else
5937 pcs_variant = arm_pcs_default;
5939 if (pcs_variant != ARM_PCS_AAPCS)
5941 int i;
5943 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5944 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5945 TYPE_MODE (type),
5946 type))
5947 return i;
5949 return -1;
5952 static rtx
5953 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5954 const_tree fntype)
5956 /* We aren't passed a decl, so we can't check that a call is local.
5957 However, it isn't clear that that would be a win anyway, since it
5958 might limit some tail-calling opportunities. */
5959 enum arm_pcs pcs_variant;
5960 int unsignedp ATTRIBUTE_UNUSED;
5962 if (fntype)
5964 const_tree fndecl = NULL_TREE;
5966 if (TREE_CODE (fntype) == FUNCTION_DECL)
5968 fndecl = fntype;
5969 fntype = TREE_TYPE (fntype);
5972 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5974 else
5975 pcs_variant = arm_pcs_default;
5977 /* Promote integer types. */
5978 if (type && INTEGRAL_TYPE_P (type))
5979 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5981 if (pcs_variant != ARM_PCS_AAPCS)
5983 int i;
5985 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5986 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5987 type))
5988 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5989 mode, type);
5992 /* Promotes small structs returned in a register to full-word size
5993 for big-endian AAPCS. */
5994 if (type && arm_return_in_msb (type))
5996 HOST_WIDE_INT size = int_size_in_bytes (type);
5997 if (size % UNITS_PER_WORD != 0)
5999 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6000 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6004 return gen_rtx_REG (mode, R0_REGNUM);
6007 static rtx
6008 aapcs_libcall_value (machine_mode mode)
6010 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6011 && GET_MODE_SIZE (mode) <= 4)
6012 mode = SImode;
6014 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6017 /* Lay out a function argument using the AAPCS rules. The rule
6018 numbers referred to here are those in the AAPCS. */
6019 static void
6020 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6021 const_tree type, bool named)
6023 int nregs, nregs2;
6024 int ncrn;
6026 /* We only need to do this once per argument. */
6027 if (pcum->aapcs_arg_processed)
6028 return;
6030 pcum->aapcs_arg_processed = true;
6032 /* Special case: if named is false then we are handling an incoming
6033 anonymous argument which is on the stack. */
6034 if (!named)
6035 return;
6037 /* Is this a potential co-processor register candidate? */
6038 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6040 int slot = aapcs_select_call_coproc (pcum, mode, type);
6041 pcum->aapcs_cprc_slot = slot;
6043 /* We don't have to apply any of the rules from part B of the
6044 preparation phase, these are handled elsewhere in the
6045 compiler. */
6047 if (slot >= 0)
6049 /* A Co-processor register candidate goes either in its own
6050 class of registers or on the stack. */
6051 if (!pcum->aapcs_cprc_failed[slot])
6053 /* C1.cp - Try to allocate the argument to co-processor
6054 registers. */
6055 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6056 return;
6058 /* C2.cp - Put the argument on the stack and note that we
6059 can't assign any more candidates in this slot. We also
6060 need to note that we have allocated stack space, so that
6061 we won't later try to split a non-cprc candidate between
6062 core registers and the stack. */
6063 pcum->aapcs_cprc_failed[slot] = true;
6064 pcum->can_split = false;
6067 /* We didn't get a register, so this argument goes on the
6068 stack. */
6069 gcc_assert (pcum->can_split == false);
6070 return;
6074 /* C3 - For double-word aligned arguments, round the NCRN up to the
6075 next even number. */
6076 ncrn = pcum->aapcs_ncrn;
6077 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
6078 ncrn++;
6080 nregs = ARM_NUM_REGS2(mode, type);
6082 /* Sigh, this test should really assert that nregs > 0, but a GCC
6083 extension allows empty structs and then gives them empty size; it
6084 then allows such a structure to be passed by value. For some of
6085 the code below we have to pretend that such an argument has
6086 non-zero size so that we 'locate' it correctly either in
6087 registers or on the stack. */
6088 gcc_assert (nregs >= 0);
6090 nregs2 = nregs ? nregs : 1;
6092 /* C4 - Argument fits entirely in core registers. */
6093 if (ncrn + nregs2 <= NUM_ARG_REGS)
6095 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6096 pcum->aapcs_next_ncrn = ncrn + nregs;
6097 return;
6100 /* C5 - Some core registers left and there are no arguments already
6101 on the stack: split this argument between the remaining core
6102 registers and the stack. */
6103 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6105 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6106 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6107 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6108 return;
6111 /* C6 - NCRN is set to 4. */
6112 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6114 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6115 return;
6118 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6119 for a call to a function whose data type is FNTYPE.
6120 For a library call, FNTYPE is NULL. */
6121 void
6122 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6123 rtx libname,
6124 tree fndecl ATTRIBUTE_UNUSED)
6126 /* Long call handling. */
6127 if (fntype)
6128 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6129 else
6130 pcum->pcs_variant = arm_pcs_default;
6132 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6134 if (arm_libcall_uses_aapcs_base (libname))
6135 pcum->pcs_variant = ARM_PCS_AAPCS;
6137 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6138 pcum->aapcs_reg = NULL_RTX;
6139 pcum->aapcs_partial = 0;
6140 pcum->aapcs_arg_processed = false;
6141 pcum->aapcs_cprc_slot = -1;
6142 pcum->can_split = true;
6144 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6146 int i;
6148 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6150 pcum->aapcs_cprc_failed[i] = false;
6151 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6154 return;
6157 /* Legacy ABIs */
6159 /* On the ARM, the offset starts at 0. */
6160 pcum->nregs = 0;
6161 pcum->iwmmxt_nregs = 0;
6162 pcum->can_split = true;
6164 /* Varargs vectors are treated the same as long long.
6165 named_count avoids having to change the way arm handles 'named' */
6166 pcum->named_count = 0;
6167 pcum->nargs = 0;
6169 if (TARGET_REALLY_IWMMXT && fntype)
6171 tree fn_arg;
6173 for (fn_arg = TYPE_ARG_TYPES (fntype);
6174 fn_arg;
6175 fn_arg = TREE_CHAIN (fn_arg))
6176 pcum->named_count += 1;
6178 if (! pcum->named_count)
6179 pcum->named_count = INT_MAX;
6183 /* Return true if mode/type need doubleword alignment. */
6184 static bool
6185 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6187 if (!type)
6188 return PARM_BOUNDARY < GET_MODE_ALIGNMENT (mode);
6190 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6191 if (!AGGREGATE_TYPE_P (type))
6192 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6194 /* Array types: Use member alignment of element type. */
6195 if (TREE_CODE (type) == ARRAY_TYPE)
6196 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6198 /* Record/aggregate types: Use greatest member alignment of any member. */
6199 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6200 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6201 return true;
6203 return false;
6207 /* Determine where to put an argument to a function.
6208 Value is zero to push the argument on the stack,
6209 or a hard register in which to store the argument.
6211 MODE is the argument's machine mode.
6212 TYPE is the data type of the argument (as a tree).
6213 This is null for libcalls where that information may
6214 not be available.
6215 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6216 the preceding args and about the function being called.
6217 NAMED is nonzero if this argument is a named parameter
6218 (otherwise it is an extra parameter matching an ellipsis).
6220 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6221 other arguments are passed on the stack. If (NAMED == 0) (which happens
6222 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6223 defined), say it is passed in the stack (function_prologue will
6224 indeed make it pass in the stack if necessary). */
6226 static rtx
6227 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6228 const_tree type, bool named)
6230 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6231 int nregs;
6233 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6234 a call insn (op3 of a call_value insn). */
6235 if (mode == VOIDmode)
6236 return const0_rtx;
6238 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6240 aapcs_layout_arg (pcum, mode, type, named);
6241 return pcum->aapcs_reg;
6244 /* Varargs vectors are treated the same as long long.
6245 named_count avoids having to change the way arm handles 'named' */
6246 if (TARGET_IWMMXT_ABI
6247 && arm_vector_mode_supported_p (mode)
6248 && pcum->named_count > pcum->nargs + 1)
6250 if (pcum->iwmmxt_nregs <= 9)
6251 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6252 else
6254 pcum->can_split = false;
6255 return NULL_RTX;
6259 /* Put doubleword aligned quantities in even register pairs. */
6260 if (pcum->nregs & 1
6261 && ARM_DOUBLEWORD_ALIGN
6262 && arm_needs_doubleword_align (mode, type))
6263 pcum->nregs++;
6265 /* Only allow splitting an arg between regs and memory if all preceding
6266 args were allocated to regs. For args passed by reference we only count
6267 the reference pointer. */
6268 if (pcum->can_split)
6269 nregs = 1;
6270 else
6271 nregs = ARM_NUM_REGS2 (mode, type);
6273 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6274 return NULL_RTX;
6276 return gen_rtx_REG (mode, pcum->nregs);
6279 static unsigned int
6280 arm_function_arg_boundary (machine_mode mode, const_tree type)
6282 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6283 ? DOUBLEWORD_ALIGNMENT
6284 : PARM_BOUNDARY);
6287 static int
6288 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6289 tree type, bool named)
6291 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6292 int nregs = pcum->nregs;
6294 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6296 aapcs_layout_arg (pcum, mode, type, named);
6297 return pcum->aapcs_partial;
6300 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6301 return 0;
6303 if (NUM_ARG_REGS > nregs
6304 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6305 && pcum->can_split)
6306 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6308 return 0;
6311 /* Update the data in PCUM to advance over an argument
6312 of mode MODE and data type TYPE.
6313 (TYPE is null for libcalls where that information may not be available.) */
6315 static void
6316 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6317 const_tree type, bool named)
6319 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6321 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6323 aapcs_layout_arg (pcum, mode, type, named);
6325 if (pcum->aapcs_cprc_slot >= 0)
6327 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6328 type);
6329 pcum->aapcs_cprc_slot = -1;
6332 /* Generic stuff. */
6333 pcum->aapcs_arg_processed = false;
6334 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6335 pcum->aapcs_reg = NULL_RTX;
6336 pcum->aapcs_partial = 0;
6338 else
6340 pcum->nargs += 1;
6341 if (arm_vector_mode_supported_p (mode)
6342 && pcum->named_count > pcum->nargs
6343 && TARGET_IWMMXT_ABI)
6344 pcum->iwmmxt_nregs += 1;
6345 else
6346 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6350 /* Variable sized types are passed by reference. This is a GCC
6351 extension to the ARM ABI. */
6353 static bool
6354 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6355 machine_mode mode ATTRIBUTE_UNUSED,
6356 const_tree type, bool named ATTRIBUTE_UNUSED)
6358 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6361 /* Encode the current state of the #pragma [no_]long_calls. */
6362 typedef enum
6364 OFF, /* No #pragma [no_]long_calls is in effect. */
6365 LONG, /* #pragma long_calls is in effect. */
6366 SHORT /* #pragma no_long_calls is in effect. */
6367 } arm_pragma_enum;
6369 static arm_pragma_enum arm_pragma_long_calls = OFF;
6371 void
6372 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6374 arm_pragma_long_calls = LONG;
6377 void
6378 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6380 arm_pragma_long_calls = SHORT;
6383 void
6384 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6386 arm_pragma_long_calls = OFF;
6389 /* Handle an attribute requiring a FUNCTION_DECL;
6390 arguments as in struct attribute_spec.handler. */
6391 static tree
6392 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6393 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6395 if (TREE_CODE (*node) != FUNCTION_DECL)
6397 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6398 name);
6399 *no_add_attrs = true;
6402 return NULL_TREE;
6405 /* Handle an "interrupt" or "isr" attribute;
6406 arguments as in struct attribute_spec.handler. */
6407 static tree
6408 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6409 bool *no_add_attrs)
6411 if (DECL_P (*node))
6413 if (TREE_CODE (*node) != FUNCTION_DECL)
6415 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6416 name);
6417 *no_add_attrs = true;
6419 /* FIXME: the argument if any is checked for type attributes;
6420 should it be checked for decl ones? */
6422 else
6424 if (TREE_CODE (*node) == FUNCTION_TYPE
6425 || TREE_CODE (*node) == METHOD_TYPE)
6427 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6429 warning (OPT_Wattributes, "%qE attribute ignored",
6430 name);
6431 *no_add_attrs = true;
6434 else if (TREE_CODE (*node) == POINTER_TYPE
6435 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6436 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6437 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6439 *node = build_variant_type_copy (*node);
6440 TREE_TYPE (*node) = build_type_attribute_variant
6441 (TREE_TYPE (*node),
6442 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6443 *no_add_attrs = true;
6445 else
6447 /* Possibly pass this attribute on from the type to a decl. */
6448 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6449 | (int) ATTR_FLAG_FUNCTION_NEXT
6450 | (int) ATTR_FLAG_ARRAY_NEXT))
6452 *no_add_attrs = true;
6453 return tree_cons (name, args, NULL_TREE);
6455 else
6457 warning (OPT_Wattributes, "%qE attribute ignored",
6458 name);
6463 return NULL_TREE;
6466 /* Handle a "pcs" attribute; arguments as in struct
6467 attribute_spec.handler. */
6468 static tree
6469 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6470 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6472 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6474 warning (OPT_Wattributes, "%qE attribute ignored", name);
6475 *no_add_attrs = true;
6477 return NULL_TREE;
6480 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6481 /* Handle the "notshared" attribute. This attribute is another way of
6482 requesting hidden visibility. ARM's compiler supports
6483 "__declspec(notshared)"; we support the same thing via an
6484 attribute. */
6486 static tree
6487 arm_handle_notshared_attribute (tree *node,
6488 tree name ATTRIBUTE_UNUSED,
6489 tree args ATTRIBUTE_UNUSED,
6490 int flags ATTRIBUTE_UNUSED,
6491 bool *no_add_attrs)
6493 tree decl = TYPE_NAME (*node);
6495 if (decl)
6497 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6498 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6499 *no_add_attrs = false;
6501 return NULL_TREE;
6503 #endif
6505 /* Return 0 if the attributes for two types are incompatible, 1 if they
6506 are compatible, and 2 if they are nearly compatible (which causes a
6507 warning to be generated). */
6508 static int
6509 arm_comp_type_attributes (const_tree type1, const_tree type2)
6511 int l1, l2, s1, s2;
6513 /* Check for mismatch of non-default calling convention. */
6514 if (TREE_CODE (type1) != FUNCTION_TYPE)
6515 return 1;
6517 /* Check for mismatched call attributes. */
6518 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6519 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6520 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6521 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6523 /* Only bother to check if an attribute is defined. */
6524 if (l1 | l2 | s1 | s2)
6526 /* If one type has an attribute, the other must have the same attribute. */
6527 if ((l1 != l2) || (s1 != s2))
6528 return 0;
6530 /* Disallow mixed attributes. */
6531 if ((l1 & s2) || (l2 & s1))
6532 return 0;
6535 /* Check for mismatched ISR attribute. */
6536 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6537 if (! l1)
6538 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6539 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6540 if (! l2)
6541 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6542 if (l1 != l2)
6543 return 0;
6545 return 1;
6548 /* Assigns default attributes to newly defined type. This is used to
6549 set short_call/long_call attributes for function types of
6550 functions defined inside corresponding #pragma scopes. */
6551 static void
6552 arm_set_default_type_attributes (tree type)
6554 /* Add __attribute__ ((long_call)) to all functions, when
6555 inside #pragma long_calls or __attribute__ ((short_call)),
6556 when inside #pragma no_long_calls. */
6557 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6559 tree type_attr_list, attr_name;
6560 type_attr_list = TYPE_ATTRIBUTES (type);
6562 if (arm_pragma_long_calls == LONG)
6563 attr_name = get_identifier ("long_call");
6564 else if (arm_pragma_long_calls == SHORT)
6565 attr_name = get_identifier ("short_call");
6566 else
6567 return;
6569 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6570 TYPE_ATTRIBUTES (type) = type_attr_list;
6574 /* Return true if DECL is known to be linked into section SECTION. */
6576 static bool
6577 arm_function_in_section_p (tree decl, section *section)
6579 /* We can only be certain about the prevailing symbol definition. */
6580 if (!decl_binds_to_current_def_p (decl))
6581 return false;
6583 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6584 if (!DECL_SECTION_NAME (decl))
6586 /* Make sure that we will not create a unique section for DECL. */
6587 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6588 return false;
6591 return function_section (decl) == section;
6594 /* Return nonzero if a 32-bit "long_call" should be generated for
6595 a call from the current function to DECL. We generate a long_call
6596 if the function:
6598 a. has an __attribute__((long call))
6599 or b. is within the scope of a #pragma long_calls
6600 or c. the -mlong-calls command line switch has been specified
6602 However we do not generate a long call if the function:
6604 d. has an __attribute__ ((short_call))
6605 or e. is inside the scope of a #pragma no_long_calls
6606 or f. is defined in the same section as the current function. */
6608 bool
6609 arm_is_long_call_p (tree decl)
6611 tree attrs;
6613 if (!decl)
6614 return TARGET_LONG_CALLS;
6616 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6617 if (lookup_attribute ("short_call", attrs))
6618 return false;
6620 /* For "f", be conservative, and only cater for cases in which the
6621 whole of the current function is placed in the same section. */
6622 if (!flag_reorder_blocks_and_partition
6623 && TREE_CODE (decl) == FUNCTION_DECL
6624 && arm_function_in_section_p (decl, current_function_section ()))
6625 return false;
6627 if (lookup_attribute ("long_call", attrs))
6628 return true;
6630 return TARGET_LONG_CALLS;
6633 /* Return nonzero if it is ok to make a tail-call to DECL. */
6634 static bool
6635 arm_function_ok_for_sibcall (tree decl, tree exp)
6637 unsigned long func_type;
6639 if (cfun->machine->sibcall_blocked)
6640 return false;
6642 /* Never tailcall something if we are generating code for Thumb-1. */
6643 if (TARGET_THUMB1)
6644 return false;
6646 /* The PIC register is live on entry to VxWorks PLT entries, so we
6647 must make the call before restoring the PIC register. */
6648 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6649 return false;
6651 /* If we are interworking and the function is not declared static
6652 then we can't tail-call it unless we know that it exists in this
6653 compilation unit (since it might be a Thumb routine). */
6654 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6655 && !TREE_ASM_WRITTEN (decl))
6656 return false;
6658 func_type = arm_current_func_type ();
6659 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6660 if (IS_INTERRUPT (func_type))
6661 return false;
6663 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6665 /* Check that the return value locations are the same. For
6666 example that we aren't returning a value from the sibling in
6667 a VFP register but then need to transfer it to a core
6668 register. */
6669 rtx a, b;
6671 a = arm_function_value (TREE_TYPE (exp), decl, false);
6672 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6673 cfun->decl, false);
6674 if (!rtx_equal_p (a, b))
6675 return false;
6678 /* Never tailcall if function may be called with a misaligned SP. */
6679 if (IS_STACKALIGN (func_type))
6680 return false;
6682 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6683 references should become a NOP. Don't convert such calls into
6684 sibling calls. */
6685 if (TARGET_AAPCS_BASED
6686 && arm_abi == ARM_ABI_AAPCS
6687 && decl
6688 && DECL_WEAK (decl))
6689 return false;
6691 /* Everything else is ok. */
6692 return true;
6696 /* Addressing mode support functions. */
6698 /* Return nonzero if X is a legitimate immediate operand when compiling
6699 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6701 legitimate_pic_operand_p (rtx x)
6703 if (GET_CODE (x) == SYMBOL_REF
6704 || (GET_CODE (x) == CONST
6705 && GET_CODE (XEXP (x, 0)) == PLUS
6706 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6707 return 0;
6709 return 1;
6712 /* Record that the current function needs a PIC register. Initialize
6713 cfun->machine->pic_reg if we have not already done so. */
6715 static void
6716 require_pic_register (void)
6718 /* A lot of the logic here is made obscure by the fact that this
6719 routine gets called as part of the rtx cost estimation process.
6720 We don't want those calls to affect any assumptions about the real
6721 function; and further, we can't call entry_of_function() until we
6722 start the real expansion process. */
6723 if (!crtl->uses_pic_offset_table)
6725 gcc_assert (can_create_pseudo_p ());
6726 if (arm_pic_register != INVALID_REGNUM
6727 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6729 if (!cfun->machine->pic_reg)
6730 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6732 /* Play games to avoid marking the function as needing pic
6733 if we are being called as part of the cost-estimation
6734 process. */
6735 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6736 crtl->uses_pic_offset_table = 1;
6738 else
6740 rtx_insn *seq, *insn;
6742 if (!cfun->machine->pic_reg)
6743 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6745 /* Play games to avoid marking the function as needing pic
6746 if we are being called as part of the cost-estimation
6747 process. */
6748 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6750 crtl->uses_pic_offset_table = 1;
6751 start_sequence ();
6753 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6754 && arm_pic_register > LAST_LO_REGNUM)
6755 emit_move_insn (cfun->machine->pic_reg,
6756 gen_rtx_REG (Pmode, arm_pic_register));
6757 else
6758 arm_load_pic_register (0UL);
6760 seq = get_insns ();
6761 end_sequence ();
6763 for (insn = seq; insn; insn = NEXT_INSN (insn))
6764 if (INSN_P (insn))
6765 INSN_LOCATION (insn) = prologue_location;
6767 /* We can be called during expansion of PHI nodes, where
6768 we can't yet emit instructions directly in the final
6769 insn stream. Queue the insns on the entry edge, they will
6770 be committed after everything else is expanded. */
6771 insert_insn_on_edge (seq,
6772 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6779 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6781 if (GET_CODE (orig) == SYMBOL_REF
6782 || GET_CODE (orig) == LABEL_REF)
6784 rtx insn;
6786 if (reg == 0)
6788 gcc_assert (can_create_pseudo_p ());
6789 reg = gen_reg_rtx (Pmode);
6792 /* VxWorks does not impose a fixed gap between segments; the run-time
6793 gap can be different from the object-file gap. We therefore can't
6794 use GOTOFF unless we are absolutely sure that the symbol is in the
6795 same segment as the GOT. Unfortunately, the flexibility of linker
6796 scripts means that we can't be sure of that in general, so assume
6797 that GOTOFF is never valid on VxWorks. */
6798 if ((GET_CODE (orig) == LABEL_REF
6799 || (GET_CODE (orig) == SYMBOL_REF &&
6800 SYMBOL_REF_LOCAL_P (orig)))
6801 && NEED_GOT_RELOC
6802 && arm_pic_data_is_text_relative)
6803 insn = arm_pic_static_addr (orig, reg);
6804 else
6806 rtx pat;
6807 rtx mem;
6809 /* If this function doesn't have a pic register, create one now. */
6810 require_pic_register ();
6812 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6814 /* Make the MEM as close to a constant as possible. */
6815 mem = SET_SRC (pat);
6816 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6817 MEM_READONLY_P (mem) = 1;
6818 MEM_NOTRAP_P (mem) = 1;
6820 insn = emit_insn (pat);
6823 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6824 by loop. */
6825 set_unique_reg_note (insn, REG_EQUAL, orig);
6827 return reg;
6829 else if (GET_CODE (orig) == CONST)
6831 rtx base, offset;
6833 if (GET_CODE (XEXP (orig, 0)) == PLUS
6834 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6835 return orig;
6837 /* Handle the case where we have: const (UNSPEC_TLS). */
6838 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6839 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6840 return orig;
6842 /* Handle the case where we have:
6843 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6844 CONST_INT. */
6845 if (GET_CODE (XEXP (orig, 0)) == PLUS
6846 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6847 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6849 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6850 return orig;
6853 if (reg == 0)
6855 gcc_assert (can_create_pseudo_p ());
6856 reg = gen_reg_rtx (Pmode);
6859 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6861 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6862 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6863 base == reg ? 0 : reg);
6865 if (CONST_INT_P (offset))
6867 /* The base register doesn't really matter, we only want to
6868 test the index for the appropriate mode. */
6869 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6871 gcc_assert (can_create_pseudo_p ());
6872 offset = force_reg (Pmode, offset);
6875 if (CONST_INT_P (offset))
6876 return plus_constant (Pmode, base, INTVAL (offset));
6879 if (GET_MODE_SIZE (mode) > 4
6880 && (GET_MODE_CLASS (mode) == MODE_INT
6881 || TARGET_SOFT_FLOAT))
6883 emit_insn (gen_addsi3 (reg, base, offset));
6884 return reg;
6887 return gen_rtx_PLUS (Pmode, base, offset);
6890 return orig;
6894 /* Find a spare register to use during the prolog of a function. */
6896 static int
6897 thumb_find_work_register (unsigned long pushed_regs_mask)
6899 int reg;
6901 /* Check the argument registers first as these are call-used. The
6902 register allocation order means that sometimes r3 might be used
6903 but earlier argument registers might not, so check them all. */
6904 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6905 if (!df_regs_ever_live_p (reg))
6906 return reg;
6908 /* Before going on to check the call-saved registers we can try a couple
6909 more ways of deducing that r3 is available. The first is when we are
6910 pushing anonymous arguments onto the stack and we have less than 4
6911 registers worth of fixed arguments(*). In this case r3 will be part of
6912 the variable argument list and so we can be sure that it will be
6913 pushed right at the start of the function. Hence it will be available
6914 for the rest of the prologue.
6915 (*): ie crtl->args.pretend_args_size is greater than 0. */
6916 if (cfun->machine->uses_anonymous_args
6917 && crtl->args.pretend_args_size > 0)
6918 return LAST_ARG_REGNUM;
6920 /* The other case is when we have fixed arguments but less than 4 registers
6921 worth. In this case r3 might be used in the body of the function, but
6922 it is not being used to convey an argument into the function. In theory
6923 we could just check crtl->args.size to see how many bytes are
6924 being passed in argument registers, but it seems that it is unreliable.
6925 Sometimes it will have the value 0 when in fact arguments are being
6926 passed. (See testcase execute/20021111-1.c for an example). So we also
6927 check the args_info.nregs field as well. The problem with this field is
6928 that it makes no allowances for arguments that are passed to the
6929 function but which are not used. Hence we could miss an opportunity
6930 when a function has an unused argument in r3. But it is better to be
6931 safe than to be sorry. */
6932 if (! cfun->machine->uses_anonymous_args
6933 && crtl->args.size >= 0
6934 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6935 && (TARGET_AAPCS_BASED
6936 ? crtl->args.info.aapcs_ncrn < 4
6937 : crtl->args.info.nregs < 4))
6938 return LAST_ARG_REGNUM;
6940 /* Otherwise look for a call-saved register that is going to be pushed. */
6941 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6942 if (pushed_regs_mask & (1 << reg))
6943 return reg;
6945 if (TARGET_THUMB2)
6947 /* Thumb-2 can use high regs. */
6948 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6949 if (pushed_regs_mask & (1 << reg))
6950 return reg;
6952 /* Something went wrong - thumb_compute_save_reg_mask()
6953 should have arranged for a suitable register to be pushed. */
6954 gcc_unreachable ();
6957 static GTY(()) int pic_labelno;
6959 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6960 low register. */
6962 void
6963 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6965 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6967 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6968 return;
6970 gcc_assert (flag_pic);
6972 pic_reg = cfun->machine->pic_reg;
6973 if (TARGET_VXWORKS_RTP)
6975 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6976 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6977 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6979 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6981 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6982 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6984 else
6986 /* We use an UNSPEC rather than a LABEL_REF because this label
6987 never appears in the code stream. */
6989 labelno = GEN_INT (pic_labelno++);
6990 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6991 l1 = gen_rtx_CONST (VOIDmode, l1);
6993 /* On the ARM the PC register contains 'dot + 8' at the time of the
6994 addition, on the Thumb it is 'dot + 4'. */
6995 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6996 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6997 UNSPEC_GOTSYM_OFF);
6998 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7000 if (TARGET_32BIT)
7002 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7004 else /* TARGET_THUMB1 */
7006 if (arm_pic_register != INVALID_REGNUM
7007 && REGNO (pic_reg) > LAST_LO_REGNUM)
7009 /* We will have pushed the pic register, so we should always be
7010 able to find a work register. */
7011 pic_tmp = gen_rtx_REG (SImode,
7012 thumb_find_work_register (saved_regs));
7013 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7014 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7015 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7017 else if (arm_pic_register != INVALID_REGNUM
7018 && arm_pic_register > LAST_LO_REGNUM
7019 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7021 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7022 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7023 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7025 else
7026 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7030 /* Need to emit this whether or not we obey regdecls,
7031 since setjmp/longjmp can cause life info to screw up. */
7032 emit_use (pic_reg);
7035 /* Generate code to load the address of a static var when flag_pic is set. */
7036 static rtx
7037 arm_pic_static_addr (rtx orig, rtx reg)
7039 rtx l1, labelno, offset_rtx, insn;
7041 gcc_assert (flag_pic);
7043 /* We use an UNSPEC rather than a LABEL_REF because this label
7044 never appears in the code stream. */
7045 labelno = GEN_INT (pic_labelno++);
7046 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7047 l1 = gen_rtx_CONST (VOIDmode, l1);
7049 /* On the ARM the PC register contains 'dot + 8' at the time of the
7050 addition, on the Thumb it is 'dot + 4'. */
7051 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7052 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7053 UNSPEC_SYMBOL_OFFSET);
7054 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7056 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7057 return insn;
7060 /* Return nonzero if X is valid as an ARM state addressing register. */
7061 static int
7062 arm_address_register_rtx_p (rtx x, int strict_p)
7064 int regno;
7066 if (!REG_P (x))
7067 return 0;
7069 regno = REGNO (x);
7071 if (strict_p)
7072 return ARM_REGNO_OK_FOR_BASE_P (regno);
7074 return (regno <= LAST_ARM_REGNUM
7075 || regno >= FIRST_PSEUDO_REGISTER
7076 || regno == FRAME_POINTER_REGNUM
7077 || regno == ARG_POINTER_REGNUM);
7080 /* Return TRUE if this rtx is the difference of a symbol and a label,
7081 and will reduce to a PC-relative relocation in the object file.
7082 Expressions like this can be left alone when generating PIC, rather
7083 than forced through the GOT. */
7084 static int
7085 pcrel_constant_p (rtx x)
7087 if (GET_CODE (x) == MINUS)
7088 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7090 return FALSE;
7093 /* Return true if X will surely end up in an index register after next
7094 splitting pass. */
7095 static bool
7096 will_be_in_index_register (const_rtx x)
7098 /* arm.md: calculate_pic_address will split this into a register. */
7099 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7102 /* Return nonzero if X is a valid ARM state address operand. */
7104 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7105 int strict_p)
7107 bool use_ldrd;
7108 enum rtx_code code = GET_CODE (x);
7110 if (arm_address_register_rtx_p (x, strict_p))
7111 return 1;
7113 use_ldrd = (TARGET_LDRD
7114 && (mode == DImode
7115 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7117 if (code == POST_INC || code == PRE_DEC
7118 || ((code == PRE_INC || code == POST_DEC)
7119 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7120 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7122 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7123 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7124 && GET_CODE (XEXP (x, 1)) == PLUS
7125 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7127 rtx addend = XEXP (XEXP (x, 1), 1);
7129 /* Don't allow ldrd post increment by register because it's hard
7130 to fixup invalid register choices. */
7131 if (use_ldrd
7132 && GET_CODE (x) == POST_MODIFY
7133 && REG_P (addend))
7134 return 0;
7136 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7137 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7140 /* After reload constants split into minipools will have addresses
7141 from a LABEL_REF. */
7142 else if (reload_completed
7143 && (code == LABEL_REF
7144 || (code == CONST
7145 && GET_CODE (XEXP (x, 0)) == PLUS
7146 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7147 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7148 return 1;
7150 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7151 return 0;
7153 else if (code == PLUS)
7155 rtx xop0 = XEXP (x, 0);
7156 rtx xop1 = XEXP (x, 1);
7158 return ((arm_address_register_rtx_p (xop0, strict_p)
7159 && ((CONST_INT_P (xop1)
7160 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7161 || (!strict_p && will_be_in_index_register (xop1))))
7162 || (arm_address_register_rtx_p (xop1, strict_p)
7163 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7166 #if 0
7167 /* Reload currently can't handle MINUS, so disable this for now */
7168 else if (GET_CODE (x) == MINUS)
7170 rtx xop0 = XEXP (x, 0);
7171 rtx xop1 = XEXP (x, 1);
7173 return (arm_address_register_rtx_p (xop0, strict_p)
7174 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7176 #endif
7178 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7179 && code == SYMBOL_REF
7180 && CONSTANT_POOL_ADDRESS_P (x)
7181 && ! (flag_pic
7182 && symbol_mentioned_p (get_pool_constant (x))
7183 && ! pcrel_constant_p (get_pool_constant (x))))
7184 return 1;
7186 return 0;
7189 /* Return nonzero if X is a valid Thumb-2 address operand. */
7190 static int
7191 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7193 bool use_ldrd;
7194 enum rtx_code code = GET_CODE (x);
7196 if (arm_address_register_rtx_p (x, strict_p))
7197 return 1;
7199 use_ldrd = (TARGET_LDRD
7200 && (mode == DImode
7201 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7203 if (code == POST_INC || code == PRE_DEC
7204 || ((code == PRE_INC || code == POST_DEC)
7205 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7206 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7208 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7209 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7210 && GET_CODE (XEXP (x, 1)) == PLUS
7211 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7213 /* Thumb-2 only has autoincrement by constant. */
7214 rtx addend = XEXP (XEXP (x, 1), 1);
7215 HOST_WIDE_INT offset;
7217 if (!CONST_INT_P (addend))
7218 return 0;
7220 offset = INTVAL(addend);
7221 if (GET_MODE_SIZE (mode) <= 4)
7222 return (offset > -256 && offset < 256);
7224 return (use_ldrd && offset > -1024 && offset < 1024
7225 && (offset & 3) == 0);
7228 /* After reload constants split into minipools will have addresses
7229 from a LABEL_REF. */
7230 else if (reload_completed
7231 && (code == LABEL_REF
7232 || (code == CONST
7233 && GET_CODE (XEXP (x, 0)) == PLUS
7234 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7235 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7236 return 1;
7238 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7239 return 0;
7241 else if (code == PLUS)
7243 rtx xop0 = XEXP (x, 0);
7244 rtx xop1 = XEXP (x, 1);
7246 return ((arm_address_register_rtx_p (xop0, strict_p)
7247 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7248 || (!strict_p && will_be_in_index_register (xop1))))
7249 || (arm_address_register_rtx_p (xop1, strict_p)
7250 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7253 /* Normally we can assign constant values to target registers without
7254 the help of constant pool. But there are cases we have to use constant
7255 pool like:
7256 1) assign a label to register.
7257 2) sign-extend a 8bit value to 32bit and then assign to register.
7259 Constant pool access in format:
7260 (set (reg r0) (mem (symbol_ref (".LC0"))))
7261 will cause the use of literal pool (later in function arm_reorg).
7262 So here we mark such format as an invalid format, then the compiler
7263 will adjust it into:
7264 (set (reg r0) (symbol_ref (".LC0")))
7265 (set (reg r0) (mem (reg r0))).
7266 No extra register is required, and (mem (reg r0)) won't cause the use
7267 of literal pools. */
7268 else if (arm_disable_literal_pool && code == SYMBOL_REF
7269 && CONSTANT_POOL_ADDRESS_P (x))
7270 return 0;
7272 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7273 && code == SYMBOL_REF
7274 && CONSTANT_POOL_ADDRESS_P (x)
7275 && ! (flag_pic
7276 && symbol_mentioned_p (get_pool_constant (x))
7277 && ! pcrel_constant_p (get_pool_constant (x))))
7278 return 1;
7280 return 0;
7283 /* Return nonzero if INDEX is valid for an address index operand in
7284 ARM state. */
7285 static int
7286 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7287 int strict_p)
7289 HOST_WIDE_INT range;
7290 enum rtx_code code = GET_CODE (index);
7292 /* Standard coprocessor addressing modes. */
7293 if (TARGET_HARD_FLOAT
7294 && TARGET_VFP
7295 && (mode == SFmode || mode == DFmode))
7296 return (code == CONST_INT && INTVAL (index) < 1024
7297 && INTVAL (index) > -1024
7298 && (INTVAL (index) & 3) == 0);
7300 /* For quad modes, we restrict the constant offset to be slightly less
7301 than what the instruction format permits. We do this because for
7302 quad mode moves, we will actually decompose them into two separate
7303 double-mode reads or writes. INDEX must therefore be a valid
7304 (double-mode) offset and so should INDEX+8. */
7305 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7306 return (code == CONST_INT
7307 && INTVAL (index) < 1016
7308 && INTVAL (index) > -1024
7309 && (INTVAL (index) & 3) == 0);
7311 /* We have no such constraint on double mode offsets, so we permit the
7312 full range of the instruction format. */
7313 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7314 return (code == CONST_INT
7315 && INTVAL (index) < 1024
7316 && INTVAL (index) > -1024
7317 && (INTVAL (index) & 3) == 0);
7319 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7320 return (code == CONST_INT
7321 && INTVAL (index) < 1024
7322 && INTVAL (index) > -1024
7323 && (INTVAL (index) & 3) == 0);
7325 if (arm_address_register_rtx_p (index, strict_p)
7326 && (GET_MODE_SIZE (mode) <= 4))
7327 return 1;
7329 if (mode == DImode || mode == DFmode)
7331 if (code == CONST_INT)
7333 HOST_WIDE_INT val = INTVAL (index);
7335 if (TARGET_LDRD)
7336 return val > -256 && val < 256;
7337 else
7338 return val > -4096 && val < 4092;
7341 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7344 if (GET_MODE_SIZE (mode) <= 4
7345 && ! (arm_arch4
7346 && (mode == HImode
7347 || mode == HFmode
7348 || (mode == QImode && outer == SIGN_EXTEND))))
7350 if (code == MULT)
7352 rtx xiop0 = XEXP (index, 0);
7353 rtx xiop1 = XEXP (index, 1);
7355 return ((arm_address_register_rtx_p (xiop0, strict_p)
7356 && power_of_two_operand (xiop1, SImode))
7357 || (arm_address_register_rtx_p (xiop1, strict_p)
7358 && power_of_two_operand (xiop0, SImode)));
7360 else if (code == LSHIFTRT || code == ASHIFTRT
7361 || code == ASHIFT || code == ROTATERT)
7363 rtx op = XEXP (index, 1);
7365 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7366 && CONST_INT_P (op)
7367 && INTVAL (op) > 0
7368 && INTVAL (op) <= 31);
7372 /* For ARM v4 we may be doing a sign-extend operation during the
7373 load. */
7374 if (arm_arch4)
7376 if (mode == HImode
7377 || mode == HFmode
7378 || (outer == SIGN_EXTEND && mode == QImode))
7379 range = 256;
7380 else
7381 range = 4096;
7383 else
7384 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7386 return (code == CONST_INT
7387 && INTVAL (index) < range
7388 && INTVAL (index) > -range);
7391 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7392 index operand. i.e. 1, 2, 4 or 8. */
7393 static bool
7394 thumb2_index_mul_operand (rtx op)
7396 HOST_WIDE_INT val;
7398 if (!CONST_INT_P (op))
7399 return false;
7401 val = INTVAL(op);
7402 return (val == 1 || val == 2 || val == 4 || val == 8);
7405 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7406 static int
7407 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7409 enum rtx_code code = GET_CODE (index);
7411 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7412 /* Standard coprocessor addressing modes. */
7413 if (TARGET_HARD_FLOAT
7414 && TARGET_VFP
7415 && (mode == SFmode || mode == DFmode))
7416 return (code == CONST_INT && INTVAL (index) < 1024
7417 /* Thumb-2 allows only > -256 index range for it's core register
7418 load/stores. Since we allow SF/DF in core registers, we have
7419 to use the intersection between -256~4096 (core) and -1024~1024
7420 (coprocessor). */
7421 && INTVAL (index) > -256
7422 && (INTVAL (index) & 3) == 0);
7424 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7426 /* For DImode assume values will usually live in core regs
7427 and only allow LDRD addressing modes. */
7428 if (!TARGET_LDRD || mode != DImode)
7429 return (code == CONST_INT
7430 && INTVAL (index) < 1024
7431 && INTVAL (index) > -1024
7432 && (INTVAL (index) & 3) == 0);
7435 /* For quad modes, we restrict the constant offset to be slightly less
7436 than what the instruction format permits. We do this because for
7437 quad mode moves, we will actually decompose them into two separate
7438 double-mode reads or writes. INDEX must therefore be a valid
7439 (double-mode) offset and so should INDEX+8. */
7440 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7441 return (code == CONST_INT
7442 && INTVAL (index) < 1016
7443 && INTVAL (index) > -1024
7444 && (INTVAL (index) & 3) == 0);
7446 /* We have no such constraint on double mode offsets, so we permit the
7447 full range of the instruction format. */
7448 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7449 return (code == CONST_INT
7450 && INTVAL (index) < 1024
7451 && INTVAL (index) > -1024
7452 && (INTVAL (index) & 3) == 0);
7454 if (arm_address_register_rtx_p (index, strict_p)
7455 && (GET_MODE_SIZE (mode) <= 4))
7456 return 1;
7458 if (mode == DImode || mode == DFmode)
7460 if (code == CONST_INT)
7462 HOST_WIDE_INT val = INTVAL (index);
7463 /* ??? Can we assume ldrd for thumb2? */
7464 /* Thumb-2 ldrd only has reg+const addressing modes. */
7465 /* ldrd supports offsets of +-1020.
7466 However the ldr fallback does not. */
7467 return val > -256 && val < 256 && (val & 3) == 0;
7469 else
7470 return 0;
7473 if (code == MULT)
7475 rtx xiop0 = XEXP (index, 0);
7476 rtx xiop1 = XEXP (index, 1);
7478 return ((arm_address_register_rtx_p (xiop0, strict_p)
7479 && thumb2_index_mul_operand (xiop1))
7480 || (arm_address_register_rtx_p (xiop1, strict_p)
7481 && thumb2_index_mul_operand (xiop0)));
7483 else if (code == ASHIFT)
7485 rtx op = XEXP (index, 1);
7487 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7488 && CONST_INT_P (op)
7489 && INTVAL (op) > 0
7490 && INTVAL (op) <= 3);
7493 return (code == CONST_INT
7494 && INTVAL (index) < 4096
7495 && INTVAL (index) > -256);
7498 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7499 static int
7500 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7502 int regno;
7504 if (!REG_P (x))
7505 return 0;
7507 regno = REGNO (x);
7509 if (strict_p)
7510 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7512 return (regno <= LAST_LO_REGNUM
7513 || regno > LAST_VIRTUAL_REGISTER
7514 || regno == FRAME_POINTER_REGNUM
7515 || (GET_MODE_SIZE (mode) >= 4
7516 && (regno == STACK_POINTER_REGNUM
7517 || regno >= FIRST_PSEUDO_REGISTER
7518 || x == hard_frame_pointer_rtx
7519 || x == arg_pointer_rtx)));
7522 /* Return nonzero if x is a legitimate index register. This is the case
7523 for any base register that can access a QImode object. */
7524 inline static int
7525 thumb1_index_register_rtx_p (rtx x, int strict_p)
7527 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7530 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7532 The AP may be eliminated to either the SP or the FP, so we use the
7533 least common denominator, e.g. SImode, and offsets from 0 to 64.
7535 ??? Verify whether the above is the right approach.
7537 ??? Also, the FP may be eliminated to the SP, so perhaps that
7538 needs special handling also.
7540 ??? Look at how the mips16 port solves this problem. It probably uses
7541 better ways to solve some of these problems.
7543 Although it is not incorrect, we don't accept QImode and HImode
7544 addresses based on the frame pointer or arg pointer until the
7545 reload pass starts. This is so that eliminating such addresses
7546 into stack based ones won't produce impossible code. */
7548 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7550 /* ??? Not clear if this is right. Experiment. */
7551 if (GET_MODE_SIZE (mode) < 4
7552 && !(reload_in_progress || reload_completed)
7553 && (reg_mentioned_p (frame_pointer_rtx, x)
7554 || reg_mentioned_p (arg_pointer_rtx, x)
7555 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7556 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7557 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7558 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7559 return 0;
7561 /* Accept any base register. SP only in SImode or larger. */
7562 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7563 return 1;
7565 /* This is PC relative data before arm_reorg runs. */
7566 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7567 && GET_CODE (x) == SYMBOL_REF
7568 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7569 return 1;
7571 /* This is PC relative data after arm_reorg runs. */
7572 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7573 && reload_completed
7574 && (GET_CODE (x) == LABEL_REF
7575 || (GET_CODE (x) == CONST
7576 && GET_CODE (XEXP (x, 0)) == PLUS
7577 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7578 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7579 return 1;
7581 /* Post-inc indexing only supported for SImode and larger. */
7582 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7583 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7584 return 1;
7586 else if (GET_CODE (x) == PLUS)
7588 /* REG+REG address can be any two index registers. */
7589 /* We disallow FRAME+REG addressing since we know that FRAME
7590 will be replaced with STACK, and SP relative addressing only
7591 permits SP+OFFSET. */
7592 if (GET_MODE_SIZE (mode) <= 4
7593 && XEXP (x, 0) != frame_pointer_rtx
7594 && XEXP (x, 1) != frame_pointer_rtx
7595 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7596 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7597 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7598 return 1;
7600 /* REG+const has 5-7 bit offset for non-SP registers. */
7601 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7602 || XEXP (x, 0) == arg_pointer_rtx)
7603 && CONST_INT_P (XEXP (x, 1))
7604 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7605 return 1;
7607 /* REG+const has 10-bit offset for SP, but only SImode and
7608 larger is supported. */
7609 /* ??? Should probably check for DI/DFmode overflow here
7610 just like GO_IF_LEGITIMATE_OFFSET does. */
7611 else if (REG_P (XEXP (x, 0))
7612 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7613 && GET_MODE_SIZE (mode) >= 4
7614 && CONST_INT_P (XEXP (x, 1))
7615 && INTVAL (XEXP (x, 1)) >= 0
7616 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7617 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7618 return 1;
7620 else if (REG_P (XEXP (x, 0))
7621 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7622 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7623 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7624 && REGNO (XEXP (x, 0))
7625 <= LAST_VIRTUAL_POINTER_REGISTER))
7626 && GET_MODE_SIZE (mode) >= 4
7627 && CONST_INT_P (XEXP (x, 1))
7628 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7629 return 1;
7632 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7633 && GET_MODE_SIZE (mode) == 4
7634 && GET_CODE (x) == SYMBOL_REF
7635 && CONSTANT_POOL_ADDRESS_P (x)
7636 && ! (flag_pic
7637 && symbol_mentioned_p (get_pool_constant (x))
7638 && ! pcrel_constant_p (get_pool_constant (x))))
7639 return 1;
7641 return 0;
7644 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7645 instruction of mode MODE. */
7647 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7649 switch (GET_MODE_SIZE (mode))
7651 case 1:
7652 return val >= 0 && val < 32;
7654 case 2:
7655 return val >= 0 && val < 64 && (val & 1) == 0;
7657 default:
7658 return (val >= 0
7659 && (val + GET_MODE_SIZE (mode)) <= 128
7660 && (val & 3) == 0);
7664 bool
7665 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7667 if (TARGET_ARM)
7668 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7669 else if (TARGET_THUMB2)
7670 return thumb2_legitimate_address_p (mode, x, strict_p);
7671 else /* if (TARGET_THUMB1) */
7672 return thumb1_legitimate_address_p (mode, x, strict_p);
7675 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7677 Given an rtx X being reloaded into a reg required to be
7678 in class CLASS, return the class of reg to actually use.
7679 In general this is just CLASS, but for the Thumb core registers and
7680 immediate constants we prefer a LO_REGS class or a subset. */
7682 static reg_class_t
7683 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7685 if (TARGET_32BIT)
7686 return rclass;
7687 else
7689 if (rclass == GENERAL_REGS)
7690 return LO_REGS;
7691 else
7692 return rclass;
7696 /* Build the SYMBOL_REF for __tls_get_addr. */
7698 static GTY(()) rtx tls_get_addr_libfunc;
7700 static rtx
7701 get_tls_get_addr (void)
7703 if (!tls_get_addr_libfunc)
7704 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7705 return tls_get_addr_libfunc;
7709 arm_load_tp (rtx target)
7711 if (!target)
7712 target = gen_reg_rtx (SImode);
7714 if (TARGET_HARD_TP)
7716 /* Can return in any reg. */
7717 emit_insn (gen_load_tp_hard (target));
7719 else
7721 /* Always returned in r0. Immediately copy the result into a pseudo,
7722 otherwise other uses of r0 (e.g. setting up function arguments) may
7723 clobber the value. */
7725 rtx tmp;
7727 emit_insn (gen_load_tp_soft ());
7729 tmp = gen_rtx_REG (SImode, R0_REGNUM);
7730 emit_move_insn (target, tmp);
7732 return target;
7735 static rtx
7736 load_tls_operand (rtx x, rtx reg)
7738 rtx tmp;
7740 if (reg == NULL_RTX)
7741 reg = gen_reg_rtx (SImode);
7743 tmp = gen_rtx_CONST (SImode, x);
7745 emit_move_insn (reg, tmp);
7747 return reg;
7750 static rtx
7751 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7753 rtx insns, label, labelno, sum;
7755 gcc_assert (reloc != TLS_DESCSEQ);
7756 start_sequence ();
7758 labelno = GEN_INT (pic_labelno++);
7759 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7760 label = gen_rtx_CONST (VOIDmode, label);
7762 sum = gen_rtx_UNSPEC (Pmode,
7763 gen_rtvec (4, x, GEN_INT (reloc), label,
7764 GEN_INT (TARGET_ARM ? 8 : 4)),
7765 UNSPEC_TLS);
7766 reg = load_tls_operand (sum, reg);
7768 if (TARGET_ARM)
7769 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7770 else
7771 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7773 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7774 LCT_PURE, /* LCT_CONST? */
7775 Pmode, 1, reg, Pmode);
7777 insns = get_insns ();
7778 end_sequence ();
7780 return insns;
7783 static rtx
7784 arm_tls_descseq_addr (rtx x, rtx reg)
7786 rtx labelno = GEN_INT (pic_labelno++);
7787 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7788 rtx sum = gen_rtx_UNSPEC (Pmode,
7789 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7790 gen_rtx_CONST (VOIDmode, label),
7791 GEN_INT (!TARGET_ARM)),
7792 UNSPEC_TLS);
7793 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7795 emit_insn (gen_tlscall (x, labelno));
7796 if (!reg)
7797 reg = gen_reg_rtx (SImode);
7798 else
7799 gcc_assert (REGNO (reg) != R0_REGNUM);
7801 emit_move_insn (reg, reg0);
7803 return reg;
7807 legitimize_tls_address (rtx x, rtx reg)
7809 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7810 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7812 switch (model)
7814 case TLS_MODEL_GLOBAL_DYNAMIC:
7815 if (TARGET_GNU2_TLS)
7817 reg = arm_tls_descseq_addr (x, reg);
7819 tp = arm_load_tp (NULL_RTX);
7821 dest = gen_rtx_PLUS (Pmode, tp, reg);
7823 else
7825 /* Original scheme */
7826 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7827 dest = gen_reg_rtx (Pmode);
7828 emit_libcall_block (insns, dest, ret, x);
7830 return dest;
7832 case TLS_MODEL_LOCAL_DYNAMIC:
7833 if (TARGET_GNU2_TLS)
7835 reg = arm_tls_descseq_addr (x, reg);
7837 tp = arm_load_tp (NULL_RTX);
7839 dest = gen_rtx_PLUS (Pmode, tp, reg);
7841 else
7843 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7845 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7846 share the LDM result with other LD model accesses. */
7847 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7848 UNSPEC_TLS);
7849 dest = gen_reg_rtx (Pmode);
7850 emit_libcall_block (insns, dest, ret, eqv);
7852 /* Load the addend. */
7853 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7854 GEN_INT (TLS_LDO32)),
7855 UNSPEC_TLS);
7856 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7857 dest = gen_rtx_PLUS (Pmode, dest, addend);
7859 return dest;
7861 case TLS_MODEL_INITIAL_EXEC:
7862 labelno = GEN_INT (pic_labelno++);
7863 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7864 label = gen_rtx_CONST (VOIDmode, label);
7865 sum = gen_rtx_UNSPEC (Pmode,
7866 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7867 GEN_INT (TARGET_ARM ? 8 : 4)),
7868 UNSPEC_TLS);
7869 reg = load_tls_operand (sum, reg);
7871 if (TARGET_ARM)
7872 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7873 else if (TARGET_THUMB2)
7874 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7875 else
7877 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7878 emit_move_insn (reg, gen_const_mem (SImode, reg));
7881 tp = arm_load_tp (NULL_RTX);
7883 return gen_rtx_PLUS (Pmode, tp, reg);
7885 case TLS_MODEL_LOCAL_EXEC:
7886 tp = arm_load_tp (NULL_RTX);
7888 reg = gen_rtx_UNSPEC (Pmode,
7889 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7890 UNSPEC_TLS);
7891 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7893 return gen_rtx_PLUS (Pmode, tp, reg);
7895 default:
7896 abort ();
7900 /* Try machine-dependent ways of modifying an illegitimate address
7901 to be legitimate. If we find one, return the new, valid address. */
7903 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7905 if (arm_tls_referenced_p (x))
7907 rtx addend = NULL;
7909 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7911 addend = XEXP (XEXP (x, 0), 1);
7912 x = XEXP (XEXP (x, 0), 0);
7915 if (GET_CODE (x) != SYMBOL_REF)
7916 return x;
7918 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7920 x = legitimize_tls_address (x, NULL_RTX);
7922 if (addend)
7924 x = gen_rtx_PLUS (SImode, x, addend);
7925 orig_x = x;
7927 else
7928 return x;
7931 if (!TARGET_ARM)
7933 /* TODO: legitimize_address for Thumb2. */
7934 if (TARGET_THUMB2)
7935 return x;
7936 return thumb_legitimize_address (x, orig_x, mode);
7939 if (GET_CODE (x) == PLUS)
7941 rtx xop0 = XEXP (x, 0);
7942 rtx xop1 = XEXP (x, 1);
7944 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7945 xop0 = force_reg (SImode, xop0);
7947 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7948 && !symbol_mentioned_p (xop1))
7949 xop1 = force_reg (SImode, xop1);
7951 if (ARM_BASE_REGISTER_RTX_P (xop0)
7952 && CONST_INT_P (xop1))
7954 HOST_WIDE_INT n, low_n;
7955 rtx base_reg, val;
7956 n = INTVAL (xop1);
7958 /* VFP addressing modes actually allow greater offsets, but for
7959 now we just stick with the lowest common denominator. */
7960 if (mode == DImode
7961 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7963 low_n = n & 0x0f;
7964 n &= ~0x0f;
7965 if (low_n > 4)
7967 n += 16;
7968 low_n -= 16;
7971 else
7973 low_n = ((mode) == TImode ? 0
7974 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7975 n -= low_n;
7978 base_reg = gen_reg_rtx (SImode);
7979 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7980 emit_move_insn (base_reg, val);
7981 x = plus_constant (Pmode, base_reg, low_n);
7983 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7984 x = gen_rtx_PLUS (SImode, xop0, xop1);
7987 /* XXX We don't allow MINUS any more -- see comment in
7988 arm_legitimate_address_outer_p (). */
7989 else if (GET_CODE (x) == MINUS)
7991 rtx xop0 = XEXP (x, 0);
7992 rtx xop1 = XEXP (x, 1);
7994 if (CONSTANT_P (xop0))
7995 xop0 = force_reg (SImode, xop0);
7997 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7998 xop1 = force_reg (SImode, xop1);
8000 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8001 x = gen_rtx_MINUS (SImode, xop0, xop1);
8004 /* Make sure to take full advantage of the pre-indexed addressing mode
8005 with absolute addresses which often allows for the base register to
8006 be factorized for multiple adjacent memory references, and it might
8007 even allows for the mini pool to be avoided entirely. */
8008 else if (CONST_INT_P (x) && optimize > 0)
8010 unsigned int bits;
8011 HOST_WIDE_INT mask, base, index;
8012 rtx base_reg;
8014 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8015 use a 8-bit index. So let's use a 12-bit index for SImode only and
8016 hope that arm_gen_constant will enable ldrb to use more bits. */
8017 bits = (mode == SImode) ? 12 : 8;
8018 mask = (1 << bits) - 1;
8019 base = INTVAL (x) & ~mask;
8020 index = INTVAL (x) & mask;
8021 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8023 /* It'll most probably be more efficient to generate the base
8024 with more bits set and use a negative index instead. */
8025 base |= mask;
8026 index -= mask;
8028 base_reg = force_reg (SImode, GEN_INT (base));
8029 x = plus_constant (Pmode, base_reg, index);
8032 if (flag_pic)
8034 /* We need to find and carefully transform any SYMBOL and LABEL
8035 references; so go back to the original address expression. */
8036 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8038 if (new_x != orig_x)
8039 x = new_x;
8042 return x;
8046 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8047 to be legitimate. If we find one, return the new, valid address. */
8049 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8051 if (GET_CODE (x) == PLUS
8052 && CONST_INT_P (XEXP (x, 1))
8053 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8054 || INTVAL (XEXP (x, 1)) < 0))
8056 rtx xop0 = XEXP (x, 0);
8057 rtx xop1 = XEXP (x, 1);
8058 HOST_WIDE_INT offset = INTVAL (xop1);
8060 /* Try and fold the offset into a biasing of the base register and
8061 then offsetting that. Don't do this when optimizing for space
8062 since it can cause too many CSEs. */
8063 if (optimize_size && offset >= 0
8064 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8066 HOST_WIDE_INT delta;
8068 if (offset >= 256)
8069 delta = offset - (256 - GET_MODE_SIZE (mode));
8070 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8071 delta = 31 * GET_MODE_SIZE (mode);
8072 else
8073 delta = offset & (~31 * GET_MODE_SIZE (mode));
8075 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8076 NULL_RTX);
8077 x = plus_constant (Pmode, xop0, delta);
8079 else if (offset < 0 && offset > -256)
8080 /* Small negative offsets are best done with a subtract before the
8081 dereference, forcing these into a register normally takes two
8082 instructions. */
8083 x = force_operand (x, NULL_RTX);
8084 else
8086 /* For the remaining cases, force the constant into a register. */
8087 xop1 = force_reg (SImode, xop1);
8088 x = gen_rtx_PLUS (SImode, xop0, xop1);
8091 else if (GET_CODE (x) == PLUS
8092 && s_register_operand (XEXP (x, 1), SImode)
8093 && !s_register_operand (XEXP (x, 0), SImode))
8095 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8097 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8100 if (flag_pic)
8102 /* We need to find and carefully transform any SYMBOL and LABEL
8103 references; so go back to the original address expression. */
8104 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8106 if (new_x != orig_x)
8107 x = new_x;
8110 return x;
8113 /* Return TRUE if X contains any TLS symbol references. */
8115 bool
8116 arm_tls_referenced_p (rtx x)
8118 if (! TARGET_HAVE_TLS)
8119 return false;
8121 subrtx_iterator::array_type array;
8122 FOR_EACH_SUBRTX (iter, array, x, ALL)
8124 const_rtx x = *iter;
8125 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8126 return true;
8128 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8129 TLS offsets, not real symbol references. */
8130 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8131 iter.skip_subrtxes ();
8133 return false;
8136 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8138 On the ARM, allow any integer (invalid ones are removed later by insn
8139 patterns), nice doubles and symbol_refs which refer to the function's
8140 constant pool XXX.
8142 When generating pic allow anything. */
8144 static bool
8145 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8147 return flag_pic || !label_mentioned_p (x);
8150 static bool
8151 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8153 return (CONST_INT_P (x)
8154 || CONST_DOUBLE_P (x)
8155 || CONSTANT_ADDRESS_P (x)
8156 || flag_pic);
8159 static bool
8160 arm_legitimate_constant_p (machine_mode mode, rtx x)
8162 return (!arm_cannot_force_const_mem (mode, x)
8163 && (TARGET_32BIT
8164 ? arm_legitimate_constant_p_1 (mode, x)
8165 : thumb_legitimate_constant_p (mode, x)));
8168 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8170 static bool
8171 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8173 rtx base, offset;
8175 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8177 split_const (x, &base, &offset);
8178 if (GET_CODE (base) == SYMBOL_REF
8179 && !offset_within_block_p (base, INTVAL (offset)))
8180 return true;
8182 return arm_tls_referenced_p (x);
8185 #define REG_OR_SUBREG_REG(X) \
8186 (REG_P (X) \
8187 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8189 #define REG_OR_SUBREG_RTX(X) \
8190 (REG_P (X) ? (X) : SUBREG_REG (X))
8192 static inline int
8193 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8195 machine_mode mode = GET_MODE (x);
8196 int total, words;
8198 switch (code)
8200 case ASHIFT:
8201 case ASHIFTRT:
8202 case LSHIFTRT:
8203 case ROTATERT:
8204 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8206 case PLUS:
8207 case MINUS:
8208 case COMPARE:
8209 case NEG:
8210 case NOT:
8211 return COSTS_N_INSNS (1);
8213 case MULT:
8214 if (CONST_INT_P (XEXP (x, 1)))
8216 int cycles = 0;
8217 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8219 while (i)
8221 i >>= 2;
8222 cycles++;
8224 return COSTS_N_INSNS (2) + cycles;
8226 return COSTS_N_INSNS (1) + 16;
8228 case SET:
8229 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8230 the mode. */
8231 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8232 return (COSTS_N_INSNS (words)
8233 + 4 * ((MEM_P (SET_SRC (x)))
8234 + MEM_P (SET_DEST (x))));
8236 case CONST_INT:
8237 if (outer == SET)
8239 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8240 return 0;
8241 if (thumb_shiftable_const (INTVAL (x)))
8242 return COSTS_N_INSNS (2);
8243 return COSTS_N_INSNS (3);
8245 else if ((outer == PLUS || outer == COMPARE)
8246 && INTVAL (x) < 256 && INTVAL (x) > -256)
8247 return 0;
8248 else if ((outer == IOR || outer == XOR || outer == AND)
8249 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8250 return COSTS_N_INSNS (1);
8251 else if (outer == AND)
8253 int i;
8254 /* This duplicates the tests in the andsi3 expander. */
8255 for (i = 9; i <= 31; i++)
8256 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8257 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8258 return COSTS_N_INSNS (2);
8260 else if (outer == ASHIFT || outer == ASHIFTRT
8261 || outer == LSHIFTRT)
8262 return 0;
8263 return COSTS_N_INSNS (2);
8265 case CONST:
8266 case CONST_DOUBLE:
8267 case LABEL_REF:
8268 case SYMBOL_REF:
8269 return COSTS_N_INSNS (3);
8271 case UDIV:
8272 case UMOD:
8273 case DIV:
8274 case MOD:
8275 return 100;
8277 case TRUNCATE:
8278 return 99;
8280 case AND:
8281 case XOR:
8282 case IOR:
8283 /* XXX guess. */
8284 return 8;
8286 case MEM:
8287 /* XXX another guess. */
8288 /* Memory costs quite a lot for the first word, but subsequent words
8289 load at the equivalent of a single insn each. */
8290 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8291 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8292 ? 4 : 0));
8294 case IF_THEN_ELSE:
8295 /* XXX a guess. */
8296 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8297 return 14;
8298 return 2;
8300 case SIGN_EXTEND:
8301 case ZERO_EXTEND:
8302 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8303 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8305 if (mode == SImode)
8306 return total;
8308 if (arm_arch6)
8309 return total + COSTS_N_INSNS (1);
8311 /* Assume a two-shift sequence. Increase the cost slightly so
8312 we prefer actual shifts over an extend operation. */
8313 return total + 1 + COSTS_N_INSNS (2);
8315 default:
8316 return 99;
8320 static inline bool
8321 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8323 machine_mode mode = GET_MODE (x);
8324 enum rtx_code subcode;
8325 rtx operand;
8326 enum rtx_code code = GET_CODE (x);
8327 *total = 0;
8329 switch (code)
8331 case MEM:
8332 /* Memory costs quite a lot for the first word, but subsequent words
8333 load at the equivalent of a single insn each. */
8334 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8335 return true;
8337 case DIV:
8338 case MOD:
8339 case UDIV:
8340 case UMOD:
8341 if (TARGET_HARD_FLOAT && mode == SFmode)
8342 *total = COSTS_N_INSNS (2);
8343 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8344 *total = COSTS_N_INSNS (4);
8345 else
8346 *total = COSTS_N_INSNS (20);
8347 return false;
8349 case ROTATE:
8350 if (REG_P (XEXP (x, 1)))
8351 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8352 else if (!CONST_INT_P (XEXP (x, 1)))
8353 *total = rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8355 /* Fall through */
8356 case ROTATERT:
8357 if (mode != SImode)
8359 *total += COSTS_N_INSNS (4);
8360 return true;
8363 /* Fall through */
8364 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8365 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8366 if (mode == DImode)
8368 *total += COSTS_N_INSNS (3);
8369 return true;
8372 *total += COSTS_N_INSNS (1);
8373 /* Increase the cost of complex shifts because they aren't any faster,
8374 and reduce dual issue opportunities. */
8375 if (arm_tune_cortex_a9
8376 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8377 ++*total;
8379 return true;
8381 case MINUS:
8382 if (mode == DImode)
8384 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8385 if (CONST_INT_P (XEXP (x, 0))
8386 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8388 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8389 return true;
8392 if (CONST_INT_P (XEXP (x, 1))
8393 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8395 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8396 return true;
8399 return false;
8402 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8404 if (TARGET_HARD_FLOAT
8405 && (mode == SFmode
8406 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8408 *total = COSTS_N_INSNS (1);
8409 if (CONST_DOUBLE_P (XEXP (x, 0))
8410 && arm_const_double_rtx (XEXP (x, 0)))
8412 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8413 return true;
8416 if (CONST_DOUBLE_P (XEXP (x, 1))
8417 && arm_const_double_rtx (XEXP (x, 1)))
8419 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8420 return true;
8423 return false;
8425 *total = COSTS_N_INSNS (20);
8426 return false;
8429 *total = COSTS_N_INSNS (1);
8430 if (CONST_INT_P (XEXP (x, 0))
8431 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8433 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8434 return true;
8437 subcode = GET_CODE (XEXP (x, 1));
8438 if (subcode == ASHIFT || subcode == ASHIFTRT
8439 || subcode == LSHIFTRT
8440 || subcode == ROTATE || subcode == ROTATERT)
8442 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8443 *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, subcode, 0, speed);
8444 return true;
8447 /* A shift as a part of RSB costs no more than RSB itself. */
8448 if (GET_CODE (XEXP (x, 0)) == MULT
8449 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8451 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code, 0, speed);
8452 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8453 return true;
8456 if (subcode == MULT
8457 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8459 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8460 *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, subcode, 0, speed);
8461 return true;
8464 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8465 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8467 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
8468 0, speed);
8469 if (REG_P (XEXP (XEXP (x, 1), 0))
8470 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8471 *total += COSTS_N_INSNS (1);
8473 return true;
8476 /* Fall through */
8478 case PLUS:
8479 if (code == PLUS && arm_arch6 && mode == SImode
8480 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8481 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8483 *total = COSTS_N_INSNS (1);
8484 *total += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
8485 GET_CODE (XEXP (x, 0)), 0, speed);
8486 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8487 return true;
8490 /* MLA: All arguments must be registers. We filter out
8491 multiplication by a power of two, so that we fall down into
8492 the code below. */
8493 if (GET_CODE (XEXP (x, 0)) == MULT
8494 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8496 /* The cost comes from the cost of the multiply. */
8497 return false;
8500 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8502 if (TARGET_HARD_FLOAT
8503 && (mode == SFmode
8504 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8506 *total = COSTS_N_INSNS (1);
8507 if (CONST_DOUBLE_P (XEXP (x, 1))
8508 && arm_const_double_rtx (XEXP (x, 1)))
8510 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8511 return true;
8514 return false;
8517 *total = COSTS_N_INSNS (20);
8518 return false;
8521 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8522 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8524 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), mode, code,
8525 1, speed);
8526 if (REG_P (XEXP (XEXP (x, 0), 0))
8527 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8528 *total += COSTS_N_INSNS (1);
8529 return true;
8532 /* Fall through */
8534 case AND: case XOR: case IOR:
8536 /* Normally the frame registers will be spilt into reg+const during
8537 reload, so it is a bad idea to combine them with other instructions,
8538 since then they might not be moved outside of loops. As a compromise
8539 we allow integration with ops that have a constant as their second
8540 operand. */
8541 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8542 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8543 && !CONST_INT_P (XEXP (x, 1)))
8544 *total = COSTS_N_INSNS (1);
8546 if (mode == DImode)
8548 *total += COSTS_N_INSNS (2);
8549 if (CONST_INT_P (XEXP (x, 1))
8550 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8552 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8553 return true;
8556 return false;
8559 *total += COSTS_N_INSNS (1);
8560 if (CONST_INT_P (XEXP (x, 1))
8561 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8563 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8564 return true;
8566 subcode = GET_CODE (XEXP (x, 0));
8567 if (subcode == ASHIFT || subcode == ASHIFTRT
8568 || subcode == LSHIFTRT
8569 || subcode == ROTATE || subcode == ROTATERT)
8571 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8572 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8573 return true;
8576 if (subcode == MULT
8577 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8579 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8580 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8581 return true;
8584 if (subcode == UMIN || subcode == UMAX
8585 || subcode == SMIN || subcode == SMAX)
8587 *total = COSTS_N_INSNS (3);
8588 return true;
8591 return false;
8593 case MULT:
8594 /* This should have been handled by the CPU specific routines. */
8595 gcc_unreachable ();
8597 case TRUNCATE:
8598 if (arm_arch3m && mode == SImode
8599 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8600 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8601 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8602 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8603 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8604 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8606 *total = rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, LSHIFTRT,
8607 0, speed);
8608 return true;
8610 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8611 return false;
8613 case NEG:
8614 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8616 if (TARGET_HARD_FLOAT
8617 && (mode == SFmode
8618 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8620 *total = COSTS_N_INSNS (1);
8621 return false;
8623 *total = COSTS_N_INSNS (2);
8624 return false;
8627 /* Fall through */
8628 case NOT:
8629 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8630 if (mode == SImode && code == NOT)
8632 subcode = GET_CODE (XEXP (x, 0));
8633 if (subcode == ASHIFT || subcode == ASHIFTRT
8634 || subcode == LSHIFTRT
8635 || subcode == ROTATE || subcode == ROTATERT
8636 || (subcode == MULT
8637 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8639 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode,
8640 0, speed);
8641 /* Register shifts cost an extra cycle. */
8642 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8643 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8644 mode, subcode,
8645 1, speed);
8646 return true;
8650 return false;
8652 case IF_THEN_ELSE:
8653 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8655 *total = COSTS_N_INSNS (4);
8656 return true;
8659 operand = XEXP (x, 0);
8661 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8662 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8663 && REG_P (XEXP (operand, 0))
8664 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8665 *total += COSTS_N_INSNS (1);
8666 *total += rtx_cost (XEXP (x, 1), VOIDmode, code, 1, speed);
8667 *total += rtx_cost (XEXP (x, 2), VOIDmode, code, 2, speed);
8668 return true;
8670 case NE:
8671 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8673 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
8674 0, speed);
8675 return true;
8677 goto scc_insn;
8679 case GE:
8680 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8681 && mode == SImode && XEXP (x, 1) == const0_rtx)
8683 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
8684 0, speed);
8685 return true;
8687 goto scc_insn;
8689 case LT:
8690 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8691 && mode == SImode && XEXP (x, 1) == const0_rtx)
8693 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
8694 0, speed);
8695 return true;
8697 goto scc_insn;
8699 case EQ:
8700 case GT:
8701 case LE:
8702 case GEU:
8703 case LTU:
8704 case GTU:
8705 case LEU:
8706 case UNORDERED:
8707 case ORDERED:
8708 case UNEQ:
8709 case UNGE:
8710 case UNLT:
8711 case UNGT:
8712 case UNLE:
8713 scc_insn:
8714 /* SCC insns. In the case where the comparison has already been
8715 performed, then they cost 2 instructions. Otherwise they need
8716 an additional comparison before them. */
8717 *total = COSTS_N_INSNS (2);
8718 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8720 return true;
8723 /* Fall through */
8724 case COMPARE:
8725 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8727 *total = 0;
8728 return true;
8731 *total += COSTS_N_INSNS (1);
8732 if (CONST_INT_P (XEXP (x, 1))
8733 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8735 *total += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed);
8736 return true;
8739 subcode = GET_CODE (XEXP (x, 0));
8740 if (subcode == ASHIFT || subcode == ASHIFTRT
8741 || subcode == LSHIFTRT
8742 || subcode == ROTATE || subcode == ROTATERT)
8744 mode = GET_MODE (XEXP (x, 0));
8745 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8746 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8747 return true;
8750 if (subcode == MULT
8751 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8753 mode = GET_MODE (XEXP (x, 0));
8754 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8755 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8756 return true;
8759 return false;
8761 case UMIN:
8762 case UMAX:
8763 case SMIN:
8764 case SMAX:
8765 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8766 if (!CONST_INT_P (XEXP (x, 1))
8767 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8768 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8769 return true;
8771 case ABS:
8772 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8774 if (TARGET_HARD_FLOAT
8775 && (mode == SFmode
8776 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8778 *total = COSTS_N_INSNS (1);
8779 return false;
8781 *total = COSTS_N_INSNS (20);
8782 return false;
8784 *total = COSTS_N_INSNS (1);
8785 if (mode == DImode)
8786 *total += COSTS_N_INSNS (3);
8787 return false;
8789 case SIGN_EXTEND:
8790 case ZERO_EXTEND:
8791 *total = 0;
8792 if (GET_MODE_CLASS (mode) == MODE_INT)
8794 rtx op = XEXP (x, 0);
8795 machine_mode opmode = GET_MODE (op);
8797 if (mode == DImode)
8798 *total += COSTS_N_INSNS (1);
8800 if (opmode != SImode)
8802 if (MEM_P (op))
8804 /* If !arm_arch4, we use one of the extendhisi2_mem
8805 or movhi_bytes patterns for HImode. For a QImode
8806 sign extension, we first zero-extend from memory
8807 and then perform a shift sequence. */
8808 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8809 *total += COSTS_N_INSNS (2);
8811 else if (arm_arch6)
8812 *total += COSTS_N_INSNS (1);
8814 /* We don't have the necessary insn, so we need to perform some
8815 other operation. */
8816 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8817 /* An and with constant 255. */
8818 *total += COSTS_N_INSNS (1);
8819 else
8820 /* A shift sequence. Increase costs slightly to avoid
8821 combining two shifts into an extend operation. */
8822 *total += COSTS_N_INSNS (2) + 1;
8825 return false;
8828 switch (GET_MODE (XEXP (x, 0)))
8830 case V8QImode:
8831 case V4HImode:
8832 case V2SImode:
8833 case V4QImode:
8834 case V2HImode:
8835 *total = COSTS_N_INSNS (1);
8836 return false;
8838 default:
8839 gcc_unreachable ();
8841 gcc_unreachable ();
8843 case ZERO_EXTRACT:
8844 case SIGN_EXTRACT:
8845 mode = GET_MODE (XEXP (x, 0));
8846 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8847 return true;
8849 case CONST_INT:
8850 if (const_ok_for_arm (INTVAL (x))
8851 || const_ok_for_arm (~INTVAL (x)))
8852 *total = COSTS_N_INSNS (1);
8853 else
8854 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8855 INTVAL (x), NULL_RTX,
8856 NULL_RTX, 0, 0));
8857 return true;
8859 case CONST:
8860 case LABEL_REF:
8861 case SYMBOL_REF:
8862 *total = COSTS_N_INSNS (3);
8863 return true;
8865 case HIGH:
8866 *total = COSTS_N_INSNS (1);
8867 return true;
8869 case LO_SUM:
8870 *total = COSTS_N_INSNS (1);
8871 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8872 return true;
8874 case CONST_DOUBLE:
8875 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8876 && (mode == SFmode || !TARGET_VFP_SINGLE))
8877 *total = COSTS_N_INSNS (1);
8878 else
8879 *total = COSTS_N_INSNS (4);
8880 return true;
8882 case SET:
8883 /* The vec_extract patterns accept memory operands that require an
8884 address reload. Account for the cost of that reload to give the
8885 auto-inc-dec pass an incentive to try to replace them. */
8886 if (TARGET_NEON && MEM_P (SET_DEST (x))
8887 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8889 mode = GET_MODE (SET_DEST (x));
8890 *total = rtx_cost (SET_DEST (x), mode, code, 0, speed);
8891 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8892 *total += COSTS_N_INSNS (1);
8893 return true;
8895 /* Likewise for the vec_set patterns. */
8896 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8897 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8898 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8900 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8901 mode = GET_MODE (SET_DEST (x));
8902 *total = rtx_cost (mem, mode, code, 0, speed);
8903 if (!neon_vector_mem_operand (mem, 2, true))
8904 *total += COSTS_N_INSNS (1);
8905 return true;
8907 return false;
8909 case UNSPEC:
8910 /* We cost this as high as our memory costs to allow this to
8911 be hoisted from loops. */
8912 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8914 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8916 return true;
8918 case CONST_VECTOR:
8919 if (TARGET_NEON
8920 && TARGET_HARD_FLOAT
8921 && outer == SET
8922 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8923 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8924 *total = COSTS_N_INSNS (1);
8925 else
8926 *total = COSTS_N_INSNS (4);
8927 return true;
8929 default:
8930 *total = COSTS_N_INSNS (4);
8931 return false;
8935 /* Estimates the size cost of thumb1 instructions.
8936 For now most of the code is copied from thumb1_rtx_costs. We need more
8937 fine grain tuning when we have more related test cases. */
8938 static inline int
8939 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8941 machine_mode mode = GET_MODE (x);
8942 int words;
8944 switch (code)
8946 case ASHIFT:
8947 case ASHIFTRT:
8948 case LSHIFTRT:
8949 case ROTATERT:
8950 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8952 case PLUS:
8953 case MINUS:
8954 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8955 defined by RTL expansion, especially for the expansion of
8956 multiplication. */
8957 if ((GET_CODE (XEXP (x, 0)) == MULT
8958 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8959 || (GET_CODE (XEXP (x, 1)) == MULT
8960 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8961 return COSTS_N_INSNS (2);
8962 /* On purpose fall through for normal RTX. */
8963 case COMPARE:
8964 case NEG:
8965 case NOT:
8966 return COSTS_N_INSNS (1);
8968 case MULT:
8969 if (CONST_INT_P (XEXP (x, 1)))
8971 /* Thumb1 mul instruction can't operate on const. We must Load it
8972 into a register first. */
8973 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8974 /* For the targets which have a very small and high-latency multiply
8975 unit, we prefer to synthesize the mult with up to 5 instructions,
8976 giving a good balance between size and performance. */
8977 if (arm_arch6m && arm_m_profile_small_mul)
8978 return COSTS_N_INSNS (5);
8979 else
8980 return COSTS_N_INSNS (1) + const_size;
8982 return COSTS_N_INSNS (1);
8984 case SET:
8985 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8986 the mode. */
8987 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8988 return COSTS_N_INSNS (words)
8989 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8990 || satisfies_constraint_K (SET_SRC (x))
8991 /* thumb1_movdi_insn. */
8992 || ((words > 1) && MEM_P (SET_SRC (x))));
8994 case CONST_INT:
8995 if (outer == SET)
8997 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8998 return COSTS_N_INSNS (1);
8999 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9000 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9001 return COSTS_N_INSNS (2);
9002 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9003 if (thumb_shiftable_const (INTVAL (x)))
9004 return COSTS_N_INSNS (2);
9005 return COSTS_N_INSNS (3);
9007 else if ((outer == PLUS || outer == COMPARE)
9008 && INTVAL (x) < 256 && INTVAL (x) > -256)
9009 return 0;
9010 else if ((outer == IOR || outer == XOR || outer == AND)
9011 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9012 return COSTS_N_INSNS (1);
9013 else if (outer == AND)
9015 int i;
9016 /* This duplicates the tests in the andsi3 expander. */
9017 for (i = 9; i <= 31; i++)
9018 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
9019 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
9020 return COSTS_N_INSNS (2);
9022 else if (outer == ASHIFT || outer == ASHIFTRT
9023 || outer == LSHIFTRT)
9024 return 0;
9025 return COSTS_N_INSNS (2);
9027 case CONST:
9028 case CONST_DOUBLE:
9029 case LABEL_REF:
9030 case SYMBOL_REF:
9031 return COSTS_N_INSNS (3);
9033 case UDIV:
9034 case UMOD:
9035 case DIV:
9036 case MOD:
9037 return 100;
9039 case TRUNCATE:
9040 return 99;
9042 case AND:
9043 case XOR:
9044 case IOR:
9045 return COSTS_N_INSNS (1);
9047 case MEM:
9048 return (COSTS_N_INSNS (1)
9049 + COSTS_N_INSNS (1)
9050 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9051 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9052 ? COSTS_N_INSNS (1) : 0));
9054 case IF_THEN_ELSE:
9055 /* XXX a guess. */
9056 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9057 return 14;
9058 return 2;
9060 case ZERO_EXTEND:
9061 /* XXX still guessing. */
9062 switch (GET_MODE (XEXP (x, 0)))
9064 case QImode:
9065 return (1 + (mode == DImode ? 4 : 0)
9066 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9068 case HImode:
9069 return (4 + (mode == DImode ? 4 : 0)
9070 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9072 case SImode:
9073 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9075 default:
9076 return 99;
9079 default:
9080 return 99;
9084 /* RTX costs when optimizing for size. */
9085 static bool
9086 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9087 int *total)
9089 machine_mode mode = GET_MODE (x);
9090 if (TARGET_THUMB1)
9092 *total = thumb1_size_rtx_costs (x, code, outer_code);
9093 return true;
9096 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9097 switch (code)
9099 case MEM:
9100 /* A memory access costs 1 insn if the mode is small, or the address is
9101 a single register, otherwise it costs one insn per word. */
9102 if (REG_P (XEXP (x, 0)))
9103 *total = COSTS_N_INSNS (1);
9104 else if (flag_pic
9105 && GET_CODE (XEXP (x, 0)) == PLUS
9106 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9107 /* This will be split into two instructions.
9108 See arm.md:calculate_pic_address. */
9109 *total = COSTS_N_INSNS (2);
9110 else
9111 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9112 return true;
9114 case DIV:
9115 case MOD:
9116 case UDIV:
9117 case UMOD:
9118 /* Needs a libcall, so it costs about this. */
9119 *total = COSTS_N_INSNS (2);
9120 return false;
9122 case ROTATE:
9123 if (mode == SImode && REG_P (XEXP (x, 1)))
9125 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
9126 0, false);
9127 return true;
9129 /* Fall through */
9130 case ROTATERT:
9131 case ASHIFT:
9132 case LSHIFTRT:
9133 case ASHIFTRT:
9134 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9136 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), mode, code,
9137 0, false);
9138 return true;
9140 else if (mode == SImode)
9142 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
9143 0, false);
9144 /* Slightly disparage register shifts, but not by much. */
9145 if (!CONST_INT_P (XEXP (x, 1)))
9146 *total += 1 + rtx_cost (XEXP (x, 1), mode, code, 1, false);
9147 return true;
9150 /* Needs a libcall. */
9151 *total = COSTS_N_INSNS (2);
9152 return false;
9154 case MINUS:
9155 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9156 && (mode == SFmode || !TARGET_VFP_SINGLE))
9158 *total = COSTS_N_INSNS (1);
9159 return false;
9162 if (mode == SImode)
9164 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9165 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9167 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9168 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9169 || subcode1 == ROTATE || subcode1 == ROTATERT
9170 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9171 || subcode1 == ASHIFTRT)
9173 /* It's just the cost of the two operands. */
9174 *total = 0;
9175 return false;
9178 *total = COSTS_N_INSNS (1);
9179 return false;
9182 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9183 return false;
9185 case PLUS:
9186 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9187 && (mode == SFmode || !TARGET_VFP_SINGLE))
9189 *total = COSTS_N_INSNS (1);
9190 return false;
9193 /* A shift as a part of ADD costs nothing. */
9194 if (GET_CODE (XEXP (x, 0)) == MULT
9195 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9197 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9198 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code, 0, false);
9199 *total += rtx_cost (XEXP (x, 1), mode, code, 1, false);
9200 return true;
9203 /* Fall through */
9204 case AND: case XOR: case IOR:
9205 if (mode == SImode)
9207 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9209 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9210 || subcode == LSHIFTRT || subcode == ASHIFTRT
9211 || (code == AND && subcode == NOT))
9213 /* It's just the cost of the two operands. */
9214 *total = 0;
9215 return false;
9219 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9220 return false;
9222 case MULT:
9223 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9224 return false;
9226 case NEG:
9227 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9228 && (mode == SFmode || !TARGET_VFP_SINGLE))
9230 *total = COSTS_N_INSNS (1);
9231 return false;
9234 /* Fall through */
9235 case NOT:
9236 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9238 return false;
9240 case IF_THEN_ELSE:
9241 *total = 0;
9242 return false;
9244 case COMPARE:
9245 if (cc_register (XEXP (x, 0), VOIDmode))
9246 * total = 0;
9247 else
9248 *total = COSTS_N_INSNS (1);
9249 return false;
9251 case ABS:
9252 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9253 && (mode == SFmode || !TARGET_VFP_SINGLE))
9254 *total = COSTS_N_INSNS (1);
9255 else
9256 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9257 return false;
9259 case SIGN_EXTEND:
9260 case ZERO_EXTEND:
9261 return arm_rtx_costs_1 (x, outer_code, total, 0);
9263 case CONST_INT:
9264 if (const_ok_for_arm (INTVAL (x)))
9265 /* A multiplication by a constant requires another instruction
9266 to load the constant to a register. */
9267 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9268 ? 1 : 0);
9269 else if (const_ok_for_arm (~INTVAL (x)))
9270 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9271 else if (const_ok_for_arm (-INTVAL (x)))
9273 if (outer_code == COMPARE || outer_code == PLUS
9274 || outer_code == MINUS)
9275 *total = 0;
9276 else
9277 *total = COSTS_N_INSNS (1);
9279 else
9280 *total = COSTS_N_INSNS (2);
9281 return true;
9283 case CONST:
9284 case LABEL_REF:
9285 case SYMBOL_REF:
9286 *total = COSTS_N_INSNS (2);
9287 return true;
9289 case CONST_DOUBLE:
9290 *total = COSTS_N_INSNS (4);
9291 return true;
9293 case CONST_VECTOR:
9294 if (TARGET_NEON
9295 && TARGET_HARD_FLOAT
9296 && outer_code == SET
9297 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9298 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9299 *total = COSTS_N_INSNS (1);
9300 else
9301 *total = COSTS_N_INSNS (4);
9302 return true;
9304 case HIGH:
9305 case LO_SUM:
9306 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9307 cost of these slightly. */
9308 *total = COSTS_N_INSNS (1) + 1;
9309 return true;
9311 case SET:
9312 return false;
9314 default:
9315 if (mode != VOIDmode)
9316 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9317 else
9318 *total = COSTS_N_INSNS (4); /* How knows? */
9319 return false;
9323 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9324 operand, then return the operand that is being shifted. If the shift
9325 is not by a constant, then set SHIFT_REG to point to the operand.
9326 Return NULL if OP is not a shifter operand. */
9327 static rtx
9328 shifter_op_p (rtx op, rtx *shift_reg)
9330 enum rtx_code code = GET_CODE (op);
9332 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9333 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9334 return XEXP (op, 0);
9335 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9336 return XEXP (op, 0);
9337 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9338 || code == ASHIFTRT)
9340 if (!CONST_INT_P (XEXP (op, 1)))
9341 *shift_reg = XEXP (op, 1);
9342 return XEXP (op, 0);
9345 return NULL;
9348 static bool
9349 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9351 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9352 rtx_code code = GET_CODE (x);
9353 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9355 switch (XINT (x, 1))
9357 case UNSPEC_UNALIGNED_LOAD:
9358 /* We can only do unaligned loads into the integer unit, and we can't
9359 use LDM or LDRD. */
9360 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9361 if (speed_p)
9362 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9363 + extra_cost->ldst.load_unaligned);
9365 #ifdef NOT_YET
9366 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9367 ADDR_SPACE_GENERIC, speed_p);
9368 #endif
9369 return true;
9371 case UNSPEC_UNALIGNED_STORE:
9372 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9373 if (speed_p)
9374 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9375 + extra_cost->ldst.store_unaligned);
9377 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9378 #ifdef NOT_YET
9379 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9380 ADDR_SPACE_GENERIC, speed_p);
9381 #endif
9382 return true;
9384 case UNSPEC_VRINTZ:
9385 case UNSPEC_VRINTP:
9386 case UNSPEC_VRINTM:
9387 case UNSPEC_VRINTR:
9388 case UNSPEC_VRINTX:
9389 case UNSPEC_VRINTA:
9390 if (speed_p)
9391 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9393 return true;
9394 default:
9395 *cost = COSTS_N_INSNS (2);
9396 break;
9398 return true;
9401 /* Cost of a libcall. We assume one insn per argument, an amount for the
9402 call (one insn for -Os) and then one for processing the result. */
9403 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9405 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9406 do \
9408 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9409 if (shift_op != NULL \
9410 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9412 if (shift_reg) \
9414 if (speed_p) \
9415 *cost += extra_cost->alu.arith_shift_reg; \
9416 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9417 ASHIFT, 1, speed_p); \
9419 else if (speed_p) \
9420 *cost += extra_cost->alu.arith_shift; \
9422 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9423 ASHIFT, 0, speed_p) \
9424 + rtx_cost (XEXP (x, 1 - IDX), \
9425 GET_MODE (shift_op), \
9426 OP, 1, speed_p)); \
9427 return true; \
9430 while (0);
9432 /* RTX costs. Make an estimate of the cost of executing the operation
9433 X, which is contained with an operation with code OUTER_CODE.
9434 SPEED_P indicates whether the cost desired is the performance cost,
9435 or the size cost. The estimate is stored in COST and the return
9436 value is TRUE if the cost calculation is final, or FALSE if the
9437 caller should recurse through the operands of X to add additional
9438 costs.
9440 We currently make no attempt to model the size savings of Thumb-2
9441 16-bit instructions. At the normal points in compilation where
9442 this code is called we have no measure of whether the condition
9443 flags are live or not, and thus no realistic way to determine what
9444 the size will eventually be. */
9445 static bool
9446 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9447 const struct cpu_cost_table *extra_cost,
9448 int *cost, bool speed_p)
9450 machine_mode mode = GET_MODE (x);
9452 *cost = COSTS_N_INSNS (1);
9454 if (TARGET_THUMB1)
9456 if (speed_p)
9457 *cost = thumb1_rtx_costs (x, code, outer_code);
9458 else
9459 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9460 return true;
9463 switch (code)
9465 case SET:
9466 *cost = 0;
9467 /* SET RTXs don't have a mode so we get it from the destination. */
9468 mode = GET_MODE (SET_DEST (x));
9470 if (REG_P (SET_SRC (x))
9471 && REG_P (SET_DEST (x)))
9473 /* Assume that most copies can be done with a single insn,
9474 unless we don't have HW FP, in which case everything
9475 larger than word mode will require two insns. */
9476 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9477 && GET_MODE_SIZE (mode) > 4)
9478 || mode == DImode)
9479 ? 2 : 1);
9480 /* Conditional register moves can be encoded
9481 in 16 bits in Thumb mode. */
9482 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9483 *cost >>= 1;
9485 return true;
9488 if (CONST_INT_P (SET_SRC (x)))
9490 /* Handle CONST_INT here, since the value doesn't have a mode
9491 and we would otherwise be unable to work out the true cost. */
9492 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9493 0, speed_p);
9494 outer_code = SET;
9495 /* Slightly lower the cost of setting a core reg to a constant.
9496 This helps break up chains and allows for better scheduling. */
9497 if (REG_P (SET_DEST (x))
9498 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9499 *cost -= 1;
9500 x = SET_SRC (x);
9501 /* Immediate moves with an immediate in the range [0, 255] can be
9502 encoded in 16 bits in Thumb mode. */
9503 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9504 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9505 *cost >>= 1;
9506 goto const_int_cost;
9509 return false;
9511 case MEM:
9512 /* A memory access costs 1 insn if the mode is small, or the address is
9513 a single register, otherwise it costs one insn per word. */
9514 if (REG_P (XEXP (x, 0)))
9515 *cost = COSTS_N_INSNS (1);
9516 else if (flag_pic
9517 && GET_CODE (XEXP (x, 0)) == PLUS
9518 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9519 /* This will be split into two instructions.
9520 See arm.md:calculate_pic_address. */
9521 *cost = COSTS_N_INSNS (2);
9522 else
9523 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9525 /* For speed optimizations, add the costs of the address and
9526 accessing memory. */
9527 if (speed_p)
9528 #ifdef NOT_YET
9529 *cost += (extra_cost->ldst.load
9530 + arm_address_cost (XEXP (x, 0), mode,
9531 ADDR_SPACE_GENERIC, speed_p));
9532 #else
9533 *cost += extra_cost->ldst.load;
9534 #endif
9535 return true;
9537 case PARALLEL:
9539 /* Calculations of LDM costs are complex. We assume an initial cost
9540 (ldm_1st) which will load the number of registers mentioned in
9541 ldm_regs_per_insn_1st registers; then each additional
9542 ldm_regs_per_insn_subsequent registers cost one more insn. The
9543 formula for N regs is thus:
9545 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9546 + ldm_regs_per_insn_subsequent - 1)
9547 / ldm_regs_per_insn_subsequent).
9549 Additional costs may also be added for addressing. A similar
9550 formula is used for STM. */
9552 bool is_ldm = load_multiple_operation (x, SImode);
9553 bool is_stm = store_multiple_operation (x, SImode);
9555 if (is_ldm || is_stm)
9557 if (speed_p)
9559 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9560 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9561 ? extra_cost->ldst.ldm_regs_per_insn_1st
9562 : extra_cost->ldst.stm_regs_per_insn_1st;
9563 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9564 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9565 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9567 *cost += regs_per_insn_1st
9568 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9569 + regs_per_insn_sub - 1)
9570 / regs_per_insn_sub);
9571 return true;
9575 return false;
9577 case DIV:
9578 case UDIV:
9579 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9580 && (mode == SFmode || !TARGET_VFP_SINGLE))
9581 *cost += COSTS_N_INSNS (speed_p
9582 ? extra_cost->fp[mode != SFmode].div : 0);
9583 else if (mode == SImode && TARGET_IDIV)
9584 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9585 else
9586 *cost = LIBCALL_COST (2);
9587 return false; /* All arguments must be in registers. */
9589 case MOD:
9590 /* MOD by a power of 2 can be expanded as:
9591 rsbs r1, r0, #0
9592 and r0, r0, #(n - 1)
9593 and r1, r1, #(n - 1)
9594 rsbpl r0, r1, #0. */
9595 if (CONST_INT_P (XEXP (x, 1))
9596 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9597 && mode == SImode)
9599 *cost += COSTS_N_INSNS (3);
9601 if (speed_p)
9602 *cost += 2 * extra_cost->alu.logical
9603 + extra_cost->alu.arith;
9604 return true;
9607 /* Fall-through. */
9608 case UMOD:
9609 *cost = LIBCALL_COST (2);
9610 return false; /* All arguments must be in registers. */
9612 case ROTATE:
9613 if (mode == SImode && REG_P (XEXP (x, 1)))
9615 *cost += (COSTS_N_INSNS (1)
9616 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9617 if (speed_p)
9618 *cost += extra_cost->alu.shift_reg;
9619 return true;
9621 /* Fall through */
9622 case ROTATERT:
9623 case ASHIFT:
9624 case LSHIFTRT:
9625 case ASHIFTRT:
9626 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9628 *cost += (COSTS_N_INSNS (2)
9629 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9630 if (speed_p)
9631 *cost += 2 * extra_cost->alu.shift;
9632 return true;
9634 else if (mode == SImode)
9636 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9637 /* Slightly disparage register shifts at -Os, but not by much. */
9638 if (!CONST_INT_P (XEXP (x, 1)))
9639 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9640 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9641 return true;
9643 else if (GET_MODE_CLASS (mode) == MODE_INT
9644 && GET_MODE_SIZE (mode) < 4)
9646 if (code == ASHIFT)
9648 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9649 /* Slightly disparage register shifts at -Os, but not by
9650 much. */
9651 if (!CONST_INT_P (XEXP (x, 1)))
9652 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9653 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9655 else if (code == LSHIFTRT || code == ASHIFTRT)
9657 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9659 /* Can use SBFX/UBFX. */
9660 if (speed_p)
9661 *cost += extra_cost->alu.bfx;
9662 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9664 else
9666 *cost += COSTS_N_INSNS (1);
9667 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9668 if (speed_p)
9670 if (CONST_INT_P (XEXP (x, 1)))
9671 *cost += 2 * extra_cost->alu.shift;
9672 else
9673 *cost += (extra_cost->alu.shift
9674 + extra_cost->alu.shift_reg);
9676 else
9677 /* Slightly disparage register shifts. */
9678 *cost += !CONST_INT_P (XEXP (x, 1));
9681 else /* Rotates. */
9683 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9684 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9685 if (speed_p)
9687 if (CONST_INT_P (XEXP (x, 1)))
9688 *cost += (2 * extra_cost->alu.shift
9689 + extra_cost->alu.log_shift);
9690 else
9691 *cost += (extra_cost->alu.shift
9692 + extra_cost->alu.shift_reg
9693 + extra_cost->alu.log_shift_reg);
9696 return true;
9699 *cost = LIBCALL_COST (2);
9700 return false;
9702 case BSWAP:
9703 if (arm_arch6)
9705 if (mode == SImode)
9707 if (speed_p)
9708 *cost += extra_cost->alu.rev;
9710 return false;
9713 else
9715 /* No rev instruction available. Look at arm_legacy_rev
9716 and thumb_legacy_rev for the form of RTL used then. */
9717 if (TARGET_THUMB)
9719 *cost += COSTS_N_INSNS (9);
9721 if (speed_p)
9723 *cost += 6 * extra_cost->alu.shift;
9724 *cost += 3 * extra_cost->alu.logical;
9727 else
9729 *cost += COSTS_N_INSNS (4);
9731 if (speed_p)
9733 *cost += 2 * extra_cost->alu.shift;
9734 *cost += extra_cost->alu.arith_shift;
9735 *cost += 2 * extra_cost->alu.logical;
9738 return true;
9740 return false;
9742 case MINUS:
9743 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9744 && (mode == SFmode || !TARGET_VFP_SINGLE))
9746 if (GET_CODE (XEXP (x, 0)) == MULT
9747 || GET_CODE (XEXP (x, 1)) == MULT)
9749 rtx mul_op0, mul_op1, sub_op;
9751 if (speed_p)
9752 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9754 if (GET_CODE (XEXP (x, 0)) == MULT)
9756 mul_op0 = XEXP (XEXP (x, 0), 0);
9757 mul_op1 = XEXP (XEXP (x, 0), 1);
9758 sub_op = XEXP (x, 1);
9760 else
9762 mul_op0 = XEXP (XEXP (x, 1), 0);
9763 mul_op1 = XEXP (XEXP (x, 1), 1);
9764 sub_op = XEXP (x, 0);
9767 /* The first operand of the multiply may be optionally
9768 negated. */
9769 if (GET_CODE (mul_op0) == NEG)
9770 mul_op0 = XEXP (mul_op0, 0);
9772 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9773 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9774 + rtx_cost (sub_op, mode, code, 0, speed_p));
9776 return true;
9779 if (speed_p)
9780 *cost += extra_cost->fp[mode != SFmode].addsub;
9781 return false;
9784 if (mode == SImode)
9786 rtx shift_by_reg = NULL;
9787 rtx shift_op;
9788 rtx non_shift_op;
9790 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9791 if (shift_op == NULL)
9793 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9794 non_shift_op = XEXP (x, 0);
9796 else
9797 non_shift_op = XEXP (x, 1);
9799 if (shift_op != NULL)
9801 if (shift_by_reg != NULL)
9803 if (speed_p)
9804 *cost += extra_cost->alu.arith_shift_reg;
9805 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9807 else if (speed_p)
9808 *cost += extra_cost->alu.arith_shift;
9810 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9811 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9812 return true;
9815 if (arm_arch_thumb2
9816 && GET_CODE (XEXP (x, 1)) == MULT)
9818 /* MLS. */
9819 if (speed_p)
9820 *cost += extra_cost->mult[0].add;
9821 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9822 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9823 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9824 return true;
9827 if (CONST_INT_P (XEXP (x, 0)))
9829 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9830 INTVAL (XEXP (x, 0)), NULL_RTX,
9831 NULL_RTX, 1, 0);
9832 *cost = COSTS_N_INSNS (insns);
9833 if (speed_p)
9834 *cost += insns * extra_cost->alu.arith;
9835 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9836 return true;
9838 else if (speed_p)
9839 *cost += extra_cost->alu.arith;
9841 return false;
9844 if (GET_MODE_CLASS (mode) == MODE_INT
9845 && GET_MODE_SIZE (mode) < 4)
9847 rtx shift_op, shift_reg;
9848 shift_reg = NULL;
9850 /* We check both sides of the MINUS for shifter operands since,
9851 unlike PLUS, it's not commutative. */
9853 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9854 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9856 /* Slightly disparage, as we might need to widen the result. */
9857 *cost += 1;
9858 if (speed_p)
9859 *cost += extra_cost->alu.arith;
9861 if (CONST_INT_P (XEXP (x, 0)))
9863 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9864 return true;
9867 return false;
9870 if (mode == DImode)
9872 *cost += COSTS_N_INSNS (1);
9874 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9876 rtx op1 = XEXP (x, 1);
9878 if (speed_p)
9879 *cost += 2 * extra_cost->alu.arith;
9881 if (GET_CODE (op1) == ZERO_EXTEND)
9882 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9883 0, speed_p);
9884 else
9885 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9886 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9887 0, speed_p);
9888 return true;
9890 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9892 if (speed_p)
9893 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9894 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9895 0, speed_p)
9896 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9897 return true;
9899 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9900 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9902 if (speed_p)
9903 *cost += (extra_cost->alu.arith
9904 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9905 ? extra_cost->alu.arith
9906 : extra_cost->alu.arith_shift));
9907 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9908 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9909 GET_CODE (XEXP (x, 1)), 0, speed_p));
9910 return true;
9913 if (speed_p)
9914 *cost += 2 * extra_cost->alu.arith;
9915 return false;
9918 /* Vector mode? */
9920 *cost = LIBCALL_COST (2);
9921 return false;
9923 case PLUS:
9924 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9925 && (mode == SFmode || !TARGET_VFP_SINGLE))
9927 if (GET_CODE (XEXP (x, 0)) == MULT)
9929 rtx mul_op0, mul_op1, add_op;
9931 if (speed_p)
9932 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9934 mul_op0 = XEXP (XEXP (x, 0), 0);
9935 mul_op1 = XEXP (XEXP (x, 0), 1);
9936 add_op = XEXP (x, 1);
9938 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9939 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9940 + rtx_cost (add_op, mode, code, 0, speed_p));
9942 return true;
9945 if (speed_p)
9946 *cost += extra_cost->fp[mode != SFmode].addsub;
9947 return false;
9949 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9951 *cost = LIBCALL_COST (2);
9952 return false;
9955 /* Narrow modes can be synthesized in SImode, but the range
9956 of useful sub-operations is limited. Check for shift operations
9957 on one of the operands. Only left shifts can be used in the
9958 narrow modes. */
9959 if (GET_MODE_CLASS (mode) == MODE_INT
9960 && GET_MODE_SIZE (mode) < 4)
9962 rtx shift_op, shift_reg;
9963 shift_reg = NULL;
9965 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9967 if (CONST_INT_P (XEXP (x, 1)))
9969 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9970 INTVAL (XEXP (x, 1)), NULL_RTX,
9971 NULL_RTX, 1, 0);
9972 *cost = COSTS_N_INSNS (insns);
9973 if (speed_p)
9974 *cost += insns * extra_cost->alu.arith;
9975 /* Slightly penalize a narrow operation as the result may
9976 need widening. */
9977 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9978 return true;
9981 /* Slightly penalize a narrow operation as the result may
9982 need widening. */
9983 *cost += 1;
9984 if (speed_p)
9985 *cost += extra_cost->alu.arith;
9987 return false;
9990 if (mode == SImode)
9992 rtx shift_op, shift_reg;
9994 if (TARGET_INT_SIMD
9995 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9996 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9998 /* UXTA[BH] or SXTA[BH]. */
9999 if (speed_p)
10000 *cost += extra_cost->alu.extend_arith;
10001 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10002 0, speed_p)
10003 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10004 return true;
10007 shift_reg = NULL;
10008 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10009 if (shift_op != NULL)
10011 if (shift_reg)
10013 if (speed_p)
10014 *cost += extra_cost->alu.arith_shift_reg;
10015 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10017 else if (speed_p)
10018 *cost += extra_cost->alu.arith_shift;
10020 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10021 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10022 return true;
10024 if (GET_CODE (XEXP (x, 0)) == MULT)
10026 rtx mul_op = XEXP (x, 0);
10028 if (TARGET_DSP_MULTIPLY
10029 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10030 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10031 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10032 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10033 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10034 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10035 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10036 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10037 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10038 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10039 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10040 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10041 == 16))))))
10043 /* SMLA[BT][BT]. */
10044 if (speed_p)
10045 *cost += extra_cost->mult[0].extend_add;
10046 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10047 SIGN_EXTEND, 0, speed_p)
10048 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10049 SIGN_EXTEND, 0, speed_p)
10050 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10051 return true;
10054 if (speed_p)
10055 *cost += extra_cost->mult[0].add;
10056 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10057 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10058 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10059 return true;
10061 if (CONST_INT_P (XEXP (x, 1)))
10063 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10064 INTVAL (XEXP (x, 1)), NULL_RTX,
10065 NULL_RTX, 1, 0);
10066 *cost = COSTS_N_INSNS (insns);
10067 if (speed_p)
10068 *cost += insns * extra_cost->alu.arith;
10069 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10070 return true;
10072 else if (speed_p)
10073 *cost += extra_cost->alu.arith;
10075 return false;
10078 if (mode == DImode)
10080 if (arm_arch3m
10081 && GET_CODE (XEXP (x, 0)) == MULT
10082 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10083 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10084 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10085 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10087 if (speed_p)
10088 *cost += extra_cost->mult[1].extend_add;
10089 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10090 ZERO_EXTEND, 0, speed_p)
10091 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10092 ZERO_EXTEND, 0, speed_p)
10093 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10094 return true;
10097 *cost += COSTS_N_INSNS (1);
10099 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10100 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10102 if (speed_p)
10103 *cost += (extra_cost->alu.arith
10104 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10105 ? extra_cost->alu.arith
10106 : extra_cost->alu.arith_shift));
10108 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10109 0, speed_p)
10110 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10111 return true;
10114 if (speed_p)
10115 *cost += 2 * extra_cost->alu.arith;
10116 return false;
10119 /* Vector mode? */
10120 *cost = LIBCALL_COST (2);
10121 return false;
10122 case IOR:
10123 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10125 if (speed_p)
10126 *cost += extra_cost->alu.rev;
10128 return true;
10130 /* Fall through. */
10131 case AND: case XOR:
10132 if (mode == SImode)
10134 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10135 rtx op0 = XEXP (x, 0);
10136 rtx shift_op, shift_reg;
10138 if (subcode == NOT
10139 && (code == AND
10140 || (code == IOR && TARGET_THUMB2)))
10141 op0 = XEXP (op0, 0);
10143 shift_reg = NULL;
10144 shift_op = shifter_op_p (op0, &shift_reg);
10145 if (shift_op != NULL)
10147 if (shift_reg)
10149 if (speed_p)
10150 *cost += extra_cost->alu.log_shift_reg;
10151 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10153 else if (speed_p)
10154 *cost += extra_cost->alu.log_shift;
10156 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10157 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10158 return true;
10161 if (CONST_INT_P (XEXP (x, 1)))
10163 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10164 INTVAL (XEXP (x, 1)), NULL_RTX,
10165 NULL_RTX, 1, 0);
10167 *cost = COSTS_N_INSNS (insns);
10168 if (speed_p)
10169 *cost += insns * extra_cost->alu.logical;
10170 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10171 return true;
10174 if (speed_p)
10175 *cost += extra_cost->alu.logical;
10176 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10177 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10178 return true;
10181 if (mode == DImode)
10183 rtx op0 = XEXP (x, 0);
10184 enum rtx_code subcode = GET_CODE (op0);
10186 *cost += COSTS_N_INSNS (1);
10188 if (subcode == NOT
10189 && (code == AND
10190 || (code == IOR && TARGET_THUMB2)))
10191 op0 = XEXP (op0, 0);
10193 if (GET_CODE (op0) == ZERO_EXTEND)
10195 if (speed_p)
10196 *cost += 2 * extra_cost->alu.logical;
10198 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10199 0, speed_p)
10200 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10201 return true;
10203 else if (GET_CODE (op0) == SIGN_EXTEND)
10205 if (speed_p)
10206 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10208 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10209 0, speed_p)
10210 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10211 return true;
10214 if (speed_p)
10215 *cost += 2 * extra_cost->alu.logical;
10217 return true;
10219 /* Vector mode? */
10221 *cost = LIBCALL_COST (2);
10222 return false;
10224 case MULT:
10225 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10226 && (mode == SFmode || !TARGET_VFP_SINGLE))
10228 rtx op0 = XEXP (x, 0);
10230 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10231 op0 = XEXP (op0, 0);
10233 if (speed_p)
10234 *cost += extra_cost->fp[mode != SFmode].mult;
10236 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10237 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10238 return true;
10240 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10242 *cost = LIBCALL_COST (2);
10243 return false;
10246 if (mode == SImode)
10248 if (TARGET_DSP_MULTIPLY
10249 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10250 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10251 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10252 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10253 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10254 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10255 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10256 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10257 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10258 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10259 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10260 && (INTVAL (XEXP (XEXP (x, 1), 1))
10261 == 16))))))
10263 /* SMUL[TB][TB]. */
10264 if (speed_p)
10265 *cost += extra_cost->mult[0].extend;
10266 *cost += rtx_cost (XEXP (x, 0), mode, SIGN_EXTEND, 0, speed_p);
10267 *cost += rtx_cost (XEXP (x, 1), mode, SIGN_EXTEND, 1, speed_p);
10268 return true;
10270 if (speed_p)
10271 *cost += extra_cost->mult[0].simple;
10272 return false;
10275 if (mode == DImode)
10277 if (arm_arch3m
10278 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10279 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10280 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10281 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10283 if (speed_p)
10284 *cost += extra_cost->mult[1].extend;
10285 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10286 ZERO_EXTEND, 0, speed_p)
10287 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10288 ZERO_EXTEND, 0, speed_p));
10289 return true;
10292 *cost = LIBCALL_COST (2);
10293 return false;
10296 /* Vector mode? */
10297 *cost = LIBCALL_COST (2);
10298 return false;
10300 case NEG:
10301 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10302 && (mode == SFmode || !TARGET_VFP_SINGLE))
10304 if (GET_CODE (XEXP (x, 0)) == MULT)
10306 /* VNMUL. */
10307 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10308 return true;
10311 if (speed_p)
10312 *cost += extra_cost->fp[mode != SFmode].neg;
10314 return false;
10316 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10318 *cost = LIBCALL_COST (1);
10319 return false;
10322 if (mode == SImode)
10324 if (GET_CODE (XEXP (x, 0)) == ABS)
10326 *cost += COSTS_N_INSNS (1);
10327 /* Assume the non-flag-changing variant. */
10328 if (speed_p)
10329 *cost += (extra_cost->alu.log_shift
10330 + extra_cost->alu.arith_shift);
10331 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10332 return true;
10335 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10336 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10338 *cost += COSTS_N_INSNS (1);
10339 /* No extra cost for MOV imm and MVN imm. */
10340 /* If the comparison op is using the flags, there's no further
10341 cost, otherwise we need to add the cost of the comparison. */
10342 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10343 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10344 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10346 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10347 *cost += (COSTS_N_INSNS (1)
10348 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10349 0, speed_p)
10350 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10351 1, speed_p));
10352 if (speed_p)
10353 *cost += extra_cost->alu.arith;
10355 return true;
10358 if (speed_p)
10359 *cost += extra_cost->alu.arith;
10360 return false;
10363 if (GET_MODE_CLASS (mode) == MODE_INT
10364 && GET_MODE_SIZE (mode) < 4)
10366 /* Slightly disparage, as we might need an extend operation. */
10367 *cost += 1;
10368 if (speed_p)
10369 *cost += extra_cost->alu.arith;
10370 return false;
10373 if (mode == DImode)
10375 *cost += COSTS_N_INSNS (1);
10376 if (speed_p)
10377 *cost += 2 * extra_cost->alu.arith;
10378 return false;
10381 /* Vector mode? */
10382 *cost = LIBCALL_COST (1);
10383 return false;
10385 case NOT:
10386 if (mode == SImode)
10388 rtx shift_op;
10389 rtx shift_reg = NULL;
10391 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10393 if (shift_op)
10395 if (shift_reg != NULL)
10397 if (speed_p)
10398 *cost += extra_cost->alu.log_shift_reg;
10399 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10401 else if (speed_p)
10402 *cost += extra_cost->alu.log_shift;
10403 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10404 return true;
10407 if (speed_p)
10408 *cost += extra_cost->alu.logical;
10409 return false;
10411 if (mode == DImode)
10413 *cost += COSTS_N_INSNS (1);
10414 return false;
10417 /* Vector mode? */
10419 *cost += LIBCALL_COST (1);
10420 return false;
10422 case IF_THEN_ELSE:
10424 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10426 *cost += COSTS_N_INSNS (3);
10427 return true;
10429 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10430 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10432 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10433 /* Assume that if one arm of the if_then_else is a register,
10434 that it will be tied with the result and eliminate the
10435 conditional insn. */
10436 if (REG_P (XEXP (x, 1)))
10437 *cost += op2cost;
10438 else if (REG_P (XEXP (x, 2)))
10439 *cost += op1cost;
10440 else
10442 if (speed_p)
10444 if (extra_cost->alu.non_exec_costs_exec)
10445 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10446 else
10447 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10449 else
10450 *cost += op1cost + op2cost;
10453 return true;
10455 case COMPARE:
10456 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10457 *cost = 0;
10458 else
10460 machine_mode op0mode;
10461 /* We'll mostly assume that the cost of a compare is the cost of the
10462 LHS. However, there are some notable exceptions. */
10464 /* Floating point compares are never done as side-effects. */
10465 op0mode = GET_MODE (XEXP (x, 0));
10466 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10467 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10469 if (speed_p)
10470 *cost += extra_cost->fp[op0mode != SFmode].compare;
10472 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10474 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10475 return true;
10478 return false;
10480 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10482 *cost = LIBCALL_COST (2);
10483 return false;
10486 /* DImode compares normally take two insns. */
10487 if (op0mode == DImode)
10489 *cost += COSTS_N_INSNS (1);
10490 if (speed_p)
10491 *cost += 2 * extra_cost->alu.arith;
10492 return false;
10495 if (op0mode == SImode)
10497 rtx shift_op;
10498 rtx shift_reg;
10500 if (XEXP (x, 1) == const0_rtx
10501 && !(REG_P (XEXP (x, 0))
10502 || (GET_CODE (XEXP (x, 0)) == SUBREG
10503 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10505 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10507 /* Multiply operations that set the flags are often
10508 significantly more expensive. */
10509 if (speed_p
10510 && GET_CODE (XEXP (x, 0)) == MULT
10511 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10512 *cost += extra_cost->mult[0].flag_setting;
10514 if (speed_p
10515 && GET_CODE (XEXP (x, 0)) == PLUS
10516 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10517 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10518 0), 1), mode))
10519 *cost += extra_cost->mult[0].flag_setting;
10520 return true;
10523 shift_reg = NULL;
10524 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10525 if (shift_op != NULL)
10527 if (shift_reg != NULL)
10529 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10530 1, speed_p);
10531 if (speed_p)
10532 *cost += extra_cost->alu.arith_shift_reg;
10534 else if (speed_p)
10535 *cost += extra_cost->alu.arith_shift;
10536 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10537 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10538 return true;
10541 if (speed_p)
10542 *cost += extra_cost->alu.arith;
10543 if (CONST_INT_P (XEXP (x, 1))
10544 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10546 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10547 return true;
10549 return false;
10552 /* Vector mode? */
10554 *cost = LIBCALL_COST (2);
10555 return false;
10557 return true;
10559 case EQ:
10560 case NE:
10561 case LT:
10562 case LE:
10563 case GT:
10564 case GE:
10565 case LTU:
10566 case LEU:
10567 case GEU:
10568 case GTU:
10569 case ORDERED:
10570 case UNORDERED:
10571 case UNEQ:
10572 case UNLE:
10573 case UNLT:
10574 case UNGE:
10575 case UNGT:
10576 case LTGT:
10577 if (outer_code == SET)
10579 /* Is it a store-flag operation? */
10580 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10581 && XEXP (x, 1) == const0_rtx)
10583 /* Thumb also needs an IT insn. */
10584 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10585 return true;
10587 if (XEXP (x, 1) == const0_rtx)
10589 switch (code)
10591 case LT:
10592 /* LSR Rd, Rn, #31. */
10593 if (speed_p)
10594 *cost += extra_cost->alu.shift;
10595 break;
10597 case EQ:
10598 /* RSBS T1, Rn, #0
10599 ADC Rd, Rn, T1. */
10601 case NE:
10602 /* SUBS T1, Rn, #1
10603 SBC Rd, Rn, T1. */
10604 *cost += COSTS_N_INSNS (1);
10605 break;
10607 case LE:
10608 /* RSBS T1, Rn, Rn, LSR #31
10609 ADC Rd, Rn, T1. */
10610 *cost += COSTS_N_INSNS (1);
10611 if (speed_p)
10612 *cost += extra_cost->alu.arith_shift;
10613 break;
10615 case GT:
10616 /* RSB Rd, Rn, Rn, ASR #1
10617 LSR Rd, Rd, #31. */
10618 *cost += COSTS_N_INSNS (1);
10619 if (speed_p)
10620 *cost += (extra_cost->alu.arith_shift
10621 + extra_cost->alu.shift);
10622 break;
10624 case GE:
10625 /* ASR Rd, Rn, #31
10626 ADD Rd, Rn, #1. */
10627 *cost += COSTS_N_INSNS (1);
10628 if (speed_p)
10629 *cost += extra_cost->alu.shift;
10630 break;
10632 default:
10633 /* Remaining cases are either meaningless or would take
10634 three insns anyway. */
10635 *cost = COSTS_N_INSNS (3);
10636 break;
10638 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10639 return true;
10641 else
10643 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10644 if (CONST_INT_P (XEXP (x, 1))
10645 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10647 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10648 return true;
10651 return false;
10654 /* Not directly inside a set. If it involves the condition code
10655 register it must be the condition for a branch, cond_exec or
10656 I_T_E operation. Since the comparison is performed elsewhere
10657 this is just the control part which has no additional
10658 cost. */
10659 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10660 && XEXP (x, 1) == const0_rtx)
10662 *cost = 0;
10663 return true;
10665 return false;
10667 case ABS:
10668 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10669 && (mode == SFmode || !TARGET_VFP_SINGLE))
10671 if (speed_p)
10672 *cost += extra_cost->fp[mode != SFmode].neg;
10674 return false;
10676 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10678 *cost = LIBCALL_COST (1);
10679 return false;
10682 if (mode == SImode)
10684 if (speed_p)
10685 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10686 return false;
10688 /* Vector mode? */
10689 *cost = LIBCALL_COST (1);
10690 return false;
10692 case SIGN_EXTEND:
10693 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10694 && MEM_P (XEXP (x, 0)))
10696 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10698 if (mode == DImode)
10699 *cost += COSTS_N_INSNS (1);
10701 if (!speed_p)
10702 return true;
10704 if (GET_MODE (XEXP (x, 0)) == SImode)
10705 *cost += extra_cost->ldst.load;
10706 else
10707 *cost += extra_cost->ldst.load_sign_extend;
10709 if (mode == DImode)
10710 *cost += extra_cost->alu.shift;
10712 return true;
10715 /* Widening from less than 32-bits requires an extend operation. */
10716 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10718 /* We have SXTB/SXTH. */
10719 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10720 if (speed_p)
10721 *cost += extra_cost->alu.extend;
10723 else if (GET_MODE (XEXP (x, 0)) != SImode)
10725 /* Needs two shifts. */
10726 *cost += COSTS_N_INSNS (1);
10727 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10728 if (speed_p)
10729 *cost += 2 * extra_cost->alu.shift;
10732 /* Widening beyond 32-bits requires one more insn. */
10733 if (mode == DImode)
10735 *cost += COSTS_N_INSNS (1);
10736 if (speed_p)
10737 *cost += extra_cost->alu.shift;
10740 return true;
10742 case ZERO_EXTEND:
10743 if ((arm_arch4
10744 || GET_MODE (XEXP (x, 0)) == SImode
10745 || GET_MODE (XEXP (x, 0)) == QImode)
10746 && MEM_P (XEXP (x, 0)))
10748 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10750 if (mode == DImode)
10751 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10753 return true;
10756 /* Widening from less than 32-bits requires an extend operation. */
10757 if (GET_MODE (XEXP (x, 0)) == QImode)
10759 /* UXTB can be a shorter instruction in Thumb2, but it might
10760 be slower than the AND Rd, Rn, #255 alternative. When
10761 optimizing for speed it should never be slower to use
10762 AND, and we don't really model 16-bit vs 32-bit insns
10763 here. */
10764 if (speed_p)
10765 *cost += extra_cost->alu.logical;
10767 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10769 /* We have UXTB/UXTH. */
10770 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10771 if (speed_p)
10772 *cost += extra_cost->alu.extend;
10774 else if (GET_MODE (XEXP (x, 0)) != SImode)
10776 /* Needs two shifts. It's marginally preferable to use
10777 shifts rather than two BIC instructions as the second
10778 shift may merge with a subsequent insn as a shifter
10779 op. */
10780 *cost = COSTS_N_INSNS (2);
10781 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10782 if (speed_p)
10783 *cost += 2 * extra_cost->alu.shift;
10786 /* Widening beyond 32-bits requires one more insn. */
10787 if (mode == DImode)
10789 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10792 return true;
10794 case CONST_INT:
10795 *cost = 0;
10796 /* CONST_INT has no mode, so we cannot tell for sure how many
10797 insns are really going to be needed. The best we can do is
10798 look at the value passed. If it fits in SImode, then assume
10799 that's the mode it will be used for. Otherwise assume it
10800 will be used in DImode. */
10801 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10802 mode = SImode;
10803 else
10804 mode = DImode;
10806 /* Avoid blowing up in arm_gen_constant (). */
10807 if (!(outer_code == PLUS
10808 || outer_code == AND
10809 || outer_code == IOR
10810 || outer_code == XOR
10811 || outer_code == MINUS))
10812 outer_code = SET;
10814 const_int_cost:
10815 if (mode == SImode)
10817 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10818 INTVAL (x), NULL, NULL,
10819 0, 0));
10820 /* Extra costs? */
10822 else
10824 *cost += COSTS_N_INSNS (arm_gen_constant
10825 (outer_code, SImode, NULL,
10826 trunc_int_for_mode (INTVAL (x), SImode),
10827 NULL, NULL, 0, 0)
10828 + arm_gen_constant (outer_code, SImode, NULL,
10829 INTVAL (x) >> 32, NULL,
10830 NULL, 0, 0));
10831 /* Extra costs? */
10834 return true;
10836 case CONST:
10837 case LABEL_REF:
10838 case SYMBOL_REF:
10839 if (speed_p)
10841 if (arm_arch_thumb2 && !flag_pic)
10842 *cost += COSTS_N_INSNS (1);
10843 else
10844 *cost += extra_cost->ldst.load;
10846 else
10847 *cost += COSTS_N_INSNS (1);
10849 if (flag_pic)
10851 *cost += COSTS_N_INSNS (1);
10852 if (speed_p)
10853 *cost += extra_cost->alu.arith;
10856 return true;
10858 case CONST_FIXED:
10859 *cost = COSTS_N_INSNS (4);
10860 /* Fixme. */
10861 return true;
10863 case CONST_DOUBLE:
10864 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10865 && (mode == SFmode || !TARGET_VFP_SINGLE))
10867 if (vfp3_const_double_rtx (x))
10869 if (speed_p)
10870 *cost += extra_cost->fp[mode == DFmode].fpconst;
10871 return true;
10874 if (speed_p)
10876 if (mode == DFmode)
10877 *cost += extra_cost->ldst.loadd;
10878 else
10879 *cost += extra_cost->ldst.loadf;
10881 else
10882 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10884 return true;
10886 *cost = COSTS_N_INSNS (4);
10887 return true;
10889 case CONST_VECTOR:
10890 /* Fixme. */
10891 if (TARGET_NEON
10892 && TARGET_HARD_FLOAT
10893 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10894 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10895 *cost = COSTS_N_INSNS (1);
10896 else
10897 *cost = COSTS_N_INSNS (4);
10898 return true;
10900 case HIGH:
10901 case LO_SUM:
10902 /* When optimizing for size, we prefer constant pool entries to
10903 MOVW/MOVT pairs, so bump the cost of these slightly. */
10904 if (!speed_p)
10905 *cost += 1;
10906 return true;
10908 case CLZ:
10909 if (speed_p)
10910 *cost += extra_cost->alu.clz;
10911 return false;
10913 case SMIN:
10914 if (XEXP (x, 1) == const0_rtx)
10916 if (speed_p)
10917 *cost += extra_cost->alu.log_shift;
10918 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10919 return true;
10921 /* Fall through. */
10922 case SMAX:
10923 case UMIN:
10924 case UMAX:
10925 *cost += COSTS_N_INSNS (1);
10926 return false;
10928 case TRUNCATE:
10929 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10930 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10931 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10932 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10933 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10934 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10935 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10936 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10937 == ZERO_EXTEND))))
10939 if (speed_p)
10940 *cost += extra_cost->mult[1].extend;
10941 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10942 ZERO_EXTEND, 0, speed_p)
10943 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10944 ZERO_EXTEND, 0, speed_p));
10945 return true;
10947 *cost = LIBCALL_COST (1);
10948 return false;
10950 case UNSPEC_VOLATILE:
10951 case UNSPEC:
10952 return arm_unspec_cost (x, outer_code, speed_p, cost);
10954 case PC:
10955 /* Reading the PC is like reading any other register. Writing it
10956 is more expensive, but we take that into account elsewhere. */
10957 *cost = 0;
10958 return true;
10960 case ZERO_EXTRACT:
10961 /* TODO: Simple zero_extract of bottom bits using AND. */
10962 /* Fall through. */
10963 case SIGN_EXTRACT:
10964 if (arm_arch6
10965 && mode == SImode
10966 && CONST_INT_P (XEXP (x, 1))
10967 && CONST_INT_P (XEXP (x, 2)))
10969 if (speed_p)
10970 *cost += extra_cost->alu.bfx;
10971 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10972 return true;
10974 /* Without UBFX/SBFX, need to resort to shift operations. */
10975 *cost += COSTS_N_INSNS (1);
10976 if (speed_p)
10977 *cost += 2 * extra_cost->alu.shift;
10978 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10979 return true;
10981 case FLOAT_EXTEND:
10982 if (TARGET_HARD_FLOAT)
10984 if (speed_p)
10985 *cost += extra_cost->fp[mode == DFmode].widen;
10986 if (!TARGET_FPU_ARMV8
10987 && GET_MODE (XEXP (x, 0)) == HFmode)
10989 /* Pre v8, widening HF->DF is a two-step process, first
10990 widening to SFmode. */
10991 *cost += COSTS_N_INSNS (1);
10992 if (speed_p)
10993 *cost += extra_cost->fp[0].widen;
10995 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10996 return true;
10999 *cost = LIBCALL_COST (1);
11000 return false;
11002 case FLOAT_TRUNCATE:
11003 if (TARGET_HARD_FLOAT)
11005 if (speed_p)
11006 *cost += extra_cost->fp[mode == DFmode].narrow;
11007 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11008 return true;
11009 /* Vector modes? */
11011 *cost = LIBCALL_COST (1);
11012 return false;
11014 case FMA:
11015 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11017 rtx op0 = XEXP (x, 0);
11018 rtx op1 = XEXP (x, 1);
11019 rtx op2 = XEXP (x, 2);
11022 /* vfms or vfnma. */
11023 if (GET_CODE (op0) == NEG)
11024 op0 = XEXP (op0, 0);
11026 /* vfnms or vfnma. */
11027 if (GET_CODE (op2) == NEG)
11028 op2 = XEXP (op2, 0);
11030 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11031 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11032 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11034 if (speed_p)
11035 *cost += extra_cost->fp[mode ==DFmode].fma;
11037 return true;
11040 *cost = LIBCALL_COST (3);
11041 return false;
11043 case FIX:
11044 case UNSIGNED_FIX:
11045 if (TARGET_HARD_FLOAT)
11047 if (GET_MODE_CLASS (mode) == MODE_INT)
11049 mode = GET_MODE (XEXP (x, 0));
11050 if (speed_p)
11051 *cost += extra_cost->fp[mode == DFmode].toint;
11052 /* Strip of the 'cost' of rounding towards zero. */
11053 if (GET_CODE (XEXP (x, 0)) == FIX)
11054 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11055 0, speed_p);
11056 else
11057 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11058 /* ??? Increase the cost to deal with transferring from
11059 FP -> CORE registers? */
11060 return true;
11062 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11063 && TARGET_FPU_ARMV8)
11065 if (speed_p)
11066 *cost += extra_cost->fp[mode == DFmode].roundint;
11067 return false;
11069 /* Vector costs? */
11071 *cost = LIBCALL_COST (1);
11072 return false;
11074 case FLOAT:
11075 case UNSIGNED_FLOAT:
11076 if (TARGET_HARD_FLOAT)
11078 /* ??? Increase the cost to deal with transferring from CORE
11079 -> FP registers? */
11080 if (speed_p)
11081 *cost += extra_cost->fp[mode == DFmode].fromint;
11082 return false;
11084 *cost = LIBCALL_COST (1);
11085 return false;
11087 case CALL:
11088 return true;
11090 case ASM_OPERANDS:
11092 /* Just a guess. Guess number of instructions in the asm
11093 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11094 though (see PR60663). */
11095 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11096 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11098 *cost = COSTS_N_INSNS (asm_length + num_operands);
11099 return true;
11101 default:
11102 if (mode != VOIDmode)
11103 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11104 else
11105 *cost = COSTS_N_INSNS (4); /* Who knows? */
11106 return false;
11110 #undef HANDLE_NARROW_SHIFT_ARITH
11112 /* RTX costs when optimizing for size. */
11113 static bool
11114 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11115 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11117 bool result;
11118 int code = GET_CODE (x);
11120 if (TARGET_OLD_RTX_COSTS
11121 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11123 /* Old way. (Deprecated.) */
11124 if (!speed)
11125 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11126 (enum rtx_code) outer_code, total);
11127 else
11128 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11129 (enum rtx_code) outer_code, total,
11130 speed);
11132 else
11134 /* New way. */
11135 if (current_tune->insn_extra_cost)
11136 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11137 (enum rtx_code) outer_code,
11138 current_tune->insn_extra_cost,
11139 total, speed);
11140 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11141 && current_tune->insn_extra_cost != NULL */
11142 else
11143 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11144 (enum rtx_code) outer_code,
11145 &generic_extra_costs, total, speed);
11148 if (dump_file && (dump_flags & TDF_DETAILS))
11150 print_rtl_single (dump_file, x);
11151 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11152 *total, result ? "final" : "partial");
11154 return result;
11157 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11158 supported on any "slowmul" cores, so it can be ignored. */
11160 static bool
11161 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11162 int *total, bool speed)
11164 machine_mode mode = GET_MODE (x);
11166 if (TARGET_THUMB)
11168 *total = thumb1_rtx_costs (x, code, outer_code);
11169 return true;
11172 switch (code)
11174 case MULT:
11175 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11176 || mode == DImode)
11178 *total = COSTS_N_INSNS (20);
11179 return false;
11182 if (CONST_INT_P (XEXP (x, 1)))
11184 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11185 & (unsigned HOST_WIDE_INT) 0xffffffff);
11186 int cost, const_ok = const_ok_for_arm (i);
11187 int j, booth_unit_size;
11189 /* Tune as appropriate. */
11190 cost = const_ok ? 4 : 8;
11191 booth_unit_size = 2;
11192 for (j = 0; i && j < 32; j += booth_unit_size)
11194 i >>= booth_unit_size;
11195 cost++;
11198 *total = COSTS_N_INSNS (cost);
11199 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
11200 return true;
11203 *total = COSTS_N_INSNS (20);
11204 return false;
11206 default:
11207 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11212 /* RTX cost for cores with a fast multiply unit (M variants). */
11214 static bool
11215 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11216 int *total, bool speed)
11218 machine_mode mode = GET_MODE (x);
11220 if (TARGET_THUMB1)
11222 *total = thumb1_rtx_costs (x, code, outer_code);
11223 return true;
11226 /* ??? should thumb2 use different costs? */
11227 switch (code)
11229 case MULT:
11230 /* There is no point basing this on the tuning, since it is always the
11231 fast variant if it exists at all. */
11232 if (mode == DImode
11233 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11234 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11235 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11237 *total = COSTS_N_INSNS(2);
11238 return false;
11242 if (mode == DImode)
11244 *total = COSTS_N_INSNS (5);
11245 return false;
11248 if (CONST_INT_P (XEXP (x, 1)))
11250 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11251 & (unsigned HOST_WIDE_INT) 0xffffffff);
11252 int cost, const_ok = const_ok_for_arm (i);
11253 int j, booth_unit_size;
11255 /* Tune as appropriate. */
11256 cost = const_ok ? 4 : 8;
11257 booth_unit_size = 8;
11258 for (j = 0; i && j < 32; j += booth_unit_size)
11260 i >>= booth_unit_size;
11261 cost++;
11264 *total = COSTS_N_INSNS(cost);
11265 return false;
11268 if (mode == SImode)
11270 *total = COSTS_N_INSNS (4);
11271 return false;
11274 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11276 if (TARGET_HARD_FLOAT
11277 && (mode == SFmode
11278 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11280 *total = COSTS_N_INSNS (1);
11281 return false;
11285 /* Requires a lib call */
11286 *total = COSTS_N_INSNS (20);
11287 return false;
11289 default:
11290 return arm_rtx_costs_1 (x, outer_code, total, speed);
11295 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11296 so it can be ignored. */
11298 static bool
11299 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11300 int *total, bool speed)
11302 machine_mode mode = GET_MODE (x);
11304 if (TARGET_THUMB)
11306 *total = thumb1_rtx_costs (x, code, outer_code);
11307 return true;
11310 switch (code)
11312 case COMPARE:
11313 if (GET_CODE (XEXP (x, 0)) != MULT)
11314 return arm_rtx_costs_1 (x, outer_code, total, speed);
11316 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11317 will stall until the multiplication is complete. */
11318 *total = COSTS_N_INSNS (3);
11319 return false;
11321 case MULT:
11322 /* There is no point basing this on the tuning, since it is always the
11323 fast variant if it exists at all. */
11324 if (mode == DImode
11325 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11326 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11327 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11329 *total = COSTS_N_INSNS (2);
11330 return false;
11334 if (mode == DImode)
11336 *total = COSTS_N_INSNS (5);
11337 return false;
11340 if (CONST_INT_P (XEXP (x, 1)))
11342 /* If operand 1 is a constant we can more accurately
11343 calculate the cost of the multiply. The multiplier can
11344 retire 15 bits on the first cycle and a further 12 on the
11345 second. We do, of course, have to load the constant into
11346 a register first. */
11347 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11348 /* There's a general overhead of one cycle. */
11349 int cost = 1;
11350 unsigned HOST_WIDE_INT masked_const;
11352 if (i & 0x80000000)
11353 i = ~i;
11355 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11357 masked_const = i & 0xffff8000;
11358 if (masked_const != 0)
11360 cost++;
11361 masked_const = i & 0xf8000000;
11362 if (masked_const != 0)
11363 cost++;
11365 *total = COSTS_N_INSNS (cost);
11366 return false;
11369 if (mode == SImode)
11371 *total = COSTS_N_INSNS (3);
11372 return false;
11375 /* Requires a lib call */
11376 *total = COSTS_N_INSNS (20);
11377 return false;
11379 default:
11380 return arm_rtx_costs_1 (x, outer_code, total, speed);
11385 /* RTX costs for 9e (and later) cores. */
11387 static bool
11388 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11389 int *total, bool speed)
11391 machine_mode mode = GET_MODE (x);
11393 if (TARGET_THUMB1)
11395 switch (code)
11397 case MULT:
11398 /* Small multiply: 32 cycles for an integer multiply inst. */
11399 if (arm_arch6m && arm_m_profile_small_mul)
11400 *total = COSTS_N_INSNS (32);
11401 else
11402 *total = COSTS_N_INSNS (3);
11403 return true;
11405 default:
11406 *total = thumb1_rtx_costs (x, code, outer_code);
11407 return true;
11411 switch (code)
11413 case MULT:
11414 /* There is no point basing this on the tuning, since it is always the
11415 fast variant if it exists at all. */
11416 if (mode == DImode
11417 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11418 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11419 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11421 *total = COSTS_N_INSNS (2);
11422 return false;
11426 if (mode == DImode)
11428 *total = COSTS_N_INSNS (5);
11429 return false;
11432 if (mode == SImode)
11434 *total = COSTS_N_INSNS (2);
11435 return false;
11438 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11440 if (TARGET_HARD_FLOAT
11441 && (mode == SFmode
11442 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11444 *total = COSTS_N_INSNS (1);
11445 return false;
11449 *total = COSTS_N_INSNS (20);
11450 return false;
11452 default:
11453 return arm_rtx_costs_1 (x, outer_code, total, speed);
11456 /* All address computations that can be done are free, but rtx cost returns
11457 the same for practically all of them. So we weight the different types
11458 of address here in the order (most pref first):
11459 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11460 static inline int
11461 arm_arm_address_cost (rtx x)
11463 enum rtx_code c = GET_CODE (x);
11465 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11466 return 0;
11467 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11468 return 10;
11470 if (c == PLUS)
11472 if (CONST_INT_P (XEXP (x, 1)))
11473 return 2;
11475 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11476 return 3;
11478 return 4;
11481 return 6;
11484 static inline int
11485 arm_thumb_address_cost (rtx x)
11487 enum rtx_code c = GET_CODE (x);
11489 if (c == REG)
11490 return 1;
11491 if (c == PLUS
11492 && REG_P (XEXP (x, 0))
11493 && CONST_INT_P (XEXP (x, 1)))
11494 return 1;
11496 return 2;
11499 static int
11500 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11501 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11503 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11506 /* Adjust cost hook for XScale. */
11507 static bool
11508 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11510 /* Some true dependencies can have a higher cost depending
11511 on precisely how certain input operands are used. */
11512 if (REG_NOTE_KIND(link) == 0
11513 && recog_memoized (insn) >= 0
11514 && recog_memoized (dep) >= 0)
11516 int shift_opnum = get_attr_shift (insn);
11517 enum attr_type attr_type = get_attr_type (dep);
11519 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11520 operand for INSN. If we have a shifted input operand and the
11521 instruction we depend on is another ALU instruction, then we may
11522 have to account for an additional stall. */
11523 if (shift_opnum != 0
11524 && (attr_type == TYPE_ALU_SHIFT_IMM
11525 || attr_type == TYPE_ALUS_SHIFT_IMM
11526 || attr_type == TYPE_LOGIC_SHIFT_IMM
11527 || attr_type == TYPE_LOGICS_SHIFT_IMM
11528 || attr_type == TYPE_ALU_SHIFT_REG
11529 || attr_type == TYPE_ALUS_SHIFT_REG
11530 || attr_type == TYPE_LOGIC_SHIFT_REG
11531 || attr_type == TYPE_LOGICS_SHIFT_REG
11532 || attr_type == TYPE_MOV_SHIFT
11533 || attr_type == TYPE_MVN_SHIFT
11534 || attr_type == TYPE_MOV_SHIFT_REG
11535 || attr_type == TYPE_MVN_SHIFT_REG))
11537 rtx shifted_operand;
11538 int opno;
11540 /* Get the shifted operand. */
11541 extract_insn (insn);
11542 shifted_operand = recog_data.operand[shift_opnum];
11544 /* Iterate over all the operands in DEP. If we write an operand
11545 that overlaps with SHIFTED_OPERAND, then we have increase the
11546 cost of this dependency. */
11547 extract_insn (dep);
11548 preprocess_constraints (dep);
11549 for (opno = 0; opno < recog_data.n_operands; opno++)
11551 /* We can ignore strict inputs. */
11552 if (recog_data.operand_type[opno] == OP_IN)
11553 continue;
11555 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11556 shifted_operand))
11558 *cost = 2;
11559 return false;
11564 return true;
11567 /* Adjust cost hook for Cortex A9. */
11568 static bool
11569 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11571 switch (REG_NOTE_KIND (link))
11573 case REG_DEP_ANTI:
11574 *cost = 0;
11575 return false;
11577 case REG_DEP_TRUE:
11578 case REG_DEP_OUTPUT:
11579 if (recog_memoized (insn) >= 0
11580 && recog_memoized (dep) >= 0)
11582 if (GET_CODE (PATTERN (insn)) == SET)
11584 if (GET_MODE_CLASS
11585 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11586 || GET_MODE_CLASS
11587 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11589 enum attr_type attr_type_insn = get_attr_type (insn);
11590 enum attr_type attr_type_dep = get_attr_type (dep);
11592 /* By default all dependencies of the form
11593 s0 = s0 <op> s1
11594 s0 = s0 <op> s2
11595 have an extra latency of 1 cycle because
11596 of the input and output dependency in this
11597 case. However this gets modeled as an true
11598 dependency and hence all these checks. */
11599 if (REG_P (SET_DEST (PATTERN (insn)))
11600 && REG_P (SET_DEST (PATTERN (dep)))
11601 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11602 SET_DEST (PATTERN (dep))))
11604 /* FMACS is a special case where the dependent
11605 instruction can be issued 3 cycles before
11606 the normal latency in case of an output
11607 dependency. */
11608 if ((attr_type_insn == TYPE_FMACS
11609 || attr_type_insn == TYPE_FMACD)
11610 && (attr_type_dep == TYPE_FMACS
11611 || attr_type_dep == TYPE_FMACD))
11613 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11614 *cost = insn_default_latency (dep) - 3;
11615 else
11616 *cost = insn_default_latency (dep);
11617 return false;
11619 else
11621 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11622 *cost = insn_default_latency (dep) + 1;
11623 else
11624 *cost = insn_default_latency (dep);
11626 return false;
11631 break;
11633 default:
11634 gcc_unreachable ();
11637 return true;
11640 /* Adjust cost hook for FA726TE. */
11641 static bool
11642 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11644 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11645 have penalty of 3. */
11646 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11647 && recog_memoized (insn) >= 0
11648 && recog_memoized (dep) >= 0
11649 && get_attr_conds (dep) == CONDS_SET)
11651 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11652 if (get_attr_conds (insn) == CONDS_USE
11653 && get_attr_type (insn) != TYPE_BRANCH)
11655 *cost = 3;
11656 return false;
11659 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11660 || get_attr_conds (insn) == CONDS_USE)
11662 *cost = 0;
11663 return false;
11667 return true;
11670 /* Implement TARGET_REGISTER_MOVE_COST.
11672 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11673 it is typically more expensive than a single memory access. We set
11674 the cost to less than two memory accesses so that floating
11675 point to integer conversion does not go through memory. */
11678 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11679 reg_class_t from, reg_class_t to)
11681 if (TARGET_32BIT)
11683 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11684 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11685 return 15;
11686 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11687 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11688 return 4;
11689 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11690 return 20;
11691 else
11692 return 2;
11694 else
11696 if (from == HI_REGS || to == HI_REGS)
11697 return 4;
11698 else
11699 return 2;
11703 /* Implement TARGET_MEMORY_MOVE_COST. */
11706 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11707 bool in ATTRIBUTE_UNUSED)
11709 if (TARGET_32BIT)
11710 return 10;
11711 else
11713 if (GET_MODE_SIZE (mode) < 4)
11714 return 8;
11715 else
11716 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11720 /* Vectorizer cost model implementation. */
11722 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11723 static int
11724 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11725 tree vectype,
11726 int misalign ATTRIBUTE_UNUSED)
11728 unsigned elements;
11730 switch (type_of_cost)
11732 case scalar_stmt:
11733 return current_tune->vec_costs->scalar_stmt_cost;
11735 case scalar_load:
11736 return current_tune->vec_costs->scalar_load_cost;
11738 case scalar_store:
11739 return current_tune->vec_costs->scalar_store_cost;
11741 case vector_stmt:
11742 return current_tune->vec_costs->vec_stmt_cost;
11744 case vector_load:
11745 return current_tune->vec_costs->vec_align_load_cost;
11747 case vector_store:
11748 return current_tune->vec_costs->vec_store_cost;
11750 case vec_to_scalar:
11751 return current_tune->vec_costs->vec_to_scalar_cost;
11753 case scalar_to_vec:
11754 return current_tune->vec_costs->scalar_to_vec_cost;
11756 case unaligned_load:
11757 return current_tune->vec_costs->vec_unalign_load_cost;
11759 case unaligned_store:
11760 return current_tune->vec_costs->vec_unalign_store_cost;
11762 case cond_branch_taken:
11763 return current_tune->vec_costs->cond_taken_branch_cost;
11765 case cond_branch_not_taken:
11766 return current_tune->vec_costs->cond_not_taken_branch_cost;
11768 case vec_perm:
11769 case vec_promote_demote:
11770 return current_tune->vec_costs->vec_stmt_cost;
11772 case vec_construct:
11773 elements = TYPE_VECTOR_SUBPARTS (vectype);
11774 return elements / 2 + 1;
11776 default:
11777 gcc_unreachable ();
11781 /* Implement targetm.vectorize.add_stmt_cost. */
11783 static unsigned
11784 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11785 struct _stmt_vec_info *stmt_info, int misalign,
11786 enum vect_cost_model_location where)
11788 unsigned *cost = (unsigned *) data;
11789 unsigned retval = 0;
11791 if (flag_vect_cost_model)
11793 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11794 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11796 /* Statements in an inner loop relative to the loop being
11797 vectorized are weighted more heavily. The value here is
11798 arbitrary and could potentially be improved with analysis. */
11799 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11800 count *= 50; /* FIXME. */
11802 retval = (unsigned) (count * stmt_cost);
11803 cost[where] += retval;
11806 return retval;
11809 /* Return true if and only if this insn can dual-issue only as older. */
11810 static bool
11811 cortexa7_older_only (rtx_insn *insn)
11813 if (recog_memoized (insn) < 0)
11814 return false;
11816 switch (get_attr_type (insn))
11818 case TYPE_ALU_DSP_REG:
11819 case TYPE_ALU_SREG:
11820 case TYPE_ALUS_SREG:
11821 case TYPE_LOGIC_REG:
11822 case TYPE_LOGICS_REG:
11823 case TYPE_ADC_REG:
11824 case TYPE_ADCS_REG:
11825 case TYPE_ADR:
11826 case TYPE_BFM:
11827 case TYPE_REV:
11828 case TYPE_MVN_REG:
11829 case TYPE_SHIFT_IMM:
11830 case TYPE_SHIFT_REG:
11831 case TYPE_LOAD_BYTE:
11832 case TYPE_LOAD1:
11833 case TYPE_STORE1:
11834 case TYPE_FFARITHS:
11835 case TYPE_FADDS:
11836 case TYPE_FFARITHD:
11837 case TYPE_FADDD:
11838 case TYPE_FMOV:
11839 case TYPE_F_CVT:
11840 case TYPE_FCMPS:
11841 case TYPE_FCMPD:
11842 case TYPE_FCONSTS:
11843 case TYPE_FCONSTD:
11844 case TYPE_FMULS:
11845 case TYPE_FMACS:
11846 case TYPE_FMULD:
11847 case TYPE_FMACD:
11848 case TYPE_FDIVS:
11849 case TYPE_FDIVD:
11850 case TYPE_F_MRC:
11851 case TYPE_F_MRRC:
11852 case TYPE_F_FLAG:
11853 case TYPE_F_LOADS:
11854 case TYPE_F_STORES:
11855 return true;
11856 default:
11857 return false;
11861 /* Return true if and only if this insn can dual-issue as younger. */
11862 static bool
11863 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11865 if (recog_memoized (insn) < 0)
11867 if (verbose > 5)
11868 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11869 return false;
11872 switch (get_attr_type (insn))
11874 case TYPE_ALU_IMM:
11875 case TYPE_ALUS_IMM:
11876 case TYPE_LOGIC_IMM:
11877 case TYPE_LOGICS_IMM:
11878 case TYPE_EXTEND:
11879 case TYPE_MVN_IMM:
11880 case TYPE_MOV_IMM:
11881 case TYPE_MOV_REG:
11882 case TYPE_MOV_SHIFT:
11883 case TYPE_MOV_SHIFT_REG:
11884 case TYPE_BRANCH:
11885 case TYPE_CALL:
11886 return true;
11887 default:
11888 return false;
11893 /* Look for an instruction that can dual issue only as an older
11894 instruction, and move it in front of any instructions that can
11895 dual-issue as younger, while preserving the relative order of all
11896 other instructions in the ready list. This is a hueuristic to help
11897 dual-issue in later cycles, by postponing issue of more flexible
11898 instructions. This heuristic may affect dual issue opportunities
11899 in the current cycle. */
11900 static void
11901 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11902 int *n_readyp, int clock)
11904 int i;
11905 int first_older_only = -1, first_younger = -1;
11907 if (verbose > 5)
11908 fprintf (file,
11909 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11910 clock,
11911 *n_readyp);
11913 /* Traverse the ready list from the head (the instruction to issue
11914 first), and looking for the first instruction that can issue as
11915 younger and the first instruction that can dual-issue only as
11916 older. */
11917 for (i = *n_readyp - 1; i >= 0; i--)
11919 rtx_insn *insn = ready[i];
11920 if (cortexa7_older_only (insn))
11922 first_older_only = i;
11923 if (verbose > 5)
11924 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11925 break;
11927 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11928 first_younger = i;
11931 /* Nothing to reorder because either no younger insn found or insn
11932 that can dual-issue only as older appears before any insn that
11933 can dual-issue as younger. */
11934 if (first_younger == -1)
11936 if (verbose > 5)
11937 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11938 return;
11941 /* Nothing to reorder because no older-only insn in the ready list. */
11942 if (first_older_only == -1)
11944 if (verbose > 5)
11945 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11946 return;
11949 /* Move first_older_only insn before first_younger. */
11950 if (verbose > 5)
11951 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11952 INSN_UID(ready [first_older_only]),
11953 INSN_UID(ready [first_younger]));
11954 rtx_insn *first_older_only_insn = ready [first_older_only];
11955 for (i = first_older_only; i < first_younger; i++)
11957 ready[i] = ready[i+1];
11960 ready[i] = first_older_only_insn;
11961 return;
11964 /* Implement TARGET_SCHED_REORDER. */
11965 static int
11966 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11967 int clock)
11969 switch (arm_tune)
11971 case cortexa7:
11972 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11973 break;
11974 default:
11975 /* Do nothing for other cores. */
11976 break;
11979 return arm_issue_rate ();
11982 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11983 It corrects the value of COST based on the relationship between
11984 INSN and DEP through the dependence LINK. It returns the new
11985 value. There is a per-core adjust_cost hook to adjust scheduler costs
11986 and the per-core hook can choose to completely override the generic
11987 adjust_cost function. Only put bits of code into arm_adjust_cost that
11988 are common across all cores. */
11989 static int
11990 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11992 rtx i_pat, d_pat;
11994 /* When generating Thumb-1 code, we want to place flag-setting operations
11995 close to a conditional branch which depends on them, so that we can
11996 omit the comparison. */
11997 if (TARGET_THUMB1
11998 && REG_NOTE_KIND (link) == 0
11999 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12000 && recog_memoized (dep) >= 0
12001 && get_attr_conds (dep) == CONDS_SET)
12002 return 0;
12004 if (current_tune->sched_adjust_cost != NULL)
12006 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
12007 return cost;
12010 /* XXX Is this strictly true? */
12011 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
12012 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
12013 return 0;
12015 /* Call insns don't incur a stall, even if they follow a load. */
12016 if (REG_NOTE_KIND (link) == 0
12017 && CALL_P (insn))
12018 return 1;
12020 if ((i_pat = single_set (insn)) != NULL
12021 && MEM_P (SET_SRC (i_pat))
12022 && (d_pat = single_set (dep)) != NULL
12023 && MEM_P (SET_DEST (d_pat)))
12025 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12026 /* This is a load after a store, there is no conflict if the load reads
12027 from a cached area. Assume that loads from the stack, and from the
12028 constant pool are cached, and that others will miss. This is a
12029 hack. */
12031 if ((GET_CODE (src_mem) == SYMBOL_REF
12032 && CONSTANT_POOL_ADDRESS_P (src_mem))
12033 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12034 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12035 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12036 return 1;
12039 return cost;
12043 arm_max_conditional_execute (void)
12045 return max_insns_skipped;
12048 static int
12049 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12051 if (TARGET_32BIT)
12052 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12053 else
12054 return (optimize > 0) ? 2 : 0;
12057 static int
12058 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12060 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12063 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12064 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12065 sequences of non-executed instructions in IT blocks probably take the same
12066 amount of time as executed instructions (and the IT instruction itself takes
12067 space in icache). This function was experimentally determined to give good
12068 results on a popular embedded benchmark. */
12070 static int
12071 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12073 return (TARGET_32BIT && speed_p) ? 1
12074 : arm_default_branch_cost (speed_p, predictable_p);
12077 static int
12078 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12080 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12083 static bool fp_consts_inited = false;
12085 static REAL_VALUE_TYPE value_fp0;
12087 static void
12088 init_fp_table (void)
12090 REAL_VALUE_TYPE r;
12092 r = REAL_VALUE_ATOF ("0", DFmode);
12093 value_fp0 = r;
12094 fp_consts_inited = true;
12097 /* Return TRUE if rtx X is a valid immediate FP constant. */
12099 arm_const_double_rtx (rtx x)
12101 REAL_VALUE_TYPE r;
12103 if (!fp_consts_inited)
12104 init_fp_table ();
12106 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12107 if (REAL_VALUE_MINUS_ZERO (r))
12108 return 0;
12110 if (REAL_VALUES_EQUAL (r, value_fp0))
12111 return 1;
12113 return 0;
12116 /* VFPv3 has a fairly wide range of representable immediates, formed from
12117 "quarter-precision" floating-point values. These can be evaluated using this
12118 formula (with ^ for exponentiation):
12120 -1^s * n * 2^-r
12122 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12123 16 <= n <= 31 and 0 <= r <= 7.
12125 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12127 - A (most-significant) is the sign bit.
12128 - BCD are the exponent (encoded as r XOR 3).
12129 - EFGH are the mantissa (encoded as n - 16).
12132 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12133 fconst[sd] instruction, or -1 if X isn't suitable. */
12134 static int
12135 vfp3_const_double_index (rtx x)
12137 REAL_VALUE_TYPE r, m;
12138 int sign, exponent;
12139 unsigned HOST_WIDE_INT mantissa, mant_hi;
12140 unsigned HOST_WIDE_INT mask;
12141 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12142 bool fail;
12144 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12145 return -1;
12147 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12149 /* We can't represent these things, so detect them first. */
12150 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12151 return -1;
12153 /* Extract sign, exponent and mantissa. */
12154 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12155 r = real_value_abs (&r);
12156 exponent = REAL_EXP (&r);
12157 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12158 highest (sign) bit, with a fixed binary point at bit point_pos.
12159 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12160 bits for the mantissa, this may fail (low bits would be lost). */
12161 real_ldexp (&m, &r, point_pos - exponent);
12162 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12163 mantissa = w.elt (0);
12164 mant_hi = w.elt (1);
12166 /* If there are bits set in the low part of the mantissa, we can't
12167 represent this value. */
12168 if (mantissa != 0)
12169 return -1;
12171 /* Now make it so that mantissa contains the most-significant bits, and move
12172 the point_pos to indicate that the least-significant bits have been
12173 discarded. */
12174 point_pos -= HOST_BITS_PER_WIDE_INT;
12175 mantissa = mant_hi;
12177 /* We can permit four significant bits of mantissa only, plus a high bit
12178 which is always 1. */
12179 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12180 if ((mantissa & mask) != 0)
12181 return -1;
12183 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12184 mantissa >>= point_pos - 5;
12186 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12187 floating-point immediate zero with Neon using an integer-zero load, but
12188 that case is handled elsewhere.) */
12189 if (mantissa == 0)
12190 return -1;
12192 gcc_assert (mantissa >= 16 && mantissa <= 31);
12194 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12195 normalized significands are in the range [1, 2). (Our mantissa is shifted
12196 left 4 places at this point relative to normalized IEEE754 values). GCC
12197 internally uses [0.5, 1) (see real.c), so the exponent returned from
12198 REAL_EXP must be altered. */
12199 exponent = 5 - exponent;
12201 if (exponent < 0 || exponent > 7)
12202 return -1;
12204 /* Sign, mantissa and exponent are now in the correct form to plug into the
12205 formula described in the comment above. */
12206 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12209 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12211 vfp3_const_double_rtx (rtx x)
12213 if (!TARGET_VFP3)
12214 return 0;
12216 return vfp3_const_double_index (x) != -1;
12219 /* Recognize immediates which can be used in various Neon instructions. Legal
12220 immediates are described by the following table (for VMVN variants, the
12221 bitwise inverse of the constant shown is recognized. In either case, VMOV
12222 is output and the correct instruction to use for a given constant is chosen
12223 by the assembler). The constant shown is replicated across all elements of
12224 the destination vector.
12226 insn elems variant constant (binary)
12227 ---- ----- ------- -----------------
12228 vmov i32 0 00000000 00000000 00000000 abcdefgh
12229 vmov i32 1 00000000 00000000 abcdefgh 00000000
12230 vmov i32 2 00000000 abcdefgh 00000000 00000000
12231 vmov i32 3 abcdefgh 00000000 00000000 00000000
12232 vmov i16 4 00000000 abcdefgh
12233 vmov i16 5 abcdefgh 00000000
12234 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12235 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12236 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12237 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12238 vmvn i16 10 00000000 abcdefgh
12239 vmvn i16 11 abcdefgh 00000000
12240 vmov i32 12 00000000 00000000 abcdefgh 11111111
12241 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12242 vmov i32 14 00000000 abcdefgh 11111111 11111111
12243 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12244 vmov i8 16 abcdefgh
12245 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12246 eeeeeeee ffffffff gggggggg hhhhhhhh
12247 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12248 vmov f32 19 00000000 00000000 00000000 00000000
12250 For case 18, B = !b. Representable values are exactly those accepted by
12251 vfp3_const_double_index, but are output as floating-point numbers rather
12252 than indices.
12254 For case 19, we will change it to vmov.i32 when assembling.
12256 Variants 0-5 (inclusive) may also be used as immediates for the second
12257 operand of VORR/VBIC instructions.
12259 The INVERSE argument causes the bitwise inverse of the given operand to be
12260 recognized instead (used for recognizing legal immediates for the VAND/VORN
12261 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12262 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12263 output, rather than the real insns vbic/vorr).
12265 INVERSE makes no difference to the recognition of float vectors.
12267 The return value is the variant of immediate as shown in the above table, or
12268 -1 if the given value doesn't match any of the listed patterns.
12270 static int
12271 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12272 rtx *modconst, int *elementwidth)
12274 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12275 matches = 1; \
12276 for (i = 0; i < idx; i += (STRIDE)) \
12277 if (!(TEST)) \
12278 matches = 0; \
12279 if (matches) \
12281 immtype = (CLASS); \
12282 elsize = (ELSIZE); \
12283 break; \
12286 unsigned int i, elsize = 0, idx = 0, n_elts;
12287 unsigned int innersize;
12288 unsigned char bytes[16];
12289 int immtype = -1, matches;
12290 unsigned int invmask = inverse ? 0xff : 0;
12291 bool vector = GET_CODE (op) == CONST_VECTOR;
12293 if (vector)
12294 n_elts = CONST_VECTOR_NUNITS (op);
12295 else
12297 n_elts = 1;
12298 if (mode == VOIDmode)
12299 mode = DImode;
12302 innersize = GET_MODE_UNIT_SIZE (mode);
12304 /* Vectors of float constants. */
12305 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12307 rtx el0 = CONST_VECTOR_ELT (op, 0);
12308 REAL_VALUE_TYPE r0;
12310 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12311 return -1;
12313 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12315 for (i = 1; i < n_elts; i++)
12317 rtx elt = CONST_VECTOR_ELT (op, i);
12318 REAL_VALUE_TYPE re;
12320 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12322 if (!REAL_VALUES_EQUAL (r0, re))
12323 return -1;
12326 if (modconst)
12327 *modconst = CONST_VECTOR_ELT (op, 0);
12329 if (elementwidth)
12330 *elementwidth = 0;
12332 if (el0 == CONST0_RTX (GET_MODE (el0)))
12333 return 19;
12334 else
12335 return 18;
12338 /* Splat vector constant out into a byte vector. */
12339 for (i = 0; i < n_elts; i++)
12341 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12342 unsigned HOST_WIDE_INT elpart;
12343 unsigned int part, parts;
12345 if (CONST_INT_P (el))
12347 elpart = INTVAL (el);
12348 parts = 1;
12350 else if (CONST_DOUBLE_P (el))
12352 elpart = CONST_DOUBLE_LOW (el);
12353 parts = 2;
12355 else
12356 gcc_unreachable ();
12358 for (part = 0; part < parts; part++)
12360 unsigned int byte;
12361 for (byte = 0; byte < innersize; byte++)
12363 bytes[idx++] = (elpart & 0xff) ^ invmask;
12364 elpart >>= BITS_PER_UNIT;
12366 if (CONST_DOUBLE_P (el))
12367 elpart = CONST_DOUBLE_HIGH (el);
12371 /* Sanity check. */
12372 gcc_assert (idx == GET_MODE_SIZE (mode));
12376 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12377 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12379 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12380 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12382 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12383 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12385 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12386 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12388 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12390 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12392 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12393 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12395 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12396 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12398 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12399 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12401 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12402 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12404 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12406 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12408 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12409 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12411 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12412 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12414 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12415 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12417 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12418 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12420 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12422 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12423 && bytes[i] == bytes[(i + 8) % idx]);
12425 while (0);
12427 if (immtype == -1)
12428 return -1;
12430 if (elementwidth)
12431 *elementwidth = elsize;
12433 if (modconst)
12435 unsigned HOST_WIDE_INT imm = 0;
12437 /* Un-invert bytes of recognized vector, if necessary. */
12438 if (invmask != 0)
12439 for (i = 0; i < idx; i++)
12440 bytes[i] ^= invmask;
12442 if (immtype == 17)
12444 /* FIXME: Broken on 32-bit H_W_I hosts. */
12445 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12447 for (i = 0; i < 8; i++)
12448 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12449 << (i * BITS_PER_UNIT);
12451 *modconst = GEN_INT (imm);
12453 else
12455 unsigned HOST_WIDE_INT imm = 0;
12457 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12458 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12460 *modconst = GEN_INT (imm);
12464 return immtype;
12465 #undef CHECK
12468 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12469 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12470 float elements), and a modified constant (whatever should be output for a
12471 VMOV) in *MODCONST. */
12474 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12475 rtx *modconst, int *elementwidth)
12477 rtx tmpconst;
12478 int tmpwidth;
12479 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12481 if (retval == -1)
12482 return 0;
12484 if (modconst)
12485 *modconst = tmpconst;
12487 if (elementwidth)
12488 *elementwidth = tmpwidth;
12490 return 1;
12493 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12494 the immediate is valid, write a constant suitable for using as an operand
12495 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12496 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12499 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12500 rtx *modconst, int *elementwidth)
12502 rtx tmpconst;
12503 int tmpwidth;
12504 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12506 if (retval < 0 || retval > 5)
12507 return 0;
12509 if (modconst)
12510 *modconst = tmpconst;
12512 if (elementwidth)
12513 *elementwidth = tmpwidth;
12515 return 1;
12518 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12519 the immediate is valid, write a constant suitable for using as an operand
12520 to VSHR/VSHL to *MODCONST and the corresponding element width to
12521 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12522 because they have different limitations. */
12525 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12526 rtx *modconst, int *elementwidth,
12527 bool isleftshift)
12529 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12530 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12531 unsigned HOST_WIDE_INT last_elt = 0;
12532 unsigned HOST_WIDE_INT maxshift;
12534 /* Split vector constant out into a byte vector. */
12535 for (i = 0; i < n_elts; i++)
12537 rtx el = CONST_VECTOR_ELT (op, i);
12538 unsigned HOST_WIDE_INT elpart;
12540 if (CONST_INT_P (el))
12541 elpart = INTVAL (el);
12542 else if (CONST_DOUBLE_P (el))
12543 return 0;
12544 else
12545 gcc_unreachable ();
12547 if (i != 0 && elpart != last_elt)
12548 return 0;
12550 last_elt = elpart;
12553 /* Shift less than element size. */
12554 maxshift = innersize * 8;
12556 if (isleftshift)
12558 /* Left shift immediate value can be from 0 to <size>-1. */
12559 if (last_elt >= maxshift)
12560 return 0;
12562 else
12564 /* Right shift immediate value can be from 1 to <size>. */
12565 if (last_elt == 0 || last_elt > maxshift)
12566 return 0;
12569 if (elementwidth)
12570 *elementwidth = innersize * 8;
12572 if (modconst)
12573 *modconst = CONST_VECTOR_ELT (op, 0);
12575 return 1;
12578 /* Return a string suitable for output of Neon immediate logic operation
12579 MNEM. */
12581 char *
12582 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12583 int inverse, int quad)
12585 int width, is_valid;
12586 static char templ[40];
12588 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12590 gcc_assert (is_valid != 0);
12592 if (quad)
12593 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12594 else
12595 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12597 return templ;
12600 /* Return a string suitable for output of Neon immediate shift operation
12601 (VSHR or VSHL) MNEM. */
12603 char *
12604 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12605 machine_mode mode, int quad,
12606 bool isleftshift)
12608 int width, is_valid;
12609 static char templ[40];
12611 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12612 gcc_assert (is_valid != 0);
12614 if (quad)
12615 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12616 else
12617 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12619 return templ;
12622 /* Output a sequence of pairwise operations to implement a reduction.
12623 NOTE: We do "too much work" here, because pairwise operations work on two
12624 registers-worth of operands in one go. Unfortunately we can't exploit those
12625 extra calculations to do the full operation in fewer steps, I don't think.
12626 Although all vector elements of the result but the first are ignored, we
12627 actually calculate the same result in each of the elements. An alternative
12628 such as initially loading a vector with zero to use as each of the second
12629 operands would use up an additional register and take an extra instruction,
12630 for no particular gain. */
12632 void
12633 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12634 rtx (*reduc) (rtx, rtx, rtx))
12636 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12637 rtx tmpsum = op1;
12639 for (i = parts / 2; i >= 1; i /= 2)
12641 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12642 emit_insn (reduc (dest, tmpsum, tmpsum));
12643 tmpsum = dest;
12647 /* If VALS is a vector constant that can be loaded into a register
12648 using VDUP, generate instructions to do so and return an RTX to
12649 assign to the register. Otherwise return NULL_RTX. */
12651 static rtx
12652 neon_vdup_constant (rtx vals)
12654 machine_mode mode = GET_MODE (vals);
12655 machine_mode inner_mode = GET_MODE_INNER (mode);
12656 rtx x;
12658 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12659 return NULL_RTX;
12661 if (!const_vec_duplicate_p (vals, &x))
12662 /* The elements are not all the same. We could handle repeating
12663 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12664 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12665 vdup.i16). */
12666 return NULL_RTX;
12668 /* We can load this constant by using VDUP and a constant in a
12669 single ARM register. This will be cheaper than a vector
12670 load. */
12672 x = copy_to_mode_reg (inner_mode, x);
12673 return gen_rtx_VEC_DUPLICATE (mode, x);
12676 /* Generate code to load VALS, which is a PARALLEL containing only
12677 constants (for vec_init) or CONST_VECTOR, efficiently into a
12678 register. Returns an RTX to copy into the register, or NULL_RTX
12679 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12682 neon_make_constant (rtx vals)
12684 machine_mode mode = GET_MODE (vals);
12685 rtx target;
12686 rtx const_vec = NULL_RTX;
12687 int n_elts = GET_MODE_NUNITS (mode);
12688 int n_const = 0;
12689 int i;
12691 if (GET_CODE (vals) == CONST_VECTOR)
12692 const_vec = vals;
12693 else if (GET_CODE (vals) == PARALLEL)
12695 /* A CONST_VECTOR must contain only CONST_INTs and
12696 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12697 Only store valid constants in a CONST_VECTOR. */
12698 for (i = 0; i < n_elts; ++i)
12700 rtx x = XVECEXP (vals, 0, i);
12701 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12702 n_const++;
12704 if (n_const == n_elts)
12705 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12707 else
12708 gcc_unreachable ();
12710 if (const_vec != NULL
12711 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12712 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12713 return const_vec;
12714 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12715 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12716 pipeline cycle; creating the constant takes one or two ARM
12717 pipeline cycles. */
12718 return target;
12719 else if (const_vec != NULL_RTX)
12720 /* Load from constant pool. On Cortex-A8 this takes two cycles
12721 (for either double or quad vectors). We can not take advantage
12722 of single-cycle VLD1 because we need a PC-relative addressing
12723 mode. */
12724 return const_vec;
12725 else
12726 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12727 We can not construct an initializer. */
12728 return NULL_RTX;
12731 /* Initialize vector TARGET to VALS. */
12733 void
12734 neon_expand_vector_init (rtx target, rtx vals)
12736 machine_mode mode = GET_MODE (target);
12737 machine_mode inner_mode = GET_MODE_INNER (mode);
12738 int n_elts = GET_MODE_NUNITS (mode);
12739 int n_var = 0, one_var = -1;
12740 bool all_same = true;
12741 rtx x, mem;
12742 int i;
12744 for (i = 0; i < n_elts; ++i)
12746 x = XVECEXP (vals, 0, i);
12747 if (!CONSTANT_P (x))
12748 ++n_var, one_var = i;
12750 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12751 all_same = false;
12754 if (n_var == 0)
12756 rtx constant = neon_make_constant (vals);
12757 if (constant != NULL_RTX)
12759 emit_move_insn (target, constant);
12760 return;
12764 /* Splat a single non-constant element if we can. */
12765 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12767 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12768 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12769 return;
12772 /* One field is non-constant. Load constant then overwrite varying
12773 field. This is more efficient than using the stack. */
12774 if (n_var == 1)
12776 rtx copy = copy_rtx (vals);
12777 rtx index = GEN_INT (one_var);
12779 /* Load constant part of vector, substitute neighboring value for
12780 varying element. */
12781 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12782 neon_expand_vector_init (target, copy);
12784 /* Insert variable. */
12785 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12786 switch (mode)
12788 case V8QImode:
12789 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12790 break;
12791 case V16QImode:
12792 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12793 break;
12794 case V4HImode:
12795 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12796 break;
12797 case V8HImode:
12798 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12799 break;
12800 case V2SImode:
12801 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12802 break;
12803 case V4SImode:
12804 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12805 break;
12806 case V2SFmode:
12807 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12808 break;
12809 case V4SFmode:
12810 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12811 break;
12812 case V2DImode:
12813 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12814 break;
12815 default:
12816 gcc_unreachable ();
12818 return;
12821 /* Construct the vector in memory one field at a time
12822 and load the whole vector. */
12823 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12824 for (i = 0; i < n_elts; i++)
12825 emit_move_insn (adjust_address_nv (mem, inner_mode,
12826 i * GET_MODE_SIZE (inner_mode)),
12827 XVECEXP (vals, 0, i));
12828 emit_move_insn (target, mem);
12831 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12832 ERR if it doesn't. EXP indicates the source location, which includes the
12833 inlining history for intrinsics. */
12835 static void
12836 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12837 const_tree exp, const char *desc)
12839 HOST_WIDE_INT lane;
12841 gcc_assert (CONST_INT_P (operand));
12843 lane = INTVAL (operand);
12845 if (lane < low || lane >= high)
12847 if (exp)
12848 error ("%K%s %wd out of range %wd - %wd",
12849 exp, desc, lane, low, high - 1);
12850 else
12851 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12855 /* Bounds-check lanes. */
12857 void
12858 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12859 const_tree exp)
12861 bounds_check (operand, low, high, exp, "lane");
12864 /* Bounds-check constants. */
12866 void
12867 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12869 bounds_check (operand, low, high, NULL_TREE, "constant");
12872 HOST_WIDE_INT
12873 neon_element_bits (machine_mode mode)
12875 return GET_MODE_UNIT_BITSIZE (mode);
12879 /* Predicates for `match_operand' and `match_operator'. */
12881 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12882 WB is true if full writeback address modes are allowed and is false
12883 if limited writeback address modes (POST_INC and PRE_DEC) are
12884 allowed. */
12887 arm_coproc_mem_operand (rtx op, bool wb)
12889 rtx ind;
12891 /* Reject eliminable registers. */
12892 if (! (reload_in_progress || reload_completed || lra_in_progress)
12893 && ( reg_mentioned_p (frame_pointer_rtx, op)
12894 || reg_mentioned_p (arg_pointer_rtx, op)
12895 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12896 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12897 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12898 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12899 return FALSE;
12901 /* Constants are converted into offsets from labels. */
12902 if (!MEM_P (op))
12903 return FALSE;
12905 ind = XEXP (op, 0);
12907 if (reload_completed
12908 && (GET_CODE (ind) == LABEL_REF
12909 || (GET_CODE (ind) == CONST
12910 && GET_CODE (XEXP (ind, 0)) == PLUS
12911 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12912 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12913 return TRUE;
12915 /* Match: (mem (reg)). */
12916 if (REG_P (ind))
12917 return arm_address_register_rtx_p (ind, 0);
12919 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12920 acceptable in any case (subject to verification by
12921 arm_address_register_rtx_p). We need WB to be true to accept
12922 PRE_INC and POST_DEC. */
12923 if (GET_CODE (ind) == POST_INC
12924 || GET_CODE (ind) == PRE_DEC
12925 || (wb
12926 && (GET_CODE (ind) == PRE_INC
12927 || GET_CODE (ind) == POST_DEC)))
12928 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12930 if (wb
12931 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12932 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12933 && GET_CODE (XEXP (ind, 1)) == PLUS
12934 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12935 ind = XEXP (ind, 1);
12937 /* Match:
12938 (plus (reg)
12939 (const)). */
12940 if (GET_CODE (ind) == PLUS
12941 && REG_P (XEXP (ind, 0))
12942 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12943 && CONST_INT_P (XEXP (ind, 1))
12944 && INTVAL (XEXP (ind, 1)) > -1024
12945 && INTVAL (XEXP (ind, 1)) < 1024
12946 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12947 return TRUE;
12949 return FALSE;
12952 /* Return TRUE if OP is a memory operand which we can load or store a vector
12953 to/from. TYPE is one of the following values:
12954 0 - Vector load/stor (vldr)
12955 1 - Core registers (ldm)
12956 2 - Element/structure loads (vld1)
12959 neon_vector_mem_operand (rtx op, int type, bool strict)
12961 rtx ind;
12963 /* Reject eliminable registers. */
12964 if (! (reload_in_progress || reload_completed)
12965 && ( reg_mentioned_p (frame_pointer_rtx, op)
12966 || reg_mentioned_p (arg_pointer_rtx, op)
12967 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12968 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12969 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12970 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12971 return !strict;
12973 /* Constants are converted into offsets from labels. */
12974 if (!MEM_P (op))
12975 return FALSE;
12977 ind = XEXP (op, 0);
12979 if (reload_completed
12980 && (GET_CODE (ind) == LABEL_REF
12981 || (GET_CODE (ind) == CONST
12982 && GET_CODE (XEXP (ind, 0)) == PLUS
12983 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12984 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12985 return TRUE;
12987 /* Match: (mem (reg)). */
12988 if (REG_P (ind))
12989 return arm_address_register_rtx_p (ind, 0);
12991 /* Allow post-increment with Neon registers. */
12992 if ((type != 1 && GET_CODE (ind) == POST_INC)
12993 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12994 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12996 /* Allow post-increment by register for VLDn */
12997 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12998 && GET_CODE (XEXP (ind, 1)) == PLUS
12999 && REG_P (XEXP (XEXP (ind, 1), 1)))
13000 return true;
13002 /* Match:
13003 (plus (reg)
13004 (const)). */
13005 if (type == 0
13006 && GET_CODE (ind) == PLUS
13007 && REG_P (XEXP (ind, 0))
13008 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13009 && CONST_INT_P (XEXP (ind, 1))
13010 && INTVAL (XEXP (ind, 1)) > -1024
13011 /* For quad modes, we restrict the constant offset to be slightly less
13012 than what the instruction format permits. We have no such constraint
13013 on double mode offsets. (This must match arm_legitimate_index_p.) */
13014 && (INTVAL (XEXP (ind, 1))
13015 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13016 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13017 return TRUE;
13019 return FALSE;
13022 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13023 type. */
13025 neon_struct_mem_operand (rtx op)
13027 rtx ind;
13029 /* Reject eliminable registers. */
13030 if (! (reload_in_progress || reload_completed)
13031 && ( reg_mentioned_p (frame_pointer_rtx, op)
13032 || reg_mentioned_p (arg_pointer_rtx, op)
13033 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13034 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13035 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13036 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13037 return FALSE;
13039 /* Constants are converted into offsets from labels. */
13040 if (!MEM_P (op))
13041 return FALSE;
13043 ind = XEXP (op, 0);
13045 if (reload_completed
13046 && (GET_CODE (ind) == LABEL_REF
13047 || (GET_CODE (ind) == CONST
13048 && GET_CODE (XEXP (ind, 0)) == PLUS
13049 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13050 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13051 return TRUE;
13053 /* Match: (mem (reg)). */
13054 if (REG_P (ind))
13055 return arm_address_register_rtx_p (ind, 0);
13057 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13058 if (GET_CODE (ind) == POST_INC
13059 || GET_CODE (ind) == PRE_DEC)
13060 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13062 return FALSE;
13065 /* Return true if X is a register that will be eliminated later on. */
13067 arm_eliminable_register (rtx x)
13069 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13070 || REGNO (x) == ARG_POINTER_REGNUM
13071 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13072 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13075 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13076 coprocessor registers. Otherwise return NO_REGS. */
13078 enum reg_class
13079 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13081 if (mode == HFmode)
13083 if (!TARGET_NEON_FP16)
13084 return GENERAL_REGS;
13085 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13086 return NO_REGS;
13087 return GENERAL_REGS;
13090 /* The neon move patterns handle all legitimate vector and struct
13091 addresses. */
13092 if (TARGET_NEON
13093 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13094 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13095 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13096 || VALID_NEON_STRUCT_MODE (mode)))
13097 return NO_REGS;
13099 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13100 return NO_REGS;
13102 return GENERAL_REGS;
13105 /* Values which must be returned in the most-significant end of the return
13106 register. */
13108 static bool
13109 arm_return_in_msb (const_tree valtype)
13111 return (TARGET_AAPCS_BASED
13112 && BYTES_BIG_ENDIAN
13113 && (AGGREGATE_TYPE_P (valtype)
13114 || TREE_CODE (valtype) == COMPLEX_TYPE
13115 || FIXED_POINT_TYPE_P (valtype)));
13118 /* Return TRUE if X references a SYMBOL_REF. */
13120 symbol_mentioned_p (rtx x)
13122 const char * fmt;
13123 int i;
13125 if (GET_CODE (x) == SYMBOL_REF)
13126 return 1;
13128 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13129 are constant offsets, not symbols. */
13130 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13131 return 0;
13133 fmt = GET_RTX_FORMAT (GET_CODE (x));
13135 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13137 if (fmt[i] == 'E')
13139 int j;
13141 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13142 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13143 return 1;
13145 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13146 return 1;
13149 return 0;
13152 /* Return TRUE if X references a LABEL_REF. */
13154 label_mentioned_p (rtx x)
13156 const char * fmt;
13157 int i;
13159 if (GET_CODE (x) == LABEL_REF)
13160 return 1;
13162 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13163 instruction, but they are constant offsets, not symbols. */
13164 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13165 return 0;
13167 fmt = GET_RTX_FORMAT (GET_CODE (x));
13168 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13170 if (fmt[i] == 'E')
13172 int j;
13174 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13175 if (label_mentioned_p (XVECEXP (x, i, j)))
13176 return 1;
13178 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13179 return 1;
13182 return 0;
13186 tls_mentioned_p (rtx x)
13188 switch (GET_CODE (x))
13190 case CONST:
13191 return tls_mentioned_p (XEXP (x, 0));
13193 case UNSPEC:
13194 if (XINT (x, 1) == UNSPEC_TLS)
13195 return 1;
13197 default:
13198 return 0;
13202 /* Must not copy any rtx that uses a pc-relative address. */
13204 static bool
13205 arm_cannot_copy_insn_p (rtx_insn *insn)
13207 /* The tls call insn cannot be copied, as it is paired with a data
13208 word. */
13209 if (recog_memoized (insn) == CODE_FOR_tlscall)
13210 return true;
13212 subrtx_iterator::array_type array;
13213 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13215 const_rtx x = *iter;
13216 if (GET_CODE (x) == UNSPEC
13217 && (XINT (x, 1) == UNSPEC_PIC_BASE
13218 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13219 return true;
13221 return false;
13224 enum rtx_code
13225 minmax_code (rtx x)
13227 enum rtx_code code = GET_CODE (x);
13229 switch (code)
13231 case SMAX:
13232 return GE;
13233 case SMIN:
13234 return LE;
13235 case UMIN:
13236 return LEU;
13237 case UMAX:
13238 return GEU;
13239 default:
13240 gcc_unreachable ();
13244 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13246 bool
13247 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13248 int *mask, bool *signed_sat)
13250 /* The high bound must be a power of two minus one. */
13251 int log = exact_log2 (INTVAL (hi_bound) + 1);
13252 if (log == -1)
13253 return false;
13255 /* The low bound is either zero (for usat) or one less than the
13256 negation of the high bound (for ssat). */
13257 if (INTVAL (lo_bound) == 0)
13259 if (mask)
13260 *mask = log;
13261 if (signed_sat)
13262 *signed_sat = false;
13264 return true;
13267 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13269 if (mask)
13270 *mask = log + 1;
13271 if (signed_sat)
13272 *signed_sat = true;
13274 return true;
13277 return false;
13280 /* Return 1 if memory locations are adjacent. */
13282 adjacent_mem_locations (rtx a, rtx b)
13284 /* We don't guarantee to preserve the order of these memory refs. */
13285 if (volatile_refs_p (a) || volatile_refs_p (b))
13286 return 0;
13288 if ((REG_P (XEXP (a, 0))
13289 || (GET_CODE (XEXP (a, 0)) == PLUS
13290 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13291 && (REG_P (XEXP (b, 0))
13292 || (GET_CODE (XEXP (b, 0)) == PLUS
13293 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13295 HOST_WIDE_INT val0 = 0, val1 = 0;
13296 rtx reg0, reg1;
13297 int val_diff;
13299 if (GET_CODE (XEXP (a, 0)) == PLUS)
13301 reg0 = XEXP (XEXP (a, 0), 0);
13302 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13304 else
13305 reg0 = XEXP (a, 0);
13307 if (GET_CODE (XEXP (b, 0)) == PLUS)
13309 reg1 = XEXP (XEXP (b, 0), 0);
13310 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13312 else
13313 reg1 = XEXP (b, 0);
13315 /* Don't accept any offset that will require multiple
13316 instructions to handle, since this would cause the
13317 arith_adjacentmem pattern to output an overlong sequence. */
13318 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13319 return 0;
13321 /* Don't allow an eliminable register: register elimination can make
13322 the offset too large. */
13323 if (arm_eliminable_register (reg0))
13324 return 0;
13326 val_diff = val1 - val0;
13328 if (arm_ld_sched)
13330 /* If the target has load delay slots, then there's no benefit
13331 to using an ldm instruction unless the offset is zero and
13332 we are optimizing for size. */
13333 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13334 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13335 && (val_diff == 4 || val_diff == -4));
13338 return ((REGNO (reg0) == REGNO (reg1))
13339 && (val_diff == 4 || val_diff == -4));
13342 return 0;
13345 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13346 for load operations, false for store operations. CONSECUTIVE is true
13347 if the register numbers in the operation must be consecutive in the register
13348 bank. RETURN_PC is true if value is to be loaded in PC.
13349 The pattern we are trying to match for load is:
13350 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13351 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13354 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13356 where
13357 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13358 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13359 3. If consecutive is TRUE, then for kth register being loaded,
13360 REGNO (R_dk) = REGNO (R_d0) + k.
13361 The pattern for store is similar. */
13362 bool
13363 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13364 bool consecutive, bool return_pc)
13366 HOST_WIDE_INT count = XVECLEN (op, 0);
13367 rtx reg, mem, addr;
13368 unsigned regno;
13369 unsigned first_regno;
13370 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13371 rtx elt;
13372 bool addr_reg_in_reglist = false;
13373 bool update = false;
13374 int reg_increment;
13375 int offset_adj;
13376 int regs_per_val;
13378 /* If not in SImode, then registers must be consecutive
13379 (e.g., VLDM instructions for DFmode). */
13380 gcc_assert ((mode == SImode) || consecutive);
13381 /* Setting return_pc for stores is illegal. */
13382 gcc_assert (!return_pc || load);
13384 /* Set up the increments and the regs per val based on the mode. */
13385 reg_increment = GET_MODE_SIZE (mode);
13386 regs_per_val = reg_increment / 4;
13387 offset_adj = return_pc ? 1 : 0;
13389 if (count <= 1
13390 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13391 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13392 return false;
13394 /* Check if this is a write-back. */
13395 elt = XVECEXP (op, 0, offset_adj);
13396 if (GET_CODE (SET_SRC (elt)) == PLUS)
13398 i++;
13399 base = 1;
13400 update = true;
13402 /* The offset adjustment must be the number of registers being
13403 popped times the size of a single register. */
13404 if (!REG_P (SET_DEST (elt))
13405 || !REG_P (XEXP (SET_SRC (elt), 0))
13406 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13407 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13408 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13409 ((count - 1 - offset_adj) * reg_increment))
13410 return false;
13413 i = i + offset_adj;
13414 base = base + offset_adj;
13415 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13416 success depends on the type: VLDM can do just one reg,
13417 LDM must do at least two. */
13418 if ((count <= i) && (mode == SImode))
13419 return false;
13421 elt = XVECEXP (op, 0, i - 1);
13422 if (GET_CODE (elt) != SET)
13423 return false;
13425 if (load)
13427 reg = SET_DEST (elt);
13428 mem = SET_SRC (elt);
13430 else
13432 reg = SET_SRC (elt);
13433 mem = SET_DEST (elt);
13436 if (!REG_P (reg) || !MEM_P (mem))
13437 return false;
13439 regno = REGNO (reg);
13440 first_regno = regno;
13441 addr = XEXP (mem, 0);
13442 if (GET_CODE (addr) == PLUS)
13444 if (!CONST_INT_P (XEXP (addr, 1)))
13445 return false;
13447 offset = INTVAL (XEXP (addr, 1));
13448 addr = XEXP (addr, 0);
13451 if (!REG_P (addr))
13452 return false;
13454 /* Don't allow SP to be loaded unless it is also the base register. It
13455 guarantees that SP is reset correctly when an LDM instruction
13456 is interrupted. Otherwise, we might end up with a corrupt stack. */
13457 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13458 return false;
13460 for (; i < count; i++)
13462 elt = XVECEXP (op, 0, i);
13463 if (GET_CODE (elt) != SET)
13464 return false;
13466 if (load)
13468 reg = SET_DEST (elt);
13469 mem = SET_SRC (elt);
13471 else
13473 reg = SET_SRC (elt);
13474 mem = SET_DEST (elt);
13477 if (!REG_P (reg)
13478 || GET_MODE (reg) != mode
13479 || REGNO (reg) <= regno
13480 || (consecutive
13481 && (REGNO (reg) !=
13482 (unsigned int) (first_regno + regs_per_val * (i - base))))
13483 /* Don't allow SP to be loaded unless it is also the base register. It
13484 guarantees that SP is reset correctly when an LDM instruction
13485 is interrupted. Otherwise, we might end up with a corrupt stack. */
13486 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13487 || !MEM_P (mem)
13488 || GET_MODE (mem) != mode
13489 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13490 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13491 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13492 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13493 offset + (i - base) * reg_increment))
13494 && (!REG_P (XEXP (mem, 0))
13495 || offset + (i - base) * reg_increment != 0)))
13496 return false;
13498 regno = REGNO (reg);
13499 if (regno == REGNO (addr))
13500 addr_reg_in_reglist = true;
13503 if (load)
13505 if (update && addr_reg_in_reglist)
13506 return false;
13508 /* For Thumb-1, address register is always modified - either by write-back
13509 or by explicit load. If the pattern does not describe an update,
13510 then the address register must be in the list of loaded registers. */
13511 if (TARGET_THUMB1)
13512 return update || addr_reg_in_reglist;
13515 return true;
13518 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13519 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13520 instruction. ADD_OFFSET is nonzero if the base address register needs
13521 to be modified with an add instruction before we can use it. */
13523 static bool
13524 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13525 int nops, HOST_WIDE_INT add_offset)
13527 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13528 if the offset isn't small enough. The reason 2 ldrs are faster
13529 is because these ARMs are able to do more than one cache access
13530 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13531 whilst the ARM8 has a double bandwidth cache. This means that
13532 these cores can do both an instruction fetch and a data fetch in
13533 a single cycle, so the trick of calculating the address into a
13534 scratch register (one of the result regs) and then doing a load
13535 multiple actually becomes slower (and no smaller in code size).
13536 That is the transformation
13538 ldr rd1, [rbase + offset]
13539 ldr rd2, [rbase + offset + 4]
13543 add rd1, rbase, offset
13544 ldmia rd1, {rd1, rd2}
13546 produces worse code -- '3 cycles + any stalls on rd2' instead of
13547 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13548 access per cycle, the first sequence could never complete in less
13549 than 6 cycles, whereas the ldm sequence would only take 5 and
13550 would make better use of sequential accesses if not hitting the
13551 cache.
13553 We cheat here and test 'arm_ld_sched' which we currently know to
13554 only be true for the ARM8, ARM9 and StrongARM. If this ever
13555 changes, then the test below needs to be reworked. */
13556 if (nops == 2 && arm_ld_sched && add_offset != 0)
13557 return false;
13559 /* XScale has load-store double instructions, but they have stricter
13560 alignment requirements than load-store multiple, so we cannot
13561 use them.
13563 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13564 the pipeline until completion.
13566 NREGS CYCLES
13572 An ldr instruction takes 1-3 cycles, but does not block the
13573 pipeline.
13575 NREGS CYCLES
13576 1 1-3
13577 2 2-6
13578 3 3-9
13579 4 4-12
13581 Best case ldr will always win. However, the more ldr instructions
13582 we issue, the less likely we are to be able to schedule them well.
13583 Using ldr instructions also increases code size.
13585 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13586 for counts of 3 or 4 regs. */
13587 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13588 return false;
13589 return true;
13592 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13593 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13594 an array ORDER which describes the sequence to use when accessing the
13595 offsets that produces an ascending order. In this sequence, each
13596 offset must be larger by exactly 4 than the previous one. ORDER[0]
13597 must have been filled in with the lowest offset by the caller.
13598 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13599 we use to verify that ORDER produces an ascending order of registers.
13600 Return true if it was possible to construct such an order, false if
13601 not. */
13603 static bool
13604 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13605 int *unsorted_regs)
13607 int i;
13608 for (i = 1; i < nops; i++)
13610 int j;
13612 order[i] = order[i - 1];
13613 for (j = 0; j < nops; j++)
13614 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13616 /* We must find exactly one offset that is higher than the
13617 previous one by 4. */
13618 if (order[i] != order[i - 1])
13619 return false;
13620 order[i] = j;
13622 if (order[i] == order[i - 1])
13623 return false;
13624 /* The register numbers must be ascending. */
13625 if (unsorted_regs != NULL
13626 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13627 return false;
13629 return true;
13632 /* Used to determine in a peephole whether a sequence of load
13633 instructions can be changed into a load-multiple instruction.
13634 NOPS is the number of separate load instructions we are examining. The
13635 first NOPS entries in OPERANDS are the destination registers, the
13636 next NOPS entries are memory operands. If this function is
13637 successful, *BASE is set to the common base register of the memory
13638 accesses; *LOAD_OFFSET is set to the first memory location's offset
13639 from that base register.
13640 REGS is an array filled in with the destination register numbers.
13641 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13642 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13643 the sequence of registers in REGS matches the loads from ascending memory
13644 locations, and the function verifies that the register numbers are
13645 themselves ascending. If CHECK_REGS is false, the register numbers
13646 are stored in the order they are found in the operands. */
13647 static int
13648 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13649 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13651 int unsorted_regs[MAX_LDM_STM_OPS];
13652 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13653 int order[MAX_LDM_STM_OPS];
13654 rtx base_reg_rtx = NULL;
13655 int base_reg = -1;
13656 int i, ldm_case;
13658 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13659 easily extended if required. */
13660 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13662 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13664 /* Loop over the operands and check that the memory references are
13665 suitable (i.e. immediate offsets from the same base register). At
13666 the same time, extract the target register, and the memory
13667 offsets. */
13668 for (i = 0; i < nops; i++)
13670 rtx reg;
13671 rtx offset;
13673 /* Convert a subreg of a mem into the mem itself. */
13674 if (GET_CODE (operands[nops + i]) == SUBREG)
13675 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13677 gcc_assert (MEM_P (operands[nops + i]));
13679 /* Don't reorder volatile memory references; it doesn't seem worth
13680 looking for the case where the order is ok anyway. */
13681 if (MEM_VOLATILE_P (operands[nops + i]))
13682 return 0;
13684 offset = const0_rtx;
13686 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13687 || (GET_CODE (reg) == SUBREG
13688 && REG_P (reg = SUBREG_REG (reg))))
13689 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13690 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13691 || (GET_CODE (reg) == SUBREG
13692 && REG_P (reg = SUBREG_REG (reg))))
13693 && (CONST_INT_P (offset
13694 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13696 if (i == 0)
13698 base_reg = REGNO (reg);
13699 base_reg_rtx = reg;
13700 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13701 return 0;
13703 else if (base_reg != (int) REGNO (reg))
13704 /* Not addressed from the same base register. */
13705 return 0;
13707 unsorted_regs[i] = (REG_P (operands[i])
13708 ? REGNO (operands[i])
13709 : REGNO (SUBREG_REG (operands[i])));
13711 /* If it isn't an integer register, or if it overwrites the
13712 base register but isn't the last insn in the list, then
13713 we can't do this. */
13714 if (unsorted_regs[i] < 0
13715 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13716 || unsorted_regs[i] > 14
13717 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13718 return 0;
13720 /* Don't allow SP to be loaded unless it is also the base
13721 register. It guarantees that SP is reset correctly when
13722 an LDM instruction is interrupted. Otherwise, we might
13723 end up with a corrupt stack. */
13724 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13725 return 0;
13727 unsorted_offsets[i] = INTVAL (offset);
13728 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13729 order[0] = i;
13731 else
13732 /* Not a suitable memory address. */
13733 return 0;
13736 /* All the useful information has now been extracted from the
13737 operands into unsorted_regs and unsorted_offsets; additionally,
13738 order[0] has been set to the lowest offset in the list. Sort
13739 the offsets into order, verifying that they are adjacent, and
13740 check that the register numbers are ascending. */
13741 if (!compute_offset_order (nops, unsorted_offsets, order,
13742 check_regs ? unsorted_regs : NULL))
13743 return 0;
13745 if (saved_order)
13746 memcpy (saved_order, order, sizeof order);
13748 if (base)
13750 *base = base_reg;
13752 for (i = 0; i < nops; i++)
13753 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13755 *load_offset = unsorted_offsets[order[0]];
13758 if (TARGET_THUMB1
13759 && !peep2_reg_dead_p (nops, base_reg_rtx))
13760 return 0;
13762 if (unsorted_offsets[order[0]] == 0)
13763 ldm_case = 1; /* ldmia */
13764 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13765 ldm_case = 2; /* ldmib */
13766 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13767 ldm_case = 3; /* ldmda */
13768 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13769 ldm_case = 4; /* ldmdb */
13770 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13771 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13772 ldm_case = 5;
13773 else
13774 return 0;
13776 if (!multiple_operation_profitable_p (false, nops,
13777 ldm_case == 5
13778 ? unsorted_offsets[order[0]] : 0))
13779 return 0;
13781 return ldm_case;
13784 /* Used to determine in a peephole whether a sequence of store instructions can
13785 be changed into a store-multiple instruction.
13786 NOPS is the number of separate store instructions we are examining.
13787 NOPS_TOTAL is the total number of instructions recognized by the peephole
13788 pattern.
13789 The first NOPS entries in OPERANDS are the source registers, the next
13790 NOPS entries are memory operands. If this function is successful, *BASE is
13791 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13792 to the first memory location's offset from that base register. REGS is an
13793 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13794 likewise filled with the corresponding rtx's.
13795 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13796 numbers to an ascending order of stores.
13797 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13798 from ascending memory locations, and the function verifies that the register
13799 numbers are themselves ascending. If CHECK_REGS is false, the register
13800 numbers are stored in the order they are found in the operands. */
13801 static int
13802 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13803 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13804 HOST_WIDE_INT *load_offset, bool check_regs)
13806 int unsorted_regs[MAX_LDM_STM_OPS];
13807 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13808 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13809 int order[MAX_LDM_STM_OPS];
13810 int base_reg = -1;
13811 rtx base_reg_rtx = NULL;
13812 int i, stm_case;
13814 /* Write back of base register is currently only supported for Thumb 1. */
13815 int base_writeback = TARGET_THUMB1;
13817 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13818 easily extended if required. */
13819 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13821 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13823 /* Loop over the operands and check that the memory references are
13824 suitable (i.e. immediate offsets from the same base register). At
13825 the same time, extract the target register, and the memory
13826 offsets. */
13827 for (i = 0; i < nops; i++)
13829 rtx reg;
13830 rtx offset;
13832 /* Convert a subreg of a mem into the mem itself. */
13833 if (GET_CODE (operands[nops + i]) == SUBREG)
13834 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13836 gcc_assert (MEM_P (operands[nops + i]));
13838 /* Don't reorder volatile memory references; it doesn't seem worth
13839 looking for the case where the order is ok anyway. */
13840 if (MEM_VOLATILE_P (operands[nops + i]))
13841 return 0;
13843 offset = const0_rtx;
13845 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13846 || (GET_CODE (reg) == SUBREG
13847 && REG_P (reg = SUBREG_REG (reg))))
13848 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13849 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13850 || (GET_CODE (reg) == SUBREG
13851 && REG_P (reg = SUBREG_REG (reg))))
13852 && (CONST_INT_P (offset
13853 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13855 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13856 ? operands[i] : SUBREG_REG (operands[i]));
13857 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13859 if (i == 0)
13861 base_reg = REGNO (reg);
13862 base_reg_rtx = reg;
13863 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13864 return 0;
13866 else if (base_reg != (int) REGNO (reg))
13867 /* Not addressed from the same base register. */
13868 return 0;
13870 /* If it isn't an integer register, then we can't do this. */
13871 if (unsorted_regs[i] < 0
13872 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13873 /* The effects are unpredictable if the base register is
13874 both updated and stored. */
13875 || (base_writeback && unsorted_regs[i] == base_reg)
13876 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13877 || unsorted_regs[i] > 14)
13878 return 0;
13880 unsorted_offsets[i] = INTVAL (offset);
13881 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13882 order[0] = i;
13884 else
13885 /* Not a suitable memory address. */
13886 return 0;
13889 /* All the useful information has now been extracted from the
13890 operands into unsorted_regs and unsorted_offsets; additionally,
13891 order[0] has been set to the lowest offset in the list. Sort
13892 the offsets into order, verifying that they are adjacent, and
13893 check that the register numbers are ascending. */
13894 if (!compute_offset_order (nops, unsorted_offsets, order,
13895 check_regs ? unsorted_regs : NULL))
13896 return 0;
13898 if (saved_order)
13899 memcpy (saved_order, order, sizeof order);
13901 if (base)
13903 *base = base_reg;
13905 for (i = 0; i < nops; i++)
13907 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13908 if (reg_rtxs)
13909 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13912 *load_offset = unsorted_offsets[order[0]];
13915 if (TARGET_THUMB1
13916 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13917 return 0;
13919 if (unsorted_offsets[order[0]] == 0)
13920 stm_case = 1; /* stmia */
13921 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13922 stm_case = 2; /* stmib */
13923 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13924 stm_case = 3; /* stmda */
13925 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13926 stm_case = 4; /* stmdb */
13927 else
13928 return 0;
13930 if (!multiple_operation_profitable_p (false, nops, 0))
13931 return 0;
13933 return stm_case;
13936 /* Routines for use in generating RTL. */
13938 /* Generate a load-multiple instruction. COUNT is the number of loads in
13939 the instruction; REGS and MEMS are arrays containing the operands.
13940 BASEREG is the base register to be used in addressing the memory operands.
13941 WBACK_OFFSET is nonzero if the instruction should update the base
13942 register. */
13944 static rtx
13945 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13946 HOST_WIDE_INT wback_offset)
13948 int i = 0, j;
13949 rtx result;
13951 if (!multiple_operation_profitable_p (false, count, 0))
13953 rtx seq;
13955 start_sequence ();
13957 for (i = 0; i < count; i++)
13958 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13960 if (wback_offset != 0)
13961 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13963 seq = get_insns ();
13964 end_sequence ();
13966 return seq;
13969 result = gen_rtx_PARALLEL (VOIDmode,
13970 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13971 if (wback_offset != 0)
13973 XVECEXP (result, 0, 0)
13974 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13975 i = 1;
13976 count++;
13979 for (j = 0; i < count; i++, j++)
13980 XVECEXP (result, 0, i)
13981 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13983 return result;
13986 /* Generate a store-multiple instruction. COUNT is the number of stores in
13987 the instruction; REGS and MEMS are arrays containing the operands.
13988 BASEREG is the base register to be used in addressing the memory operands.
13989 WBACK_OFFSET is nonzero if the instruction should update the base
13990 register. */
13992 static rtx
13993 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13994 HOST_WIDE_INT wback_offset)
13996 int i = 0, j;
13997 rtx result;
13999 if (GET_CODE (basereg) == PLUS)
14000 basereg = XEXP (basereg, 0);
14002 if (!multiple_operation_profitable_p (false, count, 0))
14004 rtx seq;
14006 start_sequence ();
14008 for (i = 0; i < count; i++)
14009 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14011 if (wback_offset != 0)
14012 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14014 seq = get_insns ();
14015 end_sequence ();
14017 return seq;
14020 result = gen_rtx_PARALLEL (VOIDmode,
14021 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14022 if (wback_offset != 0)
14024 XVECEXP (result, 0, 0)
14025 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14026 i = 1;
14027 count++;
14030 for (j = 0; i < count; i++, j++)
14031 XVECEXP (result, 0, i)
14032 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14034 return result;
14037 /* Generate either a load-multiple or a store-multiple instruction. This
14038 function can be used in situations where we can start with a single MEM
14039 rtx and adjust its address upwards.
14040 COUNT is the number of operations in the instruction, not counting a
14041 possible update of the base register. REGS is an array containing the
14042 register operands.
14043 BASEREG is the base register to be used in addressing the memory operands,
14044 which are constructed from BASEMEM.
14045 WRITE_BACK specifies whether the generated instruction should include an
14046 update of the base register.
14047 OFFSETP is used to pass an offset to and from this function; this offset
14048 is not used when constructing the address (instead BASEMEM should have an
14049 appropriate offset in its address), it is used only for setting
14050 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14052 static rtx
14053 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14054 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14056 rtx mems[MAX_LDM_STM_OPS];
14057 HOST_WIDE_INT offset = *offsetp;
14058 int i;
14060 gcc_assert (count <= MAX_LDM_STM_OPS);
14062 if (GET_CODE (basereg) == PLUS)
14063 basereg = XEXP (basereg, 0);
14065 for (i = 0; i < count; i++)
14067 rtx addr = plus_constant (Pmode, basereg, i * 4);
14068 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14069 offset += 4;
14072 if (write_back)
14073 *offsetp = offset;
14075 if (is_load)
14076 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14077 write_back ? 4 * count : 0);
14078 else
14079 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14080 write_back ? 4 * count : 0);
14084 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14085 rtx basemem, HOST_WIDE_INT *offsetp)
14087 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14088 offsetp);
14092 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14093 rtx basemem, HOST_WIDE_INT *offsetp)
14095 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14096 offsetp);
14099 /* Called from a peephole2 expander to turn a sequence of loads into an
14100 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14101 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14102 is true if we can reorder the registers because they are used commutatively
14103 subsequently.
14104 Returns true iff we could generate a new instruction. */
14106 bool
14107 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14109 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14110 rtx mems[MAX_LDM_STM_OPS];
14111 int i, j, base_reg;
14112 rtx base_reg_rtx;
14113 HOST_WIDE_INT offset;
14114 int write_back = FALSE;
14115 int ldm_case;
14116 rtx addr;
14118 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14119 &base_reg, &offset, !sort_regs);
14121 if (ldm_case == 0)
14122 return false;
14124 if (sort_regs)
14125 for (i = 0; i < nops - 1; i++)
14126 for (j = i + 1; j < nops; j++)
14127 if (regs[i] > regs[j])
14129 int t = regs[i];
14130 regs[i] = regs[j];
14131 regs[j] = t;
14133 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14135 if (TARGET_THUMB1)
14137 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14138 gcc_assert (ldm_case == 1 || ldm_case == 5);
14139 write_back = TRUE;
14142 if (ldm_case == 5)
14144 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14145 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14146 offset = 0;
14147 if (!TARGET_THUMB1)
14149 base_reg = regs[0];
14150 base_reg_rtx = newbase;
14154 for (i = 0; i < nops; i++)
14156 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14157 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14158 SImode, addr, 0);
14160 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14161 write_back ? offset + i * 4 : 0));
14162 return true;
14165 /* Called from a peephole2 expander to turn a sequence of stores into an
14166 STM instruction. OPERANDS are the operands found by the peephole matcher;
14167 NOPS indicates how many separate stores we are trying to combine.
14168 Returns true iff we could generate a new instruction. */
14170 bool
14171 gen_stm_seq (rtx *operands, int nops)
14173 int i;
14174 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14175 rtx mems[MAX_LDM_STM_OPS];
14176 int base_reg;
14177 rtx base_reg_rtx;
14178 HOST_WIDE_INT offset;
14179 int write_back = FALSE;
14180 int stm_case;
14181 rtx addr;
14182 bool base_reg_dies;
14184 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14185 mem_order, &base_reg, &offset, true);
14187 if (stm_case == 0)
14188 return false;
14190 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14192 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14193 if (TARGET_THUMB1)
14195 gcc_assert (base_reg_dies);
14196 write_back = TRUE;
14199 if (stm_case == 5)
14201 gcc_assert (base_reg_dies);
14202 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14203 offset = 0;
14206 addr = plus_constant (Pmode, base_reg_rtx, offset);
14208 for (i = 0; i < nops; i++)
14210 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14211 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14212 SImode, addr, 0);
14214 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14215 write_back ? offset + i * 4 : 0));
14216 return true;
14219 /* Called from a peephole2 expander to turn a sequence of stores that are
14220 preceded by constant loads into an STM instruction. OPERANDS are the
14221 operands found by the peephole matcher; NOPS indicates how many
14222 separate stores we are trying to combine; there are 2 * NOPS
14223 instructions in the peephole.
14224 Returns true iff we could generate a new instruction. */
14226 bool
14227 gen_const_stm_seq (rtx *operands, int nops)
14229 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14230 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14231 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14232 rtx mems[MAX_LDM_STM_OPS];
14233 int base_reg;
14234 rtx base_reg_rtx;
14235 HOST_WIDE_INT offset;
14236 int write_back = FALSE;
14237 int stm_case;
14238 rtx addr;
14239 bool base_reg_dies;
14240 int i, j;
14241 HARD_REG_SET allocated;
14243 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14244 mem_order, &base_reg, &offset, false);
14246 if (stm_case == 0)
14247 return false;
14249 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14251 /* If the same register is used more than once, try to find a free
14252 register. */
14253 CLEAR_HARD_REG_SET (allocated);
14254 for (i = 0; i < nops; i++)
14256 for (j = i + 1; j < nops; j++)
14257 if (regs[i] == regs[j])
14259 rtx t = peep2_find_free_register (0, nops * 2,
14260 TARGET_THUMB1 ? "l" : "r",
14261 SImode, &allocated);
14262 if (t == NULL_RTX)
14263 return false;
14264 reg_rtxs[i] = t;
14265 regs[i] = REGNO (t);
14269 /* Compute an ordering that maps the register numbers to an ascending
14270 sequence. */
14271 reg_order[0] = 0;
14272 for (i = 0; i < nops; i++)
14273 if (regs[i] < regs[reg_order[0]])
14274 reg_order[0] = i;
14276 for (i = 1; i < nops; i++)
14278 int this_order = reg_order[i - 1];
14279 for (j = 0; j < nops; j++)
14280 if (regs[j] > regs[reg_order[i - 1]]
14281 && (this_order == reg_order[i - 1]
14282 || regs[j] < regs[this_order]))
14283 this_order = j;
14284 reg_order[i] = this_order;
14287 /* Ensure that registers that must be live after the instruction end
14288 up with the correct value. */
14289 for (i = 0; i < nops; i++)
14291 int this_order = reg_order[i];
14292 if ((this_order != mem_order[i]
14293 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14294 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14295 return false;
14298 /* Load the constants. */
14299 for (i = 0; i < nops; i++)
14301 rtx op = operands[2 * nops + mem_order[i]];
14302 sorted_regs[i] = regs[reg_order[i]];
14303 emit_move_insn (reg_rtxs[reg_order[i]], op);
14306 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14308 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14309 if (TARGET_THUMB1)
14311 gcc_assert (base_reg_dies);
14312 write_back = TRUE;
14315 if (stm_case == 5)
14317 gcc_assert (base_reg_dies);
14318 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14319 offset = 0;
14322 addr = plus_constant (Pmode, base_reg_rtx, offset);
14324 for (i = 0; i < nops; i++)
14326 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14327 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14328 SImode, addr, 0);
14330 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14331 write_back ? offset + i * 4 : 0));
14332 return true;
14335 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14336 unaligned copies on processors which support unaligned semantics for those
14337 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14338 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14339 An interleave factor of 1 (the minimum) will perform no interleaving.
14340 Load/store multiple are used for aligned addresses where possible. */
14342 static void
14343 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14344 HOST_WIDE_INT length,
14345 unsigned int interleave_factor)
14347 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14348 int *regnos = XALLOCAVEC (int, interleave_factor);
14349 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14350 HOST_WIDE_INT i, j;
14351 HOST_WIDE_INT remaining = length, words;
14352 rtx halfword_tmp = NULL, byte_tmp = NULL;
14353 rtx dst, src;
14354 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14355 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14356 HOST_WIDE_INT srcoffset, dstoffset;
14357 HOST_WIDE_INT src_autoinc, dst_autoinc;
14358 rtx mem, addr;
14360 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14362 /* Use hard registers if we have aligned source or destination so we can use
14363 load/store multiple with contiguous registers. */
14364 if (dst_aligned || src_aligned)
14365 for (i = 0; i < interleave_factor; i++)
14366 regs[i] = gen_rtx_REG (SImode, i);
14367 else
14368 for (i = 0; i < interleave_factor; i++)
14369 regs[i] = gen_reg_rtx (SImode);
14371 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14372 src = copy_addr_to_reg (XEXP (srcbase, 0));
14374 srcoffset = dstoffset = 0;
14376 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14377 For copying the last bytes we want to subtract this offset again. */
14378 src_autoinc = dst_autoinc = 0;
14380 for (i = 0; i < interleave_factor; i++)
14381 regnos[i] = i;
14383 /* Copy BLOCK_SIZE_BYTES chunks. */
14385 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14387 /* Load words. */
14388 if (src_aligned && interleave_factor > 1)
14390 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14391 TRUE, srcbase, &srcoffset));
14392 src_autoinc += UNITS_PER_WORD * interleave_factor;
14394 else
14396 for (j = 0; j < interleave_factor; j++)
14398 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14399 - src_autoinc));
14400 mem = adjust_automodify_address (srcbase, SImode, addr,
14401 srcoffset + j * UNITS_PER_WORD);
14402 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14404 srcoffset += block_size_bytes;
14407 /* Store words. */
14408 if (dst_aligned && interleave_factor > 1)
14410 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14411 TRUE, dstbase, &dstoffset));
14412 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14414 else
14416 for (j = 0; j < interleave_factor; j++)
14418 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14419 - dst_autoinc));
14420 mem = adjust_automodify_address (dstbase, SImode, addr,
14421 dstoffset + j * UNITS_PER_WORD);
14422 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14424 dstoffset += block_size_bytes;
14427 remaining -= block_size_bytes;
14430 /* Copy any whole words left (note these aren't interleaved with any
14431 subsequent halfword/byte load/stores in the interests of simplicity). */
14433 words = remaining / UNITS_PER_WORD;
14435 gcc_assert (words < interleave_factor);
14437 if (src_aligned && words > 1)
14439 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14440 &srcoffset));
14441 src_autoinc += UNITS_PER_WORD * words;
14443 else
14445 for (j = 0; j < words; j++)
14447 addr = plus_constant (Pmode, src,
14448 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14449 mem = adjust_automodify_address (srcbase, SImode, addr,
14450 srcoffset + j * UNITS_PER_WORD);
14451 if (src_aligned)
14452 emit_move_insn (regs[j], mem);
14453 else
14454 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14456 srcoffset += words * UNITS_PER_WORD;
14459 if (dst_aligned && words > 1)
14461 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14462 &dstoffset));
14463 dst_autoinc += words * UNITS_PER_WORD;
14465 else
14467 for (j = 0; j < words; j++)
14469 addr = plus_constant (Pmode, dst,
14470 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14471 mem = adjust_automodify_address (dstbase, SImode, addr,
14472 dstoffset + j * UNITS_PER_WORD);
14473 if (dst_aligned)
14474 emit_move_insn (mem, regs[j]);
14475 else
14476 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14478 dstoffset += words * UNITS_PER_WORD;
14481 remaining -= words * UNITS_PER_WORD;
14483 gcc_assert (remaining < 4);
14485 /* Copy a halfword if necessary. */
14487 if (remaining >= 2)
14489 halfword_tmp = gen_reg_rtx (SImode);
14491 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14492 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14493 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14495 /* Either write out immediately, or delay until we've loaded the last
14496 byte, depending on interleave factor. */
14497 if (interleave_factor == 1)
14499 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14500 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14501 emit_insn (gen_unaligned_storehi (mem,
14502 gen_lowpart (HImode, halfword_tmp)));
14503 halfword_tmp = NULL;
14504 dstoffset += 2;
14507 remaining -= 2;
14508 srcoffset += 2;
14511 gcc_assert (remaining < 2);
14513 /* Copy last byte. */
14515 if ((remaining & 1) != 0)
14517 byte_tmp = gen_reg_rtx (SImode);
14519 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14520 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14521 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14523 if (interleave_factor == 1)
14525 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14526 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14527 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14528 byte_tmp = NULL;
14529 dstoffset++;
14532 remaining--;
14533 srcoffset++;
14536 /* Store last halfword if we haven't done so already. */
14538 if (halfword_tmp)
14540 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14541 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14542 emit_insn (gen_unaligned_storehi (mem,
14543 gen_lowpart (HImode, halfword_tmp)));
14544 dstoffset += 2;
14547 /* Likewise for last byte. */
14549 if (byte_tmp)
14551 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14552 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14553 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14554 dstoffset++;
14557 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14560 /* From mips_adjust_block_mem:
14562 Helper function for doing a loop-based block operation on memory
14563 reference MEM. Each iteration of the loop will operate on LENGTH
14564 bytes of MEM.
14566 Create a new base register for use within the loop and point it to
14567 the start of MEM. Create a new memory reference that uses this
14568 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14570 static void
14571 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14572 rtx *loop_mem)
14574 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14576 /* Although the new mem does not refer to a known location,
14577 it does keep up to LENGTH bytes of alignment. */
14578 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14579 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14582 /* From mips_block_move_loop:
14584 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14585 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14586 the memory regions do not overlap. */
14588 static void
14589 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14590 unsigned int interleave_factor,
14591 HOST_WIDE_INT bytes_per_iter)
14593 rtx src_reg, dest_reg, final_src, test;
14594 HOST_WIDE_INT leftover;
14596 leftover = length % bytes_per_iter;
14597 length -= leftover;
14599 /* Create registers and memory references for use within the loop. */
14600 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14601 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14603 /* Calculate the value that SRC_REG should have after the last iteration of
14604 the loop. */
14605 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14606 0, 0, OPTAB_WIDEN);
14608 /* Emit the start of the loop. */
14609 rtx_code_label *label = gen_label_rtx ();
14610 emit_label (label);
14612 /* Emit the loop body. */
14613 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14614 interleave_factor);
14616 /* Move on to the next block. */
14617 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14618 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14620 /* Emit the loop condition. */
14621 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14622 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14624 /* Mop up any left-over bytes. */
14625 if (leftover)
14626 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14629 /* Emit a block move when either the source or destination is unaligned (not
14630 aligned to a four-byte boundary). This may need further tuning depending on
14631 core type, optimize_size setting, etc. */
14633 static int
14634 arm_movmemqi_unaligned (rtx *operands)
14636 HOST_WIDE_INT length = INTVAL (operands[2]);
14638 if (optimize_size)
14640 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14641 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14642 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14643 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14644 or dst_aligned though: allow more interleaving in those cases since the
14645 resulting code can be smaller. */
14646 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14647 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14649 if (length > 12)
14650 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14651 interleave_factor, bytes_per_iter);
14652 else
14653 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14654 interleave_factor);
14656 else
14658 /* Note that the loop created by arm_block_move_unaligned_loop may be
14659 subject to loop unrolling, which makes tuning this condition a little
14660 redundant. */
14661 if (length > 32)
14662 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14663 else
14664 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14667 return 1;
14671 arm_gen_movmemqi (rtx *operands)
14673 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14674 HOST_WIDE_INT srcoffset, dstoffset;
14675 int i;
14676 rtx src, dst, srcbase, dstbase;
14677 rtx part_bytes_reg = NULL;
14678 rtx mem;
14680 if (!CONST_INT_P (operands[2])
14681 || !CONST_INT_P (operands[3])
14682 || INTVAL (operands[2]) > 64)
14683 return 0;
14685 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14686 return arm_movmemqi_unaligned (operands);
14688 if (INTVAL (operands[3]) & 3)
14689 return 0;
14691 dstbase = operands[0];
14692 srcbase = operands[1];
14694 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14695 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14697 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14698 out_words_to_go = INTVAL (operands[2]) / 4;
14699 last_bytes = INTVAL (operands[2]) & 3;
14700 dstoffset = srcoffset = 0;
14702 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14703 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14705 for (i = 0; in_words_to_go >= 2; i+=4)
14707 if (in_words_to_go > 4)
14708 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14709 TRUE, srcbase, &srcoffset));
14710 else
14711 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14712 src, FALSE, srcbase,
14713 &srcoffset));
14715 if (out_words_to_go)
14717 if (out_words_to_go > 4)
14718 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14719 TRUE, dstbase, &dstoffset));
14720 else if (out_words_to_go != 1)
14721 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14722 out_words_to_go, dst,
14723 (last_bytes == 0
14724 ? FALSE : TRUE),
14725 dstbase, &dstoffset));
14726 else
14728 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14729 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14730 if (last_bytes != 0)
14732 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14733 dstoffset += 4;
14738 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14739 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14742 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14743 if (out_words_to_go)
14745 rtx sreg;
14747 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14748 sreg = copy_to_reg (mem);
14750 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14751 emit_move_insn (mem, sreg);
14752 in_words_to_go--;
14754 gcc_assert (!in_words_to_go); /* Sanity check */
14757 if (in_words_to_go)
14759 gcc_assert (in_words_to_go > 0);
14761 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14762 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14765 gcc_assert (!last_bytes || part_bytes_reg);
14767 if (BYTES_BIG_ENDIAN && last_bytes)
14769 rtx tmp = gen_reg_rtx (SImode);
14771 /* The bytes we want are in the top end of the word. */
14772 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14773 GEN_INT (8 * (4 - last_bytes))));
14774 part_bytes_reg = tmp;
14776 while (last_bytes)
14778 mem = adjust_automodify_address (dstbase, QImode,
14779 plus_constant (Pmode, dst,
14780 last_bytes - 1),
14781 dstoffset + last_bytes - 1);
14782 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14784 if (--last_bytes)
14786 tmp = gen_reg_rtx (SImode);
14787 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14788 part_bytes_reg = tmp;
14793 else
14795 if (last_bytes > 1)
14797 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14798 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14799 last_bytes -= 2;
14800 if (last_bytes)
14802 rtx tmp = gen_reg_rtx (SImode);
14803 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14804 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14805 part_bytes_reg = tmp;
14806 dstoffset += 2;
14810 if (last_bytes)
14812 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14813 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14817 return 1;
14820 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14821 by mode size. */
14822 inline static rtx
14823 next_consecutive_mem (rtx mem)
14825 machine_mode mode = GET_MODE (mem);
14826 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14827 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14829 return adjust_automodify_address (mem, mode, addr, offset);
14832 /* Copy using LDRD/STRD instructions whenever possible.
14833 Returns true upon success. */
14834 bool
14835 gen_movmem_ldrd_strd (rtx *operands)
14837 unsigned HOST_WIDE_INT len;
14838 HOST_WIDE_INT align;
14839 rtx src, dst, base;
14840 rtx reg0;
14841 bool src_aligned, dst_aligned;
14842 bool src_volatile, dst_volatile;
14844 gcc_assert (CONST_INT_P (operands[2]));
14845 gcc_assert (CONST_INT_P (operands[3]));
14847 len = UINTVAL (operands[2]);
14848 if (len > 64)
14849 return false;
14851 /* Maximum alignment we can assume for both src and dst buffers. */
14852 align = INTVAL (operands[3]);
14854 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14855 return false;
14857 /* Place src and dst addresses in registers
14858 and update the corresponding mem rtx. */
14859 dst = operands[0];
14860 dst_volatile = MEM_VOLATILE_P (dst);
14861 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14862 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14863 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14865 src = operands[1];
14866 src_volatile = MEM_VOLATILE_P (src);
14867 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14868 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14869 src = adjust_automodify_address (src, VOIDmode, base, 0);
14871 if (!unaligned_access && !(src_aligned && dst_aligned))
14872 return false;
14874 if (src_volatile || dst_volatile)
14875 return false;
14877 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14878 if (!(dst_aligned || src_aligned))
14879 return arm_gen_movmemqi (operands);
14881 src = adjust_address (src, DImode, 0);
14882 dst = adjust_address (dst, DImode, 0);
14883 while (len >= 8)
14885 len -= 8;
14886 reg0 = gen_reg_rtx (DImode);
14887 if (src_aligned)
14888 emit_move_insn (reg0, src);
14889 else
14890 emit_insn (gen_unaligned_loaddi (reg0, src));
14892 if (dst_aligned)
14893 emit_move_insn (dst, reg0);
14894 else
14895 emit_insn (gen_unaligned_storedi (dst, reg0));
14897 src = next_consecutive_mem (src);
14898 dst = next_consecutive_mem (dst);
14901 gcc_assert (len < 8);
14902 if (len >= 4)
14904 /* More than a word but less than a double-word to copy. Copy a word. */
14905 reg0 = gen_reg_rtx (SImode);
14906 src = adjust_address (src, SImode, 0);
14907 dst = adjust_address (dst, SImode, 0);
14908 if (src_aligned)
14909 emit_move_insn (reg0, src);
14910 else
14911 emit_insn (gen_unaligned_loadsi (reg0, src));
14913 if (dst_aligned)
14914 emit_move_insn (dst, reg0);
14915 else
14916 emit_insn (gen_unaligned_storesi (dst, reg0));
14918 src = next_consecutive_mem (src);
14919 dst = next_consecutive_mem (dst);
14920 len -= 4;
14923 if (len == 0)
14924 return true;
14926 /* Copy the remaining bytes. */
14927 if (len >= 2)
14929 dst = adjust_address (dst, HImode, 0);
14930 src = adjust_address (src, HImode, 0);
14931 reg0 = gen_reg_rtx (SImode);
14932 if (src_aligned)
14933 emit_insn (gen_zero_extendhisi2 (reg0, src));
14934 else
14935 emit_insn (gen_unaligned_loadhiu (reg0, src));
14937 if (dst_aligned)
14938 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14939 else
14940 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14942 src = next_consecutive_mem (src);
14943 dst = next_consecutive_mem (dst);
14944 if (len == 2)
14945 return true;
14948 dst = adjust_address (dst, QImode, 0);
14949 src = adjust_address (src, QImode, 0);
14950 reg0 = gen_reg_rtx (QImode);
14951 emit_move_insn (reg0, src);
14952 emit_move_insn (dst, reg0);
14953 return true;
14956 /* Select a dominance comparison mode if possible for a test of the general
14957 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14958 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14959 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14960 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14961 In all cases OP will be either EQ or NE, but we don't need to know which
14962 here. If we are unable to support a dominance comparison we return
14963 CC mode. This will then fail to match for the RTL expressions that
14964 generate this call. */
14965 machine_mode
14966 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14968 enum rtx_code cond1, cond2;
14969 int swapped = 0;
14971 /* Currently we will probably get the wrong result if the individual
14972 comparisons are not simple. This also ensures that it is safe to
14973 reverse a comparison if necessary. */
14974 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14975 != CCmode)
14976 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14977 != CCmode))
14978 return CCmode;
14980 /* The if_then_else variant of this tests the second condition if the
14981 first passes, but is true if the first fails. Reverse the first
14982 condition to get a true "inclusive-or" expression. */
14983 if (cond_or == DOM_CC_NX_OR_Y)
14984 cond1 = reverse_condition (cond1);
14986 /* If the comparisons are not equal, and one doesn't dominate the other,
14987 then we can't do this. */
14988 if (cond1 != cond2
14989 && !comparison_dominates_p (cond1, cond2)
14990 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14991 return CCmode;
14993 if (swapped)
14994 std::swap (cond1, cond2);
14996 switch (cond1)
14998 case EQ:
14999 if (cond_or == DOM_CC_X_AND_Y)
15000 return CC_DEQmode;
15002 switch (cond2)
15004 case EQ: return CC_DEQmode;
15005 case LE: return CC_DLEmode;
15006 case LEU: return CC_DLEUmode;
15007 case GE: return CC_DGEmode;
15008 case GEU: return CC_DGEUmode;
15009 default: gcc_unreachable ();
15012 case LT:
15013 if (cond_or == DOM_CC_X_AND_Y)
15014 return CC_DLTmode;
15016 switch (cond2)
15018 case LT:
15019 return CC_DLTmode;
15020 case LE:
15021 return CC_DLEmode;
15022 case NE:
15023 return CC_DNEmode;
15024 default:
15025 gcc_unreachable ();
15028 case GT:
15029 if (cond_or == DOM_CC_X_AND_Y)
15030 return CC_DGTmode;
15032 switch (cond2)
15034 case GT:
15035 return CC_DGTmode;
15036 case GE:
15037 return CC_DGEmode;
15038 case NE:
15039 return CC_DNEmode;
15040 default:
15041 gcc_unreachable ();
15044 case LTU:
15045 if (cond_or == DOM_CC_X_AND_Y)
15046 return CC_DLTUmode;
15048 switch (cond2)
15050 case LTU:
15051 return CC_DLTUmode;
15052 case LEU:
15053 return CC_DLEUmode;
15054 case NE:
15055 return CC_DNEmode;
15056 default:
15057 gcc_unreachable ();
15060 case GTU:
15061 if (cond_or == DOM_CC_X_AND_Y)
15062 return CC_DGTUmode;
15064 switch (cond2)
15066 case GTU:
15067 return CC_DGTUmode;
15068 case GEU:
15069 return CC_DGEUmode;
15070 case NE:
15071 return CC_DNEmode;
15072 default:
15073 gcc_unreachable ();
15076 /* The remaining cases only occur when both comparisons are the
15077 same. */
15078 case NE:
15079 gcc_assert (cond1 == cond2);
15080 return CC_DNEmode;
15082 case LE:
15083 gcc_assert (cond1 == cond2);
15084 return CC_DLEmode;
15086 case GE:
15087 gcc_assert (cond1 == cond2);
15088 return CC_DGEmode;
15090 case LEU:
15091 gcc_assert (cond1 == cond2);
15092 return CC_DLEUmode;
15094 case GEU:
15095 gcc_assert (cond1 == cond2);
15096 return CC_DGEUmode;
15098 default:
15099 gcc_unreachable ();
15103 machine_mode
15104 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15106 /* All floating point compares return CCFP if it is an equality
15107 comparison, and CCFPE otherwise. */
15108 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15110 switch (op)
15112 case EQ:
15113 case NE:
15114 case UNORDERED:
15115 case ORDERED:
15116 case UNLT:
15117 case UNLE:
15118 case UNGT:
15119 case UNGE:
15120 case UNEQ:
15121 case LTGT:
15122 return CCFPmode;
15124 case LT:
15125 case LE:
15126 case GT:
15127 case GE:
15128 return CCFPEmode;
15130 default:
15131 gcc_unreachable ();
15135 /* A compare with a shifted operand. Because of canonicalization, the
15136 comparison will have to be swapped when we emit the assembler. */
15137 if (GET_MODE (y) == SImode
15138 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15139 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15140 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15141 || GET_CODE (x) == ROTATERT))
15142 return CC_SWPmode;
15144 /* This operation is performed swapped, but since we only rely on the Z
15145 flag we don't need an additional mode. */
15146 if (GET_MODE (y) == SImode
15147 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15148 && GET_CODE (x) == NEG
15149 && (op == EQ || op == NE))
15150 return CC_Zmode;
15152 /* This is a special case that is used by combine to allow a
15153 comparison of a shifted byte load to be split into a zero-extend
15154 followed by a comparison of the shifted integer (only valid for
15155 equalities and unsigned inequalities). */
15156 if (GET_MODE (x) == SImode
15157 && GET_CODE (x) == ASHIFT
15158 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15159 && GET_CODE (XEXP (x, 0)) == SUBREG
15160 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15161 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15162 && (op == EQ || op == NE
15163 || op == GEU || op == GTU || op == LTU || op == LEU)
15164 && CONST_INT_P (y))
15165 return CC_Zmode;
15167 /* A construct for a conditional compare, if the false arm contains
15168 0, then both conditions must be true, otherwise either condition
15169 must be true. Not all conditions are possible, so CCmode is
15170 returned if it can't be done. */
15171 if (GET_CODE (x) == IF_THEN_ELSE
15172 && (XEXP (x, 2) == const0_rtx
15173 || XEXP (x, 2) == const1_rtx)
15174 && COMPARISON_P (XEXP (x, 0))
15175 && COMPARISON_P (XEXP (x, 1)))
15176 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15177 INTVAL (XEXP (x, 2)));
15179 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15180 if (GET_CODE (x) == AND
15181 && (op == EQ || op == NE)
15182 && COMPARISON_P (XEXP (x, 0))
15183 && COMPARISON_P (XEXP (x, 1)))
15184 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15185 DOM_CC_X_AND_Y);
15187 if (GET_CODE (x) == IOR
15188 && (op == EQ || op == NE)
15189 && COMPARISON_P (XEXP (x, 0))
15190 && COMPARISON_P (XEXP (x, 1)))
15191 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15192 DOM_CC_X_OR_Y);
15194 /* An operation (on Thumb) where we want to test for a single bit.
15195 This is done by shifting that bit up into the top bit of a
15196 scratch register; we can then branch on the sign bit. */
15197 if (TARGET_THUMB1
15198 && GET_MODE (x) == SImode
15199 && (op == EQ || op == NE)
15200 && GET_CODE (x) == ZERO_EXTRACT
15201 && XEXP (x, 1) == const1_rtx)
15202 return CC_Nmode;
15204 /* An operation that sets the condition codes as a side-effect, the
15205 V flag is not set correctly, so we can only use comparisons where
15206 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15207 instead.) */
15208 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15209 if (GET_MODE (x) == SImode
15210 && y == const0_rtx
15211 && (op == EQ || op == NE || op == LT || op == GE)
15212 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15213 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15214 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15215 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15216 || GET_CODE (x) == LSHIFTRT
15217 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15218 || GET_CODE (x) == ROTATERT
15219 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15220 return CC_NOOVmode;
15222 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15223 return CC_Zmode;
15225 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15226 && GET_CODE (x) == PLUS
15227 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15228 return CC_Cmode;
15230 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15232 switch (op)
15234 case EQ:
15235 case NE:
15236 /* A DImode comparison against zero can be implemented by
15237 or'ing the two halves together. */
15238 if (y == const0_rtx)
15239 return CC_Zmode;
15241 /* We can do an equality test in three Thumb instructions. */
15242 if (!TARGET_32BIT)
15243 return CC_Zmode;
15245 /* FALLTHROUGH */
15247 case LTU:
15248 case LEU:
15249 case GTU:
15250 case GEU:
15251 /* DImode unsigned comparisons can be implemented by cmp +
15252 cmpeq without a scratch register. Not worth doing in
15253 Thumb-2. */
15254 if (TARGET_32BIT)
15255 return CC_CZmode;
15257 /* FALLTHROUGH */
15259 case LT:
15260 case LE:
15261 case GT:
15262 case GE:
15263 /* DImode signed and unsigned comparisons can be implemented
15264 by cmp + sbcs with a scratch register, but that does not
15265 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15266 gcc_assert (op != EQ && op != NE);
15267 return CC_NCVmode;
15269 default:
15270 gcc_unreachable ();
15274 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15275 return GET_MODE (x);
15277 return CCmode;
15280 /* X and Y are two things to compare using CODE. Emit the compare insn and
15281 return the rtx for register 0 in the proper mode. FP means this is a
15282 floating point compare: I don't think that it is needed on the arm. */
15284 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15286 machine_mode mode;
15287 rtx cc_reg;
15288 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15290 /* We might have X as a constant, Y as a register because of the predicates
15291 used for cmpdi. If so, force X to a register here. */
15292 if (dimode_comparison && !REG_P (x))
15293 x = force_reg (DImode, x);
15295 mode = SELECT_CC_MODE (code, x, y);
15296 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15298 if (dimode_comparison
15299 && mode != CC_CZmode)
15301 rtx clobber, set;
15303 /* To compare two non-zero values for equality, XOR them and
15304 then compare against zero. Not used for ARM mode; there
15305 CC_CZmode is cheaper. */
15306 if (mode == CC_Zmode && y != const0_rtx)
15308 gcc_assert (!reload_completed);
15309 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15310 y = const0_rtx;
15313 /* A scratch register is required. */
15314 if (reload_completed)
15315 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15316 else
15317 scratch = gen_rtx_SCRATCH (SImode);
15319 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15320 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15321 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15323 else
15324 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15326 return cc_reg;
15329 /* Generate a sequence of insns that will generate the correct return
15330 address mask depending on the physical architecture that the program
15331 is running on. */
15333 arm_gen_return_addr_mask (void)
15335 rtx reg = gen_reg_rtx (Pmode);
15337 emit_insn (gen_return_addr_mask (reg));
15338 return reg;
15341 void
15342 arm_reload_in_hi (rtx *operands)
15344 rtx ref = operands[1];
15345 rtx base, scratch;
15346 HOST_WIDE_INT offset = 0;
15348 if (GET_CODE (ref) == SUBREG)
15350 offset = SUBREG_BYTE (ref);
15351 ref = SUBREG_REG (ref);
15354 if (REG_P (ref))
15356 /* We have a pseudo which has been spilt onto the stack; there
15357 are two cases here: the first where there is a simple
15358 stack-slot replacement and a second where the stack-slot is
15359 out of range, or is used as a subreg. */
15360 if (reg_equiv_mem (REGNO (ref)))
15362 ref = reg_equiv_mem (REGNO (ref));
15363 base = find_replacement (&XEXP (ref, 0));
15365 else
15366 /* The slot is out of range, or was dressed up in a SUBREG. */
15367 base = reg_equiv_address (REGNO (ref));
15369 else
15370 base = find_replacement (&XEXP (ref, 0));
15372 /* Handle the case where the address is too complex to be offset by 1. */
15373 if (GET_CODE (base) == MINUS
15374 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15376 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15378 emit_set_insn (base_plus, base);
15379 base = base_plus;
15381 else if (GET_CODE (base) == PLUS)
15383 /* The addend must be CONST_INT, or we would have dealt with it above. */
15384 HOST_WIDE_INT hi, lo;
15386 offset += INTVAL (XEXP (base, 1));
15387 base = XEXP (base, 0);
15389 /* Rework the address into a legal sequence of insns. */
15390 /* Valid range for lo is -4095 -> 4095 */
15391 lo = (offset >= 0
15392 ? (offset & 0xfff)
15393 : -((-offset) & 0xfff));
15395 /* Corner case, if lo is the max offset then we would be out of range
15396 once we have added the additional 1 below, so bump the msb into the
15397 pre-loading insn(s). */
15398 if (lo == 4095)
15399 lo &= 0x7ff;
15401 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15402 ^ (HOST_WIDE_INT) 0x80000000)
15403 - (HOST_WIDE_INT) 0x80000000);
15405 gcc_assert (hi + lo == offset);
15407 if (hi != 0)
15409 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15411 /* Get the base address; addsi3 knows how to handle constants
15412 that require more than one insn. */
15413 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15414 base = base_plus;
15415 offset = lo;
15419 /* Operands[2] may overlap operands[0] (though it won't overlap
15420 operands[1]), that's why we asked for a DImode reg -- so we can
15421 use the bit that does not overlap. */
15422 if (REGNO (operands[2]) == REGNO (operands[0]))
15423 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15424 else
15425 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15427 emit_insn (gen_zero_extendqisi2 (scratch,
15428 gen_rtx_MEM (QImode,
15429 plus_constant (Pmode, base,
15430 offset))));
15431 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15432 gen_rtx_MEM (QImode,
15433 plus_constant (Pmode, base,
15434 offset + 1))));
15435 if (!BYTES_BIG_ENDIAN)
15436 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15437 gen_rtx_IOR (SImode,
15438 gen_rtx_ASHIFT
15439 (SImode,
15440 gen_rtx_SUBREG (SImode, operands[0], 0),
15441 GEN_INT (8)),
15442 scratch));
15443 else
15444 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15445 gen_rtx_IOR (SImode,
15446 gen_rtx_ASHIFT (SImode, scratch,
15447 GEN_INT (8)),
15448 gen_rtx_SUBREG (SImode, operands[0], 0)));
15451 /* Handle storing a half-word to memory during reload by synthesizing as two
15452 byte stores. Take care not to clobber the input values until after we
15453 have moved them somewhere safe. This code assumes that if the DImode
15454 scratch in operands[2] overlaps either the input value or output address
15455 in some way, then that value must die in this insn (we absolutely need
15456 two scratch registers for some corner cases). */
15457 void
15458 arm_reload_out_hi (rtx *operands)
15460 rtx ref = operands[0];
15461 rtx outval = operands[1];
15462 rtx base, scratch;
15463 HOST_WIDE_INT offset = 0;
15465 if (GET_CODE (ref) == SUBREG)
15467 offset = SUBREG_BYTE (ref);
15468 ref = SUBREG_REG (ref);
15471 if (REG_P (ref))
15473 /* We have a pseudo which has been spilt onto the stack; there
15474 are two cases here: the first where there is a simple
15475 stack-slot replacement and a second where the stack-slot is
15476 out of range, or is used as a subreg. */
15477 if (reg_equiv_mem (REGNO (ref)))
15479 ref = reg_equiv_mem (REGNO (ref));
15480 base = find_replacement (&XEXP (ref, 0));
15482 else
15483 /* The slot is out of range, or was dressed up in a SUBREG. */
15484 base = reg_equiv_address (REGNO (ref));
15486 else
15487 base = find_replacement (&XEXP (ref, 0));
15489 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15491 /* Handle the case where the address is too complex to be offset by 1. */
15492 if (GET_CODE (base) == MINUS
15493 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15495 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15497 /* Be careful not to destroy OUTVAL. */
15498 if (reg_overlap_mentioned_p (base_plus, outval))
15500 /* Updating base_plus might destroy outval, see if we can
15501 swap the scratch and base_plus. */
15502 if (!reg_overlap_mentioned_p (scratch, outval))
15503 std::swap (scratch, base_plus);
15504 else
15506 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15508 /* Be conservative and copy OUTVAL into the scratch now,
15509 this should only be necessary if outval is a subreg
15510 of something larger than a word. */
15511 /* XXX Might this clobber base? I can't see how it can,
15512 since scratch is known to overlap with OUTVAL, and
15513 must be wider than a word. */
15514 emit_insn (gen_movhi (scratch_hi, outval));
15515 outval = scratch_hi;
15519 emit_set_insn (base_plus, base);
15520 base = base_plus;
15522 else if (GET_CODE (base) == PLUS)
15524 /* The addend must be CONST_INT, or we would have dealt with it above. */
15525 HOST_WIDE_INT hi, lo;
15527 offset += INTVAL (XEXP (base, 1));
15528 base = XEXP (base, 0);
15530 /* Rework the address into a legal sequence of insns. */
15531 /* Valid range for lo is -4095 -> 4095 */
15532 lo = (offset >= 0
15533 ? (offset & 0xfff)
15534 : -((-offset) & 0xfff));
15536 /* Corner case, if lo is the max offset then we would be out of range
15537 once we have added the additional 1 below, so bump the msb into the
15538 pre-loading insn(s). */
15539 if (lo == 4095)
15540 lo &= 0x7ff;
15542 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15543 ^ (HOST_WIDE_INT) 0x80000000)
15544 - (HOST_WIDE_INT) 0x80000000);
15546 gcc_assert (hi + lo == offset);
15548 if (hi != 0)
15550 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15552 /* Be careful not to destroy OUTVAL. */
15553 if (reg_overlap_mentioned_p (base_plus, outval))
15555 /* Updating base_plus might destroy outval, see if we
15556 can swap the scratch and base_plus. */
15557 if (!reg_overlap_mentioned_p (scratch, outval))
15558 std::swap (scratch, base_plus);
15559 else
15561 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15563 /* Be conservative and copy outval into scratch now,
15564 this should only be necessary if outval is a
15565 subreg of something larger than a word. */
15566 /* XXX Might this clobber base? I can't see how it
15567 can, since scratch is known to overlap with
15568 outval. */
15569 emit_insn (gen_movhi (scratch_hi, outval));
15570 outval = scratch_hi;
15574 /* Get the base address; addsi3 knows how to handle constants
15575 that require more than one insn. */
15576 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15577 base = base_plus;
15578 offset = lo;
15582 if (BYTES_BIG_ENDIAN)
15584 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15585 plus_constant (Pmode, base,
15586 offset + 1)),
15587 gen_lowpart (QImode, outval)));
15588 emit_insn (gen_lshrsi3 (scratch,
15589 gen_rtx_SUBREG (SImode, outval, 0),
15590 GEN_INT (8)));
15591 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15592 offset)),
15593 gen_lowpart (QImode, scratch)));
15595 else
15597 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15598 offset)),
15599 gen_lowpart (QImode, outval)));
15600 emit_insn (gen_lshrsi3 (scratch,
15601 gen_rtx_SUBREG (SImode, outval, 0),
15602 GEN_INT (8)));
15603 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15604 plus_constant (Pmode, base,
15605 offset + 1)),
15606 gen_lowpart (QImode, scratch)));
15610 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15611 (padded to the size of a word) should be passed in a register. */
15613 static bool
15614 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15616 if (TARGET_AAPCS_BASED)
15617 return must_pass_in_stack_var_size (mode, type);
15618 else
15619 return must_pass_in_stack_var_size_or_pad (mode, type);
15623 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15624 Return true if an argument passed on the stack should be padded upwards,
15625 i.e. if the least-significant byte has useful data.
15626 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15627 aggregate types are placed in the lowest memory address. */
15629 bool
15630 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15632 if (!TARGET_AAPCS_BASED)
15633 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15635 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15636 return false;
15638 return true;
15642 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15643 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15644 register has useful data, and return the opposite if the most
15645 significant byte does. */
15647 bool
15648 arm_pad_reg_upward (machine_mode mode,
15649 tree type, int first ATTRIBUTE_UNUSED)
15651 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15653 /* For AAPCS, small aggregates, small fixed-point types,
15654 and small complex types are always padded upwards. */
15655 if (type)
15657 if ((AGGREGATE_TYPE_P (type)
15658 || TREE_CODE (type) == COMPLEX_TYPE
15659 || FIXED_POINT_TYPE_P (type))
15660 && int_size_in_bytes (type) <= 4)
15661 return true;
15663 else
15665 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15666 && GET_MODE_SIZE (mode) <= 4)
15667 return true;
15671 /* Otherwise, use default padding. */
15672 return !BYTES_BIG_ENDIAN;
15675 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15676 assuming that the address in the base register is word aligned. */
15677 bool
15678 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15680 HOST_WIDE_INT max_offset;
15682 /* Offset must be a multiple of 4 in Thumb mode. */
15683 if (TARGET_THUMB2 && ((offset & 3) != 0))
15684 return false;
15686 if (TARGET_THUMB2)
15687 max_offset = 1020;
15688 else if (TARGET_ARM)
15689 max_offset = 255;
15690 else
15691 return false;
15693 return ((offset <= max_offset) && (offset >= -max_offset));
15696 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15697 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15698 Assumes that the address in the base register RN is word aligned. Pattern
15699 guarantees that both memory accesses use the same base register,
15700 the offsets are constants within the range, and the gap between the offsets is 4.
15701 If preload complete then check that registers are legal. WBACK indicates whether
15702 address is updated. LOAD indicates whether memory access is load or store. */
15703 bool
15704 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15705 bool wback, bool load)
15707 unsigned int t, t2, n;
15709 if (!reload_completed)
15710 return true;
15712 if (!offset_ok_for_ldrd_strd (offset))
15713 return false;
15715 t = REGNO (rt);
15716 t2 = REGNO (rt2);
15717 n = REGNO (rn);
15719 if ((TARGET_THUMB2)
15720 && ((wback && (n == t || n == t2))
15721 || (t == SP_REGNUM)
15722 || (t == PC_REGNUM)
15723 || (t2 == SP_REGNUM)
15724 || (t2 == PC_REGNUM)
15725 || (!load && (n == PC_REGNUM))
15726 || (load && (t == t2))
15727 /* Triggers Cortex-M3 LDRD errata. */
15728 || (!wback && load && fix_cm3_ldrd && (n == t))))
15729 return false;
15731 if ((TARGET_ARM)
15732 && ((wback && (n == t || n == t2))
15733 || (t2 == PC_REGNUM)
15734 || (t % 2 != 0) /* First destination register is not even. */
15735 || (t2 != t + 1)
15736 /* PC can be used as base register (for offset addressing only),
15737 but it is depricated. */
15738 || (n == PC_REGNUM)))
15739 return false;
15741 return true;
15744 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15745 operand MEM's address contains an immediate offset from the base
15746 register and has no side effects, in which case it sets BASE and
15747 OFFSET accordingly. */
15748 static bool
15749 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15751 rtx addr;
15753 gcc_assert (base != NULL && offset != NULL);
15755 /* TODO: Handle more general memory operand patterns, such as
15756 PRE_DEC and PRE_INC. */
15758 if (side_effects_p (mem))
15759 return false;
15761 /* Can't deal with subregs. */
15762 if (GET_CODE (mem) == SUBREG)
15763 return false;
15765 gcc_assert (MEM_P (mem));
15767 *offset = const0_rtx;
15769 addr = XEXP (mem, 0);
15771 /* If addr isn't valid for DImode, then we can't handle it. */
15772 if (!arm_legitimate_address_p (DImode, addr,
15773 reload_in_progress || reload_completed))
15774 return false;
15776 if (REG_P (addr))
15778 *base = addr;
15779 return true;
15781 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15783 *base = XEXP (addr, 0);
15784 *offset = XEXP (addr, 1);
15785 return (REG_P (*base) && CONST_INT_P (*offset));
15788 return false;
15791 /* Called from a peephole2 to replace two word-size accesses with a
15792 single LDRD/STRD instruction. Returns true iff we can generate a
15793 new instruction sequence. That is, both accesses use the same base
15794 register and the gap between constant offsets is 4. This function
15795 may reorder its operands to match ldrd/strd RTL templates.
15796 OPERANDS are the operands found by the peephole matcher;
15797 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15798 corresponding memory operands. LOAD indicaates whether the access
15799 is load or store. CONST_STORE indicates a store of constant
15800 integer values held in OPERANDS[4,5] and assumes that the pattern
15801 is of length 4 insn, for the purpose of checking dead registers.
15802 COMMUTE indicates that register operands may be reordered. */
15803 bool
15804 gen_operands_ldrd_strd (rtx *operands, bool load,
15805 bool const_store, bool commute)
15807 int nops = 2;
15808 HOST_WIDE_INT offsets[2], offset;
15809 rtx base = NULL_RTX;
15810 rtx cur_base, cur_offset, tmp;
15811 int i, gap;
15812 HARD_REG_SET regset;
15814 gcc_assert (!const_store || !load);
15815 /* Check that the memory references are immediate offsets from the
15816 same base register. Extract the base register, the destination
15817 registers, and the corresponding memory offsets. */
15818 for (i = 0; i < nops; i++)
15820 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15821 return false;
15823 if (i == 0)
15824 base = cur_base;
15825 else if (REGNO (base) != REGNO (cur_base))
15826 return false;
15828 offsets[i] = INTVAL (cur_offset);
15829 if (GET_CODE (operands[i]) == SUBREG)
15831 tmp = SUBREG_REG (operands[i]);
15832 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15833 operands[i] = tmp;
15837 /* Make sure there is no dependency between the individual loads. */
15838 if (load && REGNO (operands[0]) == REGNO (base))
15839 return false; /* RAW */
15841 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15842 return false; /* WAW */
15844 /* If the same input register is used in both stores
15845 when storing different constants, try to find a free register.
15846 For example, the code
15847 mov r0, 0
15848 str r0, [r2]
15849 mov r0, 1
15850 str r0, [r2, #4]
15851 can be transformed into
15852 mov r1, 0
15853 strd r1, r0, [r2]
15854 in Thumb mode assuming that r1 is free. */
15855 if (const_store
15856 && REGNO (operands[0]) == REGNO (operands[1])
15857 && INTVAL (operands[4]) != INTVAL (operands[5]))
15859 if (TARGET_THUMB2)
15861 CLEAR_HARD_REG_SET (regset);
15862 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15863 if (tmp == NULL_RTX)
15864 return false;
15866 /* Use the new register in the first load to ensure that
15867 if the original input register is not dead after peephole,
15868 then it will have the correct constant value. */
15869 operands[0] = tmp;
15871 else if (TARGET_ARM)
15873 return false;
15874 int regno = REGNO (operands[0]);
15875 if (!peep2_reg_dead_p (4, operands[0]))
15877 /* When the input register is even and is not dead after the
15878 pattern, it has to hold the second constant but we cannot
15879 form a legal STRD in ARM mode with this register as the second
15880 register. */
15881 if (regno % 2 == 0)
15882 return false;
15884 /* Is regno-1 free? */
15885 SET_HARD_REG_SET (regset);
15886 CLEAR_HARD_REG_BIT(regset, regno - 1);
15887 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15888 if (tmp == NULL_RTX)
15889 return false;
15891 operands[0] = tmp;
15893 else
15895 /* Find a DImode register. */
15896 CLEAR_HARD_REG_SET (regset);
15897 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15898 if (tmp != NULL_RTX)
15900 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15901 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15903 else
15905 /* Can we use the input register to form a DI register? */
15906 SET_HARD_REG_SET (regset);
15907 CLEAR_HARD_REG_BIT(regset,
15908 regno % 2 == 0 ? regno + 1 : regno - 1);
15909 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15910 if (tmp == NULL_RTX)
15911 return false;
15912 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15916 gcc_assert (operands[0] != NULL_RTX);
15917 gcc_assert (operands[1] != NULL_RTX);
15918 gcc_assert (REGNO (operands[0]) % 2 == 0);
15919 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15923 /* Make sure the instructions are ordered with lower memory access first. */
15924 if (offsets[0] > offsets[1])
15926 gap = offsets[0] - offsets[1];
15927 offset = offsets[1];
15929 /* Swap the instructions such that lower memory is accessed first. */
15930 std::swap (operands[0], operands[1]);
15931 std::swap (operands[2], operands[3]);
15932 if (const_store)
15933 std::swap (operands[4], operands[5]);
15935 else
15937 gap = offsets[1] - offsets[0];
15938 offset = offsets[0];
15941 /* Make sure accesses are to consecutive memory locations. */
15942 if (gap != 4)
15943 return false;
15945 /* Make sure we generate legal instructions. */
15946 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15947 false, load))
15948 return true;
15950 /* In Thumb state, where registers are almost unconstrained, there
15951 is little hope to fix it. */
15952 if (TARGET_THUMB2)
15953 return false;
15955 if (load && commute)
15957 /* Try reordering registers. */
15958 std::swap (operands[0], operands[1]);
15959 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15960 false, load))
15961 return true;
15964 if (const_store)
15966 /* If input registers are dead after this pattern, they can be
15967 reordered or replaced by other registers that are free in the
15968 current pattern. */
15969 if (!peep2_reg_dead_p (4, operands[0])
15970 || !peep2_reg_dead_p (4, operands[1]))
15971 return false;
15973 /* Try to reorder the input registers. */
15974 /* For example, the code
15975 mov r0, 0
15976 mov r1, 1
15977 str r1, [r2]
15978 str r0, [r2, #4]
15979 can be transformed into
15980 mov r1, 0
15981 mov r0, 1
15982 strd r0, [r2]
15984 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15985 false, false))
15987 std::swap (operands[0], operands[1]);
15988 return true;
15991 /* Try to find a free DI register. */
15992 CLEAR_HARD_REG_SET (regset);
15993 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15994 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15995 while (true)
15997 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15998 if (tmp == NULL_RTX)
15999 return false;
16001 /* DREG must be an even-numbered register in DImode.
16002 Split it into SI registers. */
16003 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16004 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16005 gcc_assert (operands[0] != NULL_RTX);
16006 gcc_assert (operands[1] != NULL_RTX);
16007 gcc_assert (REGNO (operands[0]) % 2 == 0);
16008 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16010 return (operands_ok_ldrd_strd (operands[0], operands[1],
16011 base, offset,
16012 false, load));
16016 return false;
16022 /* Print a symbolic form of X to the debug file, F. */
16023 static void
16024 arm_print_value (FILE *f, rtx x)
16026 switch (GET_CODE (x))
16028 case CONST_INT:
16029 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16030 return;
16032 case CONST_DOUBLE:
16033 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
16034 return;
16036 case CONST_VECTOR:
16038 int i;
16040 fprintf (f, "<");
16041 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16043 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16044 if (i < (CONST_VECTOR_NUNITS (x) - 1))
16045 fputc (',', f);
16047 fprintf (f, ">");
16049 return;
16051 case CONST_STRING:
16052 fprintf (f, "\"%s\"", XSTR (x, 0));
16053 return;
16055 case SYMBOL_REF:
16056 fprintf (f, "`%s'", XSTR (x, 0));
16057 return;
16059 case LABEL_REF:
16060 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16061 return;
16063 case CONST:
16064 arm_print_value (f, XEXP (x, 0));
16065 return;
16067 case PLUS:
16068 arm_print_value (f, XEXP (x, 0));
16069 fprintf (f, "+");
16070 arm_print_value (f, XEXP (x, 1));
16071 return;
16073 case PC:
16074 fprintf (f, "pc");
16075 return;
16077 default:
16078 fprintf (f, "????");
16079 return;
16083 /* Routines for manipulation of the constant pool. */
16085 /* Arm instructions cannot load a large constant directly into a
16086 register; they have to come from a pc relative load. The constant
16087 must therefore be placed in the addressable range of the pc
16088 relative load. Depending on the precise pc relative load
16089 instruction the range is somewhere between 256 bytes and 4k. This
16090 means that we often have to dump a constant inside a function, and
16091 generate code to branch around it.
16093 It is important to minimize this, since the branches will slow
16094 things down and make the code larger.
16096 Normally we can hide the table after an existing unconditional
16097 branch so that there is no interruption of the flow, but in the
16098 worst case the code looks like this:
16100 ldr rn, L1
16102 b L2
16103 align
16104 L1: .long value
16108 ldr rn, L3
16110 b L4
16111 align
16112 L3: .long value
16116 We fix this by performing a scan after scheduling, which notices
16117 which instructions need to have their operands fetched from the
16118 constant table and builds the table.
16120 The algorithm starts by building a table of all the constants that
16121 need fixing up and all the natural barriers in the function (places
16122 where a constant table can be dropped without breaking the flow).
16123 For each fixup we note how far the pc-relative replacement will be
16124 able to reach and the offset of the instruction into the function.
16126 Having built the table we then group the fixes together to form
16127 tables that are as large as possible (subject to addressing
16128 constraints) and emit each table of constants after the last
16129 barrier that is within range of all the instructions in the group.
16130 If a group does not contain a barrier, then we forcibly create one
16131 by inserting a jump instruction into the flow. Once the table has
16132 been inserted, the insns are then modified to reference the
16133 relevant entry in the pool.
16135 Possible enhancements to the algorithm (not implemented) are:
16137 1) For some processors and object formats, there may be benefit in
16138 aligning the pools to the start of cache lines; this alignment
16139 would need to be taken into account when calculating addressability
16140 of a pool. */
16142 /* These typedefs are located at the start of this file, so that
16143 they can be used in the prototypes there. This comment is to
16144 remind readers of that fact so that the following structures
16145 can be understood more easily.
16147 typedef struct minipool_node Mnode;
16148 typedef struct minipool_fixup Mfix; */
16150 struct minipool_node
16152 /* Doubly linked chain of entries. */
16153 Mnode * next;
16154 Mnode * prev;
16155 /* The maximum offset into the code that this entry can be placed. While
16156 pushing fixes for forward references, all entries are sorted in order
16157 of increasing max_address. */
16158 HOST_WIDE_INT max_address;
16159 /* Similarly for an entry inserted for a backwards ref. */
16160 HOST_WIDE_INT min_address;
16161 /* The number of fixes referencing this entry. This can become zero
16162 if we "unpush" an entry. In this case we ignore the entry when we
16163 come to emit the code. */
16164 int refcount;
16165 /* The offset from the start of the minipool. */
16166 HOST_WIDE_INT offset;
16167 /* The value in table. */
16168 rtx value;
16169 /* The mode of value. */
16170 machine_mode mode;
16171 /* The size of the value. With iWMMXt enabled
16172 sizes > 4 also imply an alignment of 8-bytes. */
16173 int fix_size;
16176 struct minipool_fixup
16178 Mfix * next;
16179 rtx_insn * insn;
16180 HOST_WIDE_INT address;
16181 rtx * loc;
16182 machine_mode mode;
16183 int fix_size;
16184 rtx value;
16185 Mnode * minipool;
16186 HOST_WIDE_INT forwards;
16187 HOST_WIDE_INT backwards;
16190 /* Fixes less than a word need padding out to a word boundary. */
16191 #define MINIPOOL_FIX_SIZE(mode) \
16192 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16194 static Mnode * minipool_vector_head;
16195 static Mnode * minipool_vector_tail;
16196 static rtx_code_label *minipool_vector_label;
16197 static int minipool_pad;
16199 /* The linked list of all minipool fixes required for this function. */
16200 Mfix * minipool_fix_head;
16201 Mfix * minipool_fix_tail;
16202 /* The fix entry for the current minipool, once it has been placed. */
16203 Mfix * minipool_barrier;
16205 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16206 #define JUMP_TABLES_IN_TEXT_SECTION 0
16207 #endif
16209 static HOST_WIDE_INT
16210 get_jump_table_size (rtx_jump_table_data *insn)
16212 /* ADDR_VECs only take room if read-only data does into the text
16213 section. */
16214 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16216 rtx body = PATTERN (insn);
16217 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16218 HOST_WIDE_INT size;
16219 HOST_WIDE_INT modesize;
16221 modesize = GET_MODE_SIZE (GET_MODE (body));
16222 size = modesize * XVECLEN (body, elt);
16223 switch (modesize)
16225 case 1:
16226 /* Round up size of TBB table to a halfword boundary. */
16227 size = (size + 1) & ~(HOST_WIDE_INT)1;
16228 break;
16229 case 2:
16230 /* No padding necessary for TBH. */
16231 break;
16232 case 4:
16233 /* Add two bytes for alignment on Thumb. */
16234 if (TARGET_THUMB)
16235 size += 2;
16236 break;
16237 default:
16238 gcc_unreachable ();
16240 return size;
16243 return 0;
16246 /* Return the maximum amount of padding that will be inserted before
16247 label LABEL. */
16249 static HOST_WIDE_INT
16250 get_label_padding (rtx label)
16252 HOST_WIDE_INT align, min_insn_size;
16254 align = 1 << label_to_alignment (label);
16255 min_insn_size = TARGET_THUMB ? 2 : 4;
16256 return align > min_insn_size ? align - min_insn_size : 0;
16259 /* Move a minipool fix MP from its current location to before MAX_MP.
16260 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16261 constraints may need updating. */
16262 static Mnode *
16263 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16264 HOST_WIDE_INT max_address)
16266 /* The code below assumes these are different. */
16267 gcc_assert (mp != max_mp);
16269 if (max_mp == NULL)
16271 if (max_address < mp->max_address)
16272 mp->max_address = max_address;
16274 else
16276 if (max_address > max_mp->max_address - mp->fix_size)
16277 mp->max_address = max_mp->max_address - mp->fix_size;
16278 else
16279 mp->max_address = max_address;
16281 /* Unlink MP from its current position. Since max_mp is non-null,
16282 mp->prev must be non-null. */
16283 mp->prev->next = mp->next;
16284 if (mp->next != NULL)
16285 mp->next->prev = mp->prev;
16286 else
16287 minipool_vector_tail = mp->prev;
16289 /* Re-insert it before MAX_MP. */
16290 mp->next = max_mp;
16291 mp->prev = max_mp->prev;
16292 max_mp->prev = mp;
16294 if (mp->prev != NULL)
16295 mp->prev->next = mp;
16296 else
16297 minipool_vector_head = mp;
16300 /* Save the new entry. */
16301 max_mp = mp;
16303 /* Scan over the preceding entries and adjust their addresses as
16304 required. */
16305 while (mp->prev != NULL
16306 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16308 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16309 mp = mp->prev;
16312 return max_mp;
16315 /* Add a constant to the minipool for a forward reference. Returns the
16316 node added or NULL if the constant will not fit in this pool. */
16317 static Mnode *
16318 add_minipool_forward_ref (Mfix *fix)
16320 /* If set, max_mp is the first pool_entry that has a lower
16321 constraint than the one we are trying to add. */
16322 Mnode * max_mp = NULL;
16323 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16324 Mnode * mp;
16326 /* If the minipool starts before the end of FIX->INSN then this FIX
16327 can not be placed into the current pool. Furthermore, adding the
16328 new constant pool entry may cause the pool to start FIX_SIZE bytes
16329 earlier. */
16330 if (minipool_vector_head &&
16331 (fix->address + get_attr_length (fix->insn)
16332 >= minipool_vector_head->max_address - fix->fix_size))
16333 return NULL;
16335 /* Scan the pool to see if a constant with the same value has
16336 already been added. While we are doing this, also note the
16337 location where we must insert the constant if it doesn't already
16338 exist. */
16339 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16341 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16342 && fix->mode == mp->mode
16343 && (!LABEL_P (fix->value)
16344 || (CODE_LABEL_NUMBER (fix->value)
16345 == CODE_LABEL_NUMBER (mp->value)))
16346 && rtx_equal_p (fix->value, mp->value))
16348 /* More than one fix references this entry. */
16349 mp->refcount++;
16350 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16353 /* Note the insertion point if necessary. */
16354 if (max_mp == NULL
16355 && mp->max_address > max_address)
16356 max_mp = mp;
16358 /* If we are inserting an 8-bytes aligned quantity and
16359 we have not already found an insertion point, then
16360 make sure that all such 8-byte aligned quantities are
16361 placed at the start of the pool. */
16362 if (ARM_DOUBLEWORD_ALIGN
16363 && max_mp == NULL
16364 && fix->fix_size >= 8
16365 && mp->fix_size < 8)
16367 max_mp = mp;
16368 max_address = mp->max_address;
16372 /* The value is not currently in the minipool, so we need to create
16373 a new entry for it. If MAX_MP is NULL, the entry will be put on
16374 the end of the list since the placement is less constrained than
16375 any existing entry. Otherwise, we insert the new fix before
16376 MAX_MP and, if necessary, adjust the constraints on the other
16377 entries. */
16378 mp = XNEW (Mnode);
16379 mp->fix_size = fix->fix_size;
16380 mp->mode = fix->mode;
16381 mp->value = fix->value;
16382 mp->refcount = 1;
16383 /* Not yet required for a backwards ref. */
16384 mp->min_address = -65536;
16386 if (max_mp == NULL)
16388 mp->max_address = max_address;
16389 mp->next = NULL;
16390 mp->prev = minipool_vector_tail;
16392 if (mp->prev == NULL)
16394 minipool_vector_head = mp;
16395 minipool_vector_label = gen_label_rtx ();
16397 else
16398 mp->prev->next = mp;
16400 minipool_vector_tail = mp;
16402 else
16404 if (max_address > max_mp->max_address - mp->fix_size)
16405 mp->max_address = max_mp->max_address - mp->fix_size;
16406 else
16407 mp->max_address = max_address;
16409 mp->next = max_mp;
16410 mp->prev = max_mp->prev;
16411 max_mp->prev = mp;
16412 if (mp->prev != NULL)
16413 mp->prev->next = mp;
16414 else
16415 minipool_vector_head = mp;
16418 /* Save the new entry. */
16419 max_mp = mp;
16421 /* Scan over the preceding entries and adjust their addresses as
16422 required. */
16423 while (mp->prev != NULL
16424 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16426 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16427 mp = mp->prev;
16430 return max_mp;
16433 static Mnode *
16434 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16435 HOST_WIDE_INT min_address)
16437 HOST_WIDE_INT offset;
16439 /* The code below assumes these are different. */
16440 gcc_assert (mp != min_mp);
16442 if (min_mp == NULL)
16444 if (min_address > mp->min_address)
16445 mp->min_address = min_address;
16447 else
16449 /* We will adjust this below if it is too loose. */
16450 mp->min_address = min_address;
16452 /* Unlink MP from its current position. Since min_mp is non-null,
16453 mp->next must be non-null. */
16454 mp->next->prev = mp->prev;
16455 if (mp->prev != NULL)
16456 mp->prev->next = mp->next;
16457 else
16458 minipool_vector_head = mp->next;
16460 /* Reinsert it after MIN_MP. */
16461 mp->prev = min_mp;
16462 mp->next = min_mp->next;
16463 min_mp->next = mp;
16464 if (mp->next != NULL)
16465 mp->next->prev = mp;
16466 else
16467 minipool_vector_tail = mp;
16470 min_mp = mp;
16472 offset = 0;
16473 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16475 mp->offset = offset;
16476 if (mp->refcount > 0)
16477 offset += mp->fix_size;
16479 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16480 mp->next->min_address = mp->min_address + mp->fix_size;
16483 return min_mp;
16486 /* Add a constant to the minipool for a backward reference. Returns the
16487 node added or NULL if the constant will not fit in this pool.
16489 Note that the code for insertion for a backwards reference can be
16490 somewhat confusing because the calculated offsets for each fix do
16491 not take into account the size of the pool (which is still under
16492 construction. */
16493 static Mnode *
16494 add_minipool_backward_ref (Mfix *fix)
16496 /* If set, min_mp is the last pool_entry that has a lower constraint
16497 than the one we are trying to add. */
16498 Mnode *min_mp = NULL;
16499 /* This can be negative, since it is only a constraint. */
16500 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16501 Mnode *mp;
16503 /* If we can't reach the current pool from this insn, or if we can't
16504 insert this entry at the end of the pool without pushing other
16505 fixes out of range, then we don't try. This ensures that we
16506 can't fail later on. */
16507 if (min_address >= minipool_barrier->address
16508 || (minipool_vector_tail->min_address + fix->fix_size
16509 >= minipool_barrier->address))
16510 return NULL;
16512 /* Scan the pool to see if a constant with the same value has
16513 already been added. While we are doing this, also note the
16514 location where we must insert the constant if it doesn't already
16515 exist. */
16516 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16518 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16519 && fix->mode == mp->mode
16520 && (!LABEL_P (fix->value)
16521 || (CODE_LABEL_NUMBER (fix->value)
16522 == CODE_LABEL_NUMBER (mp->value)))
16523 && rtx_equal_p (fix->value, mp->value)
16524 /* Check that there is enough slack to move this entry to the
16525 end of the table (this is conservative). */
16526 && (mp->max_address
16527 > (minipool_barrier->address
16528 + minipool_vector_tail->offset
16529 + minipool_vector_tail->fix_size)))
16531 mp->refcount++;
16532 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16535 if (min_mp != NULL)
16536 mp->min_address += fix->fix_size;
16537 else
16539 /* Note the insertion point if necessary. */
16540 if (mp->min_address < min_address)
16542 /* For now, we do not allow the insertion of 8-byte alignment
16543 requiring nodes anywhere but at the start of the pool. */
16544 if (ARM_DOUBLEWORD_ALIGN
16545 && fix->fix_size >= 8 && mp->fix_size < 8)
16546 return NULL;
16547 else
16548 min_mp = mp;
16550 else if (mp->max_address
16551 < minipool_barrier->address + mp->offset + fix->fix_size)
16553 /* Inserting before this entry would push the fix beyond
16554 its maximum address (which can happen if we have
16555 re-located a forwards fix); force the new fix to come
16556 after it. */
16557 if (ARM_DOUBLEWORD_ALIGN
16558 && fix->fix_size >= 8 && mp->fix_size < 8)
16559 return NULL;
16560 else
16562 min_mp = mp;
16563 min_address = mp->min_address + fix->fix_size;
16566 /* Do not insert a non-8-byte aligned quantity before 8-byte
16567 aligned quantities. */
16568 else if (ARM_DOUBLEWORD_ALIGN
16569 && fix->fix_size < 8
16570 && mp->fix_size >= 8)
16572 min_mp = mp;
16573 min_address = mp->min_address + fix->fix_size;
16578 /* We need to create a new entry. */
16579 mp = XNEW (Mnode);
16580 mp->fix_size = fix->fix_size;
16581 mp->mode = fix->mode;
16582 mp->value = fix->value;
16583 mp->refcount = 1;
16584 mp->max_address = minipool_barrier->address + 65536;
16586 mp->min_address = min_address;
16588 if (min_mp == NULL)
16590 mp->prev = NULL;
16591 mp->next = minipool_vector_head;
16593 if (mp->next == NULL)
16595 minipool_vector_tail = mp;
16596 minipool_vector_label = gen_label_rtx ();
16598 else
16599 mp->next->prev = mp;
16601 minipool_vector_head = mp;
16603 else
16605 mp->next = min_mp->next;
16606 mp->prev = min_mp;
16607 min_mp->next = mp;
16609 if (mp->next != NULL)
16610 mp->next->prev = mp;
16611 else
16612 minipool_vector_tail = mp;
16615 /* Save the new entry. */
16616 min_mp = mp;
16618 if (mp->prev)
16619 mp = mp->prev;
16620 else
16621 mp->offset = 0;
16623 /* Scan over the following entries and adjust their offsets. */
16624 while (mp->next != NULL)
16626 if (mp->next->min_address < mp->min_address + mp->fix_size)
16627 mp->next->min_address = mp->min_address + mp->fix_size;
16629 if (mp->refcount)
16630 mp->next->offset = mp->offset + mp->fix_size;
16631 else
16632 mp->next->offset = mp->offset;
16634 mp = mp->next;
16637 return min_mp;
16640 static void
16641 assign_minipool_offsets (Mfix *barrier)
16643 HOST_WIDE_INT offset = 0;
16644 Mnode *mp;
16646 minipool_barrier = barrier;
16648 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16650 mp->offset = offset;
16652 if (mp->refcount > 0)
16653 offset += mp->fix_size;
16657 /* Output the literal table */
16658 static void
16659 dump_minipool (rtx_insn *scan)
16661 Mnode * mp;
16662 Mnode * nmp;
16663 int align64 = 0;
16665 if (ARM_DOUBLEWORD_ALIGN)
16666 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16667 if (mp->refcount > 0 && mp->fix_size >= 8)
16669 align64 = 1;
16670 break;
16673 if (dump_file)
16674 fprintf (dump_file,
16675 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16676 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16678 scan = emit_label_after (gen_label_rtx (), scan);
16679 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16680 scan = emit_label_after (minipool_vector_label, scan);
16682 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16684 if (mp->refcount > 0)
16686 if (dump_file)
16688 fprintf (dump_file,
16689 ";; Offset %u, min %ld, max %ld ",
16690 (unsigned) mp->offset, (unsigned long) mp->min_address,
16691 (unsigned long) mp->max_address);
16692 arm_print_value (dump_file, mp->value);
16693 fputc ('\n', dump_file);
16696 switch (GET_MODE_SIZE (mp->mode))
16698 #ifdef HAVE_consttable_1
16699 case 1:
16700 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16701 break;
16703 #endif
16704 #ifdef HAVE_consttable_2
16705 case 2:
16706 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16707 break;
16709 #endif
16710 #ifdef HAVE_consttable_4
16711 case 4:
16712 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16713 break;
16715 #endif
16716 #ifdef HAVE_consttable_8
16717 case 8:
16718 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16719 break;
16721 #endif
16722 #ifdef HAVE_consttable_16
16723 case 16:
16724 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16725 break;
16727 #endif
16728 default:
16729 gcc_unreachable ();
16733 nmp = mp->next;
16734 free (mp);
16737 minipool_vector_head = minipool_vector_tail = NULL;
16738 scan = emit_insn_after (gen_consttable_end (), scan);
16739 scan = emit_barrier_after (scan);
16742 /* Return the cost of forcibly inserting a barrier after INSN. */
16743 static int
16744 arm_barrier_cost (rtx_insn *insn)
16746 /* Basing the location of the pool on the loop depth is preferable,
16747 but at the moment, the basic block information seems to be
16748 corrupt by this stage of the compilation. */
16749 int base_cost = 50;
16750 rtx_insn *next = next_nonnote_insn (insn);
16752 if (next != NULL && LABEL_P (next))
16753 base_cost -= 20;
16755 switch (GET_CODE (insn))
16757 case CODE_LABEL:
16758 /* It will always be better to place the table before the label, rather
16759 than after it. */
16760 return 50;
16762 case INSN:
16763 case CALL_INSN:
16764 return base_cost;
16766 case JUMP_INSN:
16767 return base_cost - 10;
16769 default:
16770 return base_cost + 10;
16774 /* Find the best place in the insn stream in the range
16775 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16776 Create the barrier by inserting a jump and add a new fix entry for
16777 it. */
16778 static Mfix *
16779 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16781 HOST_WIDE_INT count = 0;
16782 rtx_barrier *barrier;
16783 rtx_insn *from = fix->insn;
16784 /* The instruction after which we will insert the jump. */
16785 rtx_insn *selected = NULL;
16786 int selected_cost;
16787 /* The address at which the jump instruction will be placed. */
16788 HOST_WIDE_INT selected_address;
16789 Mfix * new_fix;
16790 HOST_WIDE_INT max_count = max_address - fix->address;
16791 rtx_code_label *label = gen_label_rtx ();
16793 selected_cost = arm_barrier_cost (from);
16794 selected_address = fix->address;
16796 while (from && count < max_count)
16798 rtx_jump_table_data *tmp;
16799 int new_cost;
16801 /* This code shouldn't have been called if there was a natural barrier
16802 within range. */
16803 gcc_assert (!BARRIER_P (from));
16805 /* Count the length of this insn. This must stay in sync with the
16806 code that pushes minipool fixes. */
16807 if (LABEL_P (from))
16808 count += get_label_padding (from);
16809 else
16810 count += get_attr_length (from);
16812 /* If there is a jump table, add its length. */
16813 if (tablejump_p (from, NULL, &tmp))
16815 count += get_jump_table_size (tmp);
16817 /* Jump tables aren't in a basic block, so base the cost on
16818 the dispatch insn. If we select this location, we will
16819 still put the pool after the table. */
16820 new_cost = arm_barrier_cost (from);
16822 if (count < max_count
16823 && (!selected || new_cost <= selected_cost))
16825 selected = tmp;
16826 selected_cost = new_cost;
16827 selected_address = fix->address + count;
16830 /* Continue after the dispatch table. */
16831 from = NEXT_INSN (tmp);
16832 continue;
16835 new_cost = arm_barrier_cost (from);
16837 if (count < max_count
16838 && (!selected || new_cost <= selected_cost))
16840 selected = from;
16841 selected_cost = new_cost;
16842 selected_address = fix->address + count;
16845 from = NEXT_INSN (from);
16848 /* Make sure that we found a place to insert the jump. */
16849 gcc_assert (selected);
16851 /* Make sure we do not split a call and its corresponding
16852 CALL_ARG_LOCATION note. */
16853 if (CALL_P (selected))
16855 rtx_insn *next = NEXT_INSN (selected);
16856 if (next && NOTE_P (next)
16857 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16858 selected = next;
16861 /* Create a new JUMP_INSN that branches around a barrier. */
16862 from = emit_jump_insn_after (gen_jump (label), selected);
16863 JUMP_LABEL (from) = label;
16864 barrier = emit_barrier_after (from);
16865 emit_label_after (label, barrier);
16867 /* Create a minipool barrier entry for the new barrier. */
16868 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16869 new_fix->insn = barrier;
16870 new_fix->address = selected_address;
16871 new_fix->next = fix->next;
16872 fix->next = new_fix;
16874 return new_fix;
16877 /* Record that there is a natural barrier in the insn stream at
16878 ADDRESS. */
16879 static void
16880 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16882 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16884 fix->insn = insn;
16885 fix->address = address;
16887 fix->next = NULL;
16888 if (minipool_fix_head != NULL)
16889 minipool_fix_tail->next = fix;
16890 else
16891 minipool_fix_head = fix;
16893 minipool_fix_tail = fix;
16896 /* Record INSN, which will need fixing up to load a value from the
16897 minipool. ADDRESS is the offset of the insn since the start of the
16898 function; LOC is a pointer to the part of the insn which requires
16899 fixing; VALUE is the constant that must be loaded, which is of type
16900 MODE. */
16901 static void
16902 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16903 machine_mode mode, rtx value)
16905 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16907 fix->insn = insn;
16908 fix->address = address;
16909 fix->loc = loc;
16910 fix->mode = mode;
16911 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16912 fix->value = value;
16913 fix->forwards = get_attr_pool_range (insn);
16914 fix->backwards = get_attr_neg_pool_range (insn);
16915 fix->minipool = NULL;
16917 /* If an insn doesn't have a range defined for it, then it isn't
16918 expecting to be reworked by this code. Better to stop now than
16919 to generate duff assembly code. */
16920 gcc_assert (fix->forwards || fix->backwards);
16922 /* If an entry requires 8-byte alignment then assume all constant pools
16923 require 4 bytes of padding. Trying to do this later on a per-pool
16924 basis is awkward because existing pool entries have to be modified. */
16925 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16926 minipool_pad = 4;
16928 if (dump_file)
16930 fprintf (dump_file,
16931 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16932 GET_MODE_NAME (mode),
16933 INSN_UID (insn), (unsigned long) address,
16934 -1 * (long)fix->backwards, (long)fix->forwards);
16935 arm_print_value (dump_file, fix->value);
16936 fprintf (dump_file, "\n");
16939 /* Add it to the chain of fixes. */
16940 fix->next = NULL;
16942 if (minipool_fix_head != NULL)
16943 minipool_fix_tail->next = fix;
16944 else
16945 minipool_fix_head = fix;
16947 minipool_fix_tail = fix;
16950 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16951 Returns the number of insns needed, or 99 if we always want to synthesize
16952 the value. */
16954 arm_max_const_double_inline_cost ()
16956 /* Let the value get synthesized to avoid the use of literal pools. */
16957 if (arm_disable_literal_pool)
16958 return 99;
16960 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16963 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16964 Returns the number of insns needed, or 99 if we don't know how to
16965 do it. */
16967 arm_const_double_inline_cost (rtx val)
16969 rtx lowpart, highpart;
16970 machine_mode mode;
16972 mode = GET_MODE (val);
16974 if (mode == VOIDmode)
16975 mode = DImode;
16977 gcc_assert (GET_MODE_SIZE (mode) == 8);
16979 lowpart = gen_lowpart (SImode, val);
16980 highpart = gen_highpart_mode (SImode, mode, val);
16982 gcc_assert (CONST_INT_P (lowpart));
16983 gcc_assert (CONST_INT_P (highpart));
16985 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16986 NULL_RTX, NULL_RTX, 0, 0)
16987 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16988 NULL_RTX, NULL_RTX, 0, 0));
16991 /* Cost of loading a SImode constant. */
16992 static inline int
16993 arm_const_inline_cost (enum rtx_code code, rtx val)
16995 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16996 NULL_RTX, NULL_RTX, 1, 0);
16999 /* Return true if it is worthwhile to split a 64-bit constant into two
17000 32-bit operations. This is the case if optimizing for size, or
17001 if we have load delay slots, or if one 32-bit part can be done with
17002 a single data operation. */
17003 bool
17004 arm_const_double_by_parts (rtx val)
17006 machine_mode mode = GET_MODE (val);
17007 rtx part;
17009 if (optimize_size || arm_ld_sched)
17010 return true;
17012 if (mode == VOIDmode)
17013 mode = DImode;
17015 part = gen_highpart_mode (SImode, mode, val);
17017 gcc_assert (CONST_INT_P (part));
17019 if (const_ok_for_arm (INTVAL (part))
17020 || const_ok_for_arm (~INTVAL (part)))
17021 return true;
17023 part = gen_lowpart (SImode, val);
17025 gcc_assert (CONST_INT_P (part));
17027 if (const_ok_for_arm (INTVAL (part))
17028 || const_ok_for_arm (~INTVAL (part)))
17029 return true;
17031 return false;
17034 /* Return true if it is possible to inline both the high and low parts
17035 of a 64-bit constant into 32-bit data processing instructions. */
17036 bool
17037 arm_const_double_by_immediates (rtx val)
17039 machine_mode mode = GET_MODE (val);
17040 rtx part;
17042 if (mode == VOIDmode)
17043 mode = DImode;
17045 part = gen_highpart_mode (SImode, mode, val);
17047 gcc_assert (CONST_INT_P (part));
17049 if (!const_ok_for_arm (INTVAL (part)))
17050 return false;
17052 part = gen_lowpart (SImode, val);
17054 gcc_assert (CONST_INT_P (part));
17056 if (!const_ok_for_arm (INTVAL (part)))
17057 return false;
17059 return true;
17062 /* Scan INSN and note any of its operands that need fixing.
17063 If DO_PUSHES is false we do not actually push any of the fixups
17064 needed. */
17065 static void
17066 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17068 int opno;
17070 extract_constrain_insn (insn);
17072 if (recog_data.n_alternatives == 0)
17073 return;
17075 /* Fill in recog_op_alt with information about the constraints of
17076 this insn. */
17077 preprocess_constraints (insn);
17079 const operand_alternative *op_alt = which_op_alt ();
17080 for (opno = 0; opno < recog_data.n_operands; opno++)
17082 /* Things we need to fix can only occur in inputs. */
17083 if (recog_data.operand_type[opno] != OP_IN)
17084 continue;
17086 /* If this alternative is a memory reference, then any mention
17087 of constants in this alternative is really to fool reload
17088 into allowing us to accept one there. We need to fix them up
17089 now so that we output the right code. */
17090 if (op_alt[opno].memory_ok)
17092 rtx op = recog_data.operand[opno];
17094 if (CONSTANT_P (op))
17096 if (do_pushes)
17097 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17098 recog_data.operand_mode[opno], op);
17100 else if (MEM_P (op)
17101 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17102 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17104 if (do_pushes)
17106 rtx cop = avoid_constant_pool_reference (op);
17108 /* Casting the address of something to a mode narrower
17109 than a word can cause avoid_constant_pool_reference()
17110 to return the pool reference itself. That's no good to
17111 us here. Lets just hope that we can use the
17112 constant pool value directly. */
17113 if (op == cop)
17114 cop = get_pool_constant (XEXP (op, 0));
17116 push_minipool_fix (insn, address,
17117 recog_data.operand_loc[opno],
17118 recog_data.operand_mode[opno], cop);
17125 return;
17128 /* Rewrite move insn into subtract of 0 if the condition codes will
17129 be useful in next conditional jump insn. */
17131 static void
17132 thumb1_reorg (void)
17134 basic_block bb;
17136 FOR_EACH_BB_FN (bb, cfun)
17138 rtx dest, src;
17139 rtx pat, op0, set = NULL;
17140 rtx_insn *prev, *insn = BB_END (bb);
17141 bool insn_clobbered = false;
17143 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17144 insn = PREV_INSN (insn);
17146 /* Find the last cbranchsi4_insn in basic block BB. */
17147 if (insn == BB_HEAD (bb)
17148 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17149 continue;
17151 /* Get the register with which we are comparing. */
17152 pat = PATTERN (insn);
17153 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17155 /* Find the first flag setting insn before INSN in basic block BB. */
17156 gcc_assert (insn != BB_HEAD (bb));
17157 for (prev = PREV_INSN (insn);
17158 (!insn_clobbered
17159 && prev != BB_HEAD (bb)
17160 && (NOTE_P (prev)
17161 || DEBUG_INSN_P (prev)
17162 || ((set = single_set (prev)) != NULL
17163 && get_attr_conds (prev) == CONDS_NOCOND)));
17164 prev = PREV_INSN (prev))
17166 if (reg_set_p (op0, prev))
17167 insn_clobbered = true;
17170 /* Skip if op0 is clobbered by insn other than prev. */
17171 if (insn_clobbered)
17172 continue;
17174 if (!set)
17175 continue;
17177 dest = SET_DEST (set);
17178 src = SET_SRC (set);
17179 if (!low_register_operand (dest, SImode)
17180 || !low_register_operand (src, SImode))
17181 continue;
17183 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17184 in INSN. Both src and dest of the move insn are checked. */
17185 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17187 dest = copy_rtx (dest);
17188 src = copy_rtx (src);
17189 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17190 PATTERN (prev) = gen_rtx_SET (dest, src);
17191 INSN_CODE (prev) = -1;
17192 /* Set test register in INSN to dest. */
17193 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17194 INSN_CODE (insn) = -1;
17199 /* Convert instructions to their cc-clobbering variant if possible, since
17200 that allows us to use smaller encodings. */
17202 static void
17203 thumb2_reorg (void)
17205 basic_block bb;
17206 regset_head live;
17208 INIT_REG_SET (&live);
17210 /* We are freeing block_for_insn in the toplev to keep compatibility
17211 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17212 compute_bb_for_insn ();
17213 df_analyze ();
17215 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17217 FOR_EACH_BB_FN (bb, cfun)
17219 if ((current_tune->disparage_flag_setting_t16_encodings
17220 == tune_params::DISPARAGE_FLAGS_ALL)
17221 && optimize_bb_for_speed_p (bb))
17222 continue;
17224 rtx_insn *insn;
17225 Convert_Action action = SKIP;
17226 Convert_Action action_for_partial_flag_setting
17227 = ((current_tune->disparage_flag_setting_t16_encodings
17228 != tune_params::DISPARAGE_FLAGS_NEITHER)
17229 && optimize_bb_for_speed_p (bb))
17230 ? SKIP : CONV;
17232 COPY_REG_SET (&live, DF_LR_OUT (bb));
17233 df_simulate_initialize_backwards (bb, &live);
17234 FOR_BB_INSNS_REVERSE (bb, insn)
17236 if (NONJUMP_INSN_P (insn)
17237 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17238 && GET_CODE (PATTERN (insn)) == SET)
17240 action = SKIP;
17241 rtx pat = PATTERN (insn);
17242 rtx dst = XEXP (pat, 0);
17243 rtx src = XEXP (pat, 1);
17244 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17246 if (UNARY_P (src) || BINARY_P (src))
17247 op0 = XEXP (src, 0);
17249 if (BINARY_P (src))
17250 op1 = XEXP (src, 1);
17252 if (low_register_operand (dst, SImode))
17254 switch (GET_CODE (src))
17256 case PLUS:
17257 /* Adding two registers and storing the result
17258 in the first source is already a 16-bit
17259 operation. */
17260 if (rtx_equal_p (dst, op0)
17261 && register_operand (op1, SImode))
17262 break;
17264 if (low_register_operand (op0, SImode))
17266 /* ADDS <Rd>,<Rn>,<Rm> */
17267 if (low_register_operand (op1, SImode))
17268 action = CONV;
17269 /* ADDS <Rdn>,#<imm8> */
17270 /* SUBS <Rdn>,#<imm8> */
17271 else if (rtx_equal_p (dst, op0)
17272 && CONST_INT_P (op1)
17273 && IN_RANGE (INTVAL (op1), -255, 255))
17274 action = CONV;
17275 /* ADDS <Rd>,<Rn>,#<imm3> */
17276 /* SUBS <Rd>,<Rn>,#<imm3> */
17277 else if (CONST_INT_P (op1)
17278 && IN_RANGE (INTVAL (op1), -7, 7))
17279 action = CONV;
17281 /* ADCS <Rd>, <Rn> */
17282 else if (GET_CODE (XEXP (src, 0)) == PLUS
17283 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17284 && low_register_operand (XEXP (XEXP (src, 0), 1),
17285 SImode)
17286 && COMPARISON_P (op1)
17287 && cc_register (XEXP (op1, 0), VOIDmode)
17288 && maybe_get_arm_condition_code (op1) == ARM_CS
17289 && XEXP (op1, 1) == const0_rtx)
17290 action = CONV;
17291 break;
17293 case MINUS:
17294 /* RSBS <Rd>,<Rn>,#0
17295 Not handled here: see NEG below. */
17296 /* SUBS <Rd>,<Rn>,#<imm3>
17297 SUBS <Rdn>,#<imm8>
17298 Not handled here: see PLUS above. */
17299 /* SUBS <Rd>,<Rn>,<Rm> */
17300 if (low_register_operand (op0, SImode)
17301 && low_register_operand (op1, SImode))
17302 action = CONV;
17303 break;
17305 case MULT:
17306 /* MULS <Rdm>,<Rn>,<Rdm>
17307 As an exception to the rule, this is only used
17308 when optimizing for size since MULS is slow on all
17309 known implementations. We do not even want to use
17310 MULS in cold code, if optimizing for speed, so we
17311 test the global flag here. */
17312 if (!optimize_size)
17313 break;
17314 /* else fall through. */
17315 case AND:
17316 case IOR:
17317 case XOR:
17318 /* ANDS <Rdn>,<Rm> */
17319 if (rtx_equal_p (dst, op0)
17320 && low_register_operand (op1, SImode))
17321 action = action_for_partial_flag_setting;
17322 else if (rtx_equal_p (dst, op1)
17323 && low_register_operand (op0, SImode))
17324 action = action_for_partial_flag_setting == SKIP
17325 ? SKIP : SWAP_CONV;
17326 break;
17328 case ASHIFTRT:
17329 case ASHIFT:
17330 case LSHIFTRT:
17331 /* ASRS <Rdn>,<Rm> */
17332 /* LSRS <Rdn>,<Rm> */
17333 /* LSLS <Rdn>,<Rm> */
17334 if (rtx_equal_p (dst, op0)
17335 && low_register_operand (op1, SImode))
17336 action = action_for_partial_flag_setting;
17337 /* ASRS <Rd>,<Rm>,#<imm5> */
17338 /* LSRS <Rd>,<Rm>,#<imm5> */
17339 /* LSLS <Rd>,<Rm>,#<imm5> */
17340 else if (low_register_operand (op0, SImode)
17341 && CONST_INT_P (op1)
17342 && IN_RANGE (INTVAL (op1), 0, 31))
17343 action = action_for_partial_flag_setting;
17344 break;
17346 case ROTATERT:
17347 /* RORS <Rdn>,<Rm> */
17348 if (rtx_equal_p (dst, op0)
17349 && low_register_operand (op1, SImode))
17350 action = action_for_partial_flag_setting;
17351 break;
17353 case NOT:
17354 /* MVNS <Rd>,<Rm> */
17355 if (low_register_operand (op0, SImode))
17356 action = action_for_partial_flag_setting;
17357 break;
17359 case NEG:
17360 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17361 if (low_register_operand (op0, SImode))
17362 action = CONV;
17363 break;
17365 case CONST_INT:
17366 /* MOVS <Rd>,#<imm8> */
17367 if (CONST_INT_P (src)
17368 && IN_RANGE (INTVAL (src), 0, 255))
17369 action = action_for_partial_flag_setting;
17370 break;
17372 case REG:
17373 /* MOVS and MOV<c> with registers have different
17374 encodings, so are not relevant here. */
17375 break;
17377 default:
17378 break;
17382 if (action != SKIP)
17384 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17385 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17386 rtvec vec;
17388 if (action == SWAP_CONV)
17390 src = copy_rtx (src);
17391 XEXP (src, 0) = op1;
17392 XEXP (src, 1) = op0;
17393 pat = gen_rtx_SET (dst, src);
17394 vec = gen_rtvec (2, pat, clobber);
17396 else /* action == CONV */
17397 vec = gen_rtvec (2, pat, clobber);
17399 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17400 INSN_CODE (insn) = -1;
17404 if (NONDEBUG_INSN_P (insn))
17405 df_simulate_one_insn_backwards (bb, insn, &live);
17409 CLEAR_REG_SET (&live);
17412 /* Gcc puts the pool in the wrong place for ARM, since we can only
17413 load addresses a limited distance around the pc. We do some
17414 special munging to move the constant pool values to the correct
17415 point in the code. */
17416 static void
17417 arm_reorg (void)
17419 rtx_insn *insn;
17420 HOST_WIDE_INT address = 0;
17421 Mfix * fix;
17423 if (TARGET_THUMB1)
17424 thumb1_reorg ();
17425 else if (TARGET_THUMB2)
17426 thumb2_reorg ();
17428 /* Ensure all insns that must be split have been split at this point.
17429 Otherwise, the pool placement code below may compute incorrect
17430 insn lengths. Note that when optimizing, all insns have already
17431 been split at this point. */
17432 if (!optimize)
17433 split_all_insns_noflow ();
17435 minipool_fix_head = minipool_fix_tail = NULL;
17437 /* The first insn must always be a note, or the code below won't
17438 scan it properly. */
17439 insn = get_insns ();
17440 gcc_assert (NOTE_P (insn));
17441 minipool_pad = 0;
17443 /* Scan all the insns and record the operands that will need fixing. */
17444 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17446 if (BARRIER_P (insn))
17447 push_minipool_barrier (insn, address);
17448 else if (INSN_P (insn))
17450 rtx_jump_table_data *table;
17452 note_invalid_constants (insn, address, true);
17453 address += get_attr_length (insn);
17455 /* If the insn is a vector jump, add the size of the table
17456 and skip the table. */
17457 if (tablejump_p (insn, NULL, &table))
17459 address += get_jump_table_size (table);
17460 insn = table;
17463 else if (LABEL_P (insn))
17464 /* Add the worst-case padding due to alignment. We don't add
17465 the _current_ padding because the minipool insertions
17466 themselves might change it. */
17467 address += get_label_padding (insn);
17470 fix = minipool_fix_head;
17472 /* Now scan the fixups and perform the required changes. */
17473 while (fix)
17475 Mfix * ftmp;
17476 Mfix * fdel;
17477 Mfix * last_added_fix;
17478 Mfix * last_barrier = NULL;
17479 Mfix * this_fix;
17481 /* Skip any further barriers before the next fix. */
17482 while (fix && BARRIER_P (fix->insn))
17483 fix = fix->next;
17485 /* No more fixes. */
17486 if (fix == NULL)
17487 break;
17489 last_added_fix = NULL;
17491 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17493 if (BARRIER_P (ftmp->insn))
17495 if (ftmp->address >= minipool_vector_head->max_address)
17496 break;
17498 last_barrier = ftmp;
17500 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17501 break;
17503 last_added_fix = ftmp; /* Keep track of the last fix added. */
17506 /* If we found a barrier, drop back to that; any fixes that we
17507 could have reached but come after the barrier will now go in
17508 the next mini-pool. */
17509 if (last_barrier != NULL)
17511 /* Reduce the refcount for those fixes that won't go into this
17512 pool after all. */
17513 for (fdel = last_barrier->next;
17514 fdel && fdel != ftmp;
17515 fdel = fdel->next)
17517 fdel->minipool->refcount--;
17518 fdel->minipool = NULL;
17521 ftmp = last_barrier;
17523 else
17525 /* ftmp is first fix that we can't fit into this pool and
17526 there no natural barriers that we could use. Insert a
17527 new barrier in the code somewhere between the previous
17528 fix and this one, and arrange to jump around it. */
17529 HOST_WIDE_INT max_address;
17531 /* The last item on the list of fixes must be a barrier, so
17532 we can never run off the end of the list of fixes without
17533 last_barrier being set. */
17534 gcc_assert (ftmp);
17536 max_address = minipool_vector_head->max_address;
17537 /* Check that there isn't another fix that is in range that
17538 we couldn't fit into this pool because the pool was
17539 already too large: we need to put the pool before such an
17540 instruction. The pool itself may come just after the
17541 fix because create_fix_barrier also allows space for a
17542 jump instruction. */
17543 if (ftmp->address < max_address)
17544 max_address = ftmp->address + 1;
17546 last_barrier = create_fix_barrier (last_added_fix, max_address);
17549 assign_minipool_offsets (last_barrier);
17551 while (ftmp)
17553 if (!BARRIER_P (ftmp->insn)
17554 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17555 == NULL))
17556 break;
17558 ftmp = ftmp->next;
17561 /* Scan over the fixes we have identified for this pool, fixing them
17562 up and adding the constants to the pool itself. */
17563 for (this_fix = fix; this_fix && ftmp != this_fix;
17564 this_fix = this_fix->next)
17565 if (!BARRIER_P (this_fix->insn))
17567 rtx addr
17568 = plus_constant (Pmode,
17569 gen_rtx_LABEL_REF (VOIDmode,
17570 minipool_vector_label),
17571 this_fix->minipool->offset);
17572 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17575 dump_minipool (last_barrier->insn);
17576 fix = ftmp;
17579 /* From now on we must synthesize any constants that we can't handle
17580 directly. This can happen if the RTL gets split during final
17581 instruction generation. */
17582 cfun->machine->after_arm_reorg = 1;
17584 /* Free the minipool memory. */
17585 obstack_free (&minipool_obstack, minipool_startobj);
17588 /* Routines to output assembly language. */
17590 /* Return string representation of passed in real value. */
17591 static const char *
17592 fp_const_from_val (REAL_VALUE_TYPE *r)
17594 if (!fp_consts_inited)
17595 init_fp_table ();
17597 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17598 return "0";
17601 /* OPERANDS[0] is the entire list of insns that constitute pop,
17602 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17603 is in the list, UPDATE is true iff the list contains explicit
17604 update of base register. */
17605 void
17606 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17607 bool update)
17609 int i;
17610 char pattern[100];
17611 int offset;
17612 const char *conditional;
17613 int num_saves = XVECLEN (operands[0], 0);
17614 unsigned int regno;
17615 unsigned int regno_base = REGNO (operands[1]);
17617 offset = 0;
17618 offset += update ? 1 : 0;
17619 offset += return_pc ? 1 : 0;
17621 /* Is the base register in the list? */
17622 for (i = offset; i < num_saves; i++)
17624 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17625 /* If SP is in the list, then the base register must be SP. */
17626 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17627 /* If base register is in the list, there must be no explicit update. */
17628 if (regno == regno_base)
17629 gcc_assert (!update);
17632 conditional = reverse ? "%?%D0" : "%?%d0";
17633 if ((regno_base == SP_REGNUM) && TARGET_THUMB)
17635 /* Output pop (not stmfd) because it has a shorter encoding. */
17636 gcc_assert (update);
17637 sprintf (pattern, "pop%s\t{", conditional);
17639 else
17641 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17642 It's just a convention, their semantics are identical. */
17643 if (regno_base == SP_REGNUM)
17644 sprintf (pattern, "ldm%sfd\t", conditional);
17645 else if (TARGET_UNIFIED_ASM)
17646 sprintf (pattern, "ldmia%s\t", conditional);
17647 else
17648 sprintf (pattern, "ldm%sia\t", conditional);
17650 strcat (pattern, reg_names[regno_base]);
17651 if (update)
17652 strcat (pattern, "!, {");
17653 else
17654 strcat (pattern, ", {");
17657 /* Output the first destination register. */
17658 strcat (pattern,
17659 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17661 /* Output the rest of the destination registers. */
17662 for (i = offset + 1; i < num_saves; i++)
17664 strcat (pattern, ", ");
17665 strcat (pattern,
17666 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17669 strcat (pattern, "}");
17671 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17672 strcat (pattern, "^");
17674 output_asm_insn (pattern, &cond);
17678 /* Output the assembly for a store multiple. */
17680 const char *
17681 vfp_output_vstmd (rtx * operands)
17683 char pattern[100];
17684 int p;
17685 int base;
17686 int i;
17687 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17688 ? XEXP (operands[0], 0)
17689 : XEXP (XEXP (operands[0], 0), 0);
17690 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17692 if (push_p)
17693 strcpy (pattern, "vpush%?.64\t{%P1");
17694 else
17695 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17697 p = strlen (pattern);
17699 gcc_assert (REG_P (operands[1]));
17701 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17702 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17704 p += sprintf (&pattern[p], ", d%d", base + i);
17706 strcpy (&pattern[p], "}");
17708 output_asm_insn (pattern, operands);
17709 return "";
17713 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17714 number of bytes pushed. */
17716 static int
17717 vfp_emit_fstmd (int base_reg, int count)
17719 rtx par;
17720 rtx dwarf;
17721 rtx tmp, reg;
17722 int i;
17724 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17725 register pairs are stored by a store multiple insn. We avoid this
17726 by pushing an extra pair. */
17727 if (count == 2 && !arm_arch6)
17729 if (base_reg == LAST_VFP_REGNUM - 3)
17730 base_reg -= 2;
17731 count++;
17734 /* FSTMD may not store more than 16 doubleword registers at once. Split
17735 larger stores into multiple parts (up to a maximum of two, in
17736 practice). */
17737 if (count > 16)
17739 int saved;
17740 /* NOTE: base_reg is an internal register number, so each D register
17741 counts as 2. */
17742 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17743 saved += vfp_emit_fstmd (base_reg, 16);
17744 return saved;
17747 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17748 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17750 reg = gen_rtx_REG (DFmode, base_reg);
17751 base_reg += 2;
17753 XVECEXP (par, 0, 0)
17754 = gen_rtx_SET (gen_frame_mem
17755 (BLKmode,
17756 gen_rtx_PRE_MODIFY (Pmode,
17757 stack_pointer_rtx,
17758 plus_constant
17759 (Pmode, stack_pointer_rtx,
17760 - (count * 8)))
17762 gen_rtx_UNSPEC (BLKmode,
17763 gen_rtvec (1, reg),
17764 UNSPEC_PUSH_MULT));
17766 tmp = gen_rtx_SET (stack_pointer_rtx,
17767 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17768 RTX_FRAME_RELATED_P (tmp) = 1;
17769 XVECEXP (dwarf, 0, 0) = tmp;
17771 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17772 RTX_FRAME_RELATED_P (tmp) = 1;
17773 XVECEXP (dwarf, 0, 1) = tmp;
17775 for (i = 1; i < count; i++)
17777 reg = gen_rtx_REG (DFmode, base_reg);
17778 base_reg += 2;
17779 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17781 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17782 plus_constant (Pmode,
17783 stack_pointer_rtx,
17784 i * 8)),
17785 reg);
17786 RTX_FRAME_RELATED_P (tmp) = 1;
17787 XVECEXP (dwarf, 0, i + 1) = tmp;
17790 par = emit_insn (par);
17791 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17792 RTX_FRAME_RELATED_P (par) = 1;
17794 return count * 8;
17797 /* Emit a call instruction with pattern PAT. ADDR is the address of
17798 the call target. */
17800 void
17801 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17803 rtx insn;
17805 insn = emit_call_insn (pat);
17807 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17808 If the call might use such an entry, add a use of the PIC register
17809 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17810 if (TARGET_VXWORKS_RTP
17811 && flag_pic
17812 && !sibcall
17813 && GET_CODE (addr) == SYMBOL_REF
17814 && (SYMBOL_REF_DECL (addr)
17815 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17816 : !SYMBOL_REF_LOCAL_P (addr)))
17818 require_pic_register ();
17819 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17822 if (TARGET_AAPCS_BASED)
17824 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17825 linker. We need to add an IP clobber to allow setting
17826 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17827 is not needed since it's a fixed register. */
17828 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17829 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17833 /* Output a 'call' insn. */
17834 const char *
17835 output_call (rtx *operands)
17837 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17839 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17840 if (REGNO (operands[0]) == LR_REGNUM)
17842 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17843 output_asm_insn ("mov%?\t%0, %|lr", operands);
17846 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17848 if (TARGET_INTERWORK || arm_arch4t)
17849 output_asm_insn ("bx%?\t%0", operands);
17850 else
17851 output_asm_insn ("mov%?\t%|pc, %0", operands);
17853 return "";
17856 /* Output a 'call' insn that is a reference in memory. This is
17857 disabled for ARMv5 and we prefer a blx instead because otherwise
17858 there's a significant performance overhead. */
17859 const char *
17860 output_call_mem (rtx *operands)
17862 gcc_assert (!arm_arch5);
17863 if (TARGET_INTERWORK)
17865 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17866 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17867 output_asm_insn ("bx%?\t%|ip", operands);
17869 else if (regno_use_in (LR_REGNUM, operands[0]))
17871 /* LR is used in the memory address. We load the address in the
17872 first instruction. It's safe to use IP as the target of the
17873 load since the call will kill it anyway. */
17874 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17875 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17876 if (arm_arch4t)
17877 output_asm_insn ("bx%?\t%|ip", operands);
17878 else
17879 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17881 else
17883 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17884 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17887 return "";
17891 /* Output a move from arm registers to arm registers of a long double
17892 OPERANDS[0] is the destination.
17893 OPERANDS[1] is the source. */
17894 const char *
17895 output_mov_long_double_arm_from_arm (rtx *operands)
17897 /* We have to be careful here because the two might overlap. */
17898 int dest_start = REGNO (operands[0]);
17899 int src_start = REGNO (operands[1]);
17900 rtx ops[2];
17901 int i;
17903 if (dest_start < src_start)
17905 for (i = 0; i < 3; i++)
17907 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17908 ops[1] = gen_rtx_REG (SImode, src_start + i);
17909 output_asm_insn ("mov%?\t%0, %1", ops);
17912 else
17914 for (i = 2; i >= 0; i--)
17916 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17917 ops[1] = gen_rtx_REG (SImode, src_start + i);
17918 output_asm_insn ("mov%?\t%0, %1", ops);
17922 return "";
17925 void
17926 arm_emit_movpair (rtx dest, rtx src)
17928 rtx insn;
17930 /* If the src is an immediate, simplify it. */
17931 if (CONST_INT_P (src))
17933 HOST_WIDE_INT val = INTVAL (src);
17934 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17935 if ((val >> 16) & 0x0000ffff)
17937 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17938 GEN_INT (16)),
17939 GEN_INT ((val >> 16) & 0x0000ffff));
17940 insn = get_last_insn ();
17941 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17943 return;
17945 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17946 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17947 insn = get_last_insn ();
17948 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17951 /* Output a move between double words. It must be REG<-MEM
17952 or MEM<-REG. */
17953 const char *
17954 output_move_double (rtx *operands, bool emit, int *count)
17956 enum rtx_code code0 = GET_CODE (operands[0]);
17957 enum rtx_code code1 = GET_CODE (operands[1]);
17958 rtx otherops[3];
17959 if (count)
17960 *count = 1;
17962 /* The only case when this might happen is when
17963 you are looking at the length of a DImode instruction
17964 that has an invalid constant in it. */
17965 if (code0 == REG && code1 != MEM)
17967 gcc_assert (!emit);
17968 *count = 2;
17969 return "";
17972 if (code0 == REG)
17974 unsigned int reg0 = REGNO (operands[0]);
17976 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17978 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17980 switch (GET_CODE (XEXP (operands[1], 0)))
17982 case REG:
17984 if (emit)
17986 if (TARGET_LDRD
17987 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17988 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17989 else
17990 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17992 break;
17994 case PRE_INC:
17995 gcc_assert (TARGET_LDRD);
17996 if (emit)
17997 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17998 break;
18000 case PRE_DEC:
18001 if (emit)
18003 if (TARGET_LDRD)
18004 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
18005 else
18006 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
18008 break;
18010 case POST_INC:
18011 if (emit)
18013 if (TARGET_LDRD)
18014 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
18015 else
18016 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
18018 break;
18020 case POST_DEC:
18021 gcc_assert (TARGET_LDRD);
18022 if (emit)
18023 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
18024 break;
18026 case PRE_MODIFY:
18027 case POST_MODIFY:
18028 /* Autoicrement addressing modes should never have overlapping
18029 base and destination registers, and overlapping index registers
18030 are already prohibited, so this doesn't need to worry about
18031 fix_cm3_ldrd. */
18032 otherops[0] = operands[0];
18033 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18034 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18036 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18038 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18040 /* Registers overlap so split out the increment. */
18041 if (emit)
18043 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18044 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
18046 if (count)
18047 *count = 2;
18049 else
18051 /* Use a single insn if we can.
18052 FIXME: IWMMXT allows offsets larger than ldrd can
18053 handle, fix these up with a pair of ldr. */
18054 if (TARGET_THUMB2
18055 || !CONST_INT_P (otherops[2])
18056 || (INTVAL (otherops[2]) > -256
18057 && INTVAL (otherops[2]) < 256))
18059 if (emit)
18060 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18062 else
18064 if (emit)
18066 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18067 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18069 if (count)
18070 *count = 2;
18075 else
18077 /* Use a single insn if we can.
18078 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18079 fix these up with a pair of ldr. */
18080 if (TARGET_THUMB2
18081 || !CONST_INT_P (otherops[2])
18082 || (INTVAL (otherops[2]) > -256
18083 && INTVAL (otherops[2]) < 256))
18085 if (emit)
18086 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18088 else
18090 if (emit)
18092 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18093 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18095 if (count)
18096 *count = 2;
18099 break;
18101 case LABEL_REF:
18102 case CONST:
18103 /* We might be able to use ldrd %0, %1 here. However the range is
18104 different to ldr/adr, and it is broken on some ARMv7-M
18105 implementations. */
18106 /* Use the second register of the pair to avoid problematic
18107 overlap. */
18108 otherops[1] = operands[1];
18109 if (emit)
18110 output_asm_insn ("adr%?\t%0, %1", otherops);
18111 operands[1] = otherops[0];
18112 if (emit)
18114 if (TARGET_LDRD)
18115 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18116 else
18117 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18120 if (count)
18121 *count = 2;
18122 break;
18124 /* ??? This needs checking for thumb2. */
18125 default:
18126 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18127 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18129 otherops[0] = operands[0];
18130 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18131 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18133 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18135 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18137 switch ((int) INTVAL (otherops[2]))
18139 case -8:
18140 if (emit)
18141 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18142 return "";
18143 case -4:
18144 if (TARGET_THUMB2)
18145 break;
18146 if (emit)
18147 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18148 return "";
18149 case 4:
18150 if (TARGET_THUMB2)
18151 break;
18152 if (emit)
18153 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18154 return "";
18157 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18158 operands[1] = otherops[0];
18159 if (TARGET_LDRD
18160 && (REG_P (otherops[2])
18161 || TARGET_THUMB2
18162 || (CONST_INT_P (otherops[2])
18163 && INTVAL (otherops[2]) > -256
18164 && INTVAL (otherops[2]) < 256)))
18166 if (reg_overlap_mentioned_p (operands[0],
18167 otherops[2]))
18169 /* Swap base and index registers over to
18170 avoid a conflict. */
18171 std::swap (otherops[1], otherops[2]);
18173 /* If both registers conflict, it will usually
18174 have been fixed by a splitter. */
18175 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18176 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18178 if (emit)
18180 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18181 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18183 if (count)
18184 *count = 2;
18186 else
18188 otherops[0] = operands[0];
18189 if (emit)
18190 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18192 return "";
18195 if (CONST_INT_P (otherops[2]))
18197 if (emit)
18199 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18200 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18201 else
18202 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18205 else
18207 if (emit)
18208 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18211 else
18213 if (emit)
18214 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18217 if (count)
18218 *count = 2;
18220 if (TARGET_LDRD)
18221 return "ldr%(d%)\t%0, [%1]";
18223 return "ldm%(ia%)\t%1, %M0";
18225 else
18227 otherops[1] = adjust_address (operands[1], SImode, 4);
18228 /* Take care of overlapping base/data reg. */
18229 if (reg_mentioned_p (operands[0], operands[1]))
18231 if (emit)
18233 output_asm_insn ("ldr%?\t%0, %1", otherops);
18234 output_asm_insn ("ldr%?\t%0, %1", operands);
18236 if (count)
18237 *count = 2;
18240 else
18242 if (emit)
18244 output_asm_insn ("ldr%?\t%0, %1", operands);
18245 output_asm_insn ("ldr%?\t%0, %1", otherops);
18247 if (count)
18248 *count = 2;
18253 else
18255 /* Constraints should ensure this. */
18256 gcc_assert (code0 == MEM && code1 == REG);
18257 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18258 || (TARGET_ARM && TARGET_LDRD));
18260 switch (GET_CODE (XEXP (operands[0], 0)))
18262 case REG:
18263 if (emit)
18265 if (TARGET_LDRD)
18266 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18267 else
18268 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18270 break;
18272 case PRE_INC:
18273 gcc_assert (TARGET_LDRD);
18274 if (emit)
18275 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18276 break;
18278 case PRE_DEC:
18279 if (emit)
18281 if (TARGET_LDRD)
18282 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18283 else
18284 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18286 break;
18288 case POST_INC:
18289 if (emit)
18291 if (TARGET_LDRD)
18292 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18293 else
18294 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18296 break;
18298 case POST_DEC:
18299 gcc_assert (TARGET_LDRD);
18300 if (emit)
18301 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18302 break;
18304 case PRE_MODIFY:
18305 case POST_MODIFY:
18306 otherops[0] = operands[1];
18307 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18308 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18310 /* IWMMXT allows offsets larger than ldrd can handle,
18311 fix these up with a pair of ldr. */
18312 if (!TARGET_THUMB2
18313 && CONST_INT_P (otherops[2])
18314 && (INTVAL(otherops[2]) <= -256
18315 || INTVAL(otherops[2]) >= 256))
18317 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18319 if (emit)
18321 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18322 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18324 if (count)
18325 *count = 2;
18327 else
18329 if (emit)
18331 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18332 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18334 if (count)
18335 *count = 2;
18338 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18340 if (emit)
18341 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18343 else
18345 if (emit)
18346 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18348 break;
18350 case PLUS:
18351 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18352 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18354 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18356 case -8:
18357 if (emit)
18358 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18359 return "";
18361 case -4:
18362 if (TARGET_THUMB2)
18363 break;
18364 if (emit)
18365 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18366 return "";
18368 case 4:
18369 if (TARGET_THUMB2)
18370 break;
18371 if (emit)
18372 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18373 return "";
18376 if (TARGET_LDRD
18377 && (REG_P (otherops[2])
18378 || TARGET_THUMB2
18379 || (CONST_INT_P (otherops[2])
18380 && INTVAL (otherops[2]) > -256
18381 && INTVAL (otherops[2]) < 256)))
18383 otherops[0] = operands[1];
18384 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18385 if (emit)
18386 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18387 return "";
18389 /* Fall through */
18391 default:
18392 otherops[0] = adjust_address (operands[0], SImode, 4);
18393 otherops[1] = operands[1];
18394 if (emit)
18396 output_asm_insn ("str%?\t%1, %0", operands);
18397 output_asm_insn ("str%?\t%H1, %0", otherops);
18399 if (count)
18400 *count = 2;
18404 return "";
18407 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18408 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18410 const char *
18411 output_move_quad (rtx *operands)
18413 if (REG_P (operands[0]))
18415 /* Load, or reg->reg move. */
18417 if (MEM_P (operands[1]))
18419 switch (GET_CODE (XEXP (operands[1], 0)))
18421 case REG:
18422 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18423 break;
18425 case LABEL_REF:
18426 case CONST:
18427 output_asm_insn ("adr%?\t%0, %1", operands);
18428 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18429 break;
18431 default:
18432 gcc_unreachable ();
18435 else
18437 rtx ops[2];
18438 int dest, src, i;
18440 gcc_assert (REG_P (operands[1]));
18442 dest = REGNO (operands[0]);
18443 src = REGNO (operands[1]);
18445 /* This seems pretty dumb, but hopefully GCC won't try to do it
18446 very often. */
18447 if (dest < src)
18448 for (i = 0; i < 4; i++)
18450 ops[0] = gen_rtx_REG (SImode, dest + i);
18451 ops[1] = gen_rtx_REG (SImode, src + i);
18452 output_asm_insn ("mov%?\t%0, %1", ops);
18454 else
18455 for (i = 3; i >= 0; i--)
18457 ops[0] = gen_rtx_REG (SImode, dest + i);
18458 ops[1] = gen_rtx_REG (SImode, src + i);
18459 output_asm_insn ("mov%?\t%0, %1", ops);
18463 else
18465 gcc_assert (MEM_P (operands[0]));
18466 gcc_assert (REG_P (operands[1]));
18467 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18469 switch (GET_CODE (XEXP (operands[0], 0)))
18471 case REG:
18472 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18473 break;
18475 default:
18476 gcc_unreachable ();
18480 return "";
18483 /* Output a VFP load or store instruction. */
18485 const char *
18486 output_move_vfp (rtx *operands)
18488 rtx reg, mem, addr, ops[2];
18489 int load = REG_P (operands[0]);
18490 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18491 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18492 const char *templ;
18493 char buff[50];
18494 machine_mode mode;
18496 reg = operands[!load];
18497 mem = operands[load];
18499 mode = GET_MODE (reg);
18501 gcc_assert (REG_P (reg));
18502 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18503 gcc_assert (mode == SFmode
18504 || mode == DFmode
18505 || mode == SImode
18506 || mode == DImode
18507 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18508 gcc_assert (MEM_P (mem));
18510 addr = XEXP (mem, 0);
18512 switch (GET_CODE (addr))
18514 case PRE_DEC:
18515 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18516 ops[0] = XEXP (addr, 0);
18517 ops[1] = reg;
18518 break;
18520 case POST_INC:
18521 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18522 ops[0] = XEXP (addr, 0);
18523 ops[1] = reg;
18524 break;
18526 default:
18527 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18528 ops[0] = reg;
18529 ops[1] = mem;
18530 break;
18533 sprintf (buff, templ,
18534 load ? "ld" : "st",
18535 dp ? "64" : "32",
18536 dp ? "P" : "",
18537 integer_p ? "\t%@ int" : "");
18538 output_asm_insn (buff, ops);
18540 return "";
18543 /* Output a Neon double-word or quad-word load or store, or a load
18544 or store for larger structure modes.
18546 WARNING: The ordering of elements is weird in big-endian mode,
18547 because the EABI requires that vectors stored in memory appear
18548 as though they were stored by a VSTM, as required by the EABI.
18549 GCC RTL defines element ordering based on in-memory order.
18550 This can be different from the architectural ordering of elements
18551 within a NEON register. The intrinsics defined in arm_neon.h use the
18552 NEON register element ordering, not the GCC RTL element ordering.
18554 For example, the in-memory ordering of a big-endian a quadword
18555 vector with 16-bit elements when stored from register pair {d0,d1}
18556 will be (lowest address first, d0[N] is NEON register element N):
18558 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18560 When necessary, quadword registers (dN, dN+1) are moved to ARM
18561 registers from rN in the order:
18563 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18565 So that STM/LDM can be used on vectors in ARM registers, and the
18566 same memory layout will result as if VSTM/VLDM were used.
18568 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18569 possible, which allows use of appropriate alignment tags.
18570 Note that the choice of "64" is independent of the actual vector
18571 element size; this size simply ensures that the behavior is
18572 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18574 Due to limitations of those instructions, use of VST1.64/VLD1.64
18575 is not possible if:
18576 - the address contains PRE_DEC, or
18577 - the mode refers to more than 4 double-word registers
18579 In those cases, it would be possible to replace VSTM/VLDM by a
18580 sequence of instructions; this is not currently implemented since
18581 this is not certain to actually improve performance. */
18583 const char *
18584 output_move_neon (rtx *operands)
18586 rtx reg, mem, addr, ops[2];
18587 int regno, nregs, load = REG_P (operands[0]);
18588 const char *templ;
18589 char buff[50];
18590 machine_mode mode;
18592 reg = operands[!load];
18593 mem = operands[load];
18595 mode = GET_MODE (reg);
18597 gcc_assert (REG_P (reg));
18598 regno = REGNO (reg);
18599 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18600 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18601 || NEON_REGNO_OK_FOR_QUAD (regno));
18602 gcc_assert (VALID_NEON_DREG_MODE (mode)
18603 || VALID_NEON_QREG_MODE (mode)
18604 || VALID_NEON_STRUCT_MODE (mode));
18605 gcc_assert (MEM_P (mem));
18607 addr = XEXP (mem, 0);
18609 /* Strip off const from addresses like (const (plus (...))). */
18610 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18611 addr = XEXP (addr, 0);
18613 switch (GET_CODE (addr))
18615 case POST_INC:
18616 /* We have to use vldm / vstm for too-large modes. */
18617 if (nregs > 4)
18619 templ = "v%smia%%?\t%%0!, %%h1";
18620 ops[0] = XEXP (addr, 0);
18622 else
18624 templ = "v%s1.64\t%%h1, %%A0";
18625 ops[0] = mem;
18627 ops[1] = reg;
18628 break;
18630 case PRE_DEC:
18631 /* We have to use vldm / vstm in this case, since there is no
18632 pre-decrement form of the vld1 / vst1 instructions. */
18633 templ = "v%smdb%%?\t%%0!, %%h1";
18634 ops[0] = XEXP (addr, 0);
18635 ops[1] = reg;
18636 break;
18638 case POST_MODIFY:
18639 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18640 gcc_unreachable ();
18642 case REG:
18643 /* We have to use vldm / vstm for too-large modes. */
18644 if (nregs > 1)
18646 if (nregs > 4)
18647 templ = "v%smia%%?\t%%m0, %%h1";
18648 else
18649 templ = "v%s1.64\t%%h1, %%A0";
18651 ops[0] = mem;
18652 ops[1] = reg;
18653 break;
18655 /* Fall through. */
18656 case LABEL_REF:
18657 case PLUS:
18659 int i;
18660 int overlap = -1;
18661 for (i = 0; i < nregs; i++)
18663 /* We're only using DImode here because it's a convenient size. */
18664 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18665 ops[1] = adjust_address (mem, DImode, 8 * i);
18666 if (reg_overlap_mentioned_p (ops[0], mem))
18668 gcc_assert (overlap == -1);
18669 overlap = i;
18671 else
18673 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18674 output_asm_insn (buff, ops);
18677 if (overlap != -1)
18679 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18680 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18681 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18682 output_asm_insn (buff, ops);
18685 return "";
18688 default:
18689 gcc_unreachable ();
18692 sprintf (buff, templ, load ? "ld" : "st");
18693 output_asm_insn (buff, ops);
18695 return "";
18698 /* Compute and return the length of neon_mov<mode>, where <mode> is
18699 one of VSTRUCT modes: EI, OI, CI or XI. */
18701 arm_attr_length_move_neon (rtx_insn *insn)
18703 rtx reg, mem, addr;
18704 int load;
18705 machine_mode mode;
18707 extract_insn_cached (insn);
18709 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18711 mode = GET_MODE (recog_data.operand[0]);
18712 switch (mode)
18714 case EImode:
18715 case OImode:
18716 return 8;
18717 case CImode:
18718 return 12;
18719 case XImode:
18720 return 16;
18721 default:
18722 gcc_unreachable ();
18726 load = REG_P (recog_data.operand[0]);
18727 reg = recog_data.operand[!load];
18728 mem = recog_data.operand[load];
18730 gcc_assert (MEM_P (mem));
18732 mode = GET_MODE (reg);
18733 addr = XEXP (mem, 0);
18735 /* Strip off const from addresses like (const (plus (...))). */
18736 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18737 addr = XEXP (addr, 0);
18739 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18741 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18742 return insns * 4;
18744 else
18745 return 4;
18748 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18749 return zero. */
18752 arm_address_offset_is_imm (rtx_insn *insn)
18754 rtx mem, addr;
18756 extract_insn_cached (insn);
18758 if (REG_P (recog_data.operand[0]))
18759 return 0;
18761 mem = recog_data.operand[0];
18763 gcc_assert (MEM_P (mem));
18765 addr = XEXP (mem, 0);
18767 if (REG_P (addr)
18768 || (GET_CODE (addr) == PLUS
18769 && REG_P (XEXP (addr, 0))
18770 && CONST_INT_P (XEXP (addr, 1))))
18771 return 1;
18772 else
18773 return 0;
18776 /* Output an ADD r, s, #n where n may be too big for one instruction.
18777 If adding zero to one register, output nothing. */
18778 const char *
18779 output_add_immediate (rtx *operands)
18781 HOST_WIDE_INT n = INTVAL (operands[2]);
18783 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18785 if (n < 0)
18786 output_multi_immediate (operands,
18787 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18788 -n);
18789 else
18790 output_multi_immediate (operands,
18791 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18795 return "";
18798 /* Output a multiple immediate operation.
18799 OPERANDS is the vector of operands referred to in the output patterns.
18800 INSTR1 is the output pattern to use for the first constant.
18801 INSTR2 is the output pattern to use for subsequent constants.
18802 IMMED_OP is the index of the constant slot in OPERANDS.
18803 N is the constant value. */
18804 static const char *
18805 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18806 int immed_op, HOST_WIDE_INT n)
18808 #if HOST_BITS_PER_WIDE_INT > 32
18809 n &= 0xffffffff;
18810 #endif
18812 if (n == 0)
18814 /* Quick and easy output. */
18815 operands[immed_op] = const0_rtx;
18816 output_asm_insn (instr1, operands);
18818 else
18820 int i;
18821 const char * instr = instr1;
18823 /* Note that n is never zero here (which would give no output). */
18824 for (i = 0; i < 32; i += 2)
18826 if (n & (3 << i))
18828 operands[immed_op] = GEN_INT (n & (255 << i));
18829 output_asm_insn (instr, operands);
18830 instr = instr2;
18831 i += 6;
18836 return "";
18839 /* Return the name of a shifter operation. */
18840 static const char *
18841 arm_shift_nmem(enum rtx_code code)
18843 switch (code)
18845 case ASHIFT:
18846 return ARM_LSL_NAME;
18848 case ASHIFTRT:
18849 return "asr";
18851 case LSHIFTRT:
18852 return "lsr";
18854 case ROTATERT:
18855 return "ror";
18857 default:
18858 abort();
18862 /* Return the appropriate ARM instruction for the operation code.
18863 The returned result should not be overwritten. OP is the rtx of the
18864 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18865 was shifted. */
18866 const char *
18867 arithmetic_instr (rtx op, int shift_first_arg)
18869 switch (GET_CODE (op))
18871 case PLUS:
18872 return "add";
18874 case MINUS:
18875 return shift_first_arg ? "rsb" : "sub";
18877 case IOR:
18878 return "orr";
18880 case XOR:
18881 return "eor";
18883 case AND:
18884 return "and";
18886 case ASHIFT:
18887 case ASHIFTRT:
18888 case LSHIFTRT:
18889 case ROTATERT:
18890 return arm_shift_nmem(GET_CODE(op));
18892 default:
18893 gcc_unreachable ();
18897 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18898 for the operation code. The returned result should not be overwritten.
18899 OP is the rtx code of the shift.
18900 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18901 shift. */
18902 static const char *
18903 shift_op (rtx op, HOST_WIDE_INT *amountp)
18905 const char * mnem;
18906 enum rtx_code code = GET_CODE (op);
18908 switch (code)
18910 case ROTATE:
18911 if (!CONST_INT_P (XEXP (op, 1)))
18913 output_operand_lossage ("invalid shift operand");
18914 return NULL;
18917 code = ROTATERT;
18918 *amountp = 32 - INTVAL (XEXP (op, 1));
18919 mnem = "ror";
18920 break;
18922 case ASHIFT:
18923 case ASHIFTRT:
18924 case LSHIFTRT:
18925 case ROTATERT:
18926 mnem = arm_shift_nmem(code);
18927 if (CONST_INT_P (XEXP (op, 1)))
18929 *amountp = INTVAL (XEXP (op, 1));
18931 else if (REG_P (XEXP (op, 1)))
18933 *amountp = -1;
18934 return mnem;
18936 else
18938 output_operand_lossage ("invalid shift operand");
18939 return NULL;
18941 break;
18943 case MULT:
18944 /* We never have to worry about the amount being other than a
18945 power of 2, since this case can never be reloaded from a reg. */
18946 if (!CONST_INT_P (XEXP (op, 1)))
18948 output_operand_lossage ("invalid shift operand");
18949 return NULL;
18952 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18954 /* Amount must be a power of two. */
18955 if (*amountp & (*amountp - 1))
18957 output_operand_lossage ("invalid shift operand");
18958 return NULL;
18961 *amountp = int_log2 (*amountp);
18962 return ARM_LSL_NAME;
18964 default:
18965 output_operand_lossage ("invalid shift operand");
18966 return NULL;
18969 /* This is not 100% correct, but follows from the desire to merge
18970 multiplication by a power of 2 with the recognizer for a
18971 shift. >=32 is not a valid shift for "lsl", so we must try and
18972 output a shift that produces the correct arithmetical result.
18973 Using lsr #32 is identical except for the fact that the carry bit
18974 is not set correctly if we set the flags; but we never use the
18975 carry bit from such an operation, so we can ignore that. */
18976 if (code == ROTATERT)
18977 /* Rotate is just modulo 32. */
18978 *amountp &= 31;
18979 else if (*amountp != (*amountp & 31))
18981 if (code == ASHIFT)
18982 mnem = "lsr";
18983 *amountp = 32;
18986 /* Shifts of 0 are no-ops. */
18987 if (*amountp == 0)
18988 return NULL;
18990 return mnem;
18993 /* Obtain the shift from the POWER of two. */
18995 static HOST_WIDE_INT
18996 int_log2 (HOST_WIDE_INT power)
18998 HOST_WIDE_INT shift = 0;
19000 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
19002 gcc_assert (shift <= 31);
19003 shift++;
19006 return shift;
19009 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19010 because /bin/as is horribly restrictive. The judgement about
19011 whether or not each character is 'printable' (and can be output as
19012 is) or not (and must be printed with an octal escape) must be made
19013 with reference to the *host* character set -- the situation is
19014 similar to that discussed in the comments above pp_c_char in
19015 c-pretty-print.c. */
19017 #define MAX_ASCII_LEN 51
19019 void
19020 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19022 int i;
19023 int len_so_far = 0;
19025 fputs ("\t.ascii\t\"", stream);
19027 for (i = 0; i < len; i++)
19029 int c = p[i];
19031 if (len_so_far >= MAX_ASCII_LEN)
19033 fputs ("\"\n\t.ascii\t\"", stream);
19034 len_so_far = 0;
19037 if (ISPRINT (c))
19039 if (c == '\\' || c == '\"')
19041 putc ('\\', stream);
19042 len_so_far++;
19044 putc (c, stream);
19045 len_so_far++;
19047 else
19049 fprintf (stream, "\\%03o", c);
19050 len_so_far += 4;
19054 fputs ("\"\n", stream);
19057 /* Whether a register is callee saved or not. This is necessary because high
19058 registers are marked as caller saved when optimizing for size on Thumb-1
19059 targets despite being callee saved in order to avoid using them. */
19060 #define callee_saved_reg_p(reg) \
19061 (!call_used_regs[reg] \
19062 || (TARGET_THUMB1 && optimize_size \
19063 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19065 /* Compute the register save mask for registers 0 through 12
19066 inclusive. This code is used by arm_compute_save_reg_mask. */
19068 static unsigned long
19069 arm_compute_save_reg0_reg12_mask (void)
19071 unsigned long func_type = arm_current_func_type ();
19072 unsigned long save_reg_mask = 0;
19073 unsigned int reg;
19075 if (IS_INTERRUPT (func_type))
19077 unsigned int max_reg;
19078 /* Interrupt functions must not corrupt any registers,
19079 even call clobbered ones. If this is a leaf function
19080 we can just examine the registers used by the RTL, but
19081 otherwise we have to assume that whatever function is
19082 called might clobber anything, and so we have to save
19083 all the call-clobbered registers as well. */
19084 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19085 /* FIQ handlers have registers r8 - r12 banked, so
19086 we only need to check r0 - r7, Normal ISRs only
19087 bank r14 and r15, so we must check up to r12.
19088 r13 is the stack pointer which is always preserved,
19089 so we do not need to consider it here. */
19090 max_reg = 7;
19091 else
19092 max_reg = 12;
19094 for (reg = 0; reg <= max_reg; reg++)
19095 if (df_regs_ever_live_p (reg)
19096 || (! crtl->is_leaf && call_used_regs[reg]))
19097 save_reg_mask |= (1 << reg);
19099 /* Also save the pic base register if necessary. */
19100 if (flag_pic
19101 && !TARGET_SINGLE_PIC_BASE
19102 && arm_pic_register != INVALID_REGNUM
19103 && crtl->uses_pic_offset_table)
19104 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19106 else if (IS_VOLATILE(func_type))
19108 /* For noreturn functions we historically omitted register saves
19109 altogether. However this really messes up debugging. As a
19110 compromise save just the frame pointers. Combined with the link
19111 register saved elsewhere this should be sufficient to get
19112 a backtrace. */
19113 if (frame_pointer_needed)
19114 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19115 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19116 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19117 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19118 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19120 else
19122 /* In the normal case we only need to save those registers
19123 which are call saved and which are used by this function. */
19124 for (reg = 0; reg <= 11; reg++)
19125 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19126 save_reg_mask |= (1 << reg);
19128 /* Handle the frame pointer as a special case. */
19129 if (frame_pointer_needed)
19130 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19132 /* If we aren't loading the PIC register,
19133 don't stack it even though it may be live. */
19134 if (flag_pic
19135 && !TARGET_SINGLE_PIC_BASE
19136 && arm_pic_register != INVALID_REGNUM
19137 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19138 || crtl->uses_pic_offset_table))
19139 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19141 /* The prologue will copy SP into R0, so save it. */
19142 if (IS_STACKALIGN (func_type))
19143 save_reg_mask |= 1;
19146 /* Save registers so the exception handler can modify them. */
19147 if (crtl->calls_eh_return)
19149 unsigned int i;
19151 for (i = 0; ; i++)
19153 reg = EH_RETURN_DATA_REGNO (i);
19154 if (reg == INVALID_REGNUM)
19155 break;
19156 save_reg_mask |= 1 << reg;
19160 return save_reg_mask;
19163 /* Return true if r3 is live at the start of the function. */
19165 static bool
19166 arm_r3_live_at_start_p (void)
19168 /* Just look at cfg info, which is still close enough to correct at this
19169 point. This gives false positives for broken functions that might use
19170 uninitialized data that happens to be allocated in r3, but who cares? */
19171 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19174 /* Compute the number of bytes used to store the static chain register on the
19175 stack, above the stack frame. We need to know this accurately to get the
19176 alignment of the rest of the stack frame correct. */
19178 static int
19179 arm_compute_static_chain_stack_bytes (void)
19181 /* See the defining assertion in arm_expand_prologue. */
19182 if (IS_NESTED (arm_current_func_type ())
19183 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19184 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19185 && !df_regs_ever_live_p (LR_REGNUM)))
19186 && arm_r3_live_at_start_p ()
19187 && crtl->args.pretend_args_size == 0)
19188 return 4;
19190 return 0;
19193 /* Compute a bit mask of which registers need to be
19194 saved on the stack for the current function.
19195 This is used by arm_get_frame_offsets, which may add extra registers. */
19197 static unsigned long
19198 arm_compute_save_reg_mask (void)
19200 unsigned int save_reg_mask = 0;
19201 unsigned long func_type = arm_current_func_type ();
19202 unsigned int reg;
19204 if (IS_NAKED (func_type))
19205 /* This should never really happen. */
19206 return 0;
19208 /* If we are creating a stack frame, then we must save the frame pointer,
19209 IP (which will hold the old stack pointer), LR and the PC. */
19210 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19211 save_reg_mask |=
19212 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19213 | (1 << IP_REGNUM)
19214 | (1 << LR_REGNUM)
19215 | (1 << PC_REGNUM);
19217 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19219 /* Decide if we need to save the link register.
19220 Interrupt routines have their own banked link register,
19221 so they never need to save it.
19222 Otherwise if we do not use the link register we do not need to save
19223 it. If we are pushing other registers onto the stack however, we
19224 can save an instruction in the epilogue by pushing the link register
19225 now and then popping it back into the PC. This incurs extra memory
19226 accesses though, so we only do it when optimizing for size, and only
19227 if we know that we will not need a fancy return sequence. */
19228 if (df_regs_ever_live_p (LR_REGNUM)
19229 || (save_reg_mask
19230 && optimize_size
19231 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19232 && !crtl->tail_call_emit
19233 && !crtl->calls_eh_return))
19234 save_reg_mask |= 1 << LR_REGNUM;
19236 if (cfun->machine->lr_save_eliminated)
19237 save_reg_mask &= ~ (1 << LR_REGNUM);
19239 if (TARGET_REALLY_IWMMXT
19240 && ((bit_count (save_reg_mask)
19241 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19242 arm_compute_static_chain_stack_bytes())
19243 ) % 2) != 0)
19245 /* The total number of registers that are going to be pushed
19246 onto the stack is odd. We need to ensure that the stack
19247 is 64-bit aligned before we start to save iWMMXt registers,
19248 and also before we start to create locals. (A local variable
19249 might be a double or long long which we will load/store using
19250 an iWMMXt instruction). Therefore we need to push another
19251 ARM register, so that the stack will be 64-bit aligned. We
19252 try to avoid using the arg registers (r0 -r3) as they might be
19253 used to pass values in a tail call. */
19254 for (reg = 4; reg <= 12; reg++)
19255 if ((save_reg_mask & (1 << reg)) == 0)
19256 break;
19258 if (reg <= 12)
19259 save_reg_mask |= (1 << reg);
19260 else
19262 cfun->machine->sibcall_blocked = 1;
19263 save_reg_mask |= (1 << 3);
19267 /* We may need to push an additional register for use initializing the
19268 PIC base register. */
19269 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19270 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19272 reg = thumb_find_work_register (1 << 4);
19273 if (!call_used_regs[reg])
19274 save_reg_mask |= (1 << reg);
19277 return save_reg_mask;
19280 /* Compute a bit mask of which registers need to be
19281 saved on the stack for the current function. */
19282 static unsigned long
19283 thumb1_compute_save_reg_mask (void)
19285 unsigned long mask;
19286 unsigned reg;
19288 mask = 0;
19289 for (reg = 0; reg < 12; reg ++)
19290 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19291 mask |= 1 << reg;
19293 if (flag_pic
19294 && !TARGET_SINGLE_PIC_BASE
19295 && arm_pic_register != INVALID_REGNUM
19296 && crtl->uses_pic_offset_table)
19297 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19299 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19300 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19301 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19303 /* LR will also be pushed if any lo regs are pushed. */
19304 if (mask & 0xff || thumb_force_lr_save ())
19305 mask |= (1 << LR_REGNUM);
19307 /* Make sure we have a low work register if we need one.
19308 We will need one if we are going to push a high register,
19309 but we are not currently intending to push a low register. */
19310 if ((mask & 0xff) == 0
19311 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19313 /* Use thumb_find_work_register to choose which register
19314 we will use. If the register is live then we will
19315 have to push it. Use LAST_LO_REGNUM as our fallback
19316 choice for the register to select. */
19317 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19318 /* Make sure the register returned by thumb_find_work_register is
19319 not part of the return value. */
19320 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19321 reg = LAST_LO_REGNUM;
19323 if (callee_saved_reg_p (reg))
19324 mask |= 1 << reg;
19327 /* The 504 below is 8 bytes less than 512 because there are two possible
19328 alignment words. We can't tell here if they will be present or not so we
19329 have to play it safe and assume that they are. */
19330 if ((CALLER_INTERWORKING_SLOT_SIZE +
19331 ROUND_UP_WORD (get_frame_size ()) +
19332 crtl->outgoing_args_size) >= 504)
19334 /* This is the same as the code in thumb1_expand_prologue() which
19335 determines which register to use for stack decrement. */
19336 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19337 if (mask & (1 << reg))
19338 break;
19340 if (reg > LAST_LO_REGNUM)
19342 /* Make sure we have a register available for stack decrement. */
19343 mask |= 1 << LAST_LO_REGNUM;
19347 return mask;
19351 /* Return the number of bytes required to save VFP registers. */
19352 static int
19353 arm_get_vfp_saved_size (void)
19355 unsigned int regno;
19356 int count;
19357 int saved;
19359 saved = 0;
19360 /* Space for saved VFP registers. */
19361 if (TARGET_HARD_FLOAT && TARGET_VFP)
19363 count = 0;
19364 for (regno = FIRST_VFP_REGNUM;
19365 regno < LAST_VFP_REGNUM;
19366 regno += 2)
19368 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19369 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19371 if (count > 0)
19373 /* Workaround ARM10 VFPr1 bug. */
19374 if (count == 2 && !arm_arch6)
19375 count++;
19376 saved += count * 8;
19378 count = 0;
19380 else
19381 count++;
19383 if (count > 0)
19385 if (count == 2 && !arm_arch6)
19386 count++;
19387 saved += count * 8;
19390 return saved;
19394 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19395 everything bar the final return instruction. If simple_return is true,
19396 then do not output epilogue, because it has already been emitted in RTL. */
19397 const char *
19398 output_return_instruction (rtx operand, bool really_return, bool reverse,
19399 bool simple_return)
19401 char conditional[10];
19402 char instr[100];
19403 unsigned reg;
19404 unsigned long live_regs_mask;
19405 unsigned long func_type;
19406 arm_stack_offsets *offsets;
19408 func_type = arm_current_func_type ();
19410 if (IS_NAKED (func_type))
19411 return "";
19413 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19415 /* If this function was declared non-returning, and we have
19416 found a tail call, then we have to trust that the called
19417 function won't return. */
19418 if (really_return)
19420 rtx ops[2];
19422 /* Otherwise, trap an attempted return by aborting. */
19423 ops[0] = operand;
19424 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19425 : "abort");
19426 assemble_external_libcall (ops[1]);
19427 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19430 return "";
19433 gcc_assert (!cfun->calls_alloca || really_return);
19435 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19437 cfun->machine->return_used_this_function = 1;
19439 offsets = arm_get_frame_offsets ();
19440 live_regs_mask = offsets->saved_regs_mask;
19442 if (!simple_return && live_regs_mask)
19444 const char * return_reg;
19446 /* If we do not have any special requirements for function exit
19447 (e.g. interworking) then we can load the return address
19448 directly into the PC. Otherwise we must load it into LR. */
19449 if (really_return
19450 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19451 return_reg = reg_names[PC_REGNUM];
19452 else
19453 return_reg = reg_names[LR_REGNUM];
19455 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19457 /* There are three possible reasons for the IP register
19458 being saved. 1) a stack frame was created, in which case
19459 IP contains the old stack pointer, or 2) an ISR routine
19460 corrupted it, or 3) it was saved to align the stack on
19461 iWMMXt. In case 1, restore IP into SP, otherwise just
19462 restore IP. */
19463 if (frame_pointer_needed)
19465 live_regs_mask &= ~ (1 << IP_REGNUM);
19466 live_regs_mask |= (1 << SP_REGNUM);
19468 else
19469 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19472 /* On some ARM architectures it is faster to use LDR rather than
19473 LDM to load a single register. On other architectures, the
19474 cost is the same. In 26 bit mode, or for exception handlers,
19475 we have to use LDM to load the PC so that the CPSR is also
19476 restored. */
19477 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19478 if (live_regs_mask == (1U << reg))
19479 break;
19481 if (reg <= LAST_ARM_REGNUM
19482 && (reg != LR_REGNUM
19483 || ! really_return
19484 || ! IS_INTERRUPT (func_type)))
19486 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19487 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19489 else
19491 char *p;
19492 int first = 1;
19494 /* Generate the load multiple instruction to restore the
19495 registers. Note we can get here, even if
19496 frame_pointer_needed is true, but only if sp already
19497 points to the base of the saved core registers. */
19498 if (live_regs_mask & (1 << SP_REGNUM))
19500 unsigned HOST_WIDE_INT stack_adjust;
19502 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19503 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19505 if (stack_adjust && arm_arch5 && TARGET_ARM)
19506 if (TARGET_UNIFIED_ASM)
19507 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19508 else
19509 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19510 else
19512 /* If we can't use ldmib (SA110 bug),
19513 then try to pop r3 instead. */
19514 if (stack_adjust)
19515 live_regs_mask |= 1 << 3;
19517 if (TARGET_UNIFIED_ASM)
19518 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19519 else
19520 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19523 else
19524 if (TARGET_UNIFIED_ASM)
19525 sprintf (instr, "pop%s\t{", conditional);
19526 else
19527 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19529 p = instr + strlen (instr);
19531 for (reg = 0; reg <= SP_REGNUM; reg++)
19532 if (live_regs_mask & (1 << reg))
19534 int l = strlen (reg_names[reg]);
19536 if (first)
19537 first = 0;
19538 else
19540 memcpy (p, ", ", 2);
19541 p += 2;
19544 memcpy (p, "%|", 2);
19545 memcpy (p + 2, reg_names[reg], l);
19546 p += l + 2;
19549 if (live_regs_mask & (1 << LR_REGNUM))
19551 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19552 /* If returning from an interrupt, restore the CPSR. */
19553 if (IS_INTERRUPT (func_type))
19554 strcat (p, "^");
19556 else
19557 strcpy (p, "}");
19560 output_asm_insn (instr, & operand);
19562 /* See if we need to generate an extra instruction to
19563 perform the actual function return. */
19564 if (really_return
19565 && func_type != ARM_FT_INTERWORKED
19566 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19568 /* The return has already been handled
19569 by loading the LR into the PC. */
19570 return "";
19574 if (really_return)
19576 switch ((int) ARM_FUNC_TYPE (func_type))
19578 case ARM_FT_ISR:
19579 case ARM_FT_FIQ:
19580 /* ??? This is wrong for unified assembly syntax. */
19581 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19582 break;
19584 case ARM_FT_INTERWORKED:
19585 sprintf (instr, "bx%s\t%%|lr", conditional);
19586 break;
19588 case ARM_FT_EXCEPTION:
19589 /* ??? This is wrong for unified assembly syntax. */
19590 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19591 break;
19593 default:
19594 /* Use bx if it's available. */
19595 if (arm_arch5 || arm_arch4t)
19596 sprintf (instr, "bx%s\t%%|lr", conditional);
19597 else
19598 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19599 break;
19602 output_asm_insn (instr, & operand);
19605 return "";
19608 /* Write the function name into the code section, directly preceding
19609 the function prologue.
19611 Code will be output similar to this:
19613 .ascii "arm_poke_function_name", 0
19614 .align
19616 .word 0xff000000 + (t1 - t0)
19617 arm_poke_function_name
19618 mov ip, sp
19619 stmfd sp!, {fp, ip, lr, pc}
19620 sub fp, ip, #4
19622 When performing a stack backtrace, code can inspect the value
19623 of 'pc' stored at 'fp' + 0. If the trace function then looks
19624 at location pc - 12 and the top 8 bits are set, then we know
19625 that there is a function name embedded immediately preceding this
19626 location and has length ((pc[-3]) & 0xff000000).
19628 We assume that pc is declared as a pointer to an unsigned long.
19630 It is of no benefit to output the function name if we are assembling
19631 a leaf function. These function types will not contain a stack
19632 backtrace structure, therefore it is not possible to determine the
19633 function name. */
19634 void
19635 arm_poke_function_name (FILE *stream, const char *name)
19637 unsigned long alignlength;
19638 unsigned long length;
19639 rtx x;
19641 length = strlen (name) + 1;
19642 alignlength = ROUND_UP_WORD (length);
19644 ASM_OUTPUT_ASCII (stream, name, length);
19645 ASM_OUTPUT_ALIGN (stream, 2);
19646 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19647 assemble_aligned_integer (UNITS_PER_WORD, x);
19650 /* Place some comments into the assembler stream
19651 describing the current function. */
19652 static void
19653 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19655 unsigned long func_type;
19657 /* ??? Do we want to print some of the below anyway? */
19658 if (TARGET_THUMB1)
19659 return;
19661 /* Sanity check. */
19662 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19664 func_type = arm_current_func_type ();
19666 switch ((int) ARM_FUNC_TYPE (func_type))
19668 default:
19669 case ARM_FT_NORMAL:
19670 break;
19671 case ARM_FT_INTERWORKED:
19672 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19673 break;
19674 case ARM_FT_ISR:
19675 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19676 break;
19677 case ARM_FT_FIQ:
19678 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19679 break;
19680 case ARM_FT_EXCEPTION:
19681 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19682 break;
19685 if (IS_NAKED (func_type))
19686 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19688 if (IS_VOLATILE (func_type))
19689 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19691 if (IS_NESTED (func_type))
19692 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19693 if (IS_STACKALIGN (func_type))
19694 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19696 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19697 crtl->args.size,
19698 crtl->args.pretend_args_size, frame_size);
19700 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19701 frame_pointer_needed,
19702 cfun->machine->uses_anonymous_args);
19704 if (cfun->machine->lr_save_eliminated)
19705 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19707 if (crtl->calls_eh_return)
19708 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19712 static void
19713 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19714 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19716 arm_stack_offsets *offsets;
19718 if (TARGET_THUMB1)
19720 int regno;
19722 /* Emit any call-via-reg trampolines that are needed for v4t support
19723 of call_reg and call_value_reg type insns. */
19724 for (regno = 0; regno < LR_REGNUM; regno++)
19726 rtx label = cfun->machine->call_via[regno];
19728 if (label != NULL)
19730 switch_to_section (function_section (current_function_decl));
19731 targetm.asm_out.internal_label (asm_out_file, "L",
19732 CODE_LABEL_NUMBER (label));
19733 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19737 /* ??? Probably not safe to set this here, since it assumes that a
19738 function will be emitted as assembly immediately after we generate
19739 RTL for it. This does not happen for inline functions. */
19740 cfun->machine->return_used_this_function = 0;
19742 else /* TARGET_32BIT */
19744 /* We need to take into account any stack-frame rounding. */
19745 offsets = arm_get_frame_offsets ();
19747 gcc_assert (!use_return_insn (FALSE, NULL)
19748 || (cfun->machine->return_used_this_function != 0)
19749 || offsets->saved_regs == offsets->outgoing_args
19750 || frame_pointer_needed);
19754 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19755 STR and STRD. If an even number of registers are being pushed, one
19756 or more STRD patterns are created for each register pair. If an
19757 odd number of registers are pushed, emit an initial STR followed by
19758 as many STRD instructions as are needed. This works best when the
19759 stack is initially 64-bit aligned (the normal case), since it
19760 ensures that each STRD is also 64-bit aligned. */
19761 static void
19762 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19764 int num_regs = 0;
19765 int i;
19766 int regno;
19767 rtx par = NULL_RTX;
19768 rtx dwarf = NULL_RTX;
19769 rtx tmp;
19770 bool first = true;
19772 num_regs = bit_count (saved_regs_mask);
19774 /* Must be at least one register to save, and can't save SP or PC. */
19775 gcc_assert (num_regs > 0 && num_regs <= 14);
19776 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19777 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19779 /* Create sequence for DWARF info. All the frame-related data for
19780 debugging is held in this wrapper. */
19781 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19783 /* Describe the stack adjustment. */
19784 tmp = gen_rtx_SET (stack_pointer_rtx,
19785 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19786 RTX_FRAME_RELATED_P (tmp) = 1;
19787 XVECEXP (dwarf, 0, 0) = tmp;
19789 /* Find the first register. */
19790 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19793 i = 0;
19795 /* If there's an odd number of registers to push. Start off by
19796 pushing a single register. This ensures that subsequent strd
19797 operations are dword aligned (assuming that SP was originally
19798 64-bit aligned). */
19799 if ((num_regs & 1) != 0)
19801 rtx reg, mem, insn;
19803 reg = gen_rtx_REG (SImode, regno);
19804 if (num_regs == 1)
19805 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19806 stack_pointer_rtx));
19807 else
19808 mem = gen_frame_mem (Pmode,
19809 gen_rtx_PRE_MODIFY
19810 (Pmode, stack_pointer_rtx,
19811 plus_constant (Pmode, stack_pointer_rtx,
19812 -4 * num_regs)));
19814 tmp = gen_rtx_SET (mem, reg);
19815 RTX_FRAME_RELATED_P (tmp) = 1;
19816 insn = emit_insn (tmp);
19817 RTX_FRAME_RELATED_P (insn) = 1;
19818 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19819 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19820 RTX_FRAME_RELATED_P (tmp) = 1;
19821 i++;
19822 regno++;
19823 XVECEXP (dwarf, 0, i) = tmp;
19824 first = false;
19827 while (i < num_regs)
19828 if (saved_regs_mask & (1 << regno))
19830 rtx reg1, reg2, mem1, mem2;
19831 rtx tmp0, tmp1, tmp2;
19832 int regno2;
19834 /* Find the register to pair with this one. */
19835 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19836 regno2++)
19839 reg1 = gen_rtx_REG (SImode, regno);
19840 reg2 = gen_rtx_REG (SImode, regno2);
19842 if (first)
19844 rtx insn;
19846 first = false;
19847 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19848 stack_pointer_rtx,
19849 -4 * num_regs));
19850 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19851 stack_pointer_rtx,
19852 -4 * (num_regs - 1)));
19853 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19854 plus_constant (Pmode, stack_pointer_rtx,
19855 -4 * (num_regs)));
19856 tmp1 = gen_rtx_SET (mem1, reg1);
19857 tmp2 = gen_rtx_SET (mem2, reg2);
19858 RTX_FRAME_RELATED_P (tmp0) = 1;
19859 RTX_FRAME_RELATED_P (tmp1) = 1;
19860 RTX_FRAME_RELATED_P (tmp2) = 1;
19861 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19862 XVECEXP (par, 0, 0) = tmp0;
19863 XVECEXP (par, 0, 1) = tmp1;
19864 XVECEXP (par, 0, 2) = tmp2;
19865 insn = emit_insn (par);
19866 RTX_FRAME_RELATED_P (insn) = 1;
19867 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19869 else
19871 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19872 stack_pointer_rtx,
19873 4 * i));
19874 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19875 stack_pointer_rtx,
19876 4 * (i + 1)));
19877 tmp1 = gen_rtx_SET (mem1, reg1);
19878 tmp2 = gen_rtx_SET (mem2, reg2);
19879 RTX_FRAME_RELATED_P (tmp1) = 1;
19880 RTX_FRAME_RELATED_P (tmp2) = 1;
19881 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19882 XVECEXP (par, 0, 0) = tmp1;
19883 XVECEXP (par, 0, 1) = tmp2;
19884 emit_insn (par);
19887 /* Create unwind information. This is an approximation. */
19888 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19889 plus_constant (Pmode,
19890 stack_pointer_rtx,
19891 4 * i)),
19892 reg1);
19893 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19894 plus_constant (Pmode,
19895 stack_pointer_rtx,
19896 4 * (i + 1))),
19897 reg2);
19899 RTX_FRAME_RELATED_P (tmp1) = 1;
19900 RTX_FRAME_RELATED_P (tmp2) = 1;
19901 XVECEXP (dwarf, 0, i + 1) = tmp1;
19902 XVECEXP (dwarf, 0, i + 2) = tmp2;
19903 i += 2;
19904 regno = regno2 + 1;
19906 else
19907 regno++;
19909 return;
19912 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19913 whenever possible, otherwise it emits single-word stores. The first store
19914 also allocates stack space for all saved registers, using writeback with
19915 post-addressing mode. All other stores use offset addressing. If no STRD
19916 can be emitted, this function emits a sequence of single-word stores,
19917 and not an STM as before, because single-word stores provide more freedom
19918 scheduling and can be turned into an STM by peephole optimizations. */
19919 static void
19920 arm_emit_strd_push (unsigned long saved_regs_mask)
19922 int num_regs = 0;
19923 int i, j, dwarf_index = 0;
19924 int offset = 0;
19925 rtx dwarf = NULL_RTX;
19926 rtx insn = NULL_RTX;
19927 rtx tmp, mem;
19929 /* TODO: A more efficient code can be emitted by changing the
19930 layout, e.g., first push all pairs that can use STRD to keep the
19931 stack aligned, and then push all other registers. */
19932 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19933 if (saved_regs_mask & (1 << i))
19934 num_regs++;
19936 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19937 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19938 gcc_assert (num_regs > 0);
19940 /* Create sequence for DWARF info. */
19941 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19943 /* For dwarf info, we generate explicit stack update. */
19944 tmp = gen_rtx_SET (stack_pointer_rtx,
19945 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19946 RTX_FRAME_RELATED_P (tmp) = 1;
19947 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19949 /* Save registers. */
19950 offset = - 4 * num_regs;
19951 j = 0;
19952 while (j <= LAST_ARM_REGNUM)
19953 if (saved_regs_mask & (1 << j))
19955 if ((j % 2 == 0)
19956 && (saved_regs_mask & (1 << (j + 1))))
19958 /* Current register and previous register form register pair for
19959 which STRD can be generated. */
19960 if (offset < 0)
19962 /* Allocate stack space for all saved registers. */
19963 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19964 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19965 mem = gen_frame_mem (DImode, tmp);
19966 offset = 0;
19968 else if (offset > 0)
19969 mem = gen_frame_mem (DImode,
19970 plus_constant (Pmode,
19971 stack_pointer_rtx,
19972 offset));
19973 else
19974 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19976 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
19977 RTX_FRAME_RELATED_P (tmp) = 1;
19978 tmp = emit_insn (tmp);
19980 /* Record the first store insn. */
19981 if (dwarf_index == 1)
19982 insn = tmp;
19984 /* Generate dwarf info. */
19985 mem = gen_frame_mem (SImode,
19986 plus_constant (Pmode,
19987 stack_pointer_rtx,
19988 offset));
19989 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19990 RTX_FRAME_RELATED_P (tmp) = 1;
19991 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19993 mem = gen_frame_mem (SImode,
19994 plus_constant (Pmode,
19995 stack_pointer_rtx,
19996 offset + 4));
19997 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
19998 RTX_FRAME_RELATED_P (tmp) = 1;
19999 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20001 offset += 8;
20002 j += 2;
20004 else
20006 /* Emit a single word store. */
20007 if (offset < 0)
20009 /* Allocate stack space for all saved registers. */
20010 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20011 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20012 mem = gen_frame_mem (SImode, tmp);
20013 offset = 0;
20015 else if (offset > 0)
20016 mem = gen_frame_mem (SImode,
20017 plus_constant (Pmode,
20018 stack_pointer_rtx,
20019 offset));
20020 else
20021 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20023 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20024 RTX_FRAME_RELATED_P (tmp) = 1;
20025 tmp = emit_insn (tmp);
20027 /* Record the first store insn. */
20028 if (dwarf_index == 1)
20029 insn = tmp;
20031 /* Generate dwarf info. */
20032 mem = gen_frame_mem (SImode,
20033 plus_constant(Pmode,
20034 stack_pointer_rtx,
20035 offset));
20036 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20037 RTX_FRAME_RELATED_P (tmp) = 1;
20038 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20040 offset += 4;
20041 j += 1;
20044 else
20045 j++;
20047 /* Attach dwarf info to the first insn we generate. */
20048 gcc_assert (insn != NULL_RTX);
20049 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20050 RTX_FRAME_RELATED_P (insn) = 1;
20053 /* Generate and emit an insn that we will recognize as a push_multi.
20054 Unfortunately, since this insn does not reflect very well the actual
20055 semantics of the operation, we need to annotate the insn for the benefit
20056 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20057 MASK for registers that should be annotated for DWARF2 frame unwind
20058 information. */
20059 static rtx
20060 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20062 int num_regs = 0;
20063 int num_dwarf_regs = 0;
20064 int i, j;
20065 rtx par;
20066 rtx dwarf;
20067 int dwarf_par_index;
20068 rtx tmp, reg;
20070 /* We don't record the PC in the dwarf frame information. */
20071 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20073 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20075 if (mask & (1 << i))
20076 num_regs++;
20077 if (dwarf_regs_mask & (1 << i))
20078 num_dwarf_regs++;
20081 gcc_assert (num_regs && num_regs <= 16);
20082 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20084 /* For the body of the insn we are going to generate an UNSPEC in
20085 parallel with several USEs. This allows the insn to be recognized
20086 by the push_multi pattern in the arm.md file.
20088 The body of the insn looks something like this:
20090 (parallel [
20091 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20092 (const_int:SI <num>)))
20093 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20094 (use (reg:SI XX))
20095 (use (reg:SI YY))
20099 For the frame note however, we try to be more explicit and actually
20100 show each register being stored into the stack frame, plus a (single)
20101 decrement of the stack pointer. We do it this way in order to be
20102 friendly to the stack unwinding code, which only wants to see a single
20103 stack decrement per instruction. The RTL we generate for the note looks
20104 something like this:
20106 (sequence [
20107 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20108 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20109 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20110 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20114 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20115 instead we'd have a parallel expression detailing all
20116 the stores to the various memory addresses so that debug
20117 information is more up-to-date. Remember however while writing
20118 this to take care of the constraints with the push instruction.
20120 Note also that this has to be taken care of for the VFP registers.
20122 For more see PR43399. */
20124 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20125 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20126 dwarf_par_index = 1;
20128 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20130 if (mask & (1 << i))
20132 reg = gen_rtx_REG (SImode, i);
20134 XVECEXP (par, 0, 0)
20135 = gen_rtx_SET (gen_frame_mem
20136 (BLKmode,
20137 gen_rtx_PRE_MODIFY (Pmode,
20138 stack_pointer_rtx,
20139 plus_constant
20140 (Pmode, stack_pointer_rtx,
20141 -4 * num_regs))
20143 gen_rtx_UNSPEC (BLKmode,
20144 gen_rtvec (1, reg),
20145 UNSPEC_PUSH_MULT));
20147 if (dwarf_regs_mask & (1 << i))
20149 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20150 reg);
20151 RTX_FRAME_RELATED_P (tmp) = 1;
20152 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20155 break;
20159 for (j = 1, i++; j < num_regs; i++)
20161 if (mask & (1 << i))
20163 reg = gen_rtx_REG (SImode, i);
20165 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20167 if (dwarf_regs_mask & (1 << i))
20170 = gen_rtx_SET (gen_frame_mem
20171 (SImode,
20172 plus_constant (Pmode, stack_pointer_rtx,
20173 4 * j)),
20174 reg);
20175 RTX_FRAME_RELATED_P (tmp) = 1;
20176 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20179 j++;
20183 par = emit_insn (par);
20185 tmp = gen_rtx_SET (stack_pointer_rtx,
20186 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20187 RTX_FRAME_RELATED_P (tmp) = 1;
20188 XVECEXP (dwarf, 0, 0) = tmp;
20190 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20192 return par;
20195 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20196 SIZE is the offset to be adjusted.
20197 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20198 static void
20199 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20201 rtx dwarf;
20203 RTX_FRAME_RELATED_P (insn) = 1;
20204 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20205 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20208 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20209 SAVED_REGS_MASK shows which registers need to be restored.
20211 Unfortunately, since this insn does not reflect very well the actual
20212 semantics of the operation, we need to annotate the insn for the benefit
20213 of DWARF2 frame unwind information. */
20214 static void
20215 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20217 int num_regs = 0;
20218 int i, j;
20219 rtx par;
20220 rtx dwarf = NULL_RTX;
20221 rtx tmp, reg;
20222 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20223 int offset_adj;
20224 int emit_update;
20226 offset_adj = return_in_pc ? 1 : 0;
20227 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20228 if (saved_regs_mask & (1 << i))
20229 num_regs++;
20231 gcc_assert (num_regs && num_regs <= 16);
20233 /* If SP is in reglist, then we don't emit SP update insn. */
20234 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20236 /* The parallel needs to hold num_regs SETs
20237 and one SET for the stack update. */
20238 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20240 if (return_in_pc)
20241 XVECEXP (par, 0, 0) = ret_rtx;
20243 if (emit_update)
20245 /* Increment the stack pointer, based on there being
20246 num_regs 4-byte registers to restore. */
20247 tmp = gen_rtx_SET (stack_pointer_rtx,
20248 plus_constant (Pmode,
20249 stack_pointer_rtx,
20250 4 * num_regs));
20251 RTX_FRAME_RELATED_P (tmp) = 1;
20252 XVECEXP (par, 0, offset_adj) = tmp;
20255 /* Now restore every reg, which may include PC. */
20256 for (j = 0, i = 0; j < num_regs; i++)
20257 if (saved_regs_mask & (1 << i))
20259 reg = gen_rtx_REG (SImode, i);
20260 if ((num_regs == 1) && emit_update && !return_in_pc)
20262 /* Emit single load with writeback. */
20263 tmp = gen_frame_mem (SImode,
20264 gen_rtx_POST_INC (Pmode,
20265 stack_pointer_rtx));
20266 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20267 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20268 return;
20271 tmp = gen_rtx_SET (reg,
20272 gen_frame_mem
20273 (SImode,
20274 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20275 RTX_FRAME_RELATED_P (tmp) = 1;
20276 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20278 /* We need to maintain a sequence for DWARF info too. As dwarf info
20279 should not have PC, skip PC. */
20280 if (i != PC_REGNUM)
20281 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20283 j++;
20286 if (return_in_pc)
20287 par = emit_jump_insn (par);
20288 else
20289 par = emit_insn (par);
20291 REG_NOTES (par) = dwarf;
20292 if (!return_in_pc)
20293 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20294 stack_pointer_rtx, stack_pointer_rtx);
20297 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20298 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20300 Unfortunately, since this insn does not reflect very well the actual
20301 semantics of the operation, we need to annotate the insn for the benefit
20302 of DWARF2 frame unwind information. */
20303 static void
20304 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20306 int i, j;
20307 rtx par;
20308 rtx dwarf = NULL_RTX;
20309 rtx tmp, reg;
20311 gcc_assert (num_regs && num_regs <= 32);
20313 /* Workaround ARM10 VFPr1 bug. */
20314 if (num_regs == 2 && !arm_arch6)
20316 if (first_reg == 15)
20317 first_reg--;
20319 num_regs++;
20322 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20323 there could be up to 32 D-registers to restore.
20324 If there are more than 16 D-registers, make two recursive calls,
20325 each of which emits one pop_multi instruction. */
20326 if (num_regs > 16)
20328 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20329 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20330 return;
20333 /* The parallel needs to hold num_regs SETs
20334 and one SET for the stack update. */
20335 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20337 /* Increment the stack pointer, based on there being
20338 num_regs 8-byte registers to restore. */
20339 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20340 RTX_FRAME_RELATED_P (tmp) = 1;
20341 XVECEXP (par, 0, 0) = tmp;
20343 /* Now show every reg that will be restored, using a SET for each. */
20344 for (j = 0, i=first_reg; j < num_regs; i += 2)
20346 reg = gen_rtx_REG (DFmode, i);
20348 tmp = gen_rtx_SET (reg,
20349 gen_frame_mem
20350 (DFmode,
20351 plus_constant (Pmode, base_reg, 8 * j)));
20352 RTX_FRAME_RELATED_P (tmp) = 1;
20353 XVECEXP (par, 0, j + 1) = tmp;
20355 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20357 j++;
20360 par = emit_insn (par);
20361 REG_NOTES (par) = dwarf;
20363 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20364 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20366 RTX_FRAME_RELATED_P (par) = 1;
20367 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20369 else
20370 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20371 base_reg, base_reg);
20374 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20375 number of registers are being popped, multiple LDRD patterns are created for
20376 all register pairs. If odd number of registers are popped, last register is
20377 loaded by using LDR pattern. */
20378 static void
20379 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20381 int num_regs = 0;
20382 int i, j;
20383 rtx par = NULL_RTX;
20384 rtx dwarf = NULL_RTX;
20385 rtx tmp, reg, tmp1;
20386 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20388 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20389 if (saved_regs_mask & (1 << i))
20390 num_regs++;
20392 gcc_assert (num_regs && num_regs <= 16);
20394 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20395 to be popped. So, if num_regs is even, now it will become odd,
20396 and we can generate pop with PC. If num_regs is odd, it will be
20397 even now, and ldr with return can be generated for PC. */
20398 if (return_in_pc)
20399 num_regs--;
20401 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20403 /* Var j iterates over all the registers to gather all the registers in
20404 saved_regs_mask. Var i gives index of saved registers in stack frame.
20405 A PARALLEL RTX of register-pair is created here, so that pattern for
20406 LDRD can be matched. As PC is always last register to be popped, and
20407 we have already decremented num_regs if PC, we don't have to worry
20408 about PC in this loop. */
20409 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20410 if (saved_regs_mask & (1 << j))
20412 /* Create RTX for memory load. */
20413 reg = gen_rtx_REG (SImode, j);
20414 tmp = gen_rtx_SET (reg,
20415 gen_frame_mem (SImode,
20416 plus_constant (Pmode,
20417 stack_pointer_rtx, 4 * i)));
20418 RTX_FRAME_RELATED_P (tmp) = 1;
20420 if (i % 2 == 0)
20422 /* When saved-register index (i) is even, the RTX to be emitted is
20423 yet to be created. Hence create it first. The LDRD pattern we
20424 are generating is :
20425 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20426 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20427 where target registers need not be consecutive. */
20428 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20429 dwarf = NULL_RTX;
20432 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20433 added as 0th element and if i is odd, reg_i is added as 1st element
20434 of LDRD pattern shown above. */
20435 XVECEXP (par, 0, (i % 2)) = tmp;
20436 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20438 if ((i % 2) == 1)
20440 /* When saved-register index (i) is odd, RTXs for both the registers
20441 to be loaded are generated in above given LDRD pattern, and the
20442 pattern can be emitted now. */
20443 par = emit_insn (par);
20444 REG_NOTES (par) = dwarf;
20445 RTX_FRAME_RELATED_P (par) = 1;
20448 i++;
20451 /* If the number of registers pushed is odd AND return_in_pc is false OR
20452 number of registers are even AND return_in_pc is true, last register is
20453 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20454 then LDR with post increment. */
20456 /* Increment the stack pointer, based on there being
20457 num_regs 4-byte registers to restore. */
20458 tmp = gen_rtx_SET (stack_pointer_rtx,
20459 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20460 RTX_FRAME_RELATED_P (tmp) = 1;
20461 tmp = emit_insn (tmp);
20462 if (!return_in_pc)
20464 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20465 stack_pointer_rtx, stack_pointer_rtx);
20468 dwarf = NULL_RTX;
20470 if (((num_regs % 2) == 1 && !return_in_pc)
20471 || ((num_regs % 2) == 0 && return_in_pc))
20473 /* Scan for the single register to be popped. Skip until the saved
20474 register is found. */
20475 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20477 /* Gen LDR with post increment here. */
20478 tmp1 = gen_rtx_MEM (SImode,
20479 gen_rtx_POST_INC (SImode,
20480 stack_pointer_rtx));
20481 set_mem_alias_set (tmp1, get_frame_alias_set ());
20483 reg = gen_rtx_REG (SImode, j);
20484 tmp = gen_rtx_SET (reg, tmp1);
20485 RTX_FRAME_RELATED_P (tmp) = 1;
20486 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20488 if (return_in_pc)
20490 /* If return_in_pc, j must be PC_REGNUM. */
20491 gcc_assert (j == PC_REGNUM);
20492 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20493 XVECEXP (par, 0, 0) = ret_rtx;
20494 XVECEXP (par, 0, 1) = tmp;
20495 par = emit_jump_insn (par);
20497 else
20499 par = emit_insn (tmp);
20500 REG_NOTES (par) = dwarf;
20501 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20502 stack_pointer_rtx, stack_pointer_rtx);
20506 else if ((num_regs % 2) == 1 && return_in_pc)
20508 /* There are 2 registers to be popped. So, generate the pattern
20509 pop_multiple_with_stack_update_and_return to pop in PC. */
20510 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20513 return;
20516 /* LDRD in ARM mode needs consecutive registers as operands. This function
20517 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20518 offset addressing and then generates one separate stack udpate. This provides
20519 more scheduling freedom, compared to writeback on every load. However,
20520 if the function returns using load into PC directly
20521 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20522 before the last load. TODO: Add a peephole optimization to recognize
20523 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20524 peephole optimization to merge the load at stack-offset zero
20525 with the stack update instruction using load with writeback
20526 in post-index addressing mode. */
20527 static void
20528 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20530 int j = 0;
20531 int offset = 0;
20532 rtx par = NULL_RTX;
20533 rtx dwarf = NULL_RTX;
20534 rtx tmp, mem;
20536 /* Restore saved registers. */
20537 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20538 j = 0;
20539 while (j <= LAST_ARM_REGNUM)
20540 if (saved_regs_mask & (1 << j))
20542 if ((j % 2) == 0
20543 && (saved_regs_mask & (1 << (j + 1)))
20544 && (j + 1) != PC_REGNUM)
20546 /* Current register and next register form register pair for which
20547 LDRD can be generated. PC is always the last register popped, and
20548 we handle it separately. */
20549 if (offset > 0)
20550 mem = gen_frame_mem (DImode,
20551 plus_constant (Pmode,
20552 stack_pointer_rtx,
20553 offset));
20554 else
20555 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20557 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20558 tmp = emit_insn (tmp);
20559 RTX_FRAME_RELATED_P (tmp) = 1;
20561 /* Generate dwarf info. */
20563 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20564 gen_rtx_REG (SImode, j),
20565 NULL_RTX);
20566 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20567 gen_rtx_REG (SImode, j + 1),
20568 dwarf);
20570 REG_NOTES (tmp) = dwarf;
20572 offset += 8;
20573 j += 2;
20575 else if (j != PC_REGNUM)
20577 /* Emit a single word load. */
20578 if (offset > 0)
20579 mem = gen_frame_mem (SImode,
20580 plus_constant (Pmode,
20581 stack_pointer_rtx,
20582 offset));
20583 else
20584 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20586 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20587 tmp = emit_insn (tmp);
20588 RTX_FRAME_RELATED_P (tmp) = 1;
20590 /* Generate dwarf info. */
20591 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20592 gen_rtx_REG (SImode, j),
20593 NULL_RTX);
20595 offset += 4;
20596 j += 1;
20598 else /* j == PC_REGNUM */
20599 j++;
20601 else
20602 j++;
20604 /* Update the stack. */
20605 if (offset > 0)
20607 tmp = gen_rtx_SET (stack_pointer_rtx,
20608 plus_constant (Pmode,
20609 stack_pointer_rtx,
20610 offset));
20611 tmp = emit_insn (tmp);
20612 arm_add_cfa_adjust_cfa_note (tmp, offset,
20613 stack_pointer_rtx, stack_pointer_rtx);
20614 offset = 0;
20617 if (saved_regs_mask & (1 << PC_REGNUM))
20619 /* Only PC is to be popped. */
20620 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20621 XVECEXP (par, 0, 0) = ret_rtx;
20622 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20623 gen_frame_mem (SImode,
20624 gen_rtx_POST_INC (SImode,
20625 stack_pointer_rtx)));
20626 RTX_FRAME_RELATED_P (tmp) = 1;
20627 XVECEXP (par, 0, 1) = tmp;
20628 par = emit_jump_insn (par);
20630 /* Generate dwarf info. */
20631 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20632 gen_rtx_REG (SImode, PC_REGNUM),
20633 NULL_RTX);
20634 REG_NOTES (par) = dwarf;
20635 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20636 stack_pointer_rtx, stack_pointer_rtx);
20640 /* Calculate the size of the return value that is passed in registers. */
20641 static unsigned
20642 arm_size_return_regs (void)
20644 machine_mode mode;
20646 if (crtl->return_rtx != 0)
20647 mode = GET_MODE (crtl->return_rtx);
20648 else
20649 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20651 return GET_MODE_SIZE (mode);
20654 /* Return true if the current function needs to save/restore LR. */
20655 static bool
20656 thumb_force_lr_save (void)
20658 return !cfun->machine->lr_save_eliminated
20659 && (!leaf_function_p ()
20660 || thumb_far_jump_used_p ()
20661 || df_regs_ever_live_p (LR_REGNUM));
20664 /* We do not know if r3 will be available because
20665 we do have an indirect tailcall happening in this
20666 particular case. */
20667 static bool
20668 is_indirect_tailcall_p (rtx call)
20670 rtx pat = PATTERN (call);
20672 /* Indirect tail call. */
20673 pat = XVECEXP (pat, 0, 0);
20674 if (GET_CODE (pat) == SET)
20675 pat = SET_SRC (pat);
20677 pat = XEXP (XEXP (pat, 0), 0);
20678 return REG_P (pat);
20681 /* Return true if r3 is used by any of the tail call insns in the
20682 current function. */
20683 static bool
20684 any_sibcall_could_use_r3 (void)
20686 edge_iterator ei;
20687 edge e;
20689 if (!crtl->tail_call_emit)
20690 return false;
20691 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20692 if (e->flags & EDGE_SIBCALL)
20694 rtx call = BB_END (e->src);
20695 if (!CALL_P (call))
20696 call = prev_nonnote_nondebug_insn (call);
20697 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20698 if (find_regno_fusage (call, USE, 3)
20699 || is_indirect_tailcall_p (call))
20700 return true;
20702 return false;
20706 /* Compute the distance from register FROM to register TO.
20707 These can be the arg pointer (26), the soft frame pointer (25),
20708 the stack pointer (13) or the hard frame pointer (11).
20709 In thumb mode r7 is used as the soft frame pointer, if needed.
20710 Typical stack layout looks like this:
20712 old stack pointer -> | |
20713 ----
20714 | | \
20715 | | saved arguments for
20716 | | vararg functions
20717 | | /
20719 hard FP & arg pointer -> | | \
20720 | | stack
20721 | | frame
20722 | | /
20724 | | \
20725 | | call saved
20726 | | registers
20727 soft frame pointer -> | | /
20729 | | \
20730 | | local
20731 | | variables
20732 locals base pointer -> | | /
20734 | | \
20735 | | outgoing
20736 | | arguments
20737 current stack pointer -> | | /
20740 For a given function some or all of these stack components
20741 may not be needed, giving rise to the possibility of
20742 eliminating some of the registers.
20744 The values returned by this function must reflect the behavior
20745 of arm_expand_prologue() and arm_compute_save_reg_mask().
20747 The sign of the number returned reflects the direction of stack
20748 growth, so the values are positive for all eliminations except
20749 from the soft frame pointer to the hard frame pointer.
20751 SFP may point just inside the local variables block to ensure correct
20752 alignment. */
20755 /* Calculate stack offsets. These are used to calculate register elimination
20756 offsets and in prologue/epilogue code. Also calculates which registers
20757 should be saved. */
20759 static arm_stack_offsets *
20760 arm_get_frame_offsets (void)
20762 struct arm_stack_offsets *offsets;
20763 unsigned long func_type;
20764 int leaf;
20765 int saved;
20766 int core_saved;
20767 HOST_WIDE_INT frame_size;
20768 int i;
20770 offsets = &cfun->machine->stack_offsets;
20772 /* We need to know if we are a leaf function. Unfortunately, it
20773 is possible to be called after start_sequence has been called,
20774 which causes get_insns to return the insns for the sequence,
20775 not the function, which will cause leaf_function_p to return
20776 the incorrect result.
20778 to know about leaf functions once reload has completed, and the
20779 frame size cannot be changed after that time, so we can safely
20780 use the cached value. */
20782 if (reload_completed)
20783 return offsets;
20785 /* Initially this is the size of the local variables. It will translated
20786 into an offset once we have determined the size of preceding data. */
20787 frame_size = ROUND_UP_WORD (get_frame_size ());
20789 leaf = leaf_function_p ();
20791 /* Space for variadic functions. */
20792 offsets->saved_args = crtl->args.pretend_args_size;
20794 /* In Thumb mode this is incorrect, but never used. */
20795 offsets->frame
20796 = (offsets->saved_args
20797 + arm_compute_static_chain_stack_bytes ()
20798 + (frame_pointer_needed ? 4 : 0));
20800 if (TARGET_32BIT)
20802 unsigned int regno;
20804 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20805 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20806 saved = core_saved;
20808 /* We know that SP will be doubleword aligned on entry, and we must
20809 preserve that condition at any subroutine call. We also require the
20810 soft frame pointer to be doubleword aligned. */
20812 if (TARGET_REALLY_IWMMXT)
20814 /* Check for the call-saved iWMMXt registers. */
20815 for (regno = FIRST_IWMMXT_REGNUM;
20816 regno <= LAST_IWMMXT_REGNUM;
20817 regno++)
20818 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20819 saved += 8;
20822 func_type = arm_current_func_type ();
20823 /* Space for saved VFP registers. */
20824 if (! IS_VOLATILE (func_type)
20825 && TARGET_HARD_FLOAT && TARGET_VFP)
20826 saved += arm_get_vfp_saved_size ();
20828 else /* TARGET_THUMB1 */
20830 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20831 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20832 saved = core_saved;
20833 if (TARGET_BACKTRACE)
20834 saved += 16;
20837 /* Saved registers include the stack frame. */
20838 offsets->saved_regs
20839 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20840 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20842 /* A leaf function does not need any stack alignment if it has nothing
20843 on the stack. */
20844 if (leaf && frame_size == 0
20845 /* However if it calls alloca(), we have a dynamically allocated
20846 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20847 && ! cfun->calls_alloca)
20849 offsets->outgoing_args = offsets->soft_frame;
20850 offsets->locals_base = offsets->soft_frame;
20851 return offsets;
20854 /* Ensure SFP has the correct alignment. */
20855 if (ARM_DOUBLEWORD_ALIGN
20856 && (offsets->soft_frame & 7))
20858 offsets->soft_frame += 4;
20859 /* Try to align stack by pushing an extra reg. Don't bother doing this
20860 when there is a stack frame as the alignment will be rolled into
20861 the normal stack adjustment. */
20862 if (frame_size + crtl->outgoing_args_size == 0)
20864 int reg = -1;
20866 /* Register r3 is caller-saved. Normally it does not need to be
20867 saved on entry by the prologue. However if we choose to save
20868 it for padding then we may confuse the compiler into thinking
20869 a prologue sequence is required when in fact it is not. This
20870 will occur when shrink-wrapping if r3 is used as a scratch
20871 register and there are no other callee-saved writes.
20873 This situation can be avoided when other callee-saved registers
20874 are available and r3 is not mandatory if we choose a callee-saved
20875 register for padding. */
20876 bool prefer_callee_reg_p = false;
20878 /* If it is safe to use r3, then do so. This sometimes
20879 generates better code on Thumb-2 by avoiding the need to
20880 use 32-bit push/pop instructions. */
20881 if (! any_sibcall_could_use_r3 ()
20882 && arm_size_return_regs () <= 12
20883 && (offsets->saved_regs_mask & (1 << 3)) == 0
20884 && (TARGET_THUMB2
20885 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20887 reg = 3;
20888 if (!TARGET_THUMB2)
20889 prefer_callee_reg_p = true;
20891 if (reg == -1
20892 || prefer_callee_reg_p)
20894 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20896 /* Avoid fixed registers; they may be changed at
20897 arbitrary times so it's unsafe to restore them
20898 during the epilogue. */
20899 if (!fixed_regs[i]
20900 && (offsets->saved_regs_mask & (1 << i)) == 0)
20902 reg = i;
20903 break;
20908 if (reg != -1)
20910 offsets->saved_regs += 4;
20911 offsets->saved_regs_mask |= (1 << reg);
20916 offsets->locals_base = offsets->soft_frame + frame_size;
20917 offsets->outgoing_args = (offsets->locals_base
20918 + crtl->outgoing_args_size);
20920 if (ARM_DOUBLEWORD_ALIGN)
20922 /* Ensure SP remains doubleword aligned. */
20923 if (offsets->outgoing_args & 7)
20924 offsets->outgoing_args += 4;
20925 gcc_assert (!(offsets->outgoing_args & 7));
20928 return offsets;
20932 /* Calculate the relative offsets for the different stack pointers. Positive
20933 offsets are in the direction of stack growth. */
20935 HOST_WIDE_INT
20936 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20938 arm_stack_offsets *offsets;
20940 offsets = arm_get_frame_offsets ();
20942 /* OK, now we have enough information to compute the distances.
20943 There must be an entry in these switch tables for each pair
20944 of registers in ELIMINABLE_REGS, even if some of the entries
20945 seem to be redundant or useless. */
20946 switch (from)
20948 case ARG_POINTER_REGNUM:
20949 switch (to)
20951 case THUMB_HARD_FRAME_POINTER_REGNUM:
20952 return 0;
20954 case FRAME_POINTER_REGNUM:
20955 /* This is the reverse of the soft frame pointer
20956 to hard frame pointer elimination below. */
20957 return offsets->soft_frame - offsets->saved_args;
20959 case ARM_HARD_FRAME_POINTER_REGNUM:
20960 /* This is only non-zero in the case where the static chain register
20961 is stored above the frame. */
20962 return offsets->frame - offsets->saved_args - 4;
20964 case STACK_POINTER_REGNUM:
20965 /* If nothing has been pushed on the stack at all
20966 then this will return -4. This *is* correct! */
20967 return offsets->outgoing_args - (offsets->saved_args + 4);
20969 default:
20970 gcc_unreachable ();
20972 gcc_unreachable ();
20974 case FRAME_POINTER_REGNUM:
20975 switch (to)
20977 case THUMB_HARD_FRAME_POINTER_REGNUM:
20978 return 0;
20980 case ARM_HARD_FRAME_POINTER_REGNUM:
20981 /* The hard frame pointer points to the top entry in the
20982 stack frame. The soft frame pointer to the bottom entry
20983 in the stack frame. If there is no stack frame at all,
20984 then they are identical. */
20986 return offsets->frame - offsets->soft_frame;
20988 case STACK_POINTER_REGNUM:
20989 return offsets->outgoing_args - offsets->soft_frame;
20991 default:
20992 gcc_unreachable ();
20994 gcc_unreachable ();
20996 default:
20997 /* You cannot eliminate from the stack pointer.
20998 In theory you could eliminate from the hard frame
20999 pointer to the stack pointer, but this will never
21000 happen, since if a stack frame is not needed the
21001 hard frame pointer will never be used. */
21002 gcc_unreachable ();
21006 /* Given FROM and TO register numbers, say whether this elimination is
21007 allowed. Frame pointer elimination is automatically handled.
21009 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21010 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21011 pointer, we must eliminate FRAME_POINTER_REGNUM into
21012 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21013 ARG_POINTER_REGNUM. */
21015 bool
21016 arm_can_eliminate (const int from, const int to)
21018 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21019 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21020 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21021 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21022 true);
21025 /* Emit RTL to save coprocessor registers on function entry. Returns the
21026 number of bytes pushed. */
21028 static int
21029 arm_save_coproc_regs(void)
21031 int saved_size = 0;
21032 unsigned reg;
21033 unsigned start_reg;
21034 rtx insn;
21036 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21037 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21039 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21040 insn = gen_rtx_MEM (V2SImode, insn);
21041 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21042 RTX_FRAME_RELATED_P (insn) = 1;
21043 saved_size += 8;
21046 if (TARGET_HARD_FLOAT && TARGET_VFP)
21048 start_reg = FIRST_VFP_REGNUM;
21050 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21052 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21053 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21055 if (start_reg != reg)
21056 saved_size += vfp_emit_fstmd (start_reg,
21057 (reg - start_reg) / 2);
21058 start_reg = reg + 2;
21061 if (start_reg != reg)
21062 saved_size += vfp_emit_fstmd (start_reg,
21063 (reg - start_reg) / 2);
21065 return saved_size;
21069 /* Set the Thumb frame pointer from the stack pointer. */
21071 static void
21072 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21074 HOST_WIDE_INT amount;
21075 rtx insn, dwarf;
21077 amount = offsets->outgoing_args - offsets->locals_base;
21078 if (amount < 1024)
21079 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21080 stack_pointer_rtx, GEN_INT (amount)));
21081 else
21083 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21084 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21085 expects the first two operands to be the same. */
21086 if (TARGET_THUMB2)
21088 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21089 stack_pointer_rtx,
21090 hard_frame_pointer_rtx));
21092 else
21094 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21095 hard_frame_pointer_rtx,
21096 stack_pointer_rtx));
21098 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21099 plus_constant (Pmode, stack_pointer_rtx, amount));
21100 RTX_FRAME_RELATED_P (dwarf) = 1;
21101 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21104 RTX_FRAME_RELATED_P (insn) = 1;
21107 struct scratch_reg {
21108 rtx reg;
21109 bool saved;
21112 /* Return a short-lived scratch register for use as a 2nd scratch register on
21113 function entry after the registers are saved in the prologue. This register
21114 must be released by means of release_scratch_register_on_entry. IP is not
21115 considered since it is always used as the 1st scratch register if available.
21117 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21118 mask of live registers. */
21120 static void
21121 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21122 unsigned long live_regs)
21124 int regno = -1;
21126 sr->saved = false;
21128 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21129 regno = LR_REGNUM;
21130 else
21132 unsigned int i;
21134 for (i = 4; i < 11; i++)
21135 if (regno1 != i && (live_regs & (1 << i)) != 0)
21137 regno = i;
21138 break;
21141 if (regno < 0)
21143 /* If IP is used as the 1st scratch register for a nested function,
21144 then either r3 wasn't available or is used to preserve IP. */
21145 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21146 regno1 = 3;
21147 regno = (regno1 == 3 ? 2 : 3);
21148 sr->saved
21149 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21150 regno);
21154 sr->reg = gen_rtx_REG (SImode, regno);
21155 if (sr->saved)
21157 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21158 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21159 rtx x = gen_rtx_SET (stack_pointer_rtx,
21160 plus_constant (Pmode, stack_pointer_rtx, -4));
21161 RTX_FRAME_RELATED_P (insn) = 1;
21162 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21166 /* Release a scratch register obtained from the preceding function. */
21168 static void
21169 release_scratch_register_on_entry (struct scratch_reg *sr)
21171 if (sr->saved)
21173 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21174 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21175 rtx x = gen_rtx_SET (stack_pointer_rtx,
21176 plus_constant (Pmode, stack_pointer_rtx, 4));
21177 RTX_FRAME_RELATED_P (insn) = 1;
21178 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21182 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21184 #if PROBE_INTERVAL > 4096
21185 #error Cannot use indexed addressing mode for stack probing
21186 #endif
21188 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21189 inclusive. These are offsets from the current stack pointer. REGNO1
21190 is the index number of the 1st scratch register and LIVE_REGS is the
21191 mask of live registers. */
21193 static void
21194 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21195 unsigned int regno1, unsigned long live_regs)
21197 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21199 /* See if we have a constant small number of probes to generate. If so,
21200 that's the easy case. */
21201 if (size <= PROBE_INTERVAL)
21203 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21204 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21205 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21208 /* The run-time loop is made up of 10 insns in the generic case while the
21209 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21210 else if (size <= 5 * PROBE_INTERVAL)
21212 HOST_WIDE_INT i, rem;
21214 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21215 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21216 emit_stack_probe (reg1);
21218 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21219 it exceeds SIZE. If only two probes are needed, this will not
21220 generate any code. Then probe at FIRST + SIZE. */
21221 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21223 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21224 emit_stack_probe (reg1);
21227 rem = size - (i - PROBE_INTERVAL);
21228 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21230 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21231 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21233 else
21234 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21237 /* Otherwise, do the same as above, but in a loop. Note that we must be
21238 extra careful with variables wrapping around because we might be at
21239 the very top (or the very bottom) of the address space and we have
21240 to be able to handle this case properly; in particular, we use an
21241 equality test for the loop condition. */
21242 else
21244 HOST_WIDE_INT rounded_size;
21245 struct scratch_reg sr;
21247 get_scratch_register_on_entry (&sr, regno1, live_regs);
21249 emit_move_insn (reg1, GEN_INT (first));
21252 /* Step 1: round SIZE to the previous multiple of the interval. */
21254 rounded_size = size & -PROBE_INTERVAL;
21255 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21258 /* Step 2: compute initial and final value of the loop counter. */
21260 /* TEST_ADDR = SP + FIRST. */
21261 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21263 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21264 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21267 /* Step 3: the loop
21269 while (TEST_ADDR != LAST_ADDR)
21271 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21272 probe at TEST_ADDR
21275 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21276 until it is equal to ROUNDED_SIZE. */
21278 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21281 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21282 that SIZE is equal to ROUNDED_SIZE. */
21284 if (size != rounded_size)
21286 HOST_WIDE_INT rem = size - rounded_size;
21288 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21290 emit_set_insn (sr.reg,
21291 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21292 emit_stack_probe (plus_constant (Pmode, sr.reg,
21293 PROBE_INTERVAL - rem));
21295 else
21296 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21299 release_scratch_register_on_entry (&sr);
21302 /* Make sure nothing is scheduled before we are done. */
21303 emit_insn (gen_blockage ());
21306 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21307 absolute addresses. */
21309 const char *
21310 output_probe_stack_range (rtx reg1, rtx reg2)
21312 static int labelno = 0;
21313 char loop_lab[32];
21314 rtx xops[2];
21316 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21318 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21320 /* Test if TEST_ADDR == LAST_ADDR. */
21321 xops[0] = reg1;
21322 xops[1] = reg2;
21323 output_asm_insn ("cmp\t%0, %1", xops);
21325 if (TARGET_THUMB2)
21326 fputs ("\tittt\tne\n", asm_out_file);
21328 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21329 xops[1] = GEN_INT (PROBE_INTERVAL);
21330 output_asm_insn ("subne\t%0, %0, %1", xops);
21332 /* Probe at TEST_ADDR and branch. */
21333 output_asm_insn ("strne\tr0, [%0, #0]", xops);
21334 fputs ("\tbne\t", asm_out_file);
21335 assemble_name_raw (asm_out_file, loop_lab);
21336 fputc ('\n', asm_out_file);
21338 return "";
21341 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21342 function. */
21343 void
21344 arm_expand_prologue (void)
21346 rtx amount;
21347 rtx insn;
21348 rtx ip_rtx;
21349 unsigned long live_regs_mask;
21350 unsigned long func_type;
21351 int fp_offset = 0;
21352 int saved_pretend_args = 0;
21353 int saved_regs = 0;
21354 unsigned HOST_WIDE_INT args_to_push;
21355 HOST_WIDE_INT size;
21356 arm_stack_offsets *offsets;
21357 bool clobber_ip;
21359 func_type = arm_current_func_type ();
21361 /* Naked functions don't have prologues. */
21362 if (IS_NAKED (func_type))
21363 return;
21365 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21366 args_to_push = crtl->args.pretend_args_size;
21368 /* Compute which register we will have to save onto the stack. */
21369 offsets = arm_get_frame_offsets ();
21370 live_regs_mask = offsets->saved_regs_mask;
21372 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21374 if (IS_STACKALIGN (func_type))
21376 rtx r0, r1;
21378 /* Handle a word-aligned stack pointer. We generate the following:
21380 mov r0, sp
21381 bic r1, r0, #7
21382 mov sp, r1
21383 <save and restore r0 in normal prologue/epilogue>
21384 mov sp, r0
21385 bx lr
21387 The unwinder doesn't need to know about the stack realignment.
21388 Just tell it we saved SP in r0. */
21389 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21391 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21392 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21394 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21395 RTX_FRAME_RELATED_P (insn) = 1;
21396 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21398 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21400 /* ??? The CFA changes here, which may cause GDB to conclude that it
21401 has entered a different function. That said, the unwind info is
21402 correct, individually, before and after this instruction because
21403 we've described the save of SP, which will override the default
21404 handling of SP as restoring from the CFA. */
21405 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21408 /* The static chain register is the same as the IP register. If it is
21409 clobbered when creating the frame, we need to save and restore it. */
21410 clobber_ip = IS_NESTED (func_type)
21411 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21412 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21413 && !df_regs_ever_live_p (LR_REGNUM)
21414 && arm_r3_live_at_start_p ()));
21416 /* Find somewhere to store IP whilst the frame is being created.
21417 We try the following places in order:
21419 1. The last argument register r3 if it is available.
21420 2. A slot on the stack above the frame if there are no
21421 arguments to push onto the stack.
21422 3. Register r3 again, after pushing the argument registers
21423 onto the stack, if this is a varargs function.
21424 4. The last slot on the stack created for the arguments to
21425 push, if this isn't a varargs function.
21427 Note - we only need to tell the dwarf2 backend about the SP
21428 adjustment in the second variant; the static chain register
21429 doesn't need to be unwound, as it doesn't contain a value
21430 inherited from the caller. */
21431 if (clobber_ip)
21433 if (!arm_r3_live_at_start_p ())
21434 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21435 else if (args_to_push == 0)
21437 rtx addr, dwarf;
21439 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21440 saved_regs += 4;
21442 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21443 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21444 fp_offset = 4;
21446 /* Just tell the dwarf backend that we adjusted SP. */
21447 dwarf = gen_rtx_SET (stack_pointer_rtx,
21448 plus_constant (Pmode, stack_pointer_rtx,
21449 -fp_offset));
21450 RTX_FRAME_RELATED_P (insn) = 1;
21451 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21453 else
21455 /* Store the args on the stack. */
21456 if (cfun->machine->uses_anonymous_args)
21458 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21459 (0xf0 >> (args_to_push / 4)) & 0xf);
21460 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21461 saved_pretend_args = 1;
21463 else
21465 rtx addr, dwarf;
21467 if (args_to_push == 4)
21468 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21469 else
21470 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21471 plus_constant (Pmode,
21472 stack_pointer_rtx,
21473 -args_to_push));
21475 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21477 /* Just tell the dwarf backend that we adjusted SP. */
21478 dwarf = gen_rtx_SET (stack_pointer_rtx,
21479 plus_constant (Pmode, stack_pointer_rtx,
21480 -args_to_push));
21481 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21484 RTX_FRAME_RELATED_P (insn) = 1;
21485 fp_offset = args_to_push;
21486 args_to_push = 0;
21490 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21492 if (IS_INTERRUPT (func_type))
21494 /* Interrupt functions must not corrupt any registers.
21495 Creating a frame pointer however, corrupts the IP
21496 register, so we must push it first. */
21497 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21499 /* Do not set RTX_FRAME_RELATED_P on this insn.
21500 The dwarf stack unwinding code only wants to see one
21501 stack decrement per function, and this is not it. If
21502 this instruction is labeled as being part of the frame
21503 creation sequence then dwarf2out_frame_debug_expr will
21504 die when it encounters the assignment of IP to FP
21505 later on, since the use of SP here establishes SP as
21506 the CFA register and not IP.
21508 Anyway this instruction is not really part of the stack
21509 frame creation although it is part of the prologue. */
21512 insn = emit_set_insn (ip_rtx,
21513 plus_constant (Pmode, stack_pointer_rtx,
21514 fp_offset));
21515 RTX_FRAME_RELATED_P (insn) = 1;
21518 if (args_to_push)
21520 /* Push the argument registers, or reserve space for them. */
21521 if (cfun->machine->uses_anonymous_args)
21522 insn = emit_multi_reg_push
21523 ((0xf0 >> (args_to_push / 4)) & 0xf,
21524 (0xf0 >> (args_to_push / 4)) & 0xf);
21525 else
21526 insn = emit_insn
21527 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21528 GEN_INT (- args_to_push)));
21529 RTX_FRAME_RELATED_P (insn) = 1;
21532 /* If this is an interrupt service routine, and the link register
21533 is going to be pushed, and we're not generating extra
21534 push of IP (needed when frame is needed and frame layout if apcs),
21535 subtracting four from LR now will mean that the function return
21536 can be done with a single instruction. */
21537 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21538 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21539 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21540 && TARGET_ARM)
21542 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21544 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21547 if (live_regs_mask)
21549 unsigned long dwarf_regs_mask = live_regs_mask;
21551 saved_regs += bit_count (live_regs_mask) * 4;
21552 if (optimize_size && !frame_pointer_needed
21553 && saved_regs == offsets->saved_regs - offsets->saved_args)
21555 /* If no coprocessor registers are being pushed and we don't have
21556 to worry about a frame pointer then push extra registers to
21557 create the stack frame. This is done is a way that does not
21558 alter the frame layout, so is independent of the epilogue. */
21559 int n;
21560 int frame;
21561 n = 0;
21562 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21563 n++;
21564 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21565 if (frame && n * 4 >= frame)
21567 n = frame / 4;
21568 live_regs_mask |= (1 << n) - 1;
21569 saved_regs += frame;
21573 if (TARGET_LDRD
21574 && current_tune->prefer_ldrd_strd
21575 && !optimize_function_for_size_p (cfun))
21577 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21578 if (TARGET_THUMB2)
21579 thumb2_emit_strd_push (live_regs_mask);
21580 else if (TARGET_ARM
21581 && !TARGET_APCS_FRAME
21582 && !IS_INTERRUPT (func_type))
21583 arm_emit_strd_push (live_regs_mask);
21584 else
21586 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21587 RTX_FRAME_RELATED_P (insn) = 1;
21590 else
21592 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21593 RTX_FRAME_RELATED_P (insn) = 1;
21597 if (! IS_VOLATILE (func_type))
21598 saved_regs += arm_save_coproc_regs ();
21600 if (frame_pointer_needed && TARGET_ARM)
21602 /* Create the new frame pointer. */
21603 if (TARGET_APCS_FRAME)
21605 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21606 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21607 RTX_FRAME_RELATED_P (insn) = 1;
21609 else
21611 insn = GEN_INT (saved_regs - (4 + fp_offset));
21612 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21613 stack_pointer_rtx, insn));
21614 RTX_FRAME_RELATED_P (insn) = 1;
21618 size = offsets->outgoing_args - offsets->saved_args;
21619 if (flag_stack_usage_info)
21620 current_function_static_stack_size = size;
21622 /* If this isn't an interrupt service routine and we have a frame, then do
21623 stack checking. We use IP as the first scratch register, except for the
21624 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21625 if (!IS_INTERRUPT (func_type)
21626 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21628 unsigned int regno;
21630 if (!IS_NESTED (func_type) || clobber_ip)
21631 regno = IP_REGNUM;
21632 else if (df_regs_ever_live_p (LR_REGNUM))
21633 regno = LR_REGNUM;
21634 else
21635 regno = 3;
21637 if (crtl->is_leaf && !cfun->calls_alloca)
21639 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21640 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21641 size - STACK_CHECK_PROTECT,
21642 regno, live_regs_mask);
21644 else if (size > 0)
21645 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21646 regno, live_regs_mask);
21649 /* Recover the static chain register. */
21650 if (clobber_ip)
21652 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21653 insn = gen_rtx_REG (SImode, 3);
21654 else
21656 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21657 insn = gen_frame_mem (SImode, insn);
21659 emit_set_insn (ip_rtx, insn);
21660 emit_insn (gen_force_register_use (ip_rtx));
21663 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21665 /* This add can produce multiple insns for a large constant, so we
21666 need to get tricky. */
21667 rtx_insn *last = get_last_insn ();
21669 amount = GEN_INT (offsets->saved_args + saved_regs
21670 - offsets->outgoing_args);
21672 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21673 amount));
21676 last = last ? NEXT_INSN (last) : get_insns ();
21677 RTX_FRAME_RELATED_P (last) = 1;
21679 while (last != insn);
21681 /* If the frame pointer is needed, emit a special barrier that
21682 will prevent the scheduler from moving stores to the frame
21683 before the stack adjustment. */
21684 if (frame_pointer_needed)
21685 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21686 hard_frame_pointer_rtx));
21690 if (frame_pointer_needed && TARGET_THUMB2)
21691 thumb_set_frame_pointer (offsets);
21693 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21695 unsigned long mask;
21697 mask = live_regs_mask;
21698 mask &= THUMB2_WORK_REGS;
21699 if (!IS_NESTED (func_type))
21700 mask |= (1 << IP_REGNUM);
21701 arm_load_pic_register (mask);
21704 /* If we are profiling, make sure no instructions are scheduled before
21705 the call to mcount. Similarly if the user has requested no
21706 scheduling in the prolog. Similarly if we want non-call exceptions
21707 using the EABI unwinder, to prevent faulting instructions from being
21708 swapped with a stack adjustment. */
21709 if (crtl->profile || !TARGET_SCHED_PROLOG
21710 || (arm_except_unwind_info (&global_options) == UI_TARGET
21711 && cfun->can_throw_non_call_exceptions))
21712 emit_insn (gen_blockage ());
21714 /* If the link register is being kept alive, with the return address in it,
21715 then make sure that it does not get reused by the ce2 pass. */
21716 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21717 cfun->machine->lr_save_eliminated = 1;
21720 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21721 static void
21722 arm_print_condition (FILE *stream)
21724 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21726 /* Branch conversion is not implemented for Thumb-2. */
21727 if (TARGET_THUMB)
21729 output_operand_lossage ("predicated Thumb instruction");
21730 return;
21732 if (current_insn_predicate != NULL)
21734 output_operand_lossage
21735 ("predicated instruction in conditional sequence");
21736 return;
21739 fputs (arm_condition_codes[arm_current_cc], stream);
21741 else if (current_insn_predicate)
21743 enum arm_cond_code code;
21745 if (TARGET_THUMB1)
21747 output_operand_lossage ("predicated Thumb instruction");
21748 return;
21751 code = get_arm_condition_code (current_insn_predicate);
21752 fputs (arm_condition_codes[code], stream);
21757 /* Globally reserved letters: acln
21758 Puncutation letters currently used: @_|?().!#
21759 Lower case letters currently used: bcdefhimpqtvwxyz
21760 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21761 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21763 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21765 If CODE is 'd', then the X is a condition operand and the instruction
21766 should only be executed if the condition is true.
21767 if CODE is 'D', then the X is a condition operand and the instruction
21768 should only be executed if the condition is false: however, if the mode
21769 of the comparison is CCFPEmode, then always execute the instruction -- we
21770 do this because in these circumstances !GE does not necessarily imply LT;
21771 in these cases the instruction pattern will take care to make sure that
21772 an instruction containing %d will follow, thereby undoing the effects of
21773 doing this instruction unconditionally.
21774 If CODE is 'N' then X is a floating point operand that must be negated
21775 before output.
21776 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21777 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21778 static void
21779 arm_print_operand (FILE *stream, rtx x, int code)
21781 switch (code)
21783 case '@':
21784 fputs (ASM_COMMENT_START, stream);
21785 return;
21787 case '_':
21788 fputs (user_label_prefix, stream);
21789 return;
21791 case '|':
21792 fputs (REGISTER_PREFIX, stream);
21793 return;
21795 case '?':
21796 arm_print_condition (stream);
21797 return;
21799 case '(':
21800 /* Nothing in unified syntax, otherwise the current condition code. */
21801 if (!TARGET_UNIFIED_ASM)
21802 arm_print_condition (stream);
21803 break;
21805 case ')':
21806 /* The current condition code in unified syntax, otherwise nothing. */
21807 if (TARGET_UNIFIED_ASM)
21808 arm_print_condition (stream);
21809 break;
21811 case '.':
21812 /* The current condition code for a condition code setting instruction.
21813 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21814 if (TARGET_UNIFIED_ASM)
21816 fputc('s', stream);
21817 arm_print_condition (stream);
21819 else
21821 arm_print_condition (stream);
21822 fputc('s', stream);
21824 return;
21826 case '!':
21827 /* If the instruction is conditionally executed then print
21828 the current condition code, otherwise print 's'. */
21829 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21830 if (current_insn_predicate)
21831 arm_print_condition (stream);
21832 else
21833 fputc('s', stream);
21834 break;
21836 /* %# is a "break" sequence. It doesn't output anything, but is used to
21837 separate e.g. operand numbers from following text, if that text consists
21838 of further digits which we don't want to be part of the operand
21839 number. */
21840 case '#':
21841 return;
21843 case 'N':
21845 REAL_VALUE_TYPE r;
21846 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21847 r = real_value_negate (&r);
21848 fprintf (stream, "%s", fp_const_from_val (&r));
21850 return;
21852 /* An integer or symbol address without a preceding # sign. */
21853 case 'c':
21854 switch (GET_CODE (x))
21856 case CONST_INT:
21857 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21858 break;
21860 case SYMBOL_REF:
21861 output_addr_const (stream, x);
21862 break;
21864 case CONST:
21865 if (GET_CODE (XEXP (x, 0)) == PLUS
21866 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21868 output_addr_const (stream, x);
21869 break;
21871 /* Fall through. */
21873 default:
21874 output_operand_lossage ("Unsupported operand for code '%c'", code);
21876 return;
21878 /* An integer that we want to print in HEX. */
21879 case 'x':
21880 switch (GET_CODE (x))
21882 case CONST_INT:
21883 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21884 break;
21886 default:
21887 output_operand_lossage ("Unsupported operand for code '%c'", code);
21889 return;
21891 case 'B':
21892 if (CONST_INT_P (x))
21894 HOST_WIDE_INT val;
21895 val = ARM_SIGN_EXTEND (~INTVAL (x));
21896 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21898 else
21900 putc ('~', stream);
21901 output_addr_const (stream, x);
21903 return;
21905 case 'b':
21906 /* Print the log2 of a CONST_INT. */
21908 HOST_WIDE_INT val;
21910 if (!CONST_INT_P (x)
21911 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21912 output_operand_lossage ("Unsupported operand for code '%c'", code);
21913 else
21914 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21916 return;
21918 case 'L':
21919 /* The low 16 bits of an immediate constant. */
21920 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21921 return;
21923 case 'i':
21924 fprintf (stream, "%s", arithmetic_instr (x, 1));
21925 return;
21927 case 'I':
21928 fprintf (stream, "%s", arithmetic_instr (x, 0));
21929 return;
21931 case 'S':
21933 HOST_WIDE_INT val;
21934 const char *shift;
21936 shift = shift_op (x, &val);
21938 if (shift)
21940 fprintf (stream, ", %s ", shift);
21941 if (val == -1)
21942 arm_print_operand (stream, XEXP (x, 1), 0);
21943 else
21944 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21947 return;
21949 /* An explanation of the 'Q', 'R' and 'H' register operands:
21951 In a pair of registers containing a DI or DF value the 'Q'
21952 operand returns the register number of the register containing
21953 the least significant part of the value. The 'R' operand returns
21954 the register number of the register containing the most
21955 significant part of the value.
21957 The 'H' operand returns the higher of the two register numbers.
21958 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21959 same as the 'Q' operand, since the most significant part of the
21960 value is held in the lower number register. The reverse is true
21961 on systems where WORDS_BIG_ENDIAN is false.
21963 The purpose of these operands is to distinguish between cases
21964 where the endian-ness of the values is important (for example
21965 when they are added together), and cases where the endian-ness
21966 is irrelevant, but the order of register operations is important.
21967 For example when loading a value from memory into a register
21968 pair, the endian-ness does not matter. Provided that the value
21969 from the lower memory address is put into the lower numbered
21970 register, and the value from the higher address is put into the
21971 higher numbered register, the load will work regardless of whether
21972 the value being loaded is big-wordian or little-wordian. The
21973 order of the two register loads can matter however, if the address
21974 of the memory location is actually held in one of the registers
21975 being overwritten by the load.
21977 The 'Q' and 'R' constraints are also available for 64-bit
21978 constants. */
21979 case 'Q':
21980 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21982 rtx part = gen_lowpart (SImode, x);
21983 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21984 return;
21987 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21989 output_operand_lossage ("invalid operand for code '%c'", code);
21990 return;
21993 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21994 return;
21996 case 'R':
21997 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21999 machine_mode mode = GET_MODE (x);
22000 rtx part;
22002 if (mode == VOIDmode)
22003 mode = DImode;
22004 part = gen_highpart_mode (SImode, mode, x);
22005 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22006 return;
22009 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22011 output_operand_lossage ("invalid operand for code '%c'", code);
22012 return;
22015 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22016 return;
22018 case 'H':
22019 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22021 output_operand_lossage ("invalid operand for code '%c'", code);
22022 return;
22025 asm_fprintf (stream, "%r", REGNO (x) + 1);
22026 return;
22028 case 'J':
22029 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22031 output_operand_lossage ("invalid operand for code '%c'", code);
22032 return;
22035 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22036 return;
22038 case 'K':
22039 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22041 output_operand_lossage ("invalid operand for code '%c'", code);
22042 return;
22045 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22046 return;
22048 case 'm':
22049 asm_fprintf (stream, "%r",
22050 REG_P (XEXP (x, 0))
22051 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22052 return;
22054 case 'M':
22055 asm_fprintf (stream, "{%r-%r}",
22056 REGNO (x),
22057 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22058 return;
22060 /* Like 'M', but writing doubleword vector registers, for use by Neon
22061 insns. */
22062 case 'h':
22064 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22065 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22066 if (numregs == 1)
22067 asm_fprintf (stream, "{d%d}", regno);
22068 else
22069 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22071 return;
22073 case 'd':
22074 /* CONST_TRUE_RTX means always -- that's the default. */
22075 if (x == const_true_rtx)
22076 return;
22078 if (!COMPARISON_P (x))
22080 output_operand_lossage ("invalid operand for code '%c'", code);
22081 return;
22084 fputs (arm_condition_codes[get_arm_condition_code (x)],
22085 stream);
22086 return;
22088 case 'D':
22089 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22090 want to do that. */
22091 if (x == const_true_rtx)
22093 output_operand_lossage ("instruction never executed");
22094 return;
22096 if (!COMPARISON_P (x))
22098 output_operand_lossage ("invalid operand for code '%c'", code);
22099 return;
22102 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22103 (get_arm_condition_code (x))],
22104 stream);
22105 return;
22107 case 's':
22108 case 'V':
22109 case 'W':
22110 case 'X':
22111 case 'Y':
22112 case 'Z':
22113 /* Former Maverick support, removed after GCC-4.7. */
22114 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22115 return;
22117 case 'U':
22118 if (!REG_P (x)
22119 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22120 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22121 /* Bad value for wCG register number. */
22123 output_operand_lossage ("invalid operand for code '%c'", code);
22124 return;
22127 else
22128 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22129 return;
22131 /* Print an iWMMXt control register name. */
22132 case 'w':
22133 if (!CONST_INT_P (x)
22134 || INTVAL (x) < 0
22135 || INTVAL (x) >= 16)
22136 /* Bad value for wC register number. */
22138 output_operand_lossage ("invalid operand for code '%c'", code);
22139 return;
22142 else
22144 static const char * wc_reg_names [16] =
22146 "wCID", "wCon", "wCSSF", "wCASF",
22147 "wC4", "wC5", "wC6", "wC7",
22148 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22149 "wC12", "wC13", "wC14", "wC15"
22152 fputs (wc_reg_names [INTVAL (x)], stream);
22154 return;
22156 /* Print the high single-precision register of a VFP double-precision
22157 register. */
22158 case 'p':
22160 machine_mode mode = GET_MODE (x);
22161 int regno;
22163 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22165 output_operand_lossage ("invalid operand for code '%c'", code);
22166 return;
22169 regno = REGNO (x);
22170 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22172 output_operand_lossage ("invalid operand for code '%c'", code);
22173 return;
22176 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22178 return;
22180 /* Print a VFP/Neon double precision or quad precision register name. */
22181 case 'P':
22182 case 'q':
22184 machine_mode mode = GET_MODE (x);
22185 int is_quad = (code == 'q');
22186 int regno;
22188 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22190 output_operand_lossage ("invalid operand for code '%c'", code);
22191 return;
22194 if (!REG_P (x)
22195 || !IS_VFP_REGNUM (REGNO (x)))
22197 output_operand_lossage ("invalid operand for code '%c'", code);
22198 return;
22201 regno = REGNO (x);
22202 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22203 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22205 output_operand_lossage ("invalid operand for code '%c'", code);
22206 return;
22209 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22210 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22212 return;
22214 /* These two codes print the low/high doubleword register of a Neon quad
22215 register, respectively. For pair-structure types, can also print
22216 low/high quadword registers. */
22217 case 'e':
22218 case 'f':
22220 machine_mode mode = GET_MODE (x);
22221 int regno;
22223 if ((GET_MODE_SIZE (mode) != 16
22224 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22226 output_operand_lossage ("invalid operand for code '%c'", code);
22227 return;
22230 regno = REGNO (x);
22231 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22233 output_operand_lossage ("invalid operand for code '%c'", code);
22234 return;
22237 if (GET_MODE_SIZE (mode) == 16)
22238 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22239 + (code == 'f' ? 1 : 0));
22240 else
22241 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22242 + (code == 'f' ? 1 : 0));
22244 return;
22246 /* Print a VFPv3 floating-point constant, represented as an integer
22247 index. */
22248 case 'G':
22250 int index = vfp3_const_double_index (x);
22251 gcc_assert (index != -1);
22252 fprintf (stream, "%d", index);
22254 return;
22256 /* Print bits representing opcode features for Neon.
22258 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22259 and polynomials as unsigned.
22261 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22263 Bit 2 is 1 for rounding functions, 0 otherwise. */
22265 /* Identify the type as 's', 'u', 'p' or 'f'. */
22266 case 'T':
22268 HOST_WIDE_INT bits = INTVAL (x);
22269 fputc ("uspf"[bits & 3], stream);
22271 return;
22273 /* Likewise, but signed and unsigned integers are both 'i'. */
22274 case 'F':
22276 HOST_WIDE_INT bits = INTVAL (x);
22277 fputc ("iipf"[bits & 3], stream);
22279 return;
22281 /* As for 'T', but emit 'u' instead of 'p'. */
22282 case 't':
22284 HOST_WIDE_INT bits = INTVAL (x);
22285 fputc ("usuf"[bits & 3], stream);
22287 return;
22289 /* Bit 2: rounding (vs none). */
22290 case 'O':
22292 HOST_WIDE_INT bits = INTVAL (x);
22293 fputs ((bits & 4) != 0 ? "r" : "", stream);
22295 return;
22297 /* Memory operand for vld1/vst1 instruction. */
22298 case 'A':
22300 rtx addr;
22301 bool postinc = FALSE;
22302 rtx postinc_reg = NULL;
22303 unsigned align, memsize, align_bits;
22305 gcc_assert (MEM_P (x));
22306 addr = XEXP (x, 0);
22307 if (GET_CODE (addr) == POST_INC)
22309 postinc = 1;
22310 addr = XEXP (addr, 0);
22312 if (GET_CODE (addr) == POST_MODIFY)
22314 postinc_reg = XEXP( XEXP (addr, 1), 1);
22315 addr = XEXP (addr, 0);
22317 asm_fprintf (stream, "[%r", REGNO (addr));
22319 /* We know the alignment of this access, so we can emit a hint in the
22320 instruction (for some alignments) as an aid to the memory subsystem
22321 of the target. */
22322 align = MEM_ALIGN (x) >> 3;
22323 memsize = MEM_SIZE (x);
22325 /* Only certain alignment specifiers are supported by the hardware. */
22326 if (memsize == 32 && (align % 32) == 0)
22327 align_bits = 256;
22328 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22329 align_bits = 128;
22330 else if (memsize >= 8 && (align % 8) == 0)
22331 align_bits = 64;
22332 else
22333 align_bits = 0;
22335 if (align_bits != 0)
22336 asm_fprintf (stream, ":%d", align_bits);
22338 asm_fprintf (stream, "]");
22340 if (postinc)
22341 fputs("!", stream);
22342 if (postinc_reg)
22343 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22345 return;
22347 case 'C':
22349 rtx addr;
22351 gcc_assert (MEM_P (x));
22352 addr = XEXP (x, 0);
22353 gcc_assert (REG_P (addr));
22354 asm_fprintf (stream, "[%r]", REGNO (addr));
22356 return;
22358 /* Translate an S register number into a D register number and element index. */
22359 case 'y':
22361 machine_mode mode = GET_MODE (x);
22362 int regno;
22364 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22366 output_operand_lossage ("invalid operand for code '%c'", code);
22367 return;
22370 regno = REGNO (x);
22371 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22373 output_operand_lossage ("invalid operand for code '%c'", code);
22374 return;
22377 regno = regno - FIRST_VFP_REGNUM;
22378 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22380 return;
22382 case 'v':
22383 gcc_assert (CONST_DOUBLE_P (x));
22384 int result;
22385 result = vfp3_const_double_for_fract_bits (x);
22386 if (result == 0)
22387 result = vfp3_const_double_for_bits (x);
22388 fprintf (stream, "#%d", result);
22389 return;
22391 /* Register specifier for vld1.16/vst1.16. Translate the S register
22392 number into a D register number and element index. */
22393 case 'z':
22395 machine_mode mode = GET_MODE (x);
22396 int regno;
22398 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22400 output_operand_lossage ("invalid operand for code '%c'", code);
22401 return;
22404 regno = REGNO (x);
22405 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22407 output_operand_lossage ("invalid operand for code '%c'", code);
22408 return;
22411 regno = regno - FIRST_VFP_REGNUM;
22412 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22414 return;
22416 default:
22417 if (x == 0)
22419 output_operand_lossage ("missing operand");
22420 return;
22423 switch (GET_CODE (x))
22425 case REG:
22426 asm_fprintf (stream, "%r", REGNO (x));
22427 break;
22429 case MEM:
22430 output_memory_reference_mode = GET_MODE (x);
22431 output_address (XEXP (x, 0));
22432 break;
22434 case CONST_DOUBLE:
22436 char fpstr[20];
22437 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22438 sizeof (fpstr), 0, 1);
22439 fprintf (stream, "#%s", fpstr);
22441 break;
22443 default:
22444 gcc_assert (GET_CODE (x) != NEG);
22445 fputc ('#', stream);
22446 if (GET_CODE (x) == HIGH)
22448 fputs (":lower16:", stream);
22449 x = XEXP (x, 0);
22452 output_addr_const (stream, x);
22453 break;
22458 /* Target hook for printing a memory address. */
22459 static void
22460 arm_print_operand_address (FILE *stream, rtx x)
22462 if (TARGET_32BIT)
22464 int is_minus = GET_CODE (x) == MINUS;
22466 if (REG_P (x))
22467 asm_fprintf (stream, "[%r]", REGNO (x));
22468 else if (GET_CODE (x) == PLUS || is_minus)
22470 rtx base = XEXP (x, 0);
22471 rtx index = XEXP (x, 1);
22472 HOST_WIDE_INT offset = 0;
22473 if (!REG_P (base)
22474 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22476 /* Ensure that BASE is a register. */
22477 /* (one of them must be). */
22478 /* Also ensure the SP is not used as in index register. */
22479 std::swap (base, index);
22481 switch (GET_CODE (index))
22483 case CONST_INT:
22484 offset = INTVAL (index);
22485 if (is_minus)
22486 offset = -offset;
22487 asm_fprintf (stream, "[%r, #%wd]",
22488 REGNO (base), offset);
22489 break;
22491 case REG:
22492 asm_fprintf (stream, "[%r, %s%r]",
22493 REGNO (base), is_minus ? "-" : "",
22494 REGNO (index));
22495 break;
22497 case MULT:
22498 case ASHIFTRT:
22499 case LSHIFTRT:
22500 case ASHIFT:
22501 case ROTATERT:
22503 asm_fprintf (stream, "[%r, %s%r",
22504 REGNO (base), is_minus ? "-" : "",
22505 REGNO (XEXP (index, 0)));
22506 arm_print_operand (stream, index, 'S');
22507 fputs ("]", stream);
22508 break;
22511 default:
22512 gcc_unreachable ();
22515 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22516 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22518 extern machine_mode output_memory_reference_mode;
22520 gcc_assert (REG_P (XEXP (x, 0)));
22522 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22523 asm_fprintf (stream, "[%r, #%s%d]!",
22524 REGNO (XEXP (x, 0)),
22525 GET_CODE (x) == PRE_DEC ? "-" : "",
22526 GET_MODE_SIZE (output_memory_reference_mode));
22527 else
22528 asm_fprintf (stream, "[%r], #%s%d",
22529 REGNO (XEXP (x, 0)),
22530 GET_CODE (x) == POST_DEC ? "-" : "",
22531 GET_MODE_SIZE (output_memory_reference_mode));
22533 else if (GET_CODE (x) == PRE_MODIFY)
22535 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22536 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22537 asm_fprintf (stream, "#%wd]!",
22538 INTVAL (XEXP (XEXP (x, 1), 1)));
22539 else
22540 asm_fprintf (stream, "%r]!",
22541 REGNO (XEXP (XEXP (x, 1), 1)));
22543 else if (GET_CODE (x) == POST_MODIFY)
22545 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22546 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22547 asm_fprintf (stream, "#%wd",
22548 INTVAL (XEXP (XEXP (x, 1), 1)));
22549 else
22550 asm_fprintf (stream, "%r",
22551 REGNO (XEXP (XEXP (x, 1), 1)));
22553 else output_addr_const (stream, x);
22555 else
22557 if (REG_P (x))
22558 asm_fprintf (stream, "[%r]", REGNO (x));
22559 else if (GET_CODE (x) == POST_INC)
22560 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22561 else if (GET_CODE (x) == PLUS)
22563 gcc_assert (REG_P (XEXP (x, 0)));
22564 if (CONST_INT_P (XEXP (x, 1)))
22565 asm_fprintf (stream, "[%r, #%wd]",
22566 REGNO (XEXP (x, 0)),
22567 INTVAL (XEXP (x, 1)));
22568 else
22569 asm_fprintf (stream, "[%r, %r]",
22570 REGNO (XEXP (x, 0)),
22571 REGNO (XEXP (x, 1)));
22573 else
22574 output_addr_const (stream, x);
22578 /* Target hook for indicating whether a punctuation character for
22579 TARGET_PRINT_OPERAND is valid. */
22580 static bool
22581 arm_print_operand_punct_valid_p (unsigned char code)
22583 return (code == '@' || code == '|' || code == '.'
22584 || code == '(' || code == ')' || code == '#'
22585 || (TARGET_32BIT && (code == '?'))
22586 || (TARGET_THUMB2 && (code == '!'))
22587 || (TARGET_THUMB && (code == '_')));
22590 /* Target hook for assembling integer objects. The ARM version needs to
22591 handle word-sized values specially. */
22592 static bool
22593 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22595 machine_mode mode;
22597 if (size == UNITS_PER_WORD && aligned_p)
22599 fputs ("\t.word\t", asm_out_file);
22600 output_addr_const (asm_out_file, x);
22602 /* Mark symbols as position independent. We only do this in the
22603 .text segment, not in the .data segment. */
22604 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22605 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22607 /* See legitimize_pic_address for an explanation of the
22608 TARGET_VXWORKS_RTP check. */
22609 if (!arm_pic_data_is_text_relative
22610 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22611 fputs ("(GOT)", asm_out_file);
22612 else
22613 fputs ("(GOTOFF)", asm_out_file);
22615 fputc ('\n', asm_out_file);
22616 return true;
22619 mode = GET_MODE (x);
22621 if (arm_vector_mode_supported_p (mode))
22623 int i, units;
22625 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22627 units = CONST_VECTOR_NUNITS (x);
22628 size = GET_MODE_UNIT_SIZE (mode);
22630 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22631 for (i = 0; i < units; i++)
22633 rtx elt = CONST_VECTOR_ELT (x, i);
22634 assemble_integer
22635 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22637 else
22638 for (i = 0; i < units; i++)
22640 rtx elt = CONST_VECTOR_ELT (x, i);
22641 REAL_VALUE_TYPE rval;
22643 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22645 assemble_real
22646 (rval, GET_MODE_INNER (mode),
22647 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22650 return true;
22653 return default_assemble_integer (x, size, aligned_p);
22656 static void
22657 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22659 section *s;
22661 if (!TARGET_AAPCS_BASED)
22663 (is_ctor ?
22664 default_named_section_asm_out_constructor
22665 : default_named_section_asm_out_destructor) (symbol, priority);
22666 return;
22669 /* Put these in the .init_array section, using a special relocation. */
22670 if (priority != DEFAULT_INIT_PRIORITY)
22672 char buf[18];
22673 sprintf (buf, "%s.%.5u",
22674 is_ctor ? ".init_array" : ".fini_array",
22675 priority);
22676 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22678 else if (is_ctor)
22679 s = ctors_section;
22680 else
22681 s = dtors_section;
22683 switch_to_section (s);
22684 assemble_align (POINTER_SIZE);
22685 fputs ("\t.word\t", asm_out_file);
22686 output_addr_const (asm_out_file, symbol);
22687 fputs ("(target1)\n", asm_out_file);
22690 /* Add a function to the list of static constructors. */
22692 static void
22693 arm_elf_asm_constructor (rtx symbol, int priority)
22695 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22698 /* Add a function to the list of static destructors. */
22700 static void
22701 arm_elf_asm_destructor (rtx symbol, int priority)
22703 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22706 /* A finite state machine takes care of noticing whether or not instructions
22707 can be conditionally executed, and thus decrease execution time and code
22708 size by deleting branch instructions. The fsm is controlled by
22709 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22711 /* The state of the fsm controlling condition codes are:
22712 0: normal, do nothing special
22713 1: make ASM_OUTPUT_OPCODE not output this instruction
22714 2: make ASM_OUTPUT_OPCODE not output this instruction
22715 3: make instructions conditional
22716 4: make instructions conditional
22718 State transitions (state->state by whom under condition):
22719 0 -> 1 final_prescan_insn if the `target' is a label
22720 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22721 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22722 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22723 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22724 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22725 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22726 (the target insn is arm_target_insn).
22728 If the jump clobbers the conditions then we use states 2 and 4.
22730 A similar thing can be done with conditional return insns.
22732 XXX In case the `target' is an unconditional branch, this conditionalising
22733 of the instructions always reduces code size, but not always execution
22734 time. But then, I want to reduce the code size to somewhere near what
22735 /bin/cc produces. */
22737 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22738 instructions. When a COND_EXEC instruction is seen the subsequent
22739 instructions are scanned so that multiple conditional instructions can be
22740 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22741 specify the length and true/false mask for the IT block. These will be
22742 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22744 /* Returns the index of the ARM condition code string in
22745 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22746 COMPARISON should be an rtx like `(eq (...) (...))'. */
22748 enum arm_cond_code
22749 maybe_get_arm_condition_code (rtx comparison)
22751 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22752 enum arm_cond_code code;
22753 enum rtx_code comp_code = GET_CODE (comparison);
22755 if (GET_MODE_CLASS (mode) != MODE_CC)
22756 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22757 XEXP (comparison, 1));
22759 switch (mode)
22761 case CC_DNEmode: code = ARM_NE; goto dominance;
22762 case CC_DEQmode: code = ARM_EQ; goto dominance;
22763 case CC_DGEmode: code = ARM_GE; goto dominance;
22764 case CC_DGTmode: code = ARM_GT; goto dominance;
22765 case CC_DLEmode: code = ARM_LE; goto dominance;
22766 case CC_DLTmode: code = ARM_LT; goto dominance;
22767 case CC_DGEUmode: code = ARM_CS; goto dominance;
22768 case CC_DGTUmode: code = ARM_HI; goto dominance;
22769 case CC_DLEUmode: code = ARM_LS; goto dominance;
22770 case CC_DLTUmode: code = ARM_CC;
22772 dominance:
22773 if (comp_code == EQ)
22774 return ARM_INVERSE_CONDITION_CODE (code);
22775 if (comp_code == NE)
22776 return code;
22777 return ARM_NV;
22779 case CC_NOOVmode:
22780 switch (comp_code)
22782 case NE: return ARM_NE;
22783 case EQ: return ARM_EQ;
22784 case GE: return ARM_PL;
22785 case LT: return ARM_MI;
22786 default: return ARM_NV;
22789 case CC_Zmode:
22790 switch (comp_code)
22792 case NE: return ARM_NE;
22793 case EQ: return ARM_EQ;
22794 default: return ARM_NV;
22797 case CC_Nmode:
22798 switch (comp_code)
22800 case NE: return ARM_MI;
22801 case EQ: return ARM_PL;
22802 default: return ARM_NV;
22805 case CCFPEmode:
22806 case CCFPmode:
22807 /* We can handle all cases except UNEQ and LTGT. */
22808 switch (comp_code)
22810 case GE: return ARM_GE;
22811 case GT: return ARM_GT;
22812 case LE: return ARM_LS;
22813 case LT: return ARM_MI;
22814 case NE: return ARM_NE;
22815 case EQ: return ARM_EQ;
22816 case ORDERED: return ARM_VC;
22817 case UNORDERED: return ARM_VS;
22818 case UNLT: return ARM_LT;
22819 case UNLE: return ARM_LE;
22820 case UNGT: return ARM_HI;
22821 case UNGE: return ARM_PL;
22822 /* UNEQ and LTGT do not have a representation. */
22823 case UNEQ: /* Fall through. */
22824 case LTGT: /* Fall through. */
22825 default: return ARM_NV;
22828 case CC_SWPmode:
22829 switch (comp_code)
22831 case NE: return ARM_NE;
22832 case EQ: return ARM_EQ;
22833 case GE: return ARM_LE;
22834 case GT: return ARM_LT;
22835 case LE: return ARM_GE;
22836 case LT: return ARM_GT;
22837 case GEU: return ARM_LS;
22838 case GTU: return ARM_CC;
22839 case LEU: return ARM_CS;
22840 case LTU: return ARM_HI;
22841 default: return ARM_NV;
22844 case CC_Cmode:
22845 switch (comp_code)
22847 case LTU: return ARM_CS;
22848 case GEU: return ARM_CC;
22849 default: return ARM_NV;
22852 case CC_CZmode:
22853 switch (comp_code)
22855 case NE: return ARM_NE;
22856 case EQ: return ARM_EQ;
22857 case GEU: return ARM_CS;
22858 case GTU: return ARM_HI;
22859 case LEU: return ARM_LS;
22860 case LTU: return ARM_CC;
22861 default: return ARM_NV;
22864 case CC_NCVmode:
22865 switch (comp_code)
22867 case GE: return ARM_GE;
22868 case LT: return ARM_LT;
22869 case GEU: return ARM_CS;
22870 case LTU: return ARM_CC;
22871 default: return ARM_NV;
22874 case CCmode:
22875 switch (comp_code)
22877 case NE: return ARM_NE;
22878 case EQ: return ARM_EQ;
22879 case GE: return ARM_GE;
22880 case GT: return ARM_GT;
22881 case LE: return ARM_LE;
22882 case LT: return ARM_LT;
22883 case GEU: return ARM_CS;
22884 case GTU: return ARM_HI;
22885 case LEU: return ARM_LS;
22886 case LTU: return ARM_CC;
22887 default: return ARM_NV;
22890 default: gcc_unreachable ();
22894 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22895 static enum arm_cond_code
22896 get_arm_condition_code (rtx comparison)
22898 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22899 gcc_assert (code != ARM_NV);
22900 return code;
22903 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22904 instructions. */
22905 void
22906 thumb2_final_prescan_insn (rtx_insn *insn)
22908 rtx_insn *first_insn = insn;
22909 rtx body = PATTERN (insn);
22910 rtx predicate;
22911 enum arm_cond_code code;
22912 int n;
22913 int mask;
22914 int max;
22916 /* max_insns_skipped in the tune was already taken into account in the
22917 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22918 just emit the IT blocks as we can. It does not make sense to split
22919 the IT blocks. */
22920 max = MAX_INSN_PER_IT_BLOCK;
22922 /* Remove the previous insn from the count of insns to be output. */
22923 if (arm_condexec_count)
22924 arm_condexec_count--;
22926 /* Nothing to do if we are already inside a conditional block. */
22927 if (arm_condexec_count)
22928 return;
22930 if (GET_CODE (body) != COND_EXEC)
22931 return;
22933 /* Conditional jumps are implemented directly. */
22934 if (JUMP_P (insn))
22935 return;
22937 predicate = COND_EXEC_TEST (body);
22938 arm_current_cc = get_arm_condition_code (predicate);
22940 n = get_attr_ce_count (insn);
22941 arm_condexec_count = 1;
22942 arm_condexec_mask = (1 << n) - 1;
22943 arm_condexec_masklen = n;
22944 /* See if subsequent instructions can be combined into the same block. */
22945 for (;;)
22947 insn = next_nonnote_insn (insn);
22949 /* Jumping into the middle of an IT block is illegal, so a label or
22950 barrier terminates the block. */
22951 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22952 break;
22954 body = PATTERN (insn);
22955 /* USE and CLOBBER aren't really insns, so just skip them. */
22956 if (GET_CODE (body) == USE
22957 || GET_CODE (body) == CLOBBER)
22958 continue;
22960 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22961 if (GET_CODE (body) != COND_EXEC)
22962 break;
22963 /* Maximum number of conditionally executed instructions in a block. */
22964 n = get_attr_ce_count (insn);
22965 if (arm_condexec_masklen + n > max)
22966 break;
22968 predicate = COND_EXEC_TEST (body);
22969 code = get_arm_condition_code (predicate);
22970 mask = (1 << n) - 1;
22971 if (arm_current_cc == code)
22972 arm_condexec_mask |= (mask << arm_condexec_masklen);
22973 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22974 break;
22976 arm_condexec_count++;
22977 arm_condexec_masklen += n;
22979 /* A jump must be the last instruction in a conditional block. */
22980 if (JUMP_P (insn))
22981 break;
22983 /* Restore recog_data (getting the attributes of other insns can
22984 destroy this array, but final.c assumes that it remains intact
22985 across this call). */
22986 extract_constrain_insn_cached (first_insn);
22989 void
22990 arm_final_prescan_insn (rtx_insn *insn)
22992 /* BODY will hold the body of INSN. */
22993 rtx body = PATTERN (insn);
22995 /* This will be 1 if trying to repeat the trick, and things need to be
22996 reversed if it appears to fail. */
22997 int reverse = 0;
22999 /* If we start with a return insn, we only succeed if we find another one. */
23000 int seeking_return = 0;
23001 enum rtx_code return_code = UNKNOWN;
23003 /* START_INSN will hold the insn from where we start looking. This is the
23004 first insn after the following code_label if REVERSE is true. */
23005 rtx_insn *start_insn = insn;
23007 /* If in state 4, check if the target branch is reached, in order to
23008 change back to state 0. */
23009 if (arm_ccfsm_state == 4)
23011 if (insn == arm_target_insn)
23013 arm_target_insn = NULL;
23014 arm_ccfsm_state = 0;
23016 return;
23019 /* If in state 3, it is possible to repeat the trick, if this insn is an
23020 unconditional branch to a label, and immediately following this branch
23021 is the previous target label which is only used once, and the label this
23022 branch jumps to is not too far off. */
23023 if (arm_ccfsm_state == 3)
23025 if (simplejump_p (insn))
23027 start_insn = next_nonnote_insn (start_insn);
23028 if (BARRIER_P (start_insn))
23030 /* XXX Isn't this always a barrier? */
23031 start_insn = next_nonnote_insn (start_insn);
23033 if (LABEL_P (start_insn)
23034 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23035 && LABEL_NUSES (start_insn) == 1)
23036 reverse = TRUE;
23037 else
23038 return;
23040 else if (ANY_RETURN_P (body))
23042 start_insn = next_nonnote_insn (start_insn);
23043 if (BARRIER_P (start_insn))
23044 start_insn = next_nonnote_insn (start_insn);
23045 if (LABEL_P (start_insn)
23046 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23047 && LABEL_NUSES (start_insn) == 1)
23049 reverse = TRUE;
23050 seeking_return = 1;
23051 return_code = GET_CODE (body);
23053 else
23054 return;
23056 else
23057 return;
23060 gcc_assert (!arm_ccfsm_state || reverse);
23061 if (!JUMP_P (insn))
23062 return;
23064 /* This jump might be paralleled with a clobber of the condition codes
23065 the jump should always come first */
23066 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23067 body = XVECEXP (body, 0, 0);
23069 if (reverse
23070 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23071 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23073 int insns_skipped;
23074 int fail = FALSE, succeed = FALSE;
23075 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23076 int then_not_else = TRUE;
23077 rtx_insn *this_insn = start_insn;
23078 rtx label = 0;
23080 /* Register the insn jumped to. */
23081 if (reverse)
23083 if (!seeking_return)
23084 label = XEXP (SET_SRC (body), 0);
23086 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23087 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23088 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23090 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23091 then_not_else = FALSE;
23093 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23095 seeking_return = 1;
23096 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23098 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23100 seeking_return = 1;
23101 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23102 then_not_else = FALSE;
23104 else
23105 gcc_unreachable ();
23107 /* See how many insns this branch skips, and what kind of insns. If all
23108 insns are okay, and the label or unconditional branch to the same
23109 label is not too far away, succeed. */
23110 for (insns_skipped = 0;
23111 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23113 rtx scanbody;
23115 this_insn = next_nonnote_insn (this_insn);
23116 if (!this_insn)
23117 break;
23119 switch (GET_CODE (this_insn))
23121 case CODE_LABEL:
23122 /* Succeed if it is the target label, otherwise fail since
23123 control falls in from somewhere else. */
23124 if (this_insn == label)
23126 arm_ccfsm_state = 1;
23127 succeed = TRUE;
23129 else
23130 fail = TRUE;
23131 break;
23133 case BARRIER:
23134 /* Succeed if the following insn is the target label.
23135 Otherwise fail.
23136 If return insns are used then the last insn in a function
23137 will be a barrier. */
23138 this_insn = next_nonnote_insn (this_insn);
23139 if (this_insn && this_insn == label)
23141 arm_ccfsm_state = 1;
23142 succeed = TRUE;
23144 else
23145 fail = TRUE;
23146 break;
23148 case CALL_INSN:
23149 /* The AAPCS says that conditional calls should not be
23150 used since they make interworking inefficient (the
23151 linker can't transform BL<cond> into BLX). That's
23152 only a problem if the machine has BLX. */
23153 if (arm_arch5)
23155 fail = TRUE;
23156 break;
23159 /* Succeed if the following insn is the target label, or
23160 if the following two insns are a barrier and the
23161 target label. */
23162 this_insn = next_nonnote_insn (this_insn);
23163 if (this_insn && BARRIER_P (this_insn))
23164 this_insn = next_nonnote_insn (this_insn);
23166 if (this_insn && this_insn == label
23167 && insns_skipped < max_insns_skipped)
23169 arm_ccfsm_state = 1;
23170 succeed = TRUE;
23172 else
23173 fail = TRUE;
23174 break;
23176 case JUMP_INSN:
23177 /* If this is an unconditional branch to the same label, succeed.
23178 If it is to another label, do nothing. If it is conditional,
23179 fail. */
23180 /* XXX Probably, the tests for SET and the PC are
23181 unnecessary. */
23183 scanbody = PATTERN (this_insn);
23184 if (GET_CODE (scanbody) == SET
23185 && GET_CODE (SET_DEST (scanbody)) == PC)
23187 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23188 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23190 arm_ccfsm_state = 2;
23191 succeed = TRUE;
23193 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23194 fail = TRUE;
23196 /* Fail if a conditional return is undesirable (e.g. on a
23197 StrongARM), but still allow this if optimizing for size. */
23198 else if (GET_CODE (scanbody) == return_code
23199 && !use_return_insn (TRUE, NULL)
23200 && !optimize_size)
23201 fail = TRUE;
23202 else if (GET_CODE (scanbody) == return_code)
23204 arm_ccfsm_state = 2;
23205 succeed = TRUE;
23207 else if (GET_CODE (scanbody) == PARALLEL)
23209 switch (get_attr_conds (this_insn))
23211 case CONDS_NOCOND:
23212 break;
23213 default:
23214 fail = TRUE;
23215 break;
23218 else
23219 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23221 break;
23223 case INSN:
23224 /* Instructions using or affecting the condition codes make it
23225 fail. */
23226 scanbody = PATTERN (this_insn);
23227 if (!(GET_CODE (scanbody) == SET
23228 || GET_CODE (scanbody) == PARALLEL)
23229 || get_attr_conds (this_insn) != CONDS_NOCOND)
23230 fail = TRUE;
23231 break;
23233 default:
23234 break;
23237 if (succeed)
23239 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23240 arm_target_label = CODE_LABEL_NUMBER (label);
23241 else
23243 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23245 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23247 this_insn = next_nonnote_insn (this_insn);
23248 gcc_assert (!this_insn
23249 || (!BARRIER_P (this_insn)
23250 && !LABEL_P (this_insn)));
23252 if (!this_insn)
23254 /* Oh, dear! we ran off the end.. give up. */
23255 extract_constrain_insn_cached (insn);
23256 arm_ccfsm_state = 0;
23257 arm_target_insn = NULL;
23258 return;
23260 arm_target_insn = this_insn;
23263 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23264 what it was. */
23265 if (!reverse)
23266 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23268 if (reverse || then_not_else)
23269 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23272 /* Restore recog_data (getting the attributes of other insns can
23273 destroy this array, but final.c assumes that it remains intact
23274 across this call. */
23275 extract_constrain_insn_cached (insn);
23279 /* Output IT instructions. */
23280 void
23281 thumb2_asm_output_opcode (FILE * stream)
23283 char buff[5];
23284 int n;
23286 if (arm_condexec_mask)
23288 for (n = 0; n < arm_condexec_masklen; n++)
23289 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23290 buff[n] = 0;
23291 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23292 arm_condition_codes[arm_current_cc]);
23293 arm_condexec_mask = 0;
23297 /* Returns true if REGNO is a valid register
23298 for holding a quantity of type MODE. */
23300 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23302 if (GET_MODE_CLASS (mode) == MODE_CC)
23303 return (regno == CC_REGNUM
23304 || (TARGET_HARD_FLOAT && TARGET_VFP
23305 && regno == VFPCC_REGNUM));
23307 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23308 return false;
23310 if (TARGET_THUMB1)
23311 /* For the Thumb we only allow values bigger than SImode in
23312 registers 0 - 6, so that there is always a second low
23313 register available to hold the upper part of the value.
23314 We probably we ought to ensure that the register is the
23315 start of an even numbered register pair. */
23316 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23318 if (TARGET_HARD_FLOAT && TARGET_VFP
23319 && IS_VFP_REGNUM (regno))
23321 if (mode == SFmode || mode == SImode)
23322 return VFP_REGNO_OK_FOR_SINGLE (regno);
23324 if (mode == DFmode)
23325 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23327 /* VFP registers can hold HFmode values, but there is no point in
23328 putting them there unless we have hardware conversion insns. */
23329 if (mode == HFmode)
23330 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23332 if (TARGET_NEON)
23333 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23334 || (VALID_NEON_QREG_MODE (mode)
23335 && NEON_REGNO_OK_FOR_QUAD (regno))
23336 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23337 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23338 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23339 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23340 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23342 return FALSE;
23345 if (TARGET_REALLY_IWMMXT)
23347 if (IS_IWMMXT_GR_REGNUM (regno))
23348 return mode == SImode;
23350 if (IS_IWMMXT_REGNUM (regno))
23351 return VALID_IWMMXT_REG_MODE (mode);
23354 /* We allow almost any value to be stored in the general registers.
23355 Restrict doubleword quantities to even register pairs in ARM state
23356 so that we can use ldrd. Do not allow very large Neon structure
23357 opaque modes in general registers; they would use too many. */
23358 if (regno <= LAST_ARM_REGNUM)
23360 if (ARM_NUM_REGS (mode) > 4)
23361 return FALSE;
23363 if (TARGET_THUMB2)
23364 return TRUE;
23366 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23369 if (regno == FRAME_POINTER_REGNUM
23370 || regno == ARG_POINTER_REGNUM)
23371 /* We only allow integers in the fake hard registers. */
23372 return GET_MODE_CLASS (mode) == MODE_INT;
23374 return FALSE;
23377 /* Implement MODES_TIEABLE_P. */
23379 bool
23380 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23382 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23383 return true;
23385 /* We specifically want to allow elements of "structure" modes to
23386 be tieable to the structure. This more general condition allows
23387 other rarer situations too. */
23388 if (TARGET_NEON
23389 && (VALID_NEON_DREG_MODE (mode1)
23390 || VALID_NEON_QREG_MODE (mode1)
23391 || VALID_NEON_STRUCT_MODE (mode1))
23392 && (VALID_NEON_DREG_MODE (mode2)
23393 || VALID_NEON_QREG_MODE (mode2)
23394 || VALID_NEON_STRUCT_MODE (mode2)))
23395 return true;
23397 return false;
23400 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23401 not used in arm mode. */
23403 enum reg_class
23404 arm_regno_class (int regno)
23406 if (regno == PC_REGNUM)
23407 return NO_REGS;
23409 if (TARGET_THUMB1)
23411 if (regno == STACK_POINTER_REGNUM)
23412 return STACK_REG;
23413 if (regno == CC_REGNUM)
23414 return CC_REG;
23415 if (regno < 8)
23416 return LO_REGS;
23417 return HI_REGS;
23420 if (TARGET_THUMB2 && regno < 8)
23421 return LO_REGS;
23423 if ( regno <= LAST_ARM_REGNUM
23424 || regno == FRAME_POINTER_REGNUM
23425 || regno == ARG_POINTER_REGNUM)
23426 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23428 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23429 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23431 if (IS_VFP_REGNUM (regno))
23433 if (regno <= D7_VFP_REGNUM)
23434 return VFP_D0_D7_REGS;
23435 else if (regno <= LAST_LO_VFP_REGNUM)
23436 return VFP_LO_REGS;
23437 else
23438 return VFP_HI_REGS;
23441 if (IS_IWMMXT_REGNUM (regno))
23442 return IWMMXT_REGS;
23444 if (IS_IWMMXT_GR_REGNUM (regno))
23445 return IWMMXT_GR_REGS;
23447 return NO_REGS;
23450 /* Handle a special case when computing the offset
23451 of an argument from the frame pointer. */
23453 arm_debugger_arg_offset (int value, rtx addr)
23455 rtx_insn *insn;
23457 /* We are only interested if dbxout_parms() failed to compute the offset. */
23458 if (value != 0)
23459 return 0;
23461 /* We can only cope with the case where the address is held in a register. */
23462 if (!REG_P (addr))
23463 return 0;
23465 /* If we are using the frame pointer to point at the argument, then
23466 an offset of 0 is correct. */
23467 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23468 return 0;
23470 /* If we are using the stack pointer to point at the
23471 argument, then an offset of 0 is correct. */
23472 /* ??? Check this is consistent with thumb2 frame layout. */
23473 if ((TARGET_THUMB || !frame_pointer_needed)
23474 && REGNO (addr) == SP_REGNUM)
23475 return 0;
23477 /* Oh dear. The argument is pointed to by a register rather
23478 than being held in a register, or being stored at a known
23479 offset from the frame pointer. Since GDB only understands
23480 those two kinds of argument we must translate the address
23481 held in the register into an offset from the frame pointer.
23482 We do this by searching through the insns for the function
23483 looking to see where this register gets its value. If the
23484 register is initialized from the frame pointer plus an offset
23485 then we are in luck and we can continue, otherwise we give up.
23487 This code is exercised by producing debugging information
23488 for a function with arguments like this:
23490 double func (double a, double b, int c, double d) {return d;}
23492 Without this code the stab for parameter 'd' will be set to
23493 an offset of 0 from the frame pointer, rather than 8. */
23495 /* The if() statement says:
23497 If the insn is a normal instruction
23498 and if the insn is setting the value in a register
23499 and if the register being set is the register holding the address of the argument
23500 and if the address is computing by an addition
23501 that involves adding to a register
23502 which is the frame pointer
23503 a constant integer
23505 then... */
23507 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23509 if ( NONJUMP_INSN_P (insn)
23510 && GET_CODE (PATTERN (insn)) == SET
23511 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23512 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23513 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23514 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23515 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23518 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23520 break;
23524 if (value == 0)
23526 debug_rtx (addr);
23527 warning (0, "unable to compute real location of stacked parameter");
23528 value = 8; /* XXX magic hack */
23531 return value;
23534 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23536 static const char *
23537 arm_invalid_parameter_type (const_tree t)
23539 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23540 return N_("function parameters cannot have __fp16 type");
23541 return NULL;
23544 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23546 static const char *
23547 arm_invalid_return_type (const_tree t)
23549 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23550 return N_("functions cannot return __fp16 type");
23551 return NULL;
23554 /* Implement TARGET_PROMOTED_TYPE. */
23556 static tree
23557 arm_promoted_type (const_tree t)
23559 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23560 return float_type_node;
23561 return NULL_TREE;
23564 /* Implement TARGET_CONVERT_TO_TYPE.
23565 Specifically, this hook implements the peculiarity of the ARM
23566 half-precision floating-point C semantics that requires conversions between
23567 __fp16 to or from double to do an intermediate conversion to float. */
23569 static tree
23570 arm_convert_to_type (tree type, tree expr)
23572 tree fromtype = TREE_TYPE (expr);
23573 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23574 return NULL_TREE;
23575 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23576 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23577 return convert (type, convert (float_type_node, expr));
23578 return NULL_TREE;
23581 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23582 This simply adds HFmode as a supported mode; even though we don't
23583 implement arithmetic on this type directly, it's supported by
23584 optabs conversions, much the way the double-word arithmetic is
23585 special-cased in the default hook. */
23587 static bool
23588 arm_scalar_mode_supported_p (machine_mode mode)
23590 if (mode == HFmode)
23591 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23592 else if (ALL_FIXED_POINT_MODE_P (mode))
23593 return true;
23594 else
23595 return default_scalar_mode_supported_p (mode);
23598 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23599 void
23600 neon_reinterpret (rtx dest, rtx src)
23602 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23605 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23606 not to early-clobber SRC registers in the process.
23608 We assume that the operands described by SRC and DEST represent a
23609 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23610 number of components into which the copy has been decomposed. */
23611 void
23612 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23614 unsigned int i;
23616 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23617 || REGNO (operands[0]) < REGNO (operands[1]))
23619 for (i = 0; i < count; i++)
23621 operands[2 * i] = dest[i];
23622 operands[2 * i + 1] = src[i];
23625 else
23627 for (i = 0; i < count; i++)
23629 operands[2 * i] = dest[count - i - 1];
23630 operands[2 * i + 1] = src[count - i - 1];
23635 /* Split operands into moves from op[1] + op[2] into op[0]. */
23637 void
23638 neon_split_vcombine (rtx operands[3])
23640 unsigned int dest = REGNO (operands[0]);
23641 unsigned int src1 = REGNO (operands[1]);
23642 unsigned int src2 = REGNO (operands[2]);
23643 machine_mode halfmode = GET_MODE (operands[1]);
23644 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23645 rtx destlo, desthi;
23647 if (src1 == dest && src2 == dest + halfregs)
23649 /* No-op move. Can't split to nothing; emit something. */
23650 emit_note (NOTE_INSN_DELETED);
23651 return;
23654 /* Preserve register attributes for variable tracking. */
23655 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23656 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23657 GET_MODE_SIZE (halfmode));
23659 /* Special case of reversed high/low parts. Use VSWP. */
23660 if (src2 == dest && src1 == dest + halfregs)
23662 rtx x = gen_rtx_SET (destlo, operands[1]);
23663 rtx y = gen_rtx_SET (desthi, operands[2]);
23664 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23665 return;
23668 if (!reg_overlap_mentioned_p (operands[2], destlo))
23670 /* Try to avoid unnecessary moves if part of the result
23671 is in the right place already. */
23672 if (src1 != dest)
23673 emit_move_insn (destlo, operands[1]);
23674 if (src2 != dest + halfregs)
23675 emit_move_insn (desthi, operands[2]);
23677 else
23679 if (src2 != dest + halfregs)
23680 emit_move_insn (desthi, operands[2]);
23681 if (src1 != dest)
23682 emit_move_insn (destlo, operands[1]);
23686 /* Return the number (counting from 0) of
23687 the least significant set bit in MASK. */
23689 inline static int
23690 number_of_first_bit_set (unsigned mask)
23692 return ctz_hwi (mask);
23695 /* Like emit_multi_reg_push, but allowing for a different set of
23696 registers to be described as saved. MASK is the set of registers
23697 to be saved; REAL_REGS is the set of registers to be described as
23698 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23700 static rtx_insn *
23701 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23703 unsigned long regno;
23704 rtx par[10], tmp, reg;
23705 rtx_insn *insn;
23706 int i, j;
23708 /* Build the parallel of the registers actually being stored. */
23709 for (i = 0; mask; ++i, mask &= mask - 1)
23711 regno = ctz_hwi (mask);
23712 reg = gen_rtx_REG (SImode, regno);
23714 if (i == 0)
23715 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23716 else
23717 tmp = gen_rtx_USE (VOIDmode, reg);
23719 par[i] = tmp;
23722 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23723 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23724 tmp = gen_frame_mem (BLKmode, tmp);
23725 tmp = gen_rtx_SET (tmp, par[0]);
23726 par[0] = tmp;
23728 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23729 insn = emit_insn (tmp);
23731 /* Always build the stack adjustment note for unwind info. */
23732 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23733 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23734 par[0] = tmp;
23736 /* Build the parallel of the registers recorded as saved for unwind. */
23737 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23739 regno = ctz_hwi (real_regs);
23740 reg = gen_rtx_REG (SImode, regno);
23742 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23743 tmp = gen_frame_mem (SImode, tmp);
23744 tmp = gen_rtx_SET (tmp, reg);
23745 RTX_FRAME_RELATED_P (tmp) = 1;
23746 par[j + 1] = tmp;
23749 if (j == 0)
23750 tmp = par[0];
23751 else
23753 RTX_FRAME_RELATED_P (par[0]) = 1;
23754 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23757 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23759 return insn;
23762 /* Emit code to push or pop registers to or from the stack. F is the
23763 assembly file. MASK is the registers to pop. */
23764 static void
23765 thumb_pop (FILE *f, unsigned long mask)
23767 int regno;
23768 int lo_mask = mask & 0xFF;
23769 int pushed_words = 0;
23771 gcc_assert (mask);
23773 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23775 /* Special case. Do not generate a POP PC statement here, do it in
23776 thumb_exit() */
23777 thumb_exit (f, -1);
23778 return;
23781 fprintf (f, "\tpop\t{");
23783 /* Look at the low registers first. */
23784 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23786 if (lo_mask & 1)
23788 asm_fprintf (f, "%r", regno);
23790 if ((lo_mask & ~1) != 0)
23791 fprintf (f, ", ");
23793 pushed_words++;
23797 if (mask & (1 << PC_REGNUM))
23799 /* Catch popping the PC. */
23800 if (TARGET_INTERWORK || TARGET_BACKTRACE
23801 || crtl->calls_eh_return)
23803 /* The PC is never poped directly, instead
23804 it is popped into r3 and then BX is used. */
23805 fprintf (f, "}\n");
23807 thumb_exit (f, -1);
23809 return;
23811 else
23813 if (mask & 0xFF)
23814 fprintf (f, ", ");
23816 asm_fprintf (f, "%r", PC_REGNUM);
23820 fprintf (f, "}\n");
23823 /* Generate code to return from a thumb function.
23824 If 'reg_containing_return_addr' is -1, then the return address is
23825 actually on the stack, at the stack pointer. */
23826 static void
23827 thumb_exit (FILE *f, int reg_containing_return_addr)
23829 unsigned regs_available_for_popping;
23830 unsigned regs_to_pop;
23831 int pops_needed;
23832 unsigned available;
23833 unsigned required;
23834 machine_mode mode;
23835 int size;
23836 int restore_a4 = FALSE;
23838 /* Compute the registers we need to pop. */
23839 regs_to_pop = 0;
23840 pops_needed = 0;
23842 if (reg_containing_return_addr == -1)
23844 regs_to_pop |= 1 << LR_REGNUM;
23845 ++pops_needed;
23848 if (TARGET_BACKTRACE)
23850 /* Restore the (ARM) frame pointer and stack pointer. */
23851 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23852 pops_needed += 2;
23855 /* If there is nothing to pop then just emit the BX instruction and
23856 return. */
23857 if (pops_needed == 0)
23859 if (crtl->calls_eh_return)
23860 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23862 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23863 return;
23865 /* Otherwise if we are not supporting interworking and we have not created
23866 a backtrace structure and the function was not entered in ARM mode then
23867 just pop the return address straight into the PC. */
23868 else if (!TARGET_INTERWORK
23869 && !TARGET_BACKTRACE
23870 && !is_called_in_ARM_mode (current_function_decl)
23871 && !crtl->calls_eh_return)
23873 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23874 return;
23877 /* Find out how many of the (return) argument registers we can corrupt. */
23878 regs_available_for_popping = 0;
23880 /* If returning via __builtin_eh_return, the bottom three registers
23881 all contain information needed for the return. */
23882 if (crtl->calls_eh_return)
23883 size = 12;
23884 else
23886 /* If we can deduce the registers used from the function's
23887 return value. This is more reliable that examining
23888 df_regs_ever_live_p () because that will be set if the register is
23889 ever used in the function, not just if the register is used
23890 to hold a return value. */
23892 if (crtl->return_rtx != 0)
23893 mode = GET_MODE (crtl->return_rtx);
23894 else
23895 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23897 size = GET_MODE_SIZE (mode);
23899 if (size == 0)
23901 /* In a void function we can use any argument register.
23902 In a function that returns a structure on the stack
23903 we can use the second and third argument registers. */
23904 if (mode == VOIDmode)
23905 regs_available_for_popping =
23906 (1 << ARG_REGISTER (1))
23907 | (1 << ARG_REGISTER (2))
23908 | (1 << ARG_REGISTER (3));
23909 else
23910 regs_available_for_popping =
23911 (1 << ARG_REGISTER (2))
23912 | (1 << ARG_REGISTER (3));
23914 else if (size <= 4)
23915 regs_available_for_popping =
23916 (1 << ARG_REGISTER (2))
23917 | (1 << ARG_REGISTER (3));
23918 else if (size <= 8)
23919 regs_available_for_popping =
23920 (1 << ARG_REGISTER (3));
23923 /* Match registers to be popped with registers into which we pop them. */
23924 for (available = regs_available_for_popping,
23925 required = regs_to_pop;
23926 required != 0 && available != 0;
23927 available &= ~(available & - available),
23928 required &= ~(required & - required))
23929 -- pops_needed;
23931 /* If we have any popping registers left over, remove them. */
23932 if (available > 0)
23933 regs_available_for_popping &= ~available;
23935 /* Otherwise if we need another popping register we can use
23936 the fourth argument register. */
23937 else if (pops_needed)
23939 /* If we have not found any free argument registers and
23940 reg a4 contains the return address, we must move it. */
23941 if (regs_available_for_popping == 0
23942 && reg_containing_return_addr == LAST_ARG_REGNUM)
23944 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23945 reg_containing_return_addr = LR_REGNUM;
23947 else if (size > 12)
23949 /* Register a4 is being used to hold part of the return value,
23950 but we have dire need of a free, low register. */
23951 restore_a4 = TRUE;
23953 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23956 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23958 /* The fourth argument register is available. */
23959 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23961 --pops_needed;
23965 /* Pop as many registers as we can. */
23966 thumb_pop (f, regs_available_for_popping);
23968 /* Process the registers we popped. */
23969 if (reg_containing_return_addr == -1)
23971 /* The return address was popped into the lowest numbered register. */
23972 regs_to_pop &= ~(1 << LR_REGNUM);
23974 reg_containing_return_addr =
23975 number_of_first_bit_set (regs_available_for_popping);
23977 /* Remove this register for the mask of available registers, so that
23978 the return address will not be corrupted by further pops. */
23979 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23982 /* If we popped other registers then handle them here. */
23983 if (regs_available_for_popping)
23985 int frame_pointer;
23987 /* Work out which register currently contains the frame pointer. */
23988 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23990 /* Move it into the correct place. */
23991 asm_fprintf (f, "\tmov\t%r, %r\n",
23992 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23994 /* (Temporarily) remove it from the mask of popped registers. */
23995 regs_available_for_popping &= ~(1 << frame_pointer);
23996 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23998 if (regs_available_for_popping)
24000 int stack_pointer;
24002 /* We popped the stack pointer as well,
24003 find the register that contains it. */
24004 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24006 /* Move it into the stack register. */
24007 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24009 /* At this point we have popped all necessary registers, so
24010 do not worry about restoring regs_available_for_popping
24011 to its correct value:
24013 assert (pops_needed == 0)
24014 assert (regs_available_for_popping == (1 << frame_pointer))
24015 assert (regs_to_pop == (1 << STACK_POINTER)) */
24017 else
24019 /* Since we have just move the popped value into the frame
24020 pointer, the popping register is available for reuse, and
24021 we know that we still have the stack pointer left to pop. */
24022 regs_available_for_popping |= (1 << frame_pointer);
24026 /* If we still have registers left on the stack, but we no longer have
24027 any registers into which we can pop them, then we must move the return
24028 address into the link register and make available the register that
24029 contained it. */
24030 if (regs_available_for_popping == 0 && pops_needed > 0)
24032 regs_available_for_popping |= 1 << reg_containing_return_addr;
24034 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24035 reg_containing_return_addr);
24037 reg_containing_return_addr = LR_REGNUM;
24040 /* If we have registers left on the stack then pop some more.
24041 We know that at most we will want to pop FP and SP. */
24042 if (pops_needed > 0)
24044 int popped_into;
24045 int move_to;
24047 thumb_pop (f, regs_available_for_popping);
24049 /* We have popped either FP or SP.
24050 Move whichever one it is into the correct register. */
24051 popped_into = number_of_first_bit_set (regs_available_for_popping);
24052 move_to = number_of_first_bit_set (regs_to_pop);
24054 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24056 regs_to_pop &= ~(1 << move_to);
24058 --pops_needed;
24061 /* If we still have not popped everything then we must have only
24062 had one register available to us and we are now popping the SP. */
24063 if (pops_needed > 0)
24065 int popped_into;
24067 thumb_pop (f, regs_available_for_popping);
24069 popped_into = number_of_first_bit_set (regs_available_for_popping);
24071 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24073 assert (regs_to_pop == (1 << STACK_POINTER))
24074 assert (pops_needed == 1)
24078 /* If necessary restore the a4 register. */
24079 if (restore_a4)
24081 if (reg_containing_return_addr != LR_REGNUM)
24083 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24084 reg_containing_return_addr = LR_REGNUM;
24087 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24090 if (crtl->calls_eh_return)
24091 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24093 /* Return to caller. */
24094 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24097 /* Scan INSN just before assembler is output for it.
24098 For Thumb-1, we track the status of the condition codes; this
24099 information is used in the cbranchsi4_insn pattern. */
24100 void
24101 thumb1_final_prescan_insn (rtx_insn *insn)
24103 if (flag_print_asm_name)
24104 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24105 INSN_ADDRESSES (INSN_UID (insn)));
24106 /* Don't overwrite the previous setter when we get to a cbranch. */
24107 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24109 enum attr_conds conds;
24111 if (cfun->machine->thumb1_cc_insn)
24113 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24114 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24115 CC_STATUS_INIT;
24117 conds = get_attr_conds (insn);
24118 if (conds == CONDS_SET)
24120 rtx set = single_set (insn);
24121 cfun->machine->thumb1_cc_insn = insn;
24122 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24123 cfun->machine->thumb1_cc_op1 = const0_rtx;
24124 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24125 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24127 rtx src1 = XEXP (SET_SRC (set), 1);
24128 if (src1 == const0_rtx)
24129 cfun->machine->thumb1_cc_mode = CCmode;
24131 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24133 /* Record the src register operand instead of dest because
24134 cprop_hardreg pass propagates src. */
24135 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24138 else if (conds != CONDS_NOCOND)
24139 cfun->machine->thumb1_cc_insn = NULL_RTX;
24142 /* Check if unexpected far jump is used. */
24143 if (cfun->machine->lr_save_eliminated
24144 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24145 internal_error("Unexpected thumb1 far jump");
24149 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24151 unsigned HOST_WIDE_INT mask = 0xff;
24152 int i;
24154 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24155 if (val == 0) /* XXX */
24156 return 0;
24158 for (i = 0; i < 25; i++)
24159 if ((val & (mask << i)) == val)
24160 return 1;
24162 return 0;
24165 /* Returns nonzero if the current function contains,
24166 or might contain a far jump. */
24167 static int
24168 thumb_far_jump_used_p (void)
24170 rtx_insn *insn;
24171 bool far_jump = false;
24172 unsigned int func_size = 0;
24174 /* This test is only important for leaf functions. */
24175 /* assert (!leaf_function_p ()); */
24177 /* If we have already decided that far jumps may be used,
24178 do not bother checking again, and always return true even if
24179 it turns out that they are not being used. Once we have made
24180 the decision that far jumps are present (and that hence the link
24181 register will be pushed onto the stack) we cannot go back on it. */
24182 if (cfun->machine->far_jump_used)
24183 return 1;
24185 /* If this function is not being called from the prologue/epilogue
24186 generation code then it must be being called from the
24187 INITIAL_ELIMINATION_OFFSET macro. */
24188 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24190 /* In this case we know that we are being asked about the elimination
24191 of the arg pointer register. If that register is not being used,
24192 then there are no arguments on the stack, and we do not have to
24193 worry that a far jump might force the prologue to push the link
24194 register, changing the stack offsets. In this case we can just
24195 return false, since the presence of far jumps in the function will
24196 not affect stack offsets.
24198 If the arg pointer is live (or if it was live, but has now been
24199 eliminated and so set to dead) then we do have to test to see if
24200 the function might contain a far jump. This test can lead to some
24201 false negatives, since before reload is completed, then length of
24202 branch instructions is not known, so gcc defaults to returning their
24203 longest length, which in turn sets the far jump attribute to true.
24205 A false negative will not result in bad code being generated, but it
24206 will result in a needless push and pop of the link register. We
24207 hope that this does not occur too often.
24209 If we need doubleword stack alignment this could affect the other
24210 elimination offsets so we can't risk getting it wrong. */
24211 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24212 cfun->machine->arg_pointer_live = 1;
24213 else if (!cfun->machine->arg_pointer_live)
24214 return 0;
24217 /* We should not change far_jump_used during or after reload, as there is
24218 no chance to change stack frame layout. */
24219 if (reload_in_progress || reload_completed)
24220 return 0;
24222 /* Check to see if the function contains a branch
24223 insn with the far jump attribute set. */
24224 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24226 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24228 far_jump = true;
24230 func_size += get_attr_length (insn);
24233 /* Attribute far_jump will always be true for thumb1 before
24234 shorten_branch pass. So checking far_jump attribute before
24235 shorten_branch isn't much useful.
24237 Following heuristic tries to estimate more accurately if a far jump
24238 may finally be used. The heuristic is very conservative as there is
24239 no chance to roll-back the decision of not to use far jump.
24241 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24242 2-byte insn is associated with a 4 byte constant pool. Using
24243 function size 2048/3 as the threshold is conservative enough. */
24244 if (far_jump)
24246 if ((func_size * 3) >= 2048)
24248 /* Record the fact that we have decided that
24249 the function does use far jumps. */
24250 cfun->machine->far_jump_used = 1;
24251 return 1;
24255 return 0;
24258 /* Return nonzero if FUNC must be entered in ARM mode. */
24259 static bool
24260 is_called_in_ARM_mode (tree func)
24262 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24264 /* Ignore the problem about functions whose address is taken. */
24265 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24266 return true;
24268 #ifdef ARM_PE
24269 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24270 #else
24271 return false;
24272 #endif
24275 /* Given the stack offsets and register mask in OFFSETS, decide how
24276 many additional registers to push instead of subtracting a constant
24277 from SP. For epilogues the principle is the same except we use pop.
24278 FOR_PROLOGUE indicates which we're generating. */
24279 static int
24280 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24282 HOST_WIDE_INT amount;
24283 unsigned long live_regs_mask = offsets->saved_regs_mask;
24284 /* Extract a mask of the ones we can give to the Thumb's push/pop
24285 instruction. */
24286 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24287 /* Then count how many other high registers will need to be pushed. */
24288 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24289 int n_free, reg_base, size;
24291 if (!for_prologue && frame_pointer_needed)
24292 amount = offsets->locals_base - offsets->saved_regs;
24293 else
24294 amount = offsets->outgoing_args - offsets->saved_regs;
24296 /* If the stack frame size is 512 exactly, we can save one load
24297 instruction, which should make this a win even when optimizing
24298 for speed. */
24299 if (!optimize_size && amount != 512)
24300 return 0;
24302 /* Can't do this if there are high registers to push. */
24303 if (high_regs_pushed != 0)
24304 return 0;
24306 /* Shouldn't do it in the prologue if no registers would normally
24307 be pushed at all. In the epilogue, also allow it if we'll have
24308 a pop insn for the PC. */
24309 if (l_mask == 0
24310 && (for_prologue
24311 || TARGET_BACKTRACE
24312 || (live_regs_mask & 1 << LR_REGNUM) == 0
24313 || TARGET_INTERWORK
24314 || crtl->args.pretend_args_size != 0))
24315 return 0;
24317 /* Don't do this if thumb_expand_prologue wants to emit instructions
24318 between the push and the stack frame allocation. */
24319 if (for_prologue
24320 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24321 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24322 return 0;
24324 reg_base = 0;
24325 n_free = 0;
24326 if (!for_prologue)
24328 size = arm_size_return_regs ();
24329 reg_base = ARM_NUM_INTS (size);
24330 live_regs_mask >>= reg_base;
24333 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24334 && (for_prologue || call_used_regs[reg_base + n_free]))
24336 live_regs_mask >>= 1;
24337 n_free++;
24340 if (n_free == 0)
24341 return 0;
24342 gcc_assert (amount / 4 * 4 == amount);
24344 if (amount >= 512 && (amount - n_free * 4) < 512)
24345 return (amount - 508) / 4;
24346 if (amount <= n_free * 4)
24347 return amount / 4;
24348 return 0;
24351 /* The bits which aren't usefully expanded as rtl. */
24352 const char *
24353 thumb1_unexpanded_epilogue (void)
24355 arm_stack_offsets *offsets;
24356 int regno;
24357 unsigned long live_regs_mask = 0;
24358 int high_regs_pushed = 0;
24359 int extra_pop;
24360 int had_to_push_lr;
24361 int size;
24363 if (cfun->machine->return_used_this_function != 0)
24364 return "";
24366 if (IS_NAKED (arm_current_func_type ()))
24367 return "";
24369 offsets = arm_get_frame_offsets ();
24370 live_regs_mask = offsets->saved_regs_mask;
24371 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24373 /* If we can deduce the registers used from the function's return value.
24374 This is more reliable that examining df_regs_ever_live_p () because that
24375 will be set if the register is ever used in the function, not just if
24376 the register is used to hold a return value. */
24377 size = arm_size_return_regs ();
24379 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24380 if (extra_pop > 0)
24382 unsigned long extra_mask = (1 << extra_pop) - 1;
24383 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24386 /* The prolog may have pushed some high registers to use as
24387 work registers. e.g. the testsuite file:
24388 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24389 compiles to produce:
24390 push {r4, r5, r6, r7, lr}
24391 mov r7, r9
24392 mov r6, r8
24393 push {r6, r7}
24394 as part of the prolog. We have to undo that pushing here. */
24396 if (high_regs_pushed)
24398 unsigned long mask = live_regs_mask & 0xff;
24399 int next_hi_reg;
24401 /* The available low registers depend on the size of the value we are
24402 returning. */
24403 if (size <= 12)
24404 mask |= 1 << 3;
24405 if (size <= 8)
24406 mask |= 1 << 2;
24408 if (mask == 0)
24409 /* Oh dear! We have no low registers into which we can pop
24410 high registers! */
24411 internal_error
24412 ("no low registers available for popping high registers");
24414 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24415 if (live_regs_mask & (1 << next_hi_reg))
24416 break;
24418 while (high_regs_pushed)
24420 /* Find lo register(s) into which the high register(s) can
24421 be popped. */
24422 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24424 if (mask & (1 << regno))
24425 high_regs_pushed--;
24426 if (high_regs_pushed == 0)
24427 break;
24430 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24432 /* Pop the values into the low register(s). */
24433 thumb_pop (asm_out_file, mask);
24435 /* Move the value(s) into the high registers. */
24436 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24438 if (mask & (1 << regno))
24440 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24441 regno);
24443 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24444 if (live_regs_mask & (1 << next_hi_reg))
24445 break;
24449 live_regs_mask &= ~0x0f00;
24452 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24453 live_regs_mask &= 0xff;
24455 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24457 /* Pop the return address into the PC. */
24458 if (had_to_push_lr)
24459 live_regs_mask |= 1 << PC_REGNUM;
24461 /* Either no argument registers were pushed or a backtrace
24462 structure was created which includes an adjusted stack
24463 pointer, so just pop everything. */
24464 if (live_regs_mask)
24465 thumb_pop (asm_out_file, live_regs_mask);
24467 /* We have either just popped the return address into the
24468 PC or it is was kept in LR for the entire function.
24469 Note that thumb_pop has already called thumb_exit if the
24470 PC was in the list. */
24471 if (!had_to_push_lr)
24472 thumb_exit (asm_out_file, LR_REGNUM);
24474 else
24476 /* Pop everything but the return address. */
24477 if (live_regs_mask)
24478 thumb_pop (asm_out_file, live_regs_mask);
24480 if (had_to_push_lr)
24482 if (size > 12)
24484 /* We have no free low regs, so save one. */
24485 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24486 LAST_ARG_REGNUM);
24489 /* Get the return address into a temporary register. */
24490 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24492 if (size > 12)
24494 /* Move the return address to lr. */
24495 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24496 LAST_ARG_REGNUM);
24497 /* Restore the low register. */
24498 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24499 IP_REGNUM);
24500 regno = LR_REGNUM;
24502 else
24503 regno = LAST_ARG_REGNUM;
24505 else
24506 regno = LR_REGNUM;
24508 /* Remove the argument registers that were pushed onto the stack. */
24509 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24510 SP_REGNUM, SP_REGNUM,
24511 crtl->args.pretend_args_size);
24513 thumb_exit (asm_out_file, regno);
24516 return "";
24519 /* Functions to save and restore machine-specific function data. */
24520 static struct machine_function *
24521 arm_init_machine_status (void)
24523 struct machine_function *machine;
24524 machine = ggc_cleared_alloc<machine_function> ();
24526 #if ARM_FT_UNKNOWN != 0
24527 machine->func_type = ARM_FT_UNKNOWN;
24528 #endif
24529 return machine;
24532 /* Return an RTX indicating where the return address to the
24533 calling function can be found. */
24535 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24537 if (count != 0)
24538 return NULL_RTX;
24540 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24543 /* Do anything needed before RTL is emitted for each function. */
24544 void
24545 arm_init_expanders (void)
24547 /* Arrange to initialize and mark the machine per-function status. */
24548 init_machine_status = arm_init_machine_status;
24550 /* This is to stop the combine pass optimizing away the alignment
24551 adjustment of va_arg. */
24552 /* ??? It is claimed that this should not be necessary. */
24553 if (cfun)
24554 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24557 /* Check that FUNC is called with a different mode. */
24559 bool
24560 arm_change_mode_p (tree func)
24562 if (TREE_CODE (func) != FUNCTION_DECL)
24563 return false;
24565 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24567 if (!callee_tree)
24568 callee_tree = target_option_default_node;
24570 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24571 int flags = callee_opts->x_target_flags;
24573 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24576 /* Like arm_compute_initial_elimination offset. Simpler because there
24577 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24578 to point at the base of the local variables after static stack
24579 space for a function has been allocated. */
24581 HOST_WIDE_INT
24582 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24584 arm_stack_offsets *offsets;
24586 offsets = arm_get_frame_offsets ();
24588 switch (from)
24590 case ARG_POINTER_REGNUM:
24591 switch (to)
24593 case STACK_POINTER_REGNUM:
24594 return offsets->outgoing_args - offsets->saved_args;
24596 case FRAME_POINTER_REGNUM:
24597 return offsets->soft_frame - offsets->saved_args;
24599 case ARM_HARD_FRAME_POINTER_REGNUM:
24600 return offsets->saved_regs - offsets->saved_args;
24602 case THUMB_HARD_FRAME_POINTER_REGNUM:
24603 return offsets->locals_base - offsets->saved_args;
24605 default:
24606 gcc_unreachable ();
24608 break;
24610 case FRAME_POINTER_REGNUM:
24611 switch (to)
24613 case STACK_POINTER_REGNUM:
24614 return offsets->outgoing_args - offsets->soft_frame;
24616 case ARM_HARD_FRAME_POINTER_REGNUM:
24617 return offsets->saved_regs - offsets->soft_frame;
24619 case THUMB_HARD_FRAME_POINTER_REGNUM:
24620 return offsets->locals_base - offsets->soft_frame;
24622 default:
24623 gcc_unreachable ();
24625 break;
24627 default:
24628 gcc_unreachable ();
24632 /* Generate the function's prologue. */
24634 void
24635 thumb1_expand_prologue (void)
24637 rtx_insn *insn;
24639 HOST_WIDE_INT amount;
24640 HOST_WIDE_INT size;
24641 arm_stack_offsets *offsets;
24642 unsigned long func_type;
24643 int regno;
24644 unsigned long live_regs_mask;
24645 unsigned long l_mask;
24646 unsigned high_regs_pushed = 0;
24648 func_type = arm_current_func_type ();
24650 /* Naked functions don't have prologues. */
24651 if (IS_NAKED (func_type))
24652 return;
24654 if (IS_INTERRUPT (func_type))
24656 error ("interrupt Service Routines cannot be coded in Thumb mode");
24657 return;
24660 if (is_called_in_ARM_mode (current_function_decl))
24661 emit_insn (gen_prologue_thumb1_interwork ());
24663 offsets = arm_get_frame_offsets ();
24664 live_regs_mask = offsets->saved_regs_mask;
24666 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24667 l_mask = live_regs_mask & 0x40ff;
24668 /* Then count how many other high registers will need to be pushed. */
24669 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24671 if (crtl->args.pretend_args_size)
24673 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24675 if (cfun->machine->uses_anonymous_args)
24677 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24678 unsigned long mask;
24680 mask = 1ul << (LAST_ARG_REGNUM + 1);
24681 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24683 insn = thumb1_emit_multi_reg_push (mask, 0);
24685 else
24687 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24688 stack_pointer_rtx, x));
24690 RTX_FRAME_RELATED_P (insn) = 1;
24693 if (TARGET_BACKTRACE)
24695 HOST_WIDE_INT offset = 0;
24696 unsigned work_register;
24697 rtx work_reg, x, arm_hfp_rtx;
24699 /* We have been asked to create a stack backtrace structure.
24700 The code looks like this:
24702 0 .align 2
24703 0 func:
24704 0 sub SP, #16 Reserve space for 4 registers.
24705 2 push {R7} Push low registers.
24706 4 add R7, SP, #20 Get the stack pointer before the push.
24707 6 str R7, [SP, #8] Store the stack pointer
24708 (before reserving the space).
24709 8 mov R7, PC Get hold of the start of this code + 12.
24710 10 str R7, [SP, #16] Store it.
24711 12 mov R7, FP Get hold of the current frame pointer.
24712 14 str R7, [SP, #4] Store it.
24713 16 mov R7, LR Get hold of the current return address.
24714 18 str R7, [SP, #12] Store it.
24715 20 add R7, SP, #16 Point at the start of the
24716 backtrace structure.
24717 22 mov FP, R7 Put this value into the frame pointer. */
24719 work_register = thumb_find_work_register (live_regs_mask);
24720 work_reg = gen_rtx_REG (SImode, work_register);
24721 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24723 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24724 stack_pointer_rtx, GEN_INT (-16)));
24725 RTX_FRAME_RELATED_P (insn) = 1;
24727 if (l_mask)
24729 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24730 RTX_FRAME_RELATED_P (insn) = 1;
24732 offset = bit_count (l_mask) * UNITS_PER_WORD;
24735 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24736 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24738 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24739 x = gen_frame_mem (SImode, x);
24740 emit_move_insn (x, work_reg);
24742 /* Make sure that the instruction fetching the PC is in the right place
24743 to calculate "start of backtrace creation code + 12". */
24744 /* ??? The stores using the common WORK_REG ought to be enough to
24745 prevent the scheduler from doing anything weird. Failing that
24746 we could always move all of the following into an UNSPEC_VOLATILE. */
24747 if (l_mask)
24749 x = gen_rtx_REG (SImode, PC_REGNUM);
24750 emit_move_insn (work_reg, x);
24752 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24753 x = gen_frame_mem (SImode, x);
24754 emit_move_insn (x, work_reg);
24756 emit_move_insn (work_reg, arm_hfp_rtx);
24758 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24759 x = gen_frame_mem (SImode, x);
24760 emit_move_insn (x, work_reg);
24762 else
24764 emit_move_insn (work_reg, arm_hfp_rtx);
24766 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24767 x = gen_frame_mem (SImode, x);
24768 emit_move_insn (x, work_reg);
24770 x = gen_rtx_REG (SImode, PC_REGNUM);
24771 emit_move_insn (work_reg, x);
24773 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24774 x = gen_frame_mem (SImode, x);
24775 emit_move_insn (x, work_reg);
24778 x = gen_rtx_REG (SImode, LR_REGNUM);
24779 emit_move_insn (work_reg, x);
24781 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24782 x = gen_frame_mem (SImode, x);
24783 emit_move_insn (x, work_reg);
24785 x = GEN_INT (offset + 12);
24786 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24788 emit_move_insn (arm_hfp_rtx, work_reg);
24790 /* Optimization: If we are not pushing any low registers but we are going
24791 to push some high registers then delay our first push. This will just
24792 be a push of LR and we can combine it with the push of the first high
24793 register. */
24794 else if ((l_mask & 0xff) != 0
24795 || (high_regs_pushed == 0 && l_mask))
24797 unsigned long mask = l_mask;
24798 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24799 insn = thumb1_emit_multi_reg_push (mask, mask);
24800 RTX_FRAME_RELATED_P (insn) = 1;
24803 if (high_regs_pushed)
24805 unsigned pushable_regs;
24806 unsigned next_hi_reg;
24807 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24808 : crtl->args.info.nregs;
24809 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24811 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24812 if (live_regs_mask & (1 << next_hi_reg))
24813 break;
24815 /* Here we need to mask out registers used for passing arguments
24816 even if they can be pushed. This is to avoid using them to stash the high
24817 registers. Such kind of stash may clobber the use of arguments. */
24818 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24820 if (pushable_regs == 0)
24821 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24823 while (high_regs_pushed > 0)
24825 unsigned long real_regs_mask = 0;
24827 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24829 if (pushable_regs & (1 << regno))
24831 emit_move_insn (gen_rtx_REG (SImode, regno),
24832 gen_rtx_REG (SImode, next_hi_reg));
24834 high_regs_pushed --;
24835 real_regs_mask |= (1 << next_hi_reg);
24837 if (high_regs_pushed)
24839 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24840 next_hi_reg --)
24841 if (live_regs_mask & (1 << next_hi_reg))
24842 break;
24844 else
24846 pushable_regs &= ~((1 << regno) - 1);
24847 break;
24852 /* If we had to find a work register and we have not yet
24853 saved the LR then add it to the list of regs to push. */
24854 if (l_mask == (1 << LR_REGNUM))
24856 pushable_regs |= l_mask;
24857 real_regs_mask |= l_mask;
24858 l_mask = 0;
24861 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24862 RTX_FRAME_RELATED_P (insn) = 1;
24866 /* Load the pic register before setting the frame pointer,
24867 so we can use r7 as a temporary work register. */
24868 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24869 arm_load_pic_register (live_regs_mask);
24871 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24872 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24873 stack_pointer_rtx);
24875 size = offsets->outgoing_args - offsets->saved_args;
24876 if (flag_stack_usage_info)
24877 current_function_static_stack_size = size;
24879 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24880 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24881 sorry ("-fstack-check=specific for THUMB1");
24883 amount = offsets->outgoing_args - offsets->saved_regs;
24884 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24885 if (amount)
24887 if (amount < 512)
24889 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24890 GEN_INT (- amount)));
24891 RTX_FRAME_RELATED_P (insn) = 1;
24893 else
24895 rtx reg, dwarf;
24897 /* The stack decrement is too big for an immediate value in a single
24898 insn. In theory we could issue multiple subtracts, but after
24899 three of them it becomes more space efficient to place the full
24900 value in the constant pool and load into a register. (Also the
24901 ARM debugger really likes to see only one stack decrement per
24902 function). So instead we look for a scratch register into which
24903 we can load the decrement, and then we subtract this from the
24904 stack pointer. Unfortunately on the thumb the only available
24905 scratch registers are the argument registers, and we cannot use
24906 these as they may hold arguments to the function. Instead we
24907 attempt to locate a call preserved register which is used by this
24908 function. If we can find one, then we know that it will have
24909 been pushed at the start of the prologue and so we can corrupt
24910 it now. */
24911 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24912 if (live_regs_mask & (1 << regno))
24913 break;
24915 gcc_assert(regno <= LAST_LO_REGNUM);
24917 reg = gen_rtx_REG (SImode, regno);
24919 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24921 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24922 stack_pointer_rtx, reg));
24924 dwarf = gen_rtx_SET (stack_pointer_rtx,
24925 plus_constant (Pmode, stack_pointer_rtx,
24926 -amount));
24927 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24928 RTX_FRAME_RELATED_P (insn) = 1;
24932 if (frame_pointer_needed)
24933 thumb_set_frame_pointer (offsets);
24935 /* If we are profiling, make sure no instructions are scheduled before
24936 the call to mcount. Similarly if the user has requested no
24937 scheduling in the prolog. Similarly if we want non-call exceptions
24938 using the EABI unwinder, to prevent faulting instructions from being
24939 swapped with a stack adjustment. */
24940 if (crtl->profile || !TARGET_SCHED_PROLOG
24941 || (arm_except_unwind_info (&global_options) == UI_TARGET
24942 && cfun->can_throw_non_call_exceptions))
24943 emit_insn (gen_blockage ());
24945 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24946 if (live_regs_mask & 0xff)
24947 cfun->machine->lr_save_eliminated = 0;
24950 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24951 POP instruction can be generated. LR should be replaced by PC. All
24952 the checks required are already done by USE_RETURN_INSN (). Hence,
24953 all we really need to check here is if single register is to be
24954 returned, or multiple register return. */
24955 void
24956 thumb2_expand_return (bool simple_return)
24958 int i, num_regs;
24959 unsigned long saved_regs_mask;
24960 arm_stack_offsets *offsets;
24962 offsets = arm_get_frame_offsets ();
24963 saved_regs_mask = offsets->saved_regs_mask;
24965 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24966 if (saved_regs_mask & (1 << i))
24967 num_regs++;
24969 if (!simple_return && saved_regs_mask)
24971 if (num_regs == 1)
24973 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24974 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24975 rtx addr = gen_rtx_MEM (SImode,
24976 gen_rtx_POST_INC (SImode,
24977 stack_pointer_rtx));
24978 set_mem_alias_set (addr, get_frame_alias_set ());
24979 XVECEXP (par, 0, 0) = ret_rtx;
24980 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
24981 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24982 emit_jump_insn (par);
24984 else
24986 saved_regs_mask &= ~ (1 << LR_REGNUM);
24987 saved_regs_mask |= (1 << PC_REGNUM);
24988 arm_emit_multi_reg_pop (saved_regs_mask);
24991 else
24993 emit_jump_insn (simple_return_rtx);
24997 void
24998 thumb1_expand_epilogue (void)
25000 HOST_WIDE_INT amount;
25001 arm_stack_offsets *offsets;
25002 int regno;
25004 /* Naked functions don't have prologues. */
25005 if (IS_NAKED (arm_current_func_type ()))
25006 return;
25008 offsets = arm_get_frame_offsets ();
25009 amount = offsets->outgoing_args - offsets->saved_regs;
25011 if (frame_pointer_needed)
25013 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25014 amount = offsets->locals_base - offsets->saved_regs;
25016 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25018 gcc_assert (amount >= 0);
25019 if (amount)
25021 emit_insn (gen_blockage ());
25023 if (amount < 512)
25024 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25025 GEN_INT (amount)));
25026 else
25028 /* r3 is always free in the epilogue. */
25029 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25031 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25032 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25036 /* Emit a USE (stack_pointer_rtx), so that
25037 the stack adjustment will not be deleted. */
25038 emit_insn (gen_force_register_use (stack_pointer_rtx));
25040 if (crtl->profile || !TARGET_SCHED_PROLOG)
25041 emit_insn (gen_blockage ());
25043 /* Emit a clobber for each insn that will be restored in the epilogue,
25044 so that flow2 will get register lifetimes correct. */
25045 for (regno = 0; regno < 13; regno++)
25046 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25047 emit_clobber (gen_rtx_REG (SImode, regno));
25049 if (! df_regs_ever_live_p (LR_REGNUM))
25050 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25053 /* Epilogue code for APCS frame. */
25054 static void
25055 arm_expand_epilogue_apcs_frame (bool really_return)
25057 unsigned long func_type;
25058 unsigned long saved_regs_mask;
25059 int num_regs = 0;
25060 int i;
25061 int floats_from_frame = 0;
25062 arm_stack_offsets *offsets;
25064 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25065 func_type = arm_current_func_type ();
25067 /* Get frame offsets for ARM. */
25068 offsets = arm_get_frame_offsets ();
25069 saved_regs_mask = offsets->saved_regs_mask;
25071 /* Find the offset of the floating-point save area in the frame. */
25072 floats_from_frame
25073 = (offsets->saved_args
25074 + arm_compute_static_chain_stack_bytes ()
25075 - offsets->frame);
25077 /* Compute how many core registers saved and how far away the floats are. */
25078 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25079 if (saved_regs_mask & (1 << i))
25081 num_regs++;
25082 floats_from_frame += 4;
25085 if (TARGET_HARD_FLOAT && TARGET_VFP)
25087 int start_reg;
25088 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25090 /* The offset is from IP_REGNUM. */
25091 int saved_size = arm_get_vfp_saved_size ();
25092 if (saved_size > 0)
25094 rtx_insn *insn;
25095 floats_from_frame += saved_size;
25096 insn = emit_insn (gen_addsi3 (ip_rtx,
25097 hard_frame_pointer_rtx,
25098 GEN_INT (-floats_from_frame)));
25099 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25100 ip_rtx, hard_frame_pointer_rtx);
25103 /* Generate VFP register multi-pop. */
25104 start_reg = FIRST_VFP_REGNUM;
25106 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25107 /* Look for a case where a reg does not need restoring. */
25108 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25109 && (!df_regs_ever_live_p (i + 1)
25110 || call_used_regs[i + 1]))
25112 if (start_reg != i)
25113 arm_emit_vfp_multi_reg_pop (start_reg,
25114 (i - start_reg) / 2,
25115 gen_rtx_REG (SImode,
25116 IP_REGNUM));
25117 start_reg = i + 2;
25120 /* Restore the remaining regs that we have discovered (or possibly
25121 even all of them, if the conditional in the for loop never
25122 fired). */
25123 if (start_reg != i)
25124 arm_emit_vfp_multi_reg_pop (start_reg,
25125 (i - start_reg) / 2,
25126 gen_rtx_REG (SImode, IP_REGNUM));
25129 if (TARGET_IWMMXT)
25131 /* The frame pointer is guaranteed to be non-double-word aligned, as
25132 it is set to double-word-aligned old_stack_pointer - 4. */
25133 rtx_insn *insn;
25134 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25136 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25137 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25139 rtx addr = gen_frame_mem (V2SImode,
25140 plus_constant (Pmode, hard_frame_pointer_rtx,
25141 - lrm_count * 4));
25142 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25143 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25144 gen_rtx_REG (V2SImode, i),
25145 NULL_RTX);
25146 lrm_count += 2;
25150 /* saved_regs_mask should contain IP which contains old stack pointer
25151 at the time of activation creation. Since SP and IP are adjacent registers,
25152 we can restore the value directly into SP. */
25153 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25154 saved_regs_mask &= ~(1 << IP_REGNUM);
25155 saved_regs_mask |= (1 << SP_REGNUM);
25157 /* There are two registers left in saved_regs_mask - LR and PC. We
25158 only need to restore LR (the return address), but to
25159 save time we can load it directly into PC, unless we need a
25160 special function exit sequence, or we are not really returning. */
25161 if (really_return
25162 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25163 && !crtl->calls_eh_return)
25164 /* Delete LR from the register mask, so that LR on
25165 the stack is loaded into the PC in the register mask. */
25166 saved_regs_mask &= ~(1 << LR_REGNUM);
25167 else
25168 saved_regs_mask &= ~(1 << PC_REGNUM);
25170 num_regs = bit_count (saved_regs_mask);
25171 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25173 rtx_insn *insn;
25174 emit_insn (gen_blockage ());
25175 /* Unwind the stack to just below the saved registers. */
25176 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25177 hard_frame_pointer_rtx,
25178 GEN_INT (- 4 * num_regs)));
25180 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25181 stack_pointer_rtx, hard_frame_pointer_rtx);
25184 arm_emit_multi_reg_pop (saved_regs_mask);
25186 if (IS_INTERRUPT (func_type))
25188 /* Interrupt handlers will have pushed the
25189 IP onto the stack, so restore it now. */
25190 rtx_insn *insn;
25191 rtx addr = gen_rtx_MEM (SImode,
25192 gen_rtx_POST_INC (SImode,
25193 stack_pointer_rtx));
25194 set_mem_alias_set (addr, get_frame_alias_set ());
25195 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25196 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25197 gen_rtx_REG (SImode, IP_REGNUM),
25198 NULL_RTX);
25201 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25202 return;
25204 if (crtl->calls_eh_return)
25205 emit_insn (gen_addsi3 (stack_pointer_rtx,
25206 stack_pointer_rtx,
25207 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25209 if (IS_STACKALIGN (func_type))
25210 /* Restore the original stack pointer. Before prologue, the stack was
25211 realigned and the original stack pointer saved in r0. For details,
25212 see comment in arm_expand_prologue. */
25213 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25215 emit_jump_insn (simple_return_rtx);
25218 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25219 function is not a sibcall. */
25220 void
25221 arm_expand_epilogue (bool really_return)
25223 unsigned long func_type;
25224 unsigned long saved_regs_mask;
25225 int num_regs = 0;
25226 int i;
25227 int amount;
25228 arm_stack_offsets *offsets;
25230 func_type = arm_current_func_type ();
25232 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25233 let output_return_instruction take care of instruction emission if any. */
25234 if (IS_NAKED (func_type)
25235 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25237 if (really_return)
25238 emit_jump_insn (simple_return_rtx);
25239 return;
25242 /* If we are throwing an exception, then we really must be doing a
25243 return, so we can't tail-call. */
25244 gcc_assert (!crtl->calls_eh_return || really_return);
25246 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25248 arm_expand_epilogue_apcs_frame (really_return);
25249 return;
25252 /* Get frame offsets for ARM. */
25253 offsets = arm_get_frame_offsets ();
25254 saved_regs_mask = offsets->saved_regs_mask;
25255 num_regs = bit_count (saved_regs_mask);
25257 if (frame_pointer_needed)
25259 rtx_insn *insn;
25260 /* Restore stack pointer if necessary. */
25261 if (TARGET_ARM)
25263 /* In ARM mode, frame pointer points to first saved register.
25264 Restore stack pointer to last saved register. */
25265 amount = offsets->frame - offsets->saved_regs;
25267 /* Force out any pending memory operations that reference stacked data
25268 before stack de-allocation occurs. */
25269 emit_insn (gen_blockage ());
25270 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25271 hard_frame_pointer_rtx,
25272 GEN_INT (amount)));
25273 arm_add_cfa_adjust_cfa_note (insn, amount,
25274 stack_pointer_rtx,
25275 hard_frame_pointer_rtx);
25277 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25278 deleted. */
25279 emit_insn (gen_force_register_use (stack_pointer_rtx));
25281 else
25283 /* In Thumb-2 mode, the frame pointer points to the last saved
25284 register. */
25285 amount = offsets->locals_base - offsets->saved_regs;
25286 if (amount)
25288 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25289 hard_frame_pointer_rtx,
25290 GEN_INT (amount)));
25291 arm_add_cfa_adjust_cfa_note (insn, amount,
25292 hard_frame_pointer_rtx,
25293 hard_frame_pointer_rtx);
25296 /* Force out any pending memory operations that reference stacked data
25297 before stack de-allocation occurs. */
25298 emit_insn (gen_blockage ());
25299 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25300 hard_frame_pointer_rtx));
25301 arm_add_cfa_adjust_cfa_note (insn, 0,
25302 stack_pointer_rtx,
25303 hard_frame_pointer_rtx);
25304 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25305 deleted. */
25306 emit_insn (gen_force_register_use (stack_pointer_rtx));
25309 else
25311 /* Pop off outgoing args and local frame to adjust stack pointer to
25312 last saved register. */
25313 amount = offsets->outgoing_args - offsets->saved_regs;
25314 if (amount)
25316 rtx_insn *tmp;
25317 /* Force out any pending memory operations that reference stacked data
25318 before stack de-allocation occurs. */
25319 emit_insn (gen_blockage ());
25320 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25321 stack_pointer_rtx,
25322 GEN_INT (amount)));
25323 arm_add_cfa_adjust_cfa_note (tmp, amount,
25324 stack_pointer_rtx, stack_pointer_rtx);
25325 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25326 not deleted. */
25327 emit_insn (gen_force_register_use (stack_pointer_rtx));
25331 if (TARGET_HARD_FLOAT && TARGET_VFP)
25333 /* Generate VFP register multi-pop. */
25334 int end_reg = LAST_VFP_REGNUM + 1;
25336 /* Scan the registers in reverse order. We need to match
25337 any groupings made in the prologue and generate matching
25338 vldm operations. The need to match groups is because,
25339 unlike pop, vldm can only do consecutive regs. */
25340 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25341 /* Look for a case where a reg does not need restoring. */
25342 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25343 && (!df_regs_ever_live_p (i + 1)
25344 || call_used_regs[i + 1]))
25346 /* Restore the regs discovered so far (from reg+2 to
25347 end_reg). */
25348 if (end_reg > i + 2)
25349 arm_emit_vfp_multi_reg_pop (i + 2,
25350 (end_reg - (i + 2)) / 2,
25351 stack_pointer_rtx);
25352 end_reg = i;
25355 /* Restore the remaining regs that we have discovered (or possibly
25356 even all of them, if the conditional in the for loop never
25357 fired). */
25358 if (end_reg > i + 2)
25359 arm_emit_vfp_multi_reg_pop (i + 2,
25360 (end_reg - (i + 2)) / 2,
25361 stack_pointer_rtx);
25364 if (TARGET_IWMMXT)
25365 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25366 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25368 rtx_insn *insn;
25369 rtx addr = gen_rtx_MEM (V2SImode,
25370 gen_rtx_POST_INC (SImode,
25371 stack_pointer_rtx));
25372 set_mem_alias_set (addr, get_frame_alias_set ());
25373 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25374 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25375 gen_rtx_REG (V2SImode, i),
25376 NULL_RTX);
25377 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25378 stack_pointer_rtx, stack_pointer_rtx);
25381 if (saved_regs_mask)
25383 rtx insn;
25384 bool return_in_pc = false;
25386 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25387 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25388 && !IS_STACKALIGN (func_type)
25389 && really_return
25390 && crtl->args.pretend_args_size == 0
25391 && saved_regs_mask & (1 << LR_REGNUM)
25392 && !crtl->calls_eh_return)
25394 saved_regs_mask &= ~(1 << LR_REGNUM);
25395 saved_regs_mask |= (1 << PC_REGNUM);
25396 return_in_pc = true;
25399 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25401 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25402 if (saved_regs_mask & (1 << i))
25404 rtx addr = gen_rtx_MEM (SImode,
25405 gen_rtx_POST_INC (SImode,
25406 stack_pointer_rtx));
25407 set_mem_alias_set (addr, get_frame_alias_set ());
25409 if (i == PC_REGNUM)
25411 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25412 XVECEXP (insn, 0, 0) = ret_rtx;
25413 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25414 addr);
25415 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25416 insn = emit_jump_insn (insn);
25418 else
25420 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25421 addr));
25422 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25423 gen_rtx_REG (SImode, i),
25424 NULL_RTX);
25425 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25426 stack_pointer_rtx,
25427 stack_pointer_rtx);
25431 else
25433 if (TARGET_LDRD
25434 && current_tune->prefer_ldrd_strd
25435 && !optimize_function_for_size_p (cfun))
25437 if (TARGET_THUMB2)
25438 thumb2_emit_ldrd_pop (saved_regs_mask);
25439 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25440 arm_emit_ldrd_pop (saved_regs_mask);
25441 else
25442 arm_emit_multi_reg_pop (saved_regs_mask);
25444 else
25445 arm_emit_multi_reg_pop (saved_regs_mask);
25448 if (return_in_pc)
25449 return;
25452 amount
25453 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25454 if (amount)
25456 int i, j;
25457 rtx dwarf = NULL_RTX;
25458 rtx_insn *tmp =
25459 emit_insn (gen_addsi3 (stack_pointer_rtx,
25460 stack_pointer_rtx,
25461 GEN_INT (amount)));
25463 RTX_FRAME_RELATED_P (tmp) = 1;
25465 if (cfun->machine->uses_anonymous_args)
25467 /* Restore pretend args. Refer arm_expand_prologue on how to save
25468 pretend_args in stack. */
25469 int num_regs = crtl->args.pretend_args_size / 4;
25470 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25471 for (j = 0, i = 0; j < num_regs; i++)
25472 if (saved_regs_mask & (1 << i))
25474 rtx reg = gen_rtx_REG (SImode, i);
25475 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25476 j++;
25478 REG_NOTES (tmp) = dwarf;
25480 arm_add_cfa_adjust_cfa_note (tmp, amount,
25481 stack_pointer_rtx, stack_pointer_rtx);
25484 if (!really_return)
25485 return;
25487 if (crtl->calls_eh_return)
25488 emit_insn (gen_addsi3 (stack_pointer_rtx,
25489 stack_pointer_rtx,
25490 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25492 if (IS_STACKALIGN (func_type))
25493 /* Restore the original stack pointer. Before prologue, the stack was
25494 realigned and the original stack pointer saved in r0. For details,
25495 see comment in arm_expand_prologue. */
25496 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25498 emit_jump_insn (simple_return_rtx);
25501 /* Implementation of insn prologue_thumb1_interwork. This is the first
25502 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25504 const char *
25505 thumb1_output_interwork (void)
25507 const char * name;
25508 FILE *f = asm_out_file;
25510 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25511 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25512 == SYMBOL_REF);
25513 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25515 /* Generate code sequence to switch us into Thumb mode. */
25516 /* The .code 32 directive has already been emitted by
25517 ASM_DECLARE_FUNCTION_NAME. */
25518 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25519 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25521 /* Generate a label, so that the debugger will notice the
25522 change in instruction sets. This label is also used by
25523 the assembler to bypass the ARM code when this function
25524 is called from a Thumb encoded function elsewhere in the
25525 same file. Hence the definition of STUB_NAME here must
25526 agree with the definition in gas/config/tc-arm.c. */
25528 #define STUB_NAME ".real_start_of"
25530 fprintf (f, "\t.code\t16\n");
25531 #ifdef ARM_PE
25532 if (arm_dllexport_name_p (name))
25533 name = arm_strip_name_encoding (name);
25534 #endif
25535 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25536 fprintf (f, "\t.thumb_func\n");
25537 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25539 return "";
25542 /* Handle the case of a double word load into a low register from
25543 a computed memory address. The computed address may involve a
25544 register which is overwritten by the load. */
25545 const char *
25546 thumb_load_double_from_address (rtx *operands)
25548 rtx addr;
25549 rtx base;
25550 rtx offset;
25551 rtx arg1;
25552 rtx arg2;
25554 gcc_assert (REG_P (operands[0]));
25555 gcc_assert (MEM_P (operands[1]));
25557 /* Get the memory address. */
25558 addr = XEXP (operands[1], 0);
25560 /* Work out how the memory address is computed. */
25561 switch (GET_CODE (addr))
25563 case REG:
25564 operands[2] = adjust_address (operands[1], SImode, 4);
25566 if (REGNO (operands[0]) == REGNO (addr))
25568 output_asm_insn ("ldr\t%H0, %2", operands);
25569 output_asm_insn ("ldr\t%0, %1", operands);
25571 else
25573 output_asm_insn ("ldr\t%0, %1", operands);
25574 output_asm_insn ("ldr\t%H0, %2", operands);
25576 break;
25578 case CONST:
25579 /* Compute <address> + 4 for the high order load. */
25580 operands[2] = adjust_address (operands[1], SImode, 4);
25582 output_asm_insn ("ldr\t%0, %1", operands);
25583 output_asm_insn ("ldr\t%H0, %2", operands);
25584 break;
25586 case PLUS:
25587 arg1 = XEXP (addr, 0);
25588 arg2 = XEXP (addr, 1);
25590 if (CONSTANT_P (arg1))
25591 base = arg2, offset = arg1;
25592 else
25593 base = arg1, offset = arg2;
25595 gcc_assert (REG_P (base));
25597 /* Catch the case of <address> = <reg> + <reg> */
25598 if (REG_P (offset))
25600 int reg_offset = REGNO (offset);
25601 int reg_base = REGNO (base);
25602 int reg_dest = REGNO (operands[0]);
25604 /* Add the base and offset registers together into the
25605 higher destination register. */
25606 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25607 reg_dest + 1, reg_base, reg_offset);
25609 /* Load the lower destination register from the address in
25610 the higher destination register. */
25611 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25612 reg_dest, reg_dest + 1);
25614 /* Load the higher destination register from its own address
25615 plus 4. */
25616 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25617 reg_dest + 1, reg_dest + 1);
25619 else
25621 /* Compute <address> + 4 for the high order load. */
25622 operands[2] = adjust_address (operands[1], SImode, 4);
25624 /* If the computed address is held in the low order register
25625 then load the high order register first, otherwise always
25626 load the low order register first. */
25627 if (REGNO (operands[0]) == REGNO (base))
25629 output_asm_insn ("ldr\t%H0, %2", operands);
25630 output_asm_insn ("ldr\t%0, %1", operands);
25632 else
25634 output_asm_insn ("ldr\t%0, %1", operands);
25635 output_asm_insn ("ldr\t%H0, %2", operands);
25638 break;
25640 case LABEL_REF:
25641 /* With no registers to worry about we can just load the value
25642 directly. */
25643 operands[2] = adjust_address (operands[1], SImode, 4);
25645 output_asm_insn ("ldr\t%H0, %2", operands);
25646 output_asm_insn ("ldr\t%0, %1", operands);
25647 break;
25649 default:
25650 gcc_unreachable ();
25653 return "";
25656 const char *
25657 thumb_output_move_mem_multiple (int n, rtx *operands)
25659 switch (n)
25661 case 2:
25662 if (REGNO (operands[4]) > REGNO (operands[5]))
25663 std::swap (operands[4], operands[5]);
25665 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25666 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25667 break;
25669 case 3:
25670 if (REGNO (operands[4]) > REGNO (operands[5]))
25671 std::swap (operands[4], operands[5]);
25672 if (REGNO (operands[5]) > REGNO (operands[6]))
25673 std::swap (operands[5], operands[6]);
25674 if (REGNO (operands[4]) > REGNO (operands[5]))
25675 std::swap (operands[4], operands[5]);
25677 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25678 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25679 break;
25681 default:
25682 gcc_unreachable ();
25685 return "";
25688 /* Output a call-via instruction for thumb state. */
25689 const char *
25690 thumb_call_via_reg (rtx reg)
25692 int regno = REGNO (reg);
25693 rtx *labelp;
25695 gcc_assert (regno < LR_REGNUM);
25697 /* If we are in the normal text section we can use a single instance
25698 per compilation unit. If we are doing function sections, then we need
25699 an entry per section, since we can't rely on reachability. */
25700 if (in_section == text_section)
25702 thumb_call_reg_needed = 1;
25704 if (thumb_call_via_label[regno] == NULL)
25705 thumb_call_via_label[regno] = gen_label_rtx ();
25706 labelp = thumb_call_via_label + regno;
25708 else
25710 if (cfun->machine->call_via[regno] == NULL)
25711 cfun->machine->call_via[regno] = gen_label_rtx ();
25712 labelp = cfun->machine->call_via + regno;
25715 output_asm_insn ("bl\t%a0", labelp);
25716 return "";
25719 /* Routines for generating rtl. */
25720 void
25721 thumb_expand_movmemqi (rtx *operands)
25723 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25724 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25725 HOST_WIDE_INT len = INTVAL (operands[2]);
25726 HOST_WIDE_INT offset = 0;
25728 while (len >= 12)
25730 emit_insn (gen_movmem12b (out, in, out, in));
25731 len -= 12;
25734 if (len >= 8)
25736 emit_insn (gen_movmem8b (out, in, out, in));
25737 len -= 8;
25740 if (len >= 4)
25742 rtx reg = gen_reg_rtx (SImode);
25743 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25744 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25745 len -= 4;
25746 offset += 4;
25749 if (len >= 2)
25751 rtx reg = gen_reg_rtx (HImode);
25752 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25753 plus_constant (Pmode, in,
25754 offset))));
25755 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25756 offset)),
25757 reg));
25758 len -= 2;
25759 offset += 2;
25762 if (len)
25764 rtx reg = gen_reg_rtx (QImode);
25765 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25766 plus_constant (Pmode, in,
25767 offset))));
25768 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25769 offset)),
25770 reg));
25774 void
25775 thumb_reload_out_hi (rtx *operands)
25777 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25780 /* Handle reading a half-word from memory during reload. */
25781 void
25782 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25784 gcc_unreachable ();
25787 /* Return the length of a function name prefix
25788 that starts with the character 'c'. */
25789 static int
25790 arm_get_strip_length (int c)
25792 switch (c)
25794 ARM_NAME_ENCODING_LENGTHS
25795 default: return 0;
25799 /* Return a pointer to a function's name with any
25800 and all prefix encodings stripped from it. */
25801 const char *
25802 arm_strip_name_encoding (const char *name)
25804 int skip;
25806 while ((skip = arm_get_strip_length (* name)))
25807 name += skip;
25809 return name;
25812 /* If there is a '*' anywhere in the name's prefix, then
25813 emit the stripped name verbatim, otherwise prepend an
25814 underscore if leading underscores are being used. */
25815 void
25816 arm_asm_output_labelref (FILE *stream, const char *name)
25818 int skip;
25819 int verbatim = 0;
25821 while ((skip = arm_get_strip_length (* name)))
25823 verbatim |= (*name == '*');
25824 name += skip;
25827 if (verbatim)
25828 fputs (name, stream);
25829 else
25830 asm_fprintf (stream, "%U%s", name);
25833 /* This function is used to emit an EABI tag and its associated value.
25834 We emit the numerical value of the tag in case the assembler does not
25835 support textual tags. (Eg gas prior to 2.20). If requested we include
25836 the tag name in a comment so that anyone reading the assembler output
25837 will know which tag is being set.
25839 This function is not static because arm-c.c needs it too. */
25841 void
25842 arm_emit_eabi_attribute (const char *name, int num, int val)
25844 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25845 if (flag_verbose_asm || flag_debug_asm)
25846 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25847 asm_fprintf (asm_out_file, "\n");
25850 /* This function is used to print CPU tuning information as comment
25851 in assembler file. Pointers are not printed for now. */
25853 void
25854 arm_print_tune_info (void)
25856 asm_fprintf (asm_out_file, "\t@.tune parameters\n");
25857 asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
25858 current_tune->constant_limit);
25859 asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
25860 current_tune->max_insns_skipped);
25861 asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n",
25862 current_tune->prefetch.num_slots);
25863 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n",
25864 current_tune->prefetch.l1_cache_size);
25865 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25866 current_tune->prefetch.l1_cache_line_size);
25867 asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
25868 (int) current_tune->prefer_constant_pool);
25869 asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25870 asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
25871 asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
25872 current_tune->branch_cost (false, false));
25873 asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
25874 current_tune->branch_cost (false, true));
25875 asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
25876 current_tune->branch_cost (true, false));
25877 asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
25878 current_tune->branch_cost (true, true));
25879 asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
25880 (int) current_tune->prefer_ldrd_strd);
25881 asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25882 (int) current_tune->logical_op_non_short_circuit_thumb,
25883 (int) current_tune->logical_op_non_short_circuit_arm);
25884 asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
25885 (int) current_tune->prefer_neon_for_64bits);
25886 asm_fprintf (asm_out_file,
25887 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25888 (int) current_tune->disparage_flag_setting_t16_encodings);
25889 asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
25890 (int) current_tune->string_ops_prefer_neon);
25891 asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
25892 current_tune->max_insns_inline_memset);
25893 asm_fprintf (asm_out_file, "\t\t@fusible_ops:\t%u\n",
25894 current_tune->fusible_ops);
25895 asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
25896 (int) current_tune->sched_autopref);
25899 static void
25900 arm_file_start (void)
25902 int val;
25904 if (TARGET_BPABI)
25906 const char *fpu_name;
25907 if (arm_selected_arch)
25909 /* armv7ve doesn't support any extensions. */
25910 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25912 /* Keep backward compatability for assemblers
25913 which don't support armv7ve. */
25914 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25915 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25916 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25917 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25918 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25920 else
25922 const char* pos = strchr (arm_selected_arch->name, '+');
25923 if (pos)
25925 char buf[15];
25926 gcc_assert (strlen (arm_selected_arch->name)
25927 <= sizeof (buf) / sizeof (*pos));
25928 strncpy (buf, arm_selected_arch->name,
25929 (pos - arm_selected_arch->name) * sizeof (*pos));
25930 buf[pos - arm_selected_arch->name] = '\0';
25931 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25932 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25934 else
25935 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25938 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25939 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25940 else
25942 const char* truncated_name
25943 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25944 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25947 if (print_tune_info)
25948 arm_print_tune_info ();
25950 if (TARGET_SOFT_FLOAT)
25952 fpu_name = "softvfp";
25954 else
25956 fpu_name = arm_fpu_desc->name;
25957 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25959 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
25960 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25962 if (TARGET_HARD_FLOAT_ABI)
25963 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25966 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25968 /* Some of these attributes only apply when the corresponding features
25969 are used. However we don't have any easy way of figuring this out.
25970 Conservatively record the setting that would have been used. */
25972 if (flag_rounding_math)
25973 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25975 if (!flag_unsafe_math_optimizations)
25977 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25978 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25980 if (flag_signaling_nans)
25981 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25983 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25984 flag_finite_math_only ? 1 : 3);
25986 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25987 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25988 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25989 flag_short_enums ? 1 : 2);
25991 /* Tag_ABI_optimization_goals. */
25992 if (optimize_size)
25993 val = 4;
25994 else if (optimize >= 2)
25995 val = 2;
25996 else if (optimize)
25997 val = 1;
25998 else
25999 val = 6;
26000 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26002 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26003 unaligned_access);
26005 if (arm_fp16_format)
26006 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26007 (int) arm_fp16_format);
26009 if (arm_lang_output_object_attributes_hook)
26010 arm_lang_output_object_attributes_hook();
26013 default_file_start ();
26016 static void
26017 arm_file_end (void)
26019 int regno;
26021 if (NEED_INDICATE_EXEC_STACK)
26022 /* Add .note.GNU-stack. */
26023 file_end_indicate_exec_stack ();
26025 if (! thumb_call_reg_needed)
26026 return;
26028 switch_to_section (text_section);
26029 asm_fprintf (asm_out_file, "\t.code 16\n");
26030 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26032 for (regno = 0; regno < LR_REGNUM; regno++)
26034 rtx label = thumb_call_via_label[regno];
26036 if (label != 0)
26038 targetm.asm_out.internal_label (asm_out_file, "L",
26039 CODE_LABEL_NUMBER (label));
26040 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26045 #ifndef ARM_PE
26046 /* Symbols in the text segment can be accessed without indirecting via the
26047 constant pool; it may take an extra binary operation, but this is still
26048 faster than indirecting via memory. Don't do this when not optimizing,
26049 since we won't be calculating al of the offsets necessary to do this
26050 simplification. */
26052 static void
26053 arm_encode_section_info (tree decl, rtx rtl, int first)
26055 if (optimize > 0 && TREE_CONSTANT (decl))
26056 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26058 default_encode_section_info (decl, rtl, first);
26060 #endif /* !ARM_PE */
26062 static void
26063 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26065 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26066 && !strcmp (prefix, "L"))
26068 arm_ccfsm_state = 0;
26069 arm_target_insn = NULL;
26071 default_internal_label (stream, prefix, labelno);
26074 /* Output code to add DELTA to the first argument, and then jump
26075 to FUNCTION. Used for C++ multiple inheritance. */
26076 static void
26077 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
26078 HOST_WIDE_INT delta,
26079 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
26080 tree function)
26082 static int thunk_label = 0;
26083 char label[256];
26084 char labelpc[256];
26085 int mi_delta = delta;
26086 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26087 int shift = 0;
26088 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26089 ? 1 : 0);
26090 if (mi_delta < 0)
26091 mi_delta = - mi_delta;
26093 final_start_function (emit_barrier (), file, 1);
26095 if (TARGET_THUMB1)
26097 int labelno = thunk_label++;
26098 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26099 /* Thunks are entered in arm mode when avaiable. */
26100 if (TARGET_THUMB1_ONLY)
26102 /* push r3 so we can use it as a temporary. */
26103 /* TODO: Omit this save if r3 is not used. */
26104 fputs ("\tpush {r3}\n", file);
26105 fputs ("\tldr\tr3, ", file);
26107 else
26109 fputs ("\tldr\tr12, ", file);
26111 assemble_name (file, label);
26112 fputc ('\n', file);
26113 if (flag_pic)
26115 /* If we are generating PIC, the ldr instruction below loads
26116 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26117 the address of the add + 8, so we have:
26119 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26120 = target + 1.
26122 Note that we have "+ 1" because some versions of GNU ld
26123 don't set the low bit of the result for R_ARM_REL32
26124 relocations against thumb function symbols.
26125 On ARMv6M this is +4, not +8. */
26126 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26127 assemble_name (file, labelpc);
26128 fputs (":\n", file);
26129 if (TARGET_THUMB1_ONLY)
26131 /* This is 2 insns after the start of the thunk, so we know it
26132 is 4-byte aligned. */
26133 fputs ("\tadd\tr3, pc, r3\n", file);
26134 fputs ("\tmov r12, r3\n", file);
26136 else
26137 fputs ("\tadd\tr12, pc, r12\n", file);
26139 else if (TARGET_THUMB1_ONLY)
26140 fputs ("\tmov r12, r3\n", file);
26142 if (TARGET_THUMB1_ONLY)
26144 if (mi_delta > 255)
26146 fputs ("\tldr\tr3, ", file);
26147 assemble_name (file, label);
26148 fputs ("+4\n", file);
26149 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26150 mi_op, this_regno, this_regno);
26152 else if (mi_delta != 0)
26154 /* Thumb1 unified syntax requires s suffix in instruction name when
26155 one of the operands is immediate. */
26156 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26157 mi_op, this_regno, this_regno,
26158 mi_delta);
26161 else
26163 /* TODO: Use movw/movt for large constants when available. */
26164 while (mi_delta != 0)
26166 if ((mi_delta & (3 << shift)) == 0)
26167 shift += 2;
26168 else
26170 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26171 mi_op, this_regno, this_regno,
26172 mi_delta & (0xff << shift));
26173 mi_delta &= ~(0xff << shift);
26174 shift += 8;
26178 if (TARGET_THUMB1)
26180 if (TARGET_THUMB1_ONLY)
26181 fputs ("\tpop\t{r3}\n", file);
26183 fprintf (file, "\tbx\tr12\n");
26184 ASM_OUTPUT_ALIGN (file, 2);
26185 assemble_name (file, label);
26186 fputs (":\n", file);
26187 if (flag_pic)
26189 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26190 rtx tem = XEXP (DECL_RTL (function), 0);
26191 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26192 pipeline offset is four rather than eight. Adjust the offset
26193 accordingly. */
26194 tem = plus_constant (GET_MODE (tem), tem,
26195 TARGET_THUMB1_ONLY ? -3 : -7);
26196 tem = gen_rtx_MINUS (GET_MODE (tem),
26197 tem,
26198 gen_rtx_SYMBOL_REF (Pmode,
26199 ggc_strdup (labelpc)));
26200 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26202 else
26203 /* Output ".word .LTHUNKn". */
26204 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26206 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26207 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26209 else
26211 fputs ("\tb\t", file);
26212 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26213 if (NEED_PLT_RELOC)
26214 fputs ("(PLT)", file);
26215 fputc ('\n', file);
26218 final_end_function ();
26222 arm_emit_vector_const (FILE *file, rtx x)
26224 int i;
26225 const char * pattern;
26227 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26229 switch (GET_MODE (x))
26231 case V2SImode: pattern = "%08x"; break;
26232 case V4HImode: pattern = "%04x"; break;
26233 case V8QImode: pattern = "%02x"; break;
26234 default: gcc_unreachable ();
26237 fprintf (file, "0x");
26238 for (i = CONST_VECTOR_NUNITS (x); i--;)
26240 rtx element;
26242 element = CONST_VECTOR_ELT (x, i);
26243 fprintf (file, pattern, INTVAL (element));
26246 return 1;
26249 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26250 HFmode constant pool entries are actually loaded with ldr. */
26251 void
26252 arm_emit_fp16_const (rtx c)
26254 REAL_VALUE_TYPE r;
26255 long bits;
26257 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
26258 bits = real_to_target (NULL, &r, HFmode);
26259 if (WORDS_BIG_ENDIAN)
26260 assemble_zeros (2);
26261 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26262 if (!WORDS_BIG_ENDIAN)
26263 assemble_zeros (2);
26266 const char *
26267 arm_output_load_gr (rtx *operands)
26269 rtx reg;
26270 rtx offset;
26271 rtx wcgr;
26272 rtx sum;
26274 if (!MEM_P (operands [1])
26275 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26276 || !REG_P (reg = XEXP (sum, 0))
26277 || !CONST_INT_P (offset = XEXP (sum, 1))
26278 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26279 return "wldrw%?\t%0, %1";
26281 /* Fix up an out-of-range load of a GR register. */
26282 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26283 wcgr = operands[0];
26284 operands[0] = reg;
26285 output_asm_insn ("ldr%?\t%0, %1", operands);
26287 operands[0] = wcgr;
26288 operands[1] = reg;
26289 output_asm_insn ("tmcr%?\t%0, %1", operands);
26290 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26292 return "";
26295 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26297 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26298 named arg and all anonymous args onto the stack.
26299 XXX I know the prologue shouldn't be pushing registers, but it is faster
26300 that way. */
26302 static void
26303 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26304 machine_mode mode,
26305 tree type,
26306 int *pretend_size,
26307 int second_time ATTRIBUTE_UNUSED)
26309 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26310 int nregs;
26312 cfun->machine->uses_anonymous_args = 1;
26313 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26315 nregs = pcum->aapcs_ncrn;
26316 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
26317 nregs++;
26319 else
26320 nregs = pcum->nregs;
26322 if (nregs < NUM_ARG_REGS)
26323 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26326 /* We can't rely on the caller doing the proper promotion when
26327 using APCS or ATPCS. */
26329 static bool
26330 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26332 return !TARGET_AAPCS_BASED;
26335 static machine_mode
26336 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26337 machine_mode mode,
26338 int *punsignedp ATTRIBUTE_UNUSED,
26339 const_tree fntype ATTRIBUTE_UNUSED,
26340 int for_return ATTRIBUTE_UNUSED)
26342 if (GET_MODE_CLASS (mode) == MODE_INT
26343 && GET_MODE_SIZE (mode) < 4)
26344 return SImode;
26346 return mode;
26349 /* AAPCS based ABIs use short enums by default. */
26351 static bool
26352 arm_default_short_enums (void)
26354 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26358 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26360 static bool
26361 arm_align_anon_bitfield (void)
26363 return TARGET_AAPCS_BASED;
26367 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26369 static tree
26370 arm_cxx_guard_type (void)
26372 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26376 /* The EABI says test the least significant bit of a guard variable. */
26378 static bool
26379 arm_cxx_guard_mask_bit (void)
26381 return TARGET_AAPCS_BASED;
26385 /* The EABI specifies that all array cookies are 8 bytes long. */
26387 static tree
26388 arm_get_cookie_size (tree type)
26390 tree size;
26392 if (!TARGET_AAPCS_BASED)
26393 return default_cxx_get_cookie_size (type);
26395 size = build_int_cst (sizetype, 8);
26396 return size;
26400 /* The EABI says that array cookies should also contain the element size. */
26402 static bool
26403 arm_cookie_has_size (void)
26405 return TARGET_AAPCS_BASED;
26409 /* The EABI says constructors and destructors should return a pointer to
26410 the object constructed/destroyed. */
26412 static bool
26413 arm_cxx_cdtor_returns_this (void)
26415 return TARGET_AAPCS_BASED;
26418 /* The EABI says that an inline function may never be the key
26419 method. */
26421 static bool
26422 arm_cxx_key_method_may_be_inline (void)
26424 return !TARGET_AAPCS_BASED;
26427 static void
26428 arm_cxx_determine_class_data_visibility (tree decl)
26430 if (!TARGET_AAPCS_BASED
26431 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26432 return;
26434 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26435 is exported. However, on systems without dynamic vague linkage,
26436 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26437 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26438 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26439 else
26440 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26441 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26444 static bool
26445 arm_cxx_class_data_always_comdat (void)
26447 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26448 vague linkage if the class has no key function. */
26449 return !TARGET_AAPCS_BASED;
26453 /* The EABI says __aeabi_atexit should be used to register static
26454 destructors. */
26456 static bool
26457 arm_cxx_use_aeabi_atexit (void)
26459 return TARGET_AAPCS_BASED;
26463 void
26464 arm_set_return_address (rtx source, rtx scratch)
26466 arm_stack_offsets *offsets;
26467 HOST_WIDE_INT delta;
26468 rtx addr;
26469 unsigned long saved_regs;
26471 offsets = arm_get_frame_offsets ();
26472 saved_regs = offsets->saved_regs_mask;
26474 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26475 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26476 else
26478 if (frame_pointer_needed)
26479 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26480 else
26482 /* LR will be the first saved register. */
26483 delta = offsets->outgoing_args - (offsets->frame + 4);
26486 if (delta >= 4096)
26488 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26489 GEN_INT (delta & ~4095)));
26490 addr = scratch;
26491 delta &= 4095;
26493 else
26494 addr = stack_pointer_rtx;
26496 addr = plus_constant (Pmode, addr, delta);
26498 /* The store needs to be marked as frame related in order to prevent
26499 DSE from deleting it as dead if it is based on fp. */
26500 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26501 RTX_FRAME_RELATED_P (insn) = 1;
26502 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26507 void
26508 thumb_set_return_address (rtx source, rtx scratch)
26510 arm_stack_offsets *offsets;
26511 HOST_WIDE_INT delta;
26512 HOST_WIDE_INT limit;
26513 int reg;
26514 rtx addr;
26515 unsigned long mask;
26517 emit_use (source);
26519 offsets = arm_get_frame_offsets ();
26520 mask = offsets->saved_regs_mask;
26521 if (mask & (1 << LR_REGNUM))
26523 limit = 1024;
26524 /* Find the saved regs. */
26525 if (frame_pointer_needed)
26527 delta = offsets->soft_frame - offsets->saved_args;
26528 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26529 if (TARGET_THUMB1)
26530 limit = 128;
26532 else
26534 delta = offsets->outgoing_args - offsets->saved_args;
26535 reg = SP_REGNUM;
26537 /* Allow for the stack frame. */
26538 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26539 delta -= 16;
26540 /* The link register is always the first saved register. */
26541 delta -= 4;
26543 /* Construct the address. */
26544 addr = gen_rtx_REG (SImode, reg);
26545 if (delta > limit)
26547 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26548 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26549 addr = scratch;
26551 else
26552 addr = plus_constant (Pmode, addr, delta);
26554 /* The store needs to be marked as frame related in order to prevent
26555 DSE from deleting it as dead if it is based on fp. */
26556 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26557 RTX_FRAME_RELATED_P (insn) = 1;
26558 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26560 else
26561 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26564 /* Implements target hook vector_mode_supported_p. */
26565 bool
26566 arm_vector_mode_supported_p (machine_mode mode)
26568 /* Neon also supports V2SImode, etc. listed in the clause below. */
26569 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26570 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26571 || mode == V2DImode || mode == V8HFmode))
26572 return true;
26574 if ((TARGET_NEON || TARGET_IWMMXT)
26575 && ((mode == V2SImode)
26576 || (mode == V4HImode)
26577 || (mode == V8QImode)))
26578 return true;
26580 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26581 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26582 || mode == V2HAmode))
26583 return true;
26585 return false;
26588 /* Implements target hook array_mode_supported_p. */
26590 static bool
26591 arm_array_mode_supported_p (machine_mode mode,
26592 unsigned HOST_WIDE_INT nelems)
26594 if (TARGET_NEON
26595 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26596 && (nelems >= 2 && nelems <= 4))
26597 return true;
26599 return false;
26602 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26603 registers when autovectorizing for Neon, at least until multiple vector
26604 widths are supported properly by the middle-end. */
26606 static machine_mode
26607 arm_preferred_simd_mode (machine_mode mode)
26609 if (TARGET_NEON)
26610 switch (mode)
26612 case SFmode:
26613 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26614 case SImode:
26615 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26616 case HImode:
26617 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26618 case QImode:
26619 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26620 case DImode:
26621 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26622 return V2DImode;
26623 break;
26625 default:;
26628 if (TARGET_REALLY_IWMMXT)
26629 switch (mode)
26631 case SImode:
26632 return V2SImode;
26633 case HImode:
26634 return V4HImode;
26635 case QImode:
26636 return V8QImode;
26638 default:;
26641 return word_mode;
26644 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26646 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26647 using r0-r4 for function arguments, r7 for the stack frame and don't have
26648 enough left over to do doubleword arithmetic. For Thumb-2 all the
26649 potentially problematic instructions accept high registers so this is not
26650 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26651 that require many low registers. */
26652 static bool
26653 arm_class_likely_spilled_p (reg_class_t rclass)
26655 if ((TARGET_THUMB1 && rclass == LO_REGS)
26656 || rclass == CC_REG)
26657 return true;
26659 return false;
26662 /* Implements target hook small_register_classes_for_mode_p. */
26663 bool
26664 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26666 return TARGET_THUMB1;
26669 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26670 ARM insns and therefore guarantee that the shift count is modulo 256.
26671 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26672 guarantee no particular behavior for out-of-range counts. */
26674 static unsigned HOST_WIDE_INT
26675 arm_shift_truncation_mask (machine_mode mode)
26677 return mode == SImode ? 255 : 0;
26681 /* Map internal gcc register numbers to DWARF2 register numbers. */
26683 unsigned int
26684 arm_dbx_register_number (unsigned int regno)
26686 if (regno < 16)
26687 return regno;
26689 if (IS_VFP_REGNUM (regno))
26691 /* See comment in arm_dwarf_register_span. */
26692 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26693 return 64 + regno - FIRST_VFP_REGNUM;
26694 else
26695 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26698 if (IS_IWMMXT_GR_REGNUM (regno))
26699 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26701 if (IS_IWMMXT_REGNUM (regno))
26702 return 112 + regno - FIRST_IWMMXT_REGNUM;
26704 return DWARF_FRAME_REGISTERS;
26707 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26708 GCC models tham as 64 32-bit registers, so we need to describe this to
26709 the DWARF generation code. Other registers can use the default. */
26710 static rtx
26711 arm_dwarf_register_span (rtx rtl)
26713 machine_mode mode;
26714 unsigned regno;
26715 rtx parts[16];
26716 int nregs;
26717 int i;
26719 regno = REGNO (rtl);
26720 if (!IS_VFP_REGNUM (regno))
26721 return NULL_RTX;
26723 /* XXX FIXME: The EABI defines two VFP register ranges:
26724 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26725 256-287: D0-D31
26726 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26727 corresponding D register. Until GDB supports this, we shall use the
26728 legacy encodings. We also use these encodings for D0-D15 for
26729 compatibility with older debuggers. */
26730 mode = GET_MODE (rtl);
26731 if (GET_MODE_SIZE (mode) < 8)
26732 return NULL_RTX;
26734 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26736 nregs = GET_MODE_SIZE (mode) / 4;
26737 for (i = 0; i < nregs; i += 2)
26738 if (TARGET_BIG_END)
26740 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26741 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26743 else
26745 parts[i] = gen_rtx_REG (SImode, regno + i);
26746 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26749 else
26751 nregs = GET_MODE_SIZE (mode) / 8;
26752 for (i = 0; i < nregs; i++)
26753 parts[i] = gen_rtx_REG (DImode, regno + i);
26756 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26759 #if ARM_UNWIND_INFO
26760 /* Emit unwind directives for a store-multiple instruction or stack pointer
26761 push during alignment.
26762 These should only ever be generated by the function prologue code, so
26763 expect them to have a particular form.
26764 The store-multiple instruction sometimes pushes pc as the last register,
26765 although it should not be tracked into unwind information, or for -Os
26766 sometimes pushes some dummy registers before first register that needs
26767 to be tracked in unwind information; such dummy registers are there just
26768 to avoid separate stack adjustment, and will not be restored in the
26769 epilogue. */
26771 static void
26772 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26774 int i;
26775 HOST_WIDE_INT offset;
26776 HOST_WIDE_INT nregs;
26777 int reg_size;
26778 unsigned reg;
26779 unsigned lastreg;
26780 unsigned padfirst = 0, padlast = 0;
26781 rtx e;
26783 e = XVECEXP (p, 0, 0);
26784 gcc_assert (GET_CODE (e) == SET);
26786 /* First insn will adjust the stack pointer. */
26787 gcc_assert (GET_CODE (e) == SET
26788 && REG_P (SET_DEST (e))
26789 && REGNO (SET_DEST (e)) == SP_REGNUM
26790 && GET_CODE (SET_SRC (e)) == PLUS);
26792 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26793 nregs = XVECLEN (p, 0) - 1;
26794 gcc_assert (nregs);
26796 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26797 if (reg < 16)
26799 /* For -Os dummy registers can be pushed at the beginning to
26800 avoid separate stack pointer adjustment. */
26801 e = XVECEXP (p, 0, 1);
26802 e = XEXP (SET_DEST (e), 0);
26803 if (GET_CODE (e) == PLUS)
26804 padfirst = INTVAL (XEXP (e, 1));
26805 gcc_assert (padfirst == 0 || optimize_size);
26806 /* The function prologue may also push pc, but not annotate it as it is
26807 never restored. We turn this into a stack pointer adjustment. */
26808 e = XVECEXP (p, 0, nregs);
26809 e = XEXP (SET_DEST (e), 0);
26810 if (GET_CODE (e) == PLUS)
26811 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26812 else
26813 padlast = offset - 4;
26814 gcc_assert (padlast == 0 || padlast == 4);
26815 if (padlast == 4)
26816 fprintf (asm_out_file, "\t.pad #4\n");
26817 reg_size = 4;
26818 fprintf (asm_out_file, "\t.save {");
26820 else if (IS_VFP_REGNUM (reg))
26822 reg_size = 8;
26823 fprintf (asm_out_file, "\t.vsave {");
26825 else
26826 /* Unknown register type. */
26827 gcc_unreachable ();
26829 /* If the stack increment doesn't match the size of the saved registers,
26830 something has gone horribly wrong. */
26831 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26833 offset = padfirst;
26834 lastreg = 0;
26835 /* The remaining insns will describe the stores. */
26836 for (i = 1; i <= nregs; i++)
26838 /* Expect (set (mem <addr>) (reg)).
26839 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26840 e = XVECEXP (p, 0, i);
26841 gcc_assert (GET_CODE (e) == SET
26842 && MEM_P (SET_DEST (e))
26843 && REG_P (SET_SRC (e)));
26845 reg = REGNO (SET_SRC (e));
26846 gcc_assert (reg >= lastreg);
26848 if (i != 1)
26849 fprintf (asm_out_file, ", ");
26850 /* We can't use %r for vfp because we need to use the
26851 double precision register names. */
26852 if (IS_VFP_REGNUM (reg))
26853 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26854 else
26855 asm_fprintf (asm_out_file, "%r", reg);
26857 #ifdef ENABLE_CHECKING
26858 /* Check that the addresses are consecutive. */
26859 e = XEXP (SET_DEST (e), 0);
26860 if (GET_CODE (e) == PLUS)
26861 gcc_assert (REG_P (XEXP (e, 0))
26862 && REGNO (XEXP (e, 0)) == SP_REGNUM
26863 && CONST_INT_P (XEXP (e, 1))
26864 && offset == INTVAL (XEXP (e, 1)));
26865 else
26866 gcc_assert (i == 1
26867 && REG_P (e)
26868 && REGNO (e) == SP_REGNUM);
26869 offset += reg_size;
26870 #endif
26872 fprintf (asm_out_file, "}\n");
26873 if (padfirst)
26874 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26877 /* Emit unwind directives for a SET. */
26879 static void
26880 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26882 rtx e0;
26883 rtx e1;
26884 unsigned reg;
26886 e0 = XEXP (p, 0);
26887 e1 = XEXP (p, 1);
26888 switch (GET_CODE (e0))
26890 case MEM:
26891 /* Pushing a single register. */
26892 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26893 || !REG_P (XEXP (XEXP (e0, 0), 0))
26894 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26895 abort ();
26897 asm_fprintf (asm_out_file, "\t.save ");
26898 if (IS_VFP_REGNUM (REGNO (e1)))
26899 asm_fprintf(asm_out_file, "{d%d}\n",
26900 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26901 else
26902 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26903 break;
26905 case REG:
26906 if (REGNO (e0) == SP_REGNUM)
26908 /* A stack increment. */
26909 if (GET_CODE (e1) != PLUS
26910 || !REG_P (XEXP (e1, 0))
26911 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26912 || !CONST_INT_P (XEXP (e1, 1)))
26913 abort ();
26915 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26916 -INTVAL (XEXP (e1, 1)));
26918 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26920 HOST_WIDE_INT offset;
26922 if (GET_CODE (e1) == PLUS)
26924 if (!REG_P (XEXP (e1, 0))
26925 || !CONST_INT_P (XEXP (e1, 1)))
26926 abort ();
26927 reg = REGNO (XEXP (e1, 0));
26928 offset = INTVAL (XEXP (e1, 1));
26929 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26930 HARD_FRAME_POINTER_REGNUM, reg,
26931 offset);
26933 else if (REG_P (e1))
26935 reg = REGNO (e1);
26936 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26937 HARD_FRAME_POINTER_REGNUM, reg);
26939 else
26940 abort ();
26942 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26944 /* Move from sp to reg. */
26945 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26947 else if (GET_CODE (e1) == PLUS
26948 && REG_P (XEXP (e1, 0))
26949 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26950 && CONST_INT_P (XEXP (e1, 1)))
26952 /* Set reg to offset from sp. */
26953 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26954 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26956 else
26957 abort ();
26958 break;
26960 default:
26961 abort ();
26966 /* Emit unwind directives for the given insn. */
26968 static void
26969 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26971 rtx note, pat;
26972 bool handled_one = false;
26974 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26975 return;
26977 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26978 && (TREE_NOTHROW (current_function_decl)
26979 || crtl->all_throwers_are_sibcalls))
26980 return;
26982 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26983 return;
26985 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26987 switch (REG_NOTE_KIND (note))
26989 case REG_FRAME_RELATED_EXPR:
26990 pat = XEXP (note, 0);
26991 goto found;
26993 case REG_CFA_REGISTER:
26994 pat = XEXP (note, 0);
26995 if (pat == NULL)
26997 pat = PATTERN (insn);
26998 if (GET_CODE (pat) == PARALLEL)
26999 pat = XVECEXP (pat, 0, 0);
27002 /* Only emitted for IS_STACKALIGN re-alignment. */
27004 rtx dest, src;
27005 unsigned reg;
27007 src = SET_SRC (pat);
27008 dest = SET_DEST (pat);
27010 gcc_assert (src == stack_pointer_rtx);
27011 reg = REGNO (dest);
27012 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27013 reg + 0x90, reg);
27015 handled_one = true;
27016 break;
27018 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27019 to get correct dwarf information for shrink-wrap. We should not
27020 emit unwind information for it because these are used either for
27021 pretend arguments or notes to adjust sp and restore registers from
27022 stack. */
27023 case REG_CFA_DEF_CFA:
27024 case REG_CFA_ADJUST_CFA:
27025 case REG_CFA_RESTORE:
27026 return;
27028 case REG_CFA_EXPRESSION:
27029 case REG_CFA_OFFSET:
27030 /* ??? Only handling here what we actually emit. */
27031 gcc_unreachable ();
27033 default:
27034 break;
27037 if (handled_one)
27038 return;
27039 pat = PATTERN (insn);
27040 found:
27042 switch (GET_CODE (pat))
27044 case SET:
27045 arm_unwind_emit_set (asm_out_file, pat);
27046 break;
27048 case SEQUENCE:
27049 /* Store multiple. */
27050 arm_unwind_emit_sequence (asm_out_file, pat);
27051 break;
27053 default:
27054 abort();
27059 /* Output a reference from a function exception table to the type_info
27060 object X. The EABI specifies that the symbol should be relocated by
27061 an R_ARM_TARGET2 relocation. */
27063 static bool
27064 arm_output_ttype (rtx x)
27066 fputs ("\t.word\t", asm_out_file);
27067 output_addr_const (asm_out_file, x);
27068 /* Use special relocations for symbol references. */
27069 if (!CONST_INT_P (x))
27070 fputs ("(TARGET2)", asm_out_file);
27071 fputc ('\n', asm_out_file);
27073 return TRUE;
27076 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27078 static void
27079 arm_asm_emit_except_personality (rtx personality)
27081 fputs ("\t.personality\t", asm_out_file);
27082 output_addr_const (asm_out_file, personality);
27083 fputc ('\n', asm_out_file);
27086 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27088 static void
27089 arm_asm_init_sections (void)
27091 exception_section = get_unnamed_section (0, output_section_asm_op,
27092 "\t.handlerdata");
27094 #endif /* ARM_UNWIND_INFO */
27096 /* Output unwind directives for the start/end of a function. */
27098 void
27099 arm_output_fn_unwind (FILE * f, bool prologue)
27101 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27102 return;
27104 if (prologue)
27105 fputs ("\t.fnstart\n", f);
27106 else
27108 /* If this function will never be unwound, then mark it as such.
27109 The came condition is used in arm_unwind_emit to suppress
27110 the frame annotations. */
27111 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27112 && (TREE_NOTHROW (current_function_decl)
27113 || crtl->all_throwers_are_sibcalls))
27114 fputs("\t.cantunwind\n", f);
27116 fputs ("\t.fnend\n", f);
27120 static bool
27121 arm_emit_tls_decoration (FILE *fp, rtx x)
27123 enum tls_reloc reloc;
27124 rtx val;
27126 val = XVECEXP (x, 0, 0);
27127 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27129 output_addr_const (fp, val);
27131 switch (reloc)
27133 case TLS_GD32:
27134 fputs ("(tlsgd)", fp);
27135 break;
27136 case TLS_LDM32:
27137 fputs ("(tlsldm)", fp);
27138 break;
27139 case TLS_LDO32:
27140 fputs ("(tlsldo)", fp);
27141 break;
27142 case TLS_IE32:
27143 fputs ("(gottpoff)", fp);
27144 break;
27145 case TLS_LE32:
27146 fputs ("(tpoff)", fp);
27147 break;
27148 case TLS_DESCSEQ:
27149 fputs ("(tlsdesc)", fp);
27150 break;
27151 default:
27152 gcc_unreachable ();
27155 switch (reloc)
27157 case TLS_GD32:
27158 case TLS_LDM32:
27159 case TLS_IE32:
27160 case TLS_DESCSEQ:
27161 fputs (" + (. - ", fp);
27162 output_addr_const (fp, XVECEXP (x, 0, 2));
27163 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27164 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27165 output_addr_const (fp, XVECEXP (x, 0, 3));
27166 fputc (')', fp);
27167 break;
27168 default:
27169 break;
27172 return TRUE;
27175 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27177 static void
27178 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27180 gcc_assert (size == 4);
27181 fputs ("\t.word\t", file);
27182 output_addr_const (file, x);
27183 fputs ("(tlsldo)", file);
27186 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27188 static bool
27189 arm_output_addr_const_extra (FILE *fp, rtx x)
27191 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27192 return arm_emit_tls_decoration (fp, x);
27193 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27195 char label[256];
27196 int labelno = INTVAL (XVECEXP (x, 0, 0));
27198 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27199 assemble_name_raw (fp, label);
27201 return TRUE;
27203 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27205 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27206 if (GOT_PCREL)
27207 fputs ("+.", fp);
27208 fputs ("-(", fp);
27209 output_addr_const (fp, XVECEXP (x, 0, 0));
27210 fputc (')', fp);
27211 return TRUE;
27213 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27215 output_addr_const (fp, XVECEXP (x, 0, 0));
27216 if (GOT_PCREL)
27217 fputs ("+.", fp);
27218 fputs ("-(", fp);
27219 output_addr_const (fp, XVECEXP (x, 0, 1));
27220 fputc (')', fp);
27221 return TRUE;
27223 else if (GET_CODE (x) == CONST_VECTOR)
27224 return arm_emit_vector_const (fp, x);
27226 return FALSE;
27229 /* Output assembly for a shift instruction.
27230 SET_FLAGS determines how the instruction modifies the condition codes.
27231 0 - Do not set condition codes.
27232 1 - Set condition codes.
27233 2 - Use smallest instruction. */
27234 const char *
27235 arm_output_shift(rtx * operands, int set_flags)
27237 char pattern[100];
27238 static const char flag_chars[3] = {'?', '.', '!'};
27239 const char *shift;
27240 HOST_WIDE_INT val;
27241 char c;
27243 c = flag_chars[set_flags];
27244 if (TARGET_UNIFIED_ASM)
27246 shift = shift_op(operands[3], &val);
27247 if (shift)
27249 if (val != -1)
27250 operands[2] = GEN_INT(val);
27251 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27253 else
27254 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27256 else
27257 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
27258 output_asm_insn (pattern, operands);
27259 return "";
27262 /* Output assembly for a WMMX immediate shift instruction. */
27263 const char *
27264 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27266 int shift = INTVAL (operands[2]);
27267 char templ[50];
27268 machine_mode opmode = GET_MODE (operands[0]);
27270 gcc_assert (shift >= 0);
27272 /* If the shift value in the register versions is > 63 (for D qualifier),
27273 31 (for W qualifier) or 15 (for H qualifier). */
27274 if (((opmode == V4HImode) && (shift > 15))
27275 || ((opmode == V2SImode) && (shift > 31))
27276 || ((opmode == DImode) && (shift > 63)))
27278 if (wror_or_wsra)
27280 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27281 output_asm_insn (templ, operands);
27282 if (opmode == DImode)
27284 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27285 output_asm_insn (templ, operands);
27288 else
27290 /* The destination register will contain all zeros. */
27291 sprintf (templ, "wzero\t%%0");
27292 output_asm_insn (templ, operands);
27294 return "";
27297 if ((opmode == DImode) && (shift > 32))
27299 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27300 output_asm_insn (templ, operands);
27301 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27302 output_asm_insn (templ, operands);
27304 else
27306 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27307 output_asm_insn (templ, operands);
27309 return "";
27312 /* Output assembly for a WMMX tinsr instruction. */
27313 const char *
27314 arm_output_iwmmxt_tinsr (rtx *operands)
27316 int mask = INTVAL (operands[3]);
27317 int i;
27318 char templ[50];
27319 int units = mode_nunits[GET_MODE (operands[0])];
27320 gcc_assert ((mask & (mask - 1)) == 0);
27321 for (i = 0; i < units; ++i)
27323 if ((mask & 0x01) == 1)
27325 break;
27327 mask >>= 1;
27329 gcc_assert (i < units);
27331 switch (GET_MODE (operands[0]))
27333 case V8QImode:
27334 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27335 break;
27336 case V4HImode:
27337 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27338 break;
27339 case V2SImode:
27340 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27341 break;
27342 default:
27343 gcc_unreachable ();
27344 break;
27346 output_asm_insn (templ, operands);
27348 return "";
27351 /* Output a Thumb-1 casesi dispatch sequence. */
27352 const char *
27353 thumb1_output_casesi (rtx *operands)
27355 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27357 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27359 switch (GET_MODE(diff_vec))
27361 case QImode:
27362 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27363 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27364 case HImode:
27365 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27366 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27367 case SImode:
27368 return "bl\t%___gnu_thumb1_case_si";
27369 default:
27370 gcc_unreachable ();
27374 /* Output a Thumb-2 casesi instruction. */
27375 const char *
27376 thumb2_output_casesi (rtx *operands)
27378 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27380 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27382 output_asm_insn ("cmp\t%0, %1", operands);
27383 output_asm_insn ("bhi\t%l3", operands);
27384 switch (GET_MODE(diff_vec))
27386 case QImode:
27387 return "tbb\t[%|pc, %0]";
27388 case HImode:
27389 return "tbh\t[%|pc, %0, lsl #1]";
27390 case SImode:
27391 if (flag_pic)
27393 output_asm_insn ("adr\t%4, %l2", operands);
27394 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27395 output_asm_insn ("add\t%4, %4, %5", operands);
27396 return "bx\t%4";
27398 else
27400 output_asm_insn ("adr\t%4, %l2", operands);
27401 return "ldr\t%|pc, [%4, %0, lsl #2]";
27403 default:
27404 gcc_unreachable ();
27408 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27409 per-core tuning structs. */
27410 static int
27411 arm_issue_rate (void)
27413 return current_tune->issue_rate;
27416 /* Return how many instructions should scheduler lookahead to choose the
27417 best one. */
27418 static int
27419 arm_first_cycle_multipass_dfa_lookahead (void)
27421 int issue_rate = arm_issue_rate ();
27423 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27426 /* Enable modeling of L2 auto-prefetcher. */
27427 static int
27428 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27430 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27433 const char *
27434 arm_mangle_type (const_tree type)
27436 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27437 has to be managled as if it is in the "std" namespace. */
27438 if (TARGET_AAPCS_BASED
27439 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27440 return "St9__va_list";
27442 /* Half-precision float. */
27443 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27444 return "Dh";
27446 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27447 builtin type. */
27448 if (TYPE_NAME (type) != NULL)
27449 return arm_mangle_builtin_type (type);
27451 /* Use the default mangling. */
27452 return NULL;
27455 /* Order of allocation of core registers for Thumb: this allocation is
27456 written over the corresponding initial entries of the array
27457 initialized with REG_ALLOC_ORDER. We allocate all low registers
27458 first. Saving and restoring a low register is usually cheaper than
27459 using a call-clobbered high register. */
27461 static const int thumb_core_reg_alloc_order[] =
27463 3, 2, 1, 0, 4, 5, 6, 7,
27464 14, 12, 8, 9, 10, 11
27467 /* Adjust register allocation order when compiling for Thumb. */
27469 void
27470 arm_order_regs_for_local_alloc (void)
27472 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27473 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27474 if (TARGET_THUMB)
27475 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27476 sizeof (thumb_core_reg_alloc_order));
27479 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27481 bool
27482 arm_frame_pointer_required (void)
27484 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27485 return true;
27487 /* If the function receives nonlocal gotos, it needs to save the frame
27488 pointer in the nonlocal_goto_save_area object. */
27489 if (cfun->has_nonlocal_label)
27490 return true;
27492 /* The frame pointer is required for non-leaf APCS frames. */
27493 if (TARGET_ARM && TARGET_APCS_FRAME && !leaf_function_p ())
27494 return true;
27496 /* If we are probing the stack in the prologue, we will have a faulting
27497 instruction prior to the stack adjustment and this requires a frame
27498 pointer if we want to catch the exception using the EABI unwinder. */
27499 if (!IS_INTERRUPT (arm_current_func_type ())
27500 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27501 && arm_except_unwind_info (&global_options) == UI_TARGET
27502 && cfun->can_throw_non_call_exceptions)
27504 HOST_WIDE_INT size = get_frame_size ();
27506 /* That's irrelevant if there is no stack adjustment. */
27507 if (size <= 0)
27508 return false;
27510 /* That's relevant only if there is a stack probe. */
27511 if (crtl->is_leaf && !cfun->calls_alloca)
27513 /* We don't have the final size of the frame so adjust. */
27514 size += 32 * UNITS_PER_WORD;
27515 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27516 return true;
27518 else
27519 return true;
27522 return false;
27525 /* Only thumb1 can't support conditional execution, so return true if
27526 the target is not thumb1. */
27527 static bool
27528 arm_have_conditional_execution (void)
27530 return !TARGET_THUMB1;
27533 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27534 static HOST_WIDE_INT
27535 arm_vector_alignment (const_tree type)
27537 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27539 if (TARGET_AAPCS_BASED)
27540 align = MIN (align, 64);
27542 return align;
27545 static unsigned int
27546 arm_autovectorize_vector_sizes (void)
27548 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27551 static bool
27552 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27554 /* Vectors which aren't in packed structures will not be less aligned than
27555 the natural alignment of their element type, so this is safe. */
27556 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27557 return !is_packed;
27559 return default_builtin_vector_alignment_reachable (type, is_packed);
27562 static bool
27563 arm_builtin_support_vector_misalignment (machine_mode mode,
27564 const_tree type, int misalignment,
27565 bool is_packed)
27567 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27569 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27571 if (is_packed)
27572 return align == 1;
27574 /* If the misalignment is unknown, we should be able to handle the access
27575 so long as it is not to a member of a packed data structure. */
27576 if (misalignment == -1)
27577 return true;
27579 /* Return true if the misalignment is a multiple of the natural alignment
27580 of the vector's element type. This is probably always going to be
27581 true in practice, since we've already established that this isn't a
27582 packed access. */
27583 return ((misalignment % align) == 0);
27586 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27587 is_packed);
27590 static void
27591 arm_conditional_register_usage (void)
27593 int regno;
27595 if (TARGET_THUMB1 && optimize_size)
27597 /* When optimizing for size on Thumb-1, it's better not
27598 to use the HI regs, because of the overhead of
27599 stacking them. */
27600 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27601 fixed_regs[regno] = call_used_regs[regno] = 1;
27604 /* The link register can be clobbered by any branch insn,
27605 but we have no way to track that at present, so mark
27606 it as unavailable. */
27607 if (TARGET_THUMB1)
27608 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27610 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27612 /* VFPv3 registers are disabled when earlier VFP
27613 versions are selected due to the definition of
27614 LAST_VFP_REGNUM. */
27615 for (regno = FIRST_VFP_REGNUM;
27616 regno <= LAST_VFP_REGNUM; ++ regno)
27618 fixed_regs[regno] = 0;
27619 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27620 || regno >= FIRST_VFP_REGNUM + 32;
27624 if (TARGET_REALLY_IWMMXT)
27626 regno = FIRST_IWMMXT_GR_REGNUM;
27627 /* The 2002/10/09 revision of the XScale ABI has wCG0
27628 and wCG1 as call-preserved registers. The 2002/11/21
27629 revision changed this so that all wCG registers are
27630 scratch registers. */
27631 for (regno = FIRST_IWMMXT_GR_REGNUM;
27632 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27633 fixed_regs[regno] = 0;
27634 /* The XScale ABI has wR0 - wR9 as scratch registers,
27635 the rest as call-preserved registers. */
27636 for (regno = FIRST_IWMMXT_REGNUM;
27637 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27639 fixed_regs[regno] = 0;
27640 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27644 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27646 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27647 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27649 else if (TARGET_APCS_STACK)
27651 fixed_regs[10] = 1;
27652 call_used_regs[10] = 1;
27654 /* -mcaller-super-interworking reserves r11 for calls to
27655 _interwork_r11_call_via_rN(). Making the register global
27656 is an easy way of ensuring that it remains valid for all
27657 calls. */
27658 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27659 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27661 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27662 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27663 if (TARGET_CALLER_INTERWORKING)
27664 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27666 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27669 static reg_class_t
27670 arm_preferred_rename_class (reg_class_t rclass)
27672 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27673 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27674 and code size can be reduced. */
27675 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27676 return LO_REGS;
27677 else
27678 return NO_REGS;
27681 /* Compute the atrribute "length" of insn "*push_multi".
27682 So this function MUST be kept in sync with that insn pattern. */
27684 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27686 int i, regno, hi_reg;
27687 int num_saves = XVECLEN (parallel_op, 0);
27689 /* ARM mode. */
27690 if (TARGET_ARM)
27691 return 4;
27692 /* Thumb1 mode. */
27693 if (TARGET_THUMB1)
27694 return 2;
27696 /* Thumb2 mode. */
27697 regno = REGNO (first_op);
27698 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27699 for (i = 1; i < num_saves && !hi_reg; i++)
27701 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27702 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27705 if (!hi_reg)
27706 return 2;
27707 return 4;
27710 /* Compute the number of instructions emitted by output_move_double. */
27712 arm_count_output_move_double_insns (rtx *operands)
27714 int count;
27715 rtx ops[2];
27716 /* output_move_double may modify the operands array, so call it
27717 here on a copy of the array. */
27718 ops[0] = operands[0];
27719 ops[1] = operands[1];
27720 output_move_double (ops, false, &count);
27721 return count;
27725 vfp3_const_double_for_fract_bits (rtx operand)
27727 REAL_VALUE_TYPE r0;
27729 if (!CONST_DOUBLE_P (operand))
27730 return 0;
27732 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27733 if (exact_real_inverse (DFmode, &r0)
27734 && !REAL_VALUE_NEGATIVE (r0))
27736 if (exact_real_truncate (DFmode, &r0))
27738 HOST_WIDE_INT value = real_to_integer (&r0);
27739 value = value & 0xffffffff;
27740 if ((value != 0) && ( (value & (value - 1)) == 0))
27741 return int_log2 (value);
27744 return 0;
27748 vfp3_const_double_for_bits (rtx operand)
27750 REAL_VALUE_TYPE r0;
27752 if (!CONST_DOUBLE_P (operand))
27753 return 0;
27755 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27756 if (exact_real_truncate (DFmode, &r0))
27758 HOST_WIDE_INT value = real_to_integer (&r0);
27759 value = value & 0xffffffff;
27760 if ((value != 0) && ( (value & (value - 1)) == 0))
27761 return int_log2 (value);
27764 return 0;
27767 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27769 static void
27770 arm_pre_atomic_barrier (enum memmodel model)
27772 if (need_atomic_barrier_p (model, true))
27773 emit_insn (gen_memory_barrier ());
27776 static void
27777 arm_post_atomic_barrier (enum memmodel model)
27779 if (need_atomic_barrier_p (model, false))
27780 emit_insn (gen_memory_barrier ());
27783 /* Emit the load-exclusive and store-exclusive instructions.
27784 Use acquire and release versions if necessary. */
27786 static void
27787 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27789 rtx (*gen) (rtx, rtx);
27791 if (acq)
27793 switch (mode)
27795 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27796 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27797 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27798 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27799 default:
27800 gcc_unreachable ();
27803 else
27805 switch (mode)
27807 case QImode: gen = gen_arm_load_exclusiveqi; break;
27808 case HImode: gen = gen_arm_load_exclusivehi; break;
27809 case SImode: gen = gen_arm_load_exclusivesi; break;
27810 case DImode: gen = gen_arm_load_exclusivedi; break;
27811 default:
27812 gcc_unreachable ();
27816 emit_insn (gen (rval, mem));
27819 static void
27820 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27821 rtx mem, bool rel)
27823 rtx (*gen) (rtx, rtx, rtx);
27825 if (rel)
27827 switch (mode)
27829 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27830 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27831 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27832 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27833 default:
27834 gcc_unreachable ();
27837 else
27839 switch (mode)
27841 case QImode: gen = gen_arm_store_exclusiveqi; break;
27842 case HImode: gen = gen_arm_store_exclusivehi; break;
27843 case SImode: gen = gen_arm_store_exclusivesi; break;
27844 case DImode: gen = gen_arm_store_exclusivedi; break;
27845 default:
27846 gcc_unreachable ();
27850 emit_insn (gen (bval, rval, mem));
27853 /* Mark the previous jump instruction as unlikely. */
27855 static void
27856 emit_unlikely_jump (rtx insn)
27858 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27860 insn = emit_jump_insn (insn);
27861 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27864 /* Expand a compare and swap pattern. */
27866 void
27867 arm_expand_compare_and_swap (rtx operands[])
27869 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27870 machine_mode mode;
27871 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27873 bval = operands[0];
27874 rval = operands[1];
27875 mem = operands[2];
27876 oldval = operands[3];
27877 newval = operands[4];
27878 is_weak = operands[5];
27879 mod_s = operands[6];
27880 mod_f = operands[7];
27881 mode = GET_MODE (mem);
27883 /* Normally the succ memory model must be stronger than fail, but in the
27884 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27885 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27887 if (TARGET_HAVE_LDACQ
27888 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
27889 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
27890 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27892 switch (mode)
27894 case QImode:
27895 case HImode:
27896 /* For narrow modes, we're going to perform the comparison in SImode,
27897 so do the zero-extension now. */
27898 rval = gen_reg_rtx (SImode);
27899 oldval = convert_modes (SImode, mode, oldval, true);
27900 /* FALLTHRU */
27902 case SImode:
27903 /* Force the value into a register if needed. We waited until after
27904 the zero-extension above to do this properly. */
27905 if (!arm_add_operand (oldval, SImode))
27906 oldval = force_reg (SImode, oldval);
27907 break;
27909 case DImode:
27910 if (!cmpdi_operand (oldval, mode))
27911 oldval = force_reg (mode, oldval);
27912 break;
27914 default:
27915 gcc_unreachable ();
27918 switch (mode)
27920 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27921 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27922 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27923 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27924 default:
27925 gcc_unreachable ();
27928 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27930 if (mode == QImode || mode == HImode)
27931 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27933 /* In all cases, we arrange for success to be signaled by Z set.
27934 This arrangement allows for the boolean result to be used directly
27935 in a subsequent branch, post optimization. */
27936 x = gen_rtx_REG (CCmode, CC_REGNUM);
27937 x = gen_rtx_EQ (SImode, x, const0_rtx);
27938 emit_insn (gen_rtx_SET (bval, x));
27941 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27942 another memory store between the load-exclusive and store-exclusive can
27943 reset the monitor from Exclusive to Open state. This means we must wait
27944 until after reload to split the pattern, lest we get a register spill in
27945 the middle of the atomic sequence. */
27947 void
27948 arm_split_compare_and_swap (rtx operands[])
27950 rtx rval, mem, oldval, newval, scratch;
27951 machine_mode mode;
27952 enum memmodel mod_s, mod_f;
27953 bool is_weak;
27954 rtx_code_label *label1, *label2;
27955 rtx x, cond;
27957 rval = operands[0];
27958 mem = operands[1];
27959 oldval = operands[2];
27960 newval = operands[3];
27961 is_weak = (operands[4] != const0_rtx);
27962 mod_s = memmodel_from_int (INTVAL (operands[5]));
27963 mod_f = memmodel_from_int (INTVAL (operands[6]));
27964 scratch = operands[7];
27965 mode = GET_MODE (mem);
27967 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
27969 bool use_acquire = TARGET_HAVE_LDACQ
27970 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27971 || is_mm_release (mod_s));
27973 bool use_release = TARGET_HAVE_LDACQ
27974 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27975 || is_mm_acquire (mod_s));
27977 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
27978 a full barrier is emitted after the store-release. */
27979 if (is_armv8_sync)
27980 use_acquire = false;
27982 /* Checks whether a barrier is needed and emits one accordingly. */
27983 if (!(use_acquire || use_release))
27984 arm_pre_atomic_barrier (mod_s);
27986 label1 = NULL;
27987 if (!is_weak)
27989 label1 = gen_label_rtx ();
27990 emit_label (label1);
27992 label2 = gen_label_rtx ();
27994 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27996 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27997 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27998 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27999 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28000 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28002 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
28004 /* Weak or strong, we want EQ to be true for success, so that we
28005 match the flags that we got from the compare above. */
28006 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28007 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
28008 emit_insn (gen_rtx_SET (cond, x));
28010 if (!is_weak)
28012 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28013 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28014 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
28015 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28018 if (!is_mm_relaxed (mod_f))
28019 emit_label (label2);
28021 /* Checks whether a barrier is needed and emits one accordingly. */
28022 if (is_armv8_sync
28023 || !(use_acquire || use_release))
28024 arm_post_atomic_barrier (mod_s);
28026 if (is_mm_relaxed (mod_f))
28027 emit_label (label2);
28030 void
28031 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28032 rtx value, rtx model_rtx, rtx cond)
28034 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28035 machine_mode mode = GET_MODE (mem);
28036 machine_mode wmode = (mode == DImode ? DImode : SImode);
28037 rtx_code_label *label;
28038 rtx x;
28040 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28042 bool use_acquire = TARGET_HAVE_LDACQ
28043 && !(is_mm_relaxed (model) || is_mm_consume (model)
28044 || is_mm_release (model));
28046 bool use_release = TARGET_HAVE_LDACQ
28047 && !(is_mm_relaxed (model) || is_mm_consume (model)
28048 || is_mm_acquire (model));
28050 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28051 a full barrier is emitted after the store-release. */
28052 if (is_armv8_sync)
28053 use_acquire = false;
28055 /* Checks whether a barrier is needed and emits one accordingly. */
28056 if (!(use_acquire || use_release))
28057 arm_pre_atomic_barrier (model);
28059 label = gen_label_rtx ();
28060 emit_label (label);
28062 if (new_out)
28063 new_out = gen_lowpart (wmode, new_out);
28064 if (old_out)
28065 old_out = gen_lowpart (wmode, old_out);
28066 else
28067 old_out = new_out;
28068 value = simplify_gen_subreg (wmode, value, mode, 0);
28070 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28072 switch (code)
28074 case SET:
28075 new_out = value;
28076 break;
28078 case NOT:
28079 x = gen_rtx_AND (wmode, old_out, value);
28080 emit_insn (gen_rtx_SET (new_out, x));
28081 x = gen_rtx_NOT (wmode, new_out);
28082 emit_insn (gen_rtx_SET (new_out, x));
28083 break;
28085 case MINUS:
28086 if (CONST_INT_P (value))
28088 value = GEN_INT (-INTVAL (value));
28089 code = PLUS;
28091 /* FALLTHRU */
28093 case PLUS:
28094 if (mode == DImode)
28096 /* DImode plus/minus need to clobber flags. */
28097 /* The adddi3 and subdi3 patterns are incorrectly written so that
28098 they require matching operands, even when we could easily support
28099 three operands. Thankfully, this can be fixed up post-splitting,
28100 as the individual add+adc patterns do accept three operands and
28101 post-reload cprop can make these moves go away. */
28102 emit_move_insn (new_out, old_out);
28103 if (code == PLUS)
28104 x = gen_adddi3 (new_out, new_out, value);
28105 else
28106 x = gen_subdi3 (new_out, new_out, value);
28107 emit_insn (x);
28108 break;
28110 /* FALLTHRU */
28112 default:
28113 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28114 emit_insn (gen_rtx_SET (new_out, x));
28115 break;
28118 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28119 use_release);
28121 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28122 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28124 /* Checks whether a barrier is needed and emits one accordingly. */
28125 if (is_armv8_sync
28126 || !(use_acquire || use_release))
28127 arm_post_atomic_barrier (model);
28130 #define MAX_VECT_LEN 16
28132 struct expand_vec_perm_d
28134 rtx target, op0, op1;
28135 unsigned char perm[MAX_VECT_LEN];
28136 machine_mode vmode;
28137 unsigned char nelt;
28138 bool one_vector_p;
28139 bool testing_p;
28142 /* Generate a variable permutation. */
28144 static void
28145 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28147 machine_mode vmode = GET_MODE (target);
28148 bool one_vector_p = rtx_equal_p (op0, op1);
28150 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28151 gcc_checking_assert (GET_MODE (op0) == vmode);
28152 gcc_checking_assert (GET_MODE (op1) == vmode);
28153 gcc_checking_assert (GET_MODE (sel) == vmode);
28154 gcc_checking_assert (TARGET_NEON);
28156 if (one_vector_p)
28158 if (vmode == V8QImode)
28159 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28160 else
28161 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28163 else
28165 rtx pair;
28167 if (vmode == V8QImode)
28169 pair = gen_reg_rtx (V16QImode);
28170 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28171 pair = gen_lowpart (TImode, pair);
28172 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28174 else
28176 pair = gen_reg_rtx (OImode);
28177 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28178 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28183 void
28184 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28186 machine_mode vmode = GET_MODE (target);
28187 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28188 bool one_vector_p = rtx_equal_p (op0, op1);
28189 rtx rmask[MAX_VECT_LEN], mask;
28191 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28192 numbering of elements for big-endian, we must reverse the order. */
28193 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28195 /* The VTBL instruction does not use a modulo index, so we must take care
28196 of that ourselves. */
28197 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28198 for (i = 0; i < nelt; ++i)
28199 rmask[i] = mask;
28200 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28201 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28203 arm_expand_vec_perm_1 (target, op0, op1, sel);
28206 /* Generate or test for an insn that supports a constant permutation. */
28208 /* Recognize patterns for the VUZP insns. */
28210 static bool
28211 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28213 unsigned int i, odd, mask, nelt = d->nelt;
28214 rtx out0, out1, in0, in1;
28215 rtx (*gen)(rtx, rtx, rtx, rtx);
28217 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28218 return false;
28220 /* Note that these are little-endian tests. Adjust for big-endian later. */
28221 if (d->perm[0] == 0)
28222 odd = 0;
28223 else if (d->perm[0] == 1)
28224 odd = 1;
28225 else
28226 return false;
28227 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28229 for (i = 0; i < nelt; i++)
28231 unsigned elt = (i * 2 + odd) & mask;
28232 if (d->perm[i] != elt)
28233 return false;
28236 /* Success! */
28237 if (d->testing_p)
28238 return true;
28240 switch (d->vmode)
28242 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28243 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28244 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28245 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28246 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28247 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28248 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28249 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28250 default:
28251 gcc_unreachable ();
28254 in0 = d->op0;
28255 in1 = d->op1;
28256 if (BYTES_BIG_ENDIAN)
28258 std::swap (in0, in1);
28259 odd = !odd;
28262 out0 = d->target;
28263 out1 = gen_reg_rtx (d->vmode);
28264 if (odd)
28265 std::swap (out0, out1);
28267 emit_insn (gen (out0, in0, in1, out1));
28268 return true;
28271 /* Recognize patterns for the VZIP insns. */
28273 static bool
28274 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28276 unsigned int i, high, mask, nelt = d->nelt;
28277 rtx out0, out1, in0, in1;
28278 rtx (*gen)(rtx, rtx, rtx, rtx);
28280 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28281 return false;
28283 /* Note that these are little-endian tests. Adjust for big-endian later. */
28284 high = nelt / 2;
28285 if (d->perm[0] == high)
28287 else if (d->perm[0] == 0)
28288 high = 0;
28289 else
28290 return false;
28291 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28293 for (i = 0; i < nelt / 2; i++)
28295 unsigned elt = (i + high) & mask;
28296 if (d->perm[i * 2] != elt)
28297 return false;
28298 elt = (elt + nelt) & mask;
28299 if (d->perm[i * 2 + 1] != elt)
28300 return false;
28303 /* Success! */
28304 if (d->testing_p)
28305 return true;
28307 switch (d->vmode)
28309 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28310 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28311 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28312 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28313 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
28314 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
28315 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28316 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28317 default:
28318 gcc_unreachable ();
28321 in0 = d->op0;
28322 in1 = d->op1;
28323 if (BYTES_BIG_ENDIAN)
28325 std::swap (in0, in1);
28326 high = !high;
28329 out0 = d->target;
28330 out1 = gen_reg_rtx (d->vmode);
28331 if (high)
28332 std::swap (out0, out1);
28334 emit_insn (gen (out0, in0, in1, out1));
28335 return true;
28338 /* Recognize patterns for the VREV insns. */
28340 static bool
28341 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28343 unsigned int i, j, diff, nelt = d->nelt;
28344 rtx (*gen)(rtx, rtx);
28346 if (!d->one_vector_p)
28347 return false;
28349 diff = d->perm[0];
28350 switch (diff)
28352 case 7:
28353 switch (d->vmode)
28355 case V16QImode: gen = gen_neon_vrev64v16qi; break;
28356 case V8QImode: gen = gen_neon_vrev64v8qi; break;
28357 default:
28358 return false;
28360 break;
28361 case 3:
28362 switch (d->vmode)
28364 case V16QImode: gen = gen_neon_vrev32v16qi; break;
28365 case V8QImode: gen = gen_neon_vrev32v8qi; break;
28366 case V8HImode: gen = gen_neon_vrev64v8hi; break;
28367 case V4HImode: gen = gen_neon_vrev64v4hi; break;
28368 default:
28369 return false;
28371 break;
28372 case 1:
28373 switch (d->vmode)
28375 case V16QImode: gen = gen_neon_vrev16v16qi; break;
28376 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28377 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28378 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28379 case V4SImode: gen = gen_neon_vrev64v4si; break;
28380 case V2SImode: gen = gen_neon_vrev64v2si; break;
28381 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28382 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28383 default:
28384 return false;
28386 break;
28387 default:
28388 return false;
28391 for (i = 0; i < nelt ; i += diff + 1)
28392 for (j = 0; j <= diff; j += 1)
28394 /* This is guaranteed to be true as the value of diff
28395 is 7, 3, 1 and we should have enough elements in the
28396 queue to generate this. Getting a vector mask with a
28397 value of diff other than these values implies that
28398 something is wrong by the time we get here. */
28399 gcc_assert (i + j < nelt);
28400 if (d->perm[i + j] != i + diff - j)
28401 return false;
28404 /* Success! */
28405 if (d->testing_p)
28406 return true;
28408 emit_insn (gen (d->target, d->op0));
28409 return true;
28412 /* Recognize patterns for the VTRN insns. */
28414 static bool
28415 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28417 unsigned int i, odd, mask, nelt = d->nelt;
28418 rtx out0, out1, in0, in1;
28419 rtx (*gen)(rtx, rtx, rtx, rtx);
28421 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28422 return false;
28424 /* Note that these are little-endian tests. Adjust for big-endian later. */
28425 if (d->perm[0] == 0)
28426 odd = 0;
28427 else if (d->perm[0] == 1)
28428 odd = 1;
28429 else
28430 return false;
28431 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28433 for (i = 0; i < nelt; i += 2)
28435 if (d->perm[i] != i + odd)
28436 return false;
28437 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28438 return false;
28441 /* Success! */
28442 if (d->testing_p)
28443 return true;
28445 switch (d->vmode)
28447 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28448 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28449 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28450 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28451 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28452 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28453 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28454 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28455 default:
28456 gcc_unreachable ();
28459 in0 = d->op0;
28460 in1 = d->op1;
28461 if (BYTES_BIG_ENDIAN)
28463 std::swap (in0, in1);
28464 odd = !odd;
28467 out0 = d->target;
28468 out1 = gen_reg_rtx (d->vmode);
28469 if (odd)
28470 std::swap (out0, out1);
28472 emit_insn (gen (out0, in0, in1, out1));
28473 return true;
28476 /* Recognize patterns for the VEXT insns. */
28478 static bool
28479 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28481 unsigned int i, nelt = d->nelt;
28482 rtx (*gen) (rtx, rtx, rtx, rtx);
28483 rtx offset;
28485 unsigned int location;
28487 unsigned int next = d->perm[0] + 1;
28489 /* TODO: Handle GCC's numbering of elements for big-endian. */
28490 if (BYTES_BIG_ENDIAN)
28491 return false;
28493 /* Check if the extracted indexes are increasing by one. */
28494 for (i = 1; i < nelt; next++, i++)
28496 /* If we hit the most significant element of the 2nd vector in
28497 the previous iteration, no need to test further. */
28498 if (next == 2 * nelt)
28499 return false;
28501 /* If we are operating on only one vector: it could be a
28502 rotation. If there are only two elements of size < 64, let
28503 arm_evpc_neon_vrev catch it. */
28504 if (d->one_vector_p && (next == nelt))
28506 if ((nelt == 2) && (d->vmode != V2DImode))
28507 return false;
28508 else
28509 next = 0;
28512 if (d->perm[i] != next)
28513 return false;
28516 location = d->perm[0];
28518 switch (d->vmode)
28520 case V16QImode: gen = gen_neon_vextv16qi; break;
28521 case V8QImode: gen = gen_neon_vextv8qi; break;
28522 case V4HImode: gen = gen_neon_vextv4hi; break;
28523 case V8HImode: gen = gen_neon_vextv8hi; break;
28524 case V2SImode: gen = gen_neon_vextv2si; break;
28525 case V4SImode: gen = gen_neon_vextv4si; break;
28526 case V2SFmode: gen = gen_neon_vextv2sf; break;
28527 case V4SFmode: gen = gen_neon_vextv4sf; break;
28528 case V2DImode: gen = gen_neon_vextv2di; break;
28529 default:
28530 return false;
28533 /* Success! */
28534 if (d->testing_p)
28535 return true;
28537 offset = GEN_INT (location);
28538 emit_insn (gen (d->target, d->op0, d->op1, offset));
28539 return true;
28542 /* The NEON VTBL instruction is a fully variable permuation that's even
28543 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28544 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28545 can do slightly better by expanding this as a constant where we don't
28546 have to apply a mask. */
28548 static bool
28549 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28551 rtx rperm[MAX_VECT_LEN], sel;
28552 machine_mode vmode = d->vmode;
28553 unsigned int i, nelt = d->nelt;
28555 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28556 numbering of elements for big-endian, we must reverse the order. */
28557 if (BYTES_BIG_ENDIAN)
28558 return false;
28560 if (d->testing_p)
28561 return true;
28563 /* Generic code will try constant permutation twice. Once with the
28564 original mode and again with the elements lowered to QImode.
28565 So wait and don't do the selector expansion ourselves. */
28566 if (vmode != V8QImode && vmode != V16QImode)
28567 return false;
28569 for (i = 0; i < nelt; ++i)
28570 rperm[i] = GEN_INT (d->perm[i]);
28571 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28572 sel = force_reg (vmode, sel);
28574 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28575 return true;
28578 static bool
28579 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28581 /* Check if the input mask matches vext before reordering the
28582 operands. */
28583 if (TARGET_NEON)
28584 if (arm_evpc_neon_vext (d))
28585 return true;
28587 /* The pattern matching functions above are written to look for a small
28588 number to begin the sequence (0, 1, N/2). If we begin with an index
28589 from the second operand, we can swap the operands. */
28590 if (d->perm[0] >= d->nelt)
28592 unsigned i, nelt = d->nelt;
28594 for (i = 0; i < nelt; ++i)
28595 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28597 std::swap (d->op0, d->op1);
28600 if (TARGET_NEON)
28602 if (arm_evpc_neon_vuzp (d))
28603 return true;
28604 if (arm_evpc_neon_vzip (d))
28605 return true;
28606 if (arm_evpc_neon_vrev (d))
28607 return true;
28608 if (arm_evpc_neon_vtrn (d))
28609 return true;
28610 return arm_evpc_neon_vtbl (d);
28612 return false;
28615 /* Expand a vec_perm_const pattern. */
28617 bool
28618 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28620 struct expand_vec_perm_d d;
28621 int i, nelt, which;
28623 d.target = target;
28624 d.op0 = op0;
28625 d.op1 = op1;
28627 d.vmode = GET_MODE (target);
28628 gcc_assert (VECTOR_MODE_P (d.vmode));
28629 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28630 d.testing_p = false;
28632 for (i = which = 0; i < nelt; ++i)
28634 rtx e = XVECEXP (sel, 0, i);
28635 int ei = INTVAL (e) & (2 * nelt - 1);
28636 which |= (ei < nelt ? 1 : 2);
28637 d.perm[i] = ei;
28640 switch (which)
28642 default:
28643 gcc_unreachable();
28645 case 3:
28646 d.one_vector_p = false;
28647 if (!rtx_equal_p (op0, op1))
28648 break;
28650 /* The elements of PERM do not suggest that only the first operand
28651 is used, but both operands are identical. Allow easier matching
28652 of the permutation by folding the permutation into the single
28653 input vector. */
28654 /* FALLTHRU */
28655 case 2:
28656 for (i = 0; i < nelt; ++i)
28657 d.perm[i] &= nelt - 1;
28658 d.op0 = op1;
28659 d.one_vector_p = true;
28660 break;
28662 case 1:
28663 d.op1 = op0;
28664 d.one_vector_p = true;
28665 break;
28668 return arm_expand_vec_perm_const_1 (&d);
28671 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28673 static bool
28674 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28675 const unsigned char *sel)
28677 struct expand_vec_perm_d d;
28678 unsigned int i, nelt, which;
28679 bool ret;
28681 d.vmode = vmode;
28682 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28683 d.testing_p = true;
28684 memcpy (d.perm, sel, nelt);
28686 /* Categorize the set of elements in the selector. */
28687 for (i = which = 0; i < nelt; ++i)
28689 unsigned char e = d.perm[i];
28690 gcc_assert (e < 2 * nelt);
28691 which |= (e < nelt ? 1 : 2);
28694 /* For all elements from second vector, fold the elements to first. */
28695 if (which == 2)
28696 for (i = 0; i < nelt; ++i)
28697 d.perm[i] -= nelt;
28699 /* Check whether the mask can be applied to the vector type. */
28700 d.one_vector_p = (which != 3);
28702 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28703 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28704 if (!d.one_vector_p)
28705 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28707 start_sequence ();
28708 ret = arm_expand_vec_perm_const_1 (&d);
28709 end_sequence ();
28711 return ret;
28714 bool
28715 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28717 /* If we are soft float and we do not have ldrd
28718 then all auto increment forms are ok. */
28719 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28720 return true;
28722 switch (code)
28724 /* Post increment and Pre Decrement are supported for all
28725 instruction forms except for vector forms. */
28726 case ARM_POST_INC:
28727 case ARM_PRE_DEC:
28728 if (VECTOR_MODE_P (mode))
28730 if (code != ARM_PRE_DEC)
28731 return true;
28732 else
28733 return false;
28736 return true;
28738 case ARM_POST_DEC:
28739 case ARM_PRE_INC:
28740 /* Without LDRD and mode size greater than
28741 word size, there is no point in auto-incrementing
28742 because ldm and stm will not have these forms. */
28743 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28744 return false;
28746 /* Vector and floating point modes do not support
28747 these auto increment forms. */
28748 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28749 return false;
28751 return true;
28753 default:
28754 return false;
28758 return false;
28761 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28762 on ARM, since we know that shifts by negative amounts are no-ops.
28763 Additionally, the default expansion code is not available or suitable
28764 for post-reload insn splits (this can occur when the register allocator
28765 chooses not to do a shift in NEON).
28767 This function is used in both initial expand and post-reload splits, and
28768 handles all kinds of 64-bit shifts.
28770 Input requirements:
28771 - It is safe for the input and output to be the same register, but
28772 early-clobber rules apply for the shift amount and scratch registers.
28773 - Shift by register requires both scratch registers. In all other cases
28774 the scratch registers may be NULL.
28775 - Ashiftrt by a register also clobbers the CC register. */
28776 void
28777 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28778 rtx amount, rtx scratch1, rtx scratch2)
28780 rtx out_high = gen_highpart (SImode, out);
28781 rtx out_low = gen_lowpart (SImode, out);
28782 rtx in_high = gen_highpart (SImode, in);
28783 rtx in_low = gen_lowpart (SImode, in);
28785 /* Terminology:
28786 in = the register pair containing the input value.
28787 out = the destination register pair.
28788 up = the high- or low-part of each pair.
28789 down = the opposite part to "up".
28790 In a shift, we can consider bits to shift from "up"-stream to
28791 "down"-stream, so in a left-shift "up" is the low-part and "down"
28792 is the high-part of each register pair. */
28794 rtx out_up = code == ASHIFT ? out_low : out_high;
28795 rtx out_down = code == ASHIFT ? out_high : out_low;
28796 rtx in_up = code == ASHIFT ? in_low : in_high;
28797 rtx in_down = code == ASHIFT ? in_high : in_low;
28799 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28800 gcc_assert (out
28801 && (REG_P (out) || GET_CODE (out) == SUBREG)
28802 && GET_MODE (out) == DImode);
28803 gcc_assert (in
28804 && (REG_P (in) || GET_CODE (in) == SUBREG)
28805 && GET_MODE (in) == DImode);
28806 gcc_assert (amount
28807 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28808 && GET_MODE (amount) == SImode)
28809 || CONST_INT_P (amount)));
28810 gcc_assert (scratch1 == NULL
28811 || (GET_CODE (scratch1) == SCRATCH)
28812 || (GET_MODE (scratch1) == SImode
28813 && REG_P (scratch1)));
28814 gcc_assert (scratch2 == NULL
28815 || (GET_CODE (scratch2) == SCRATCH)
28816 || (GET_MODE (scratch2) == SImode
28817 && REG_P (scratch2)));
28818 gcc_assert (!REG_P (out) || !REG_P (amount)
28819 || !HARD_REGISTER_P (out)
28820 || (REGNO (out) != REGNO (amount)
28821 && REGNO (out) + 1 != REGNO (amount)));
28823 /* Macros to make following code more readable. */
28824 #define SUB_32(DEST,SRC) \
28825 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28826 #define RSB_32(DEST,SRC) \
28827 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28828 #define SUB_S_32(DEST,SRC) \
28829 gen_addsi3_compare0 ((DEST), (SRC), \
28830 GEN_INT (-32))
28831 #define SET(DEST,SRC) \
28832 gen_rtx_SET ((DEST), (SRC))
28833 #define SHIFT(CODE,SRC,AMOUNT) \
28834 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28835 #define LSHIFT(CODE,SRC,AMOUNT) \
28836 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28837 SImode, (SRC), (AMOUNT))
28838 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28839 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28840 SImode, (SRC), (AMOUNT))
28841 #define ORR(A,B) \
28842 gen_rtx_IOR (SImode, (A), (B))
28843 #define BRANCH(COND,LABEL) \
28844 gen_arm_cond_branch ((LABEL), \
28845 gen_rtx_ ## COND (CCmode, cc_reg, \
28846 const0_rtx), \
28847 cc_reg)
28849 /* Shifts by register and shifts by constant are handled separately. */
28850 if (CONST_INT_P (amount))
28852 /* We have a shift-by-constant. */
28854 /* First, handle out-of-range shift amounts.
28855 In both cases we try to match the result an ARM instruction in a
28856 shift-by-register would give. This helps reduce execution
28857 differences between optimization levels, but it won't stop other
28858 parts of the compiler doing different things. This is "undefined
28859 behaviour, in any case. */
28860 if (INTVAL (amount) <= 0)
28861 emit_insn (gen_movdi (out, in));
28862 else if (INTVAL (amount) >= 64)
28864 if (code == ASHIFTRT)
28866 rtx const31_rtx = GEN_INT (31);
28867 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28868 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28870 else
28871 emit_insn (gen_movdi (out, const0_rtx));
28874 /* Now handle valid shifts. */
28875 else if (INTVAL (amount) < 32)
28877 /* Shifts by a constant less than 32. */
28878 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28880 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28881 emit_insn (SET (out_down,
28882 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28883 out_down)));
28884 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28886 else
28888 /* Shifts by a constant greater than 31. */
28889 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28891 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28892 if (code == ASHIFTRT)
28893 emit_insn (gen_ashrsi3 (out_up, in_up,
28894 GEN_INT (31)));
28895 else
28896 emit_insn (SET (out_up, const0_rtx));
28899 else
28901 /* We have a shift-by-register. */
28902 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28904 /* This alternative requires the scratch registers. */
28905 gcc_assert (scratch1 && REG_P (scratch1));
28906 gcc_assert (scratch2 && REG_P (scratch2));
28908 /* We will need the values "amount-32" and "32-amount" later.
28909 Swapping them around now allows the later code to be more general. */
28910 switch (code)
28912 case ASHIFT:
28913 emit_insn (SUB_32 (scratch1, amount));
28914 emit_insn (RSB_32 (scratch2, amount));
28915 break;
28916 case ASHIFTRT:
28917 emit_insn (RSB_32 (scratch1, amount));
28918 /* Also set CC = amount > 32. */
28919 emit_insn (SUB_S_32 (scratch2, amount));
28920 break;
28921 case LSHIFTRT:
28922 emit_insn (RSB_32 (scratch1, amount));
28923 emit_insn (SUB_32 (scratch2, amount));
28924 break;
28925 default:
28926 gcc_unreachable ();
28929 /* Emit code like this:
28931 arithmetic-left:
28932 out_down = in_down << amount;
28933 out_down = (in_up << (amount - 32)) | out_down;
28934 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28935 out_up = in_up << amount;
28937 arithmetic-right:
28938 out_down = in_down >> amount;
28939 out_down = (in_up << (32 - amount)) | out_down;
28940 if (amount < 32)
28941 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28942 out_up = in_up << amount;
28944 logical-right:
28945 out_down = in_down >> amount;
28946 out_down = (in_up << (32 - amount)) | out_down;
28947 if (amount < 32)
28948 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28949 out_up = in_up << amount;
28951 The ARM and Thumb2 variants are the same but implemented slightly
28952 differently. If this were only called during expand we could just
28953 use the Thumb2 case and let combine do the right thing, but this
28954 can also be called from post-reload splitters. */
28956 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28958 if (!TARGET_THUMB2)
28960 /* Emit code for ARM mode. */
28961 emit_insn (SET (out_down,
28962 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28963 if (code == ASHIFTRT)
28965 rtx_code_label *done_label = gen_label_rtx ();
28966 emit_jump_insn (BRANCH (LT, done_label));
28967 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28968 out_down)));
28969 emit_label (done_label);
28971 else
28972 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28973 out_down)));
28975 else
28977 /* Emit code for Thumb2 mode.
28978 Thumb2 can't do shift and or in one insn. */
28979 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28980 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28982 if (code == ASHIFTRT)
28984 rtx_code_label *done_label = gen_label_rtx ();
28985 emit_jump_insn (BRANCH (LT, done_label));
28986 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28987 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28988 emit_label (done_label);
28990 else
28992 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28993 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28997 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29000 #undef SUB_32
29001 #undef RSB_32
29002 #undef SUB_S_32
29003 #undef SET
29004 #undef SHIFT
29005 #undef LSHIFT
29006 #undef REV_LSHIFT
29007 #undef ORR
29008 #undef BRANCH
29011 /* Returns true if the pattern is a valid symbolic address, which is either a
29012 symbol_ref or (symbol_ref + addend).
29014 According to the ARM ELF ABI, the initial addend of REL-type relocations
29015 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29016 literal field of the instruction as a 16-bit signed value in the range
29017 -32768 <= A < 32768. */
29019 bool
29020 arm_valid_symbolic_address_p (rtx addr)
29022 rtx xop0, xop1 = NULL_RTX;
29023 rtx tmp = addr;
29025 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29026 return true;
29028 /* (const (plus: symbol_ref const_int)) */
29029 if (GET_CODE (addr) == CONST)
29030 tmp = XEXP (addr, 0);
29032 if (GET_CODE (tmp) == PLUS)
29034 xop0 = XEXP (tmp, 0);
29035 xop1 = XEXP (tmp, 1);
29037 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29038 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29041 return false;
29044 /* Returns true if a valid comparison operation and makes
29045 the operands in a form that is valid. */
29046 bool
29047 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29049 enum rtx_code code = GET_CODE (*comparison);
29050 int code_int;
29051 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29052 ? GET_MODE (*op2) : GET_MODE (*op1);
29054 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29056 if (code == UNEQ || code == LTGT)
29057 return false;
29059 code_int = (int)code;
29060 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29061 PUT_CODE (*comparison, (enum rtx_code)code_int);
29063 switch (mode)
29065 case SImode:
29066 if (!arm_add_operand (*op1, mode))
29067 *op1 = force_reg (mode, *op1);
29068 if (!arm_add_operand (*op2, mode))
29069 *op2 = force_reg (mode, *op2);
29070 return true;
29072 case DImode:
29073 if (!cmpdi_operand (*op1, mode))
29074 *op1 = force_reg (mode, *op1);
29075 if (!cmpdi_operand (*op2, mode))
29076 *op2 = force_reg (mode, *op2);
29077 return true;
29079 case SFmode:
29080 case DFmode:
29081 if (!arm_float_compare_operand (*op1, mode))
29082 *op1 = force_reg (mode, *op1);
29083 if (!arm_float_compare_operand (*op2, mode))
29084 *op2 = force_reg (mode, *op2);
29085 return true;
29086 default:
29087 break;
29090 return false;
29094 /* Maximum number of instructions to set block of memory. */
29095 static int
29096 arm_block_set_max_insns (void)
29098 if (optimize_function_for_size_p (cfun))
29099 return 4;
29100 else
29101 return current_tune->max_insns_inline_memset;
29104 /* Return TRUE if it's profitable to set block of memory for
29105 non-vectorized case. VAL is the value to set the memory
29106 with. LENGTH is the number of bytes to set. ALIGN is the
29107 alignment of the destination memory in bytes. UNALIGNED_P
29108 is TRUE if we can only set the memory with instructions
29109 meeting alignment requirements. USE_STRD_P is TRUE if we
29110 can use strd to set the memory. */
29111 static bool
29112 arm_block_set_non_vect_profit_p (rtx val,
29113 unsigned HOST_WIDE_INT length,
29114 unsigned HOST_WIDE_INT align,
29115 bool unaligned_p, bool use_strd_p)
29117 int num = 0;
29118 /* For leftovers in bytes of 0-7, we can set the memory block using
29119 strb/strh/str with minimum instruction number. */
29120 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29122 if (unaligned_p)
29124 num = arm_const_inline_cost (SET, val);
29125 num += length / align + length % align;
29127 else if (use_strd_p)
29129 num = arm_const_double_inline_cost (val);
29130 num += (length >> 3) + leftover[length & 7];
29132 else
29134 num = arm_const_inline_cost (SET, val);
29135 num += (length >> 2) + leftover[length & 3];
29138 /* We may be able to combine last pair STRH/STRB into a single STR
29139 by shifting one byte back. */
29140 if (unaligned_access && length > 3 && (length & 3) == 3)
29141 num--;
29143 return (num <= arm_block_set_max_insns ());
29146 /* Return TRUE if it's profitable to set block of memory for
29147 vectorized case. LENGTH is the number of bytes to set.
29148 ALIGN is the alignment of destination memory in bytes.
29149 MODE is the vector mode used to set the memory. */
29150 static bool
29151 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29152 unsigned HOST_WIDE_INT align,
29153 machine_mode mode)
29155 int num;
29156 bool unaligned_p = ((align & 3) != 0);
29157 unsigned int nelt = GET_MODE_NUNITS (mode);
29159 /* Instruction loading constant value. */
29160 num = 1;
29161 /* Instructions storing the memory. */
29162 num += (length + nelt - 1) / nelt;
29163 /* Instructions adjusting the address expression. Only need to
29164 adjust address expression if it's 4 bytes aligned and bytes
29165 leftover can only be stored by mis-aligned store instruction. */
29166 if (!unaligned_p && (length & 3) != 0)
29167 num++;
29169 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29170 if (!unaligned_p && mode == V16QImode)
29171 num--;
29173 return (num <= arm_block_set_max_insns ());
29176 /* Set a block of memory using vectorization instructions for the
29177 unaligned case. We fill the first LENGTH bytes of the memory
29178 area starting from DSTBASE with byte constant VALUE. ALIGN is
29179 the alignment requirement of memory. Return TRUE if succeeded. */
29180 static bool
29181 arm_block_set_unaligned_vect (rtx dstbase,
29182 unsigned HOST_WIDE_INT length,
29183 unsigned HOST_WIDE_INT value,
29184 unsigned HOST_WIDE_INT align)
29186 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29187 rtx dst, mem;
29188 rtx val_elt, val_vec, reg;
29189 rtx rval[MAX_VECT_LEN];
29190 rtx (*gen_func) (rtx, rtx);
29191 machine_mode mode;
29192 unsigned HOST_WIDE_INT v = value;
29194 gcc_assert ((align & 0x3) != 0);
29195 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29196 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29197 if (length >= nelt_v16)
29199 mode = V16QImode;
29200 gen_func = gen_movmisalignv16qi;
29202 else
29204 mode = V8QImode;
29205 gen_func = gen_movmisalignv8qi;
29207 nelt_mode = GET_MODE_NUNITS (mode);
29208 gcc_assert (length >= nelt_mode);
29209 /* Skip if it isn't profitable. */
29210 if (!arm_block_set_vect_profit_p (length, align, mode))
29211 return false;
29213 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29214 mem = adjust_automodify_address (dstbase, mode, dst, 0);
29216 v = sext_hwi (v, BITS_PER_WORD);
29217 val_elt = GEN_INT (v);
29218 for (j = 0; j < nelt_mode; j++)
29219 rval[j] = val_elt;
29221 reg = gen_reg_rtx (mode);
29222 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29223 /* Emit instruction loading the constant value. */
29224 emit_move_insn (reg, val_vec);
29226 /* Handle nelt_mode bytes in a vector. */
29227 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29229 emit_insn ((*gen_func) (mem, reg));
29230 if (i + 2 * nelt_mode <= length)
29231 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29234 /* If there are not less than nelt_v8 bytes leftover, we must be in
29235 V16QI mode. */
29236 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29238 /* Handle (8, 16) bytes leftover. */
29239 if (i + nelt_v8 < length)
29241 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29242 /* We are shifting bytes back, set the alignment accordingly. */
29243 if ((length & 1) != 0 && align >= 2)
29244 set_mem_align (mem, BITS_PER_UNIT);
29246 emit_insn (gen_movmisalignv16qi (mem, reg));
29248 /* Handle (0, 8] bytes leftover. */
29249 else if (i < length && i + nelt_v8 >= length)
29251 if (mode == V16QImode)
29253 reg = gen_lowpart (V8QImode, reg);
29254 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
29256 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29257 + (nelt_mode - nelt_v8))));
29258 /* We are shifting bytes back, set the alignment accordingly. */
29259 if ((length & 1) != 0 && align >= 2)
29260 set_mem_align (mem, BITS_PER_UNIT);
29262 emit_insn (gen_movmisalignv8qi (mem, reg));
29265 return true;
29268 /* Set a block of memory using vectorization instructions for the
29269 aligned case. We fill the first LENGTH bytes of the memory area
29270 starting from DSTBASE with byte constant VALUE. ALIGN is the
29271 alignment requirement of memory. Return TRUE if succeeded. */
29272 static bool
29273 arm_block_set_aligned_vect (rtx dstbase,
29274 unsigned HOST_WIDE_INT length,
29275 unsigned HOST_WIDE_INT value,
29276 unsigned HOST_WIDE_INT align)
29278 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29279 rtx dst, addr, mem;
29280 rtx val_elt, val_vec, reg;
29281 rtx rval[MAX_VECT_LEN];
29282 machine_mode mode;
29283 unsigned HOST_WIDE_INT v = value;
29285 gcc_assert ((align & 0x3) == 0);
29286 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29287 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29288 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29289 mode = V16QImode;
29290 else
29291 mode = V8QImode;
29293 nelt_mode = GET_MODE_NUNITS (mode);
29294 gcc_assert (length >= nelt_mode);
29295 /* Skip if it isn't profitable. */
29296 if (!arm_block_set_vect_profit_p (length, align, mode))
29297 return false;
29299 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29301 v = sext_hwi (v, BITS_PER_WORD);
29302 val_elt = GEN_INT (v);
29303 for (j = 0; j < nelt_mode; j++)
29304 rval[j] = val_elt;
29306 reg = gen_reg_rtx (mode);
29307 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29308 /* Emit instruction loading the constant value. */
29309 emit_move_insn (reg, val_vec);
29311 i = 0;
29312 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29313 if (mode == V16QImode)
29315 mem = adjust_automodify_address (dstbase, mode, dst, 0);
29316 emit_insn (gen_movmisalignv16qi (mem, reg));
29317 i += nelt_mode;
29318 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29319 if (i + nelt_v8 < length && i + nelt_v16 > length)
29321 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29322 mem = adjust_automodify_address (dstbase, mode, dst, 0);
29323 /* We are shifting bytes back, set the alignment accordingly. */
29324 if ((length & 0x3) == 0)
29325 set_mem_align (mem, BITS_PER_UNIT * 4);
29326 else if ((length & 0x1) == 0)
29327 set_mem_align (mem, BITS_PER_UNIT * 2);
29328 else
29329 set_mem_align (mem, BITS_PER_UNIT);
29331 emit_insn (gen_movmisalignv16qi (mem, reg));
29332 return true;
29334 /* Fall through for bytes leftover. */
29335 mode = V8QImode;
29336 nelt_mode = GET_MODE_NUNITS (mode);
29337 reg = gen_lowpart (V8QImode, reg);
29340 /* Handle 8 bytes in a vector. */
29341 for (; (i + nelt_mode <= length); i += nelt_mode)
29343 addr = plus_constant (Pmode, dst, i);
29344 mem = adjust_automodify_address (dstbase, mode, addr, i);
29345 emit_move_insn (mem, reg);
29348 /* Handle single word leftover by shifting 4 bytes back. We can
29349 use aligned access for this case. */
29350 if (i + UNITS_PER_WORD == length)
29352 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29353 mem = adjust_automodify_address (dstbase, mode,
29354 addr, i - UNITS_PER_WORD);
29355 /* We are shifting 4 bytes back, set the alignment accordingly. */
29356 if (align > UNITS_PER_WORD)
29357 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29359 emit_move_insn (mem, reg);
29361 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29362 We have to use unaligned access for this case. */
29363 else if (i < length)
29365 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29366 mem = adjust_automodify_address (dstbase, mode, dst, 0);
29367 /* We are shifting bytes back, set the alignment accordingly. */
29368 if ((length & 1) == 0)
29369 set_mem_align (mem, BITS_PER_UNIT * 2);
29370 else
29371 set_mem_align (mem, BITS_PER_UNIT);
29373 emit_insn (gen_movmisalignv8qi (mem, reg));
29376 return true;
29379 /* Set a block of memory using plain strh/strb instructions, only
29380 using instructions allowed by ALIGN on processor. We fill the
29381 first LENGTH bytes of the memory area starting from DSTBASE
29382 with byte constant VALUE. ALIGN is the alignment requirement
29383 of memory. */
29384 static bool
29385 arm_block_set_unaligned_non_vect (rtx dstbase,
29386 unsigned HOST_WIDE_INT length,
29387 unsigned HOST_WIDE_INT value,
29388 unsigned HOST_WIDE_INT align)
29390 unsigned int i;
29391 rtx dst, addr, mem;
29392 rtx val_exp, val_reg, reg;
29393 machine_mode mode;
29394 HOST_WIDE_INT v = value;
29396 gcc_assert (align == 1 || align == 2);
29398 if (align == 2)
29399 v |= (value << BITS_PER_UNIT);
29401 v = sext_hwi (v, BITS_PER_WORD);
29402 val_exp = GEN_INT (v);
29403 /* Skip if it isn't profitable. */
29404 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29405 align, true, false))
29406 return false;
29408 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29409 mode = (align == 2 ? HImode : QImode);
29410 val_reg = force_reg (SImode, val_exp);
29411 reg = gen_lowpart (mode, val_reg);
29413 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29415 addr = plus_constant (Pmode, dst, i);
29416 mem = adjust_automodify_address (dstbase, mode, addr, i);
29417 emit_move_insn (mem, reg);
29420 /* Handle single byte leftover. */
29421 if (i + 1 == length)
29423 reg = gen_lowpart (QImode, val_reg);
29424 addr = plus_constant (Pmode, dst, i);
29425 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29426 emit_move_insn (mem, reg);
29427 i++;
29430 gcc_assert (i == length);
29431 return true;
29434 /* Set a block of memory using plain strd/str/strh/strb instructions,
29435 to permit unaligned copies on processors which support unaligned
29436 semantics for those instructions. We fill the first LENGTH bytes
29437 of the memory area starting from DSTBASE with byte constant VALUE.
29438 ALIGN is the alignment requirement of memory. */
29439 static bool
29440 arm_block_set_aligned_non_vect (rtx dstbase,
29441 unsigned HOST_WIDE_INT length,
29442 unsigned HOST_WIDE_INT value,
29443 unsigned HOST_WIDE_INT align)
29445 unsigned int i;
29446 rtx dst, addr, mem;
29447 rtx val_exp, val_reg, reg;
29448 unsigned HOST_WIDE_INT v;
29449 bool use_strd_p;
29451 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29452 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29454 v = (value | (value << 8) | (value << 16) | (value << 24));
29455 if (length < UNITS_PER_WORD)
29456 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29458 if (use_strd_p)
29459 v |= (v << BITS_PER_WORD);
29460 else
29461 v = sext_hwi (v, BITS_PER_WORD);
29463 val_exp = GEN_INT (v);
29464 /* Skip if it isn't profitable. */
29465 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29466 align, false, use_strd_p))
29468 if (!use_strd_p)
29469 return false;
29471 /* Try without strd. */
29472 v = (v >> BITS_PER_WORD);
29473 v = sext_hwi (v, BITS_PER_WORD);
29474 val_exp = GEN_INT (v);
29475 use_strd_p = false;
29476 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29477 align, false, use_strd_p))
29478 return false;
29481 i = 0;
29482 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29483 /* Handle double words using strd if possible. */
29484 if (use_strd_p)
29486 val_reg = force_reg (DImode, val_exp);
29487 reg = val_reg;
29488 for (; (i + 8 <= length); i += 8)
29490 addr = plus_constant (Pmode, dst, i);
29491 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29492 emit_move_insn (mem, reg);
29495 else
29496 val_reg = force_reg (SImode, val_exp);
29498 /* Handle words. */
29499 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29500 for (; (i + 4 <= length); i += 4)
29502 addr = plus_constant (Pmode, dst, i);
29503 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29504 if ((align & 3) == 0)
29505 emit_move_insn (mem, reg);
29506 else
29507 emit_insn (gen_unaligned_storesi (mem, reg));
29510 /* Merge last pair of STRH and STRB into a STR if possible. */
29511 if (unaligned_access && i > 0 && (i + 3) == length)
29513 addr = plus_constant (Pmode, dst, i - 1);
29514 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29515 /* We are shifting one byte back, set the alignment accordingly. */
29516 if ((align & 1) == 0)
29517 set_mem_align (mem, BITS_PER_UNIT);
29519 /* Most likely this is an unaligned access, and we can't tell at
29520 compilation time. */
29521 emit_insn (gen_unaligned_storesi (mem, reg));
29522 return true;
29525 /* Handle half word leftover. */
29526 if (i + 2 <= length)
29528 reg = gen_lowpart (HImode, val_reg);
29529 addr = plus_constant (Pmode, dst, i);
29530 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29531 if ((align & 1) == 0)
29532 emit_move_insn (mem, reg);
29533 else
29534 emit_insn (gen_unaligned_storehi (mem, reg));
29536 i += 2;
29539 /* Handle single byte leftover. */
29540 if (i + 1 == length)
29542 reg = gen_lowpart (QImode, val_reg);
29543 addr = plus_constant (Pmode, dst, i);
29544 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29545 emit_move_insn (mem, reg);
29548 return true;
29551 /* Set a block of memory using vectorization instructions for both
29552 aligned and unaligned cases. We fill the first LENGTH bytes of
29553 the memory area starting from DSTBASE with byte constant VALUE.
29554 ALIGN is the alignment requirement of memory. */
29555 static bool
29556 arm_block_set_vect (rtx dstbase,
29557 unsigned HOST_WIDE_INT length,
29558 unsigned HOST_WIDE_INT value,
29559 unsigned HOST_WIDE_INT align)
29561 /* Check whether we need to use unaligned store instruction. */
29562 if (((align & 3) != 0 || (length & 3) != 0)
29563 /* Check whether unaligned store instruction is available. */
29564 && (!unaligned_access || BYTES_BIG_ENDIAN))
29565 return false;
29567 if ((align & 3) == 0)
29568 return arm_block_set_aligned_vect (dstbase, length, value, align);
29569 else
29570 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29573 /* Expand string store operation. Firstly we try to do that by using
29574 vectorization instructions, then try with ARM unaligned access and
29575 double-word store if profitable. OPERANDS[0] is the destination,
29576 OPERANDS[1] is the number of bytes, operands[2] is the value to
29577 initialize the memory, OPERANDS[3] is the known alignment of the
29578 destination. */
29579 bool
29580 arm_gen_setmem (rtx *operands)
29582 rtx dstbase = operands[0];
29583 unsigned HOST_WIDE_INT length;
29584 unsigned HOST_WIDE_INT value;
29585 unsigned HOST_WIDE_INT align;
29587 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29588 return false;
29590 length = UINTVAL (operands[1]);
29591 if (length > 64)
29592 return false;
29594 value = (UINTVAL (operands[2]) & 0xFF);
29595 align = UINTVAL (operands[3]);
29596 if (TARGET_NEON && length >= 8
29597 && current_tune->string_ops_prefer_neon
29598 && arm_block_set_vect (dstbase, length, value, align))
29599 return true;
29601 if (!unaligned_access && (align & 3) != 0)
29602 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29604 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29608 static bool
29609 arm_macro_fusion_p (void)
29611 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
29615 static bool
29616 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
29618 rtx set_dest;
29619 rtx prev_set = single_set (prev);
29620 rtx curr_set = single_set (curr);
29622 if (!prev_set
29623 || !curr_set)
29624 return false;
29626 if (any_condjump_p (curr))
29627 return false;
29629 if (!arm_macro_fusion_p ())
29630 return false;
29632 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT)
29634 /* We are trying to fuse
29635 movw imm / movt imm
29636 instructions as a group that gets scheduled together. */
29638 set_dest = SET_DEST (curr_set);
29640 if (GET_MODE (set_dest) != SImode)
29641 return false;
29643 /* We are trying to match:
29644 prev (movw) == (set (reg r0) (const_int imm16))
29645 curr (movt) == (set (zero_extract (reg r0)
29646 (const_int 16)
29647 (const_int 16))
29648 (const_int imm16_1))
29650 prev (movw) == (set (reg r1)
29651 (high (symbol_ref ("SYM"))))
29652 curr (movt) == (set (reg r0)
29653 (lo_sum (reg r1)
29654 (symbol_ref ("SYM")))) */
29655 if (GET_CODE (set_dest) == ZERO_EXTRACT)
29657 if (CONST_INT_P (SET_SRC (curr_set))
29658 && CONST_INT_P (SET_SRC (prev_set))
29659 && REG_P (XEXP (set_dest, 0))
29660 && REG_P (SET_DEST (prev_set))
29661 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
29662 return true;
29664 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
29665 && REG_P (SET_DEST (curr_set))
29666 && REG_P (SET_DEST (prev_set))
29667 && GET_CODE (SET_SRC (prev_set)) == HIGH
29668 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
29669 return true;
29671 return false;
29674 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29676 static unsigned HOST_WIDE_INT
29677 arm_asan_shadow_offset (void)
29679 return (unsigned HOST_WIDE_INT) 1 << 29;
29683 /* This is a temporary fix for PR60655. Ideally we need
29684 to handle most of these cases in the generic part but
29685 currently we reject minus (..) (sym_ref). We try to
29686 ameliorate the case with minus (sym_ref1) (sym_ref2)
29687 where they are in the same section. */
29689 static bool
29690 arm_const_not_ok_for_debug_p (rtx p)
29692 tree decl_op0 = NULL;
29693 tree decl_op1 = NULL;
29695 if (GET_CODE (p) == MINUS)
29697 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29699 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29700 if (decl_op1
29701 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29702 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29704 if ((TREE_CODE (decl_op1) == VAR_DECL
29705 || TREE_CODE (decl_op1) == CONST_DECL)
29706 && (TREE_CODE (decl_op0) == VAR_DECL
29707 || TREE_CODE (decl_op0) == CONST_DECL))
29708 return (get_variable_section (decl_op1, false)
29709 != get_variable_section (decl_op0, false));
29711 if (TREE_CODE (decl_op1) == LABEL_DECL
29712 && TREE_CODE (decl_op0) == LABEL_DECL)
29713 return (DECL_CONTEXT (decl_op1)
29714 != DECL_CONTEXT (decl_op0));
29717 return true;
29721 return false;
29724 /* return TRUE if x is a reference to a value in a constant pool */
29725 extern bool
29726 arm_is_constant_pool_ref (rtx x)
29728 return (MEM_P (x)
29729 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29730 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29733 /* Remember the last target of arm_set_current_function. */
29734 static GTY(()) tree arm_previous_fndecl;
29736 /* Invalidate arm_previous_fndecl. */
29737 void
29738 arm_reset_previous_fndecl (void)
29740 arm_previous_fndecl = NULL_TREE;
29743 /* Establish appropriate back-end context for processing the function
29744 FNDECL. The argument might be NULL to indicate processing at top
29745 level, outside of any function scope. */
29746 static void
29747 arm_set_current_function (tree fndecl)
29749 if (!fndecl || fndecl == arm_previous_fndecl)
29750 return;
29752 tree old_tree = (arm_previous_fndecl
29753 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
29754 : NULL_TREE);
29756 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
29758 arm_previous_fndecl = fndecl;
29759 if (old_tree == new_tree)
29760 return;
29762 if (new_tree && new_tree != target_option_default_node)
29764 cl_target_option_restore (&global_options,
29765 TREE_TARGET_OPTION (new_tree));
29767 if (TREE_TARGET_GLOBALS (new_tree))
29768 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
29769 else
29770 TREE_TARGET_GLOBALS (new_tree)
29771 = save_target_globals_default_opts ();
29774 else if (old_tree && old_tree != target_option_default_node)
29776 new_tree = target_option_current_node;
29778 cl_target_option_restore (&global_options,
29779 TREE_TARGET_OPTION (new_tree));
29780 if (TREE_TARGET_GLOBALS (new_tree))
29781 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
29782 else if (new_tree == target_option_default_node)
29783 restore_target_globals (&default_target_globals);
29784 else
29785 TREE_TARGET_GLOBALS (new_tree)
29786 = save_target_globals_default_opts ();
29789 arm_option_params_internal ();
29792 /* Implement TARGET_OPTION_PRINT. */
29794 static void
29795 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
29797 int flags = ptr->x_target_flags;
29799 fprintf (file, "%*sselected arch %s\n", indent, "",
29800 TARGET_THUMB2_P (flags) ? "thumb2" :
29801 TARGET_THUMB_P (flags) ? "thumb1" :
29802 "arm");
29805 /* Hook to determine if one function can safely inline another. */
29807 static bool
29808 arm_can_inline_p (tree caller ATTRIBUTE_UNUSED, tree callee ATTRIBUTE_UNUSED)
29810 /* Overidde default hook: Always OK to inline between different modes.
29811 Function with mode specific instructions, e.g using asm, must be explicitely
29812 protected with noinline. */
29813 return true;
29816 /* Inner function to process the attribute((target(...))), take an argument and
29817 set the current options from the argument. If we have a list, recursively
29818 go over the list. */
29820 static bool
29821 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
29823 if (TREE_CODE (args) == TREE_LIST)
29825 bool ret = true;
29826 for (; args; args = TREE_CHAIN (args))
29827 if (TREE_VALUE (args)
29828 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
29829 ret = false;
29830 return ret;
29833 else if (TREE_CODE (args) != STRING_CST)
29835 error ("attribute %<target%> argument not a string");
29836 return false;
29839 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
29840 while (argstr && *argstr != '\0')
29842 while (ISSPACE (*argstr))
29843 argstr++;
29845 if (!strcmp (argstr, "thumb"))
29847 opts->x_target_flags |= MASK_THUMB;
29848 arm_option_check_internal (opts);
29849 return true;
29852 if (!strcmp (argstr, "arm"))
29854 opts->x_target_flags &= ~MASK_THUMB;
29855 arm_option_check_internal (opts);
29856 return true;
29859 warning (0, "attribute(target(\"%s\")) is unknown", argstr);
29860 return false;
29863 return false;
29866 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
29868 tree
29869 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
29870 struct gcc_options *opts_set)
29872 if (!arm_valid_target_attribute_rec (args, opts))
29873 return NULL_TREE;
29875 /* Do any overrides, such as global options arch=xxx. */
29876 arm_option_override_internal (opts, opts_set);
29878 return build_target_option_node (opts);
29881 static void
29882 add_attribute (const char * mode, tree *attributes)
29884 size_t len = strlen (mode);
29885 tree value = build_string (len, mode);
29887 TREE_TYPE (value) = build_array_type (char_type_node,
29888 build_index_type (size_int (len)));
29890 *attributes = tree_cons (get_identifier ("target"),
29891 build_tree_list (NULL_TREE, value),
29892 *attributes);
29895 /* For testing. Insert thumb or arm modes alternatively on functions. */
29897 static void
29898 arm_insert_attributes (tree fndecl, tree * attributes)
29900 const char *mode;
29902 if (! TARGET_FLIP_THUMB)
29903 return;
29905 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
29906 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
29907 return;
29909 /* Nested definitions must inherit mode. */
29910 if (current_function_decl)
29912 mode = TARGET_THUMB ? "thumb" : "arm";
29913 add_attribute (mode, attributes);
29914 return;
29917 /* If there is already a setting don't change it. */
29918 if (lookup_attribute ("target", *attributes) != NULL)
29919 return;
29921 mode = thumb_flipper ? "thumb" : "arm";
29922 add_attribute (mode, attributes);
29924 thumb_flipper = !thumb_flipper;
29927 /* Hook to validate attribute((target("string"))). */
29929 static bool
29930 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
29931 tree args, int ARG_UNUSED (flags))
29933 bool ret = true;
29934 struct gcc_options func_options;
29935 tree cur_tree, new_optimize;
29936 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
29938 /* Get the optimization options of the current function. */
29939 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
29941 /* If the function changed the optimization levels as well as setting target
29942 options, start with the optimizations specified. */
29943 if (!func_optimize)
29944 func_optimize = optimization_default_node;
29946 /* Init func_options. */
29947 memset (&func_options, 0, sizeof (func_options));
29948 init_options_struct (&func_options, NULL);
29949 lang_hooks.init_options_struct (&func_options);
29951 /* Initialize func_options to the defaults. */
29952 cl_optimization_restore (&func_options,
29953 TREE_OPTIMIZATION (func_optimize));
29955 cl_target_option_restore (&func_options,
29956 TREE_TARGET_OPTION (target_option_default_node));
29958 /* Set func_options flags with new target mode. */
29959 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
29960 &global_options_set);
29962 if (cur_tree == NULL_TREE)
29963 ret = false;
29965 new_optimize = build_optimization_node (&func_options);
29967 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
29969 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
29971 return ret;
29974 void
29975 arm_declare_function_name (FILE *stream, const char *name, tree decl)
29977 if (TARGET_UNIFIED_ASM)
29978 fprintf (stream, "\t.syntax unified\n");
29979 else
29980 fprintf (stream, "\t.syntax divided\n");
29982 if (TARGET_THUMB)
29984 if (is_called_in_ARM_mode (decl)
29985 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
29986 && cfun->is_thunk))
29987 fprintf (stream, "\t.code 32\n");
29988 else if (TARGET_THUMB1)
29989 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
29990 else
29991 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
29993 else
29994 fprintf (stream, "\t.arm\n");
29996 if (TARGET_POKE_FUNCTION_NAME)
29997 arm_poke_function_name (stream, (const char *) name);
30000 /* If MEM is in the form of [base+offset], extract the two parts
30001 of address and set to BASE and OFFSET, otherwise return false
30002 after clearing BASE and OFFSET. */
30004 static bool
30005 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30007 rtx addr;
30009 gcc_assert (MEM_P (mem));
30011 addr = XEXP (mem, 0);
30013 /* Strip off const from addresses like (const (addr)). */
30014 if (GET_CODE (addr) == CONST)
30015 addr = XEXP (addr, 0);
30017 if (GET_CODE (addr) == REG)
30019 *base = addr;
30020 *offset = const0_rtx;
30021 return true;
30024 if (GET_CODE (addr) == PLUS
30025 && GET_CODE (XEXP (addr, 0)) == REG
30026 && CONST_INT_P (XEXP (addr, 1)))
30028 *base = XEXP (addr, 0);
30029 *offset = XEXP (addr, 1);
30030 return true;
30033 *base = NULL_RTX;
30034 *offset = NULL_RTX;
30036 return false;
30039 /* If INSN is a load or store of address in the form of [base+offset],
30040 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30041 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30042 otherwise return FALSE. */
30044 static bool
30045 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30047 rtx x, dest, src;
30049 gcc_assert (INSN_P (insn));
30050 x = PATTERN (insn);
30051 if (GET_CODE (x) != SET)
30052 return false;
30054 src = SET_SRC (x);
30055 dest = SET_DEST (x);
30056 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30058 *is_load = false;
30059 extract_base_offset_in_addr (dest, base, offset);
30061 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30063 *is_load = true;
30064 extract_base_offset_in_addr (src, base, offset);
30066 else
30067 return false;
30069 return (*base != NULL_RTX && *offset != NULL_RTX);
30072 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30074 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30075 and PRI are only calculated for these instructions. For other instruction,
30076 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30077 instruction fusion can be supported by returning different priorities.
30079 It's important that irrelevant instructions get the largest FUSION_PRI. */
30081 static void
30082 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30083 int *fusion_pri, int *pri)
30085 int tmp, off_val;
30086 bool is_load;
30087 rtx base, offset;
30089 gcc_assert (INSN_P (insn));
30091 tmp = max_pri - 1;
30092 if (!fusion_load_store (insn, &base, &offset, &is_load))
30094 *pri = tmp;
30095 *fusion_pri = tmp;
30096 return;
30099 /* Load goes first. */
30100 if (is_load)
30101 *fusion_pri = tmp - 1;
30102 else
30103 *fusion_pri = tmp - 2;
30105 tmp /= 2;
30107 /* INSN with smaller base register goes first. */
30108 tmp -= ((REGNO (base) & 0xff) << 20);
30110 /* INSN with smaller offset goes first. */
30111 off_val = (int)(INTVAL (offset));
30112 if (off_val >= 0)
30113 tmp -= (off_val & 0xfffff);
30114 else
30115 tmp += ((- off_val) & 0xfffff);
30117 *pri = tmp;
30118 return;
30120 #include "gt-arm.h"