recog_memoized works on an rtx_insn *
[official-gcc.git] / gcc / config / arm / arm.c
blobe4a6aaac9ec84fc8b97aa64657a9cadcb9adb1d7
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "function.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "cgraph.h"
49 #include "ggc.h"
50 #include "except.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "sched-int.h"
54 #include "target-def.h"
55 #include "debug.h"
56 #include "langhooks.h"
57 #include "df.h"
58 #include "intl.h"
59 #include "libfuncs.h"
60 #include "params.h"
61 #include "opts.h"
62 #include "dumpfile.h"
63 #include "gimple-expr.h"
64 #include "builtins.h"
66 /* Forward definitions of types. */
67 typedef struct minipool_node Mnode;
68 typedef struct minipool_fixup Mfix;
70 void (*arm_lang_output_object_attributes_hook)(void);
72 struct four_ints
74 int i[4];
77 /* Forward function declarations. */
78 static bool arm_const_not_ok_for_debug_p (rtx);
79 static bool arm_lra_p (void);
80 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
81 static int arm_compute_static_chain_stack_bytes (void);
82 static arm_stack_offsets *arm_get_frame_offsets (void);
83 static void arm_add_gc_roots (void);
84 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
85 HOST_WIDE_INT, rtx, rtx, int, int);
86 static unsigned bit_count (unsigned long);
87 static int arm_address_register_rtx_p (rtx, int);
88 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
89 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
90 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
91 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
92 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
93 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
94 inline static int thumb1_index_register_rtx_p (rtx, int);
95 static int thumb_far_jump_used_p (void);
96 static bool thumb_force_lr_save (void);
97 static unsigned arm_size_return_regs (void);
98 static bool arm_assemble_integer (rtx, unsigned int, int);
99 static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
100 static void arm_print_operand (FILE *, rtx, int);
101 static void arm_print_operand_address (FILE *, rtx);
102 static bool arm_print_operand_punct_valid_p (unsigned char code);
103 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
104 static arm_cc get_arm_condition_code (rtx);
105 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
106 static const char *output_multi_immediate (rtx *, const char *, const char *,
107 int, HOST_WIDE_INT);
108 static const char *shift_op (rtx, HOST_WIDE_INT *);
109 static struct machine_function *arm_init_machine_status (void);
110 static void thumb_exit (FILE *, int);
111 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
112 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
113 static Mnode *add_minipool_forward_ref (Mfix *);
114 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
115 static Mnode *add_minipool_backward_ref (Mfix *);
116 static void assign_minipool_offsets (Mfix *);
117 static void arm_print_value (FILE *, rtx);
118 static void dump_minipool (rtx_insn *);
119 static int arm_barrier_cost (rtx);
120 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
121 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
122 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
123 enum machine_mode, rtx);
124 static void arm_reorg (void);
125 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
126 static unsigned long arm_compute_save_reg0_reg12_mask (void);
127 static unsigned long arm_compute_save_reg_mask (void);
128 static unsigned long arm_isr_value (tree);
129 static unsigned long arm_compute_func_type (void);
130 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
131 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
132 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
133 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
134 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
135 #endif
136 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
137 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
138 static int arm_comp_type_attributes (const_tree, const_tree);
139 static void arm_set_default_type_attributes (tree);
140 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
141 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
142 static int optimal_immediate_sequence (enum rtx_code code,
143 unsigned HOST_WIDE_INT val,
144 struct four_ints *return_sequence);
145 static int optimal_immediate_sequence_1 (enum rtx_code code,
146 unsigned HOST_WIDE_INT val,
147 struct four_ints *return_sequence,
148 int i);
149 static int arm_get_strip_length (int);
150 static bool arm_function_ok_for_sibcall (tree, tree);
151 static enum machine_mode arm_promote_function_mode (const_tree,
152 enum machine_mode, int *,
153 const_tree, int);
154 static bool arm_return_in_memory (const_tree, const_tree);
155 static rtx arm_function_value (const_tree, const_tree, bool);
156 static rtx arm_libcall_value_1 (enum machine_mode);
157 static rtx arm_libcall_value (enum machine_mode, const_rtx);
158 static bool arm_function_value_regno_p (const unsigned int);
159 static void arm_internal_label (FILE *, const char *, unsigned long);
160 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
161 tree);
162 static bool arm_have_conditional_execution (void);
163 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
164 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
165 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
166 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
167 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
168 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
169 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
170 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
171 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
172 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
173 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
174 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
175 static void arm_init_builtins (void);
176 static void arm_init_iwmmxt_builtins (void);
177 static rtx safe_vector_operand (rtx, enum machine_mode);
178 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
179 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
180 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
181 static tree arm_builtin_decl (unsigned, bool);
182 static void emit_constant_insn (rtx cond, rtx pattern);
183 static rtx_insn *emit_set_insn (rtx, rtx);
184 static rtx emit_multi_reg_push (unsigned long, unsigned long);
185 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
186 tree, bool);
187 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
188 const_tree, bool);
189 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
190 const_tree, bool);
191 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
192 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
193 const_tree);
194 static rtx aapcs_libcall_value (enum machine_mode);
195 static int aapcs_select_return_coproc (const_tree, const_tree);
197 #ifdef OBJECT_FORMAT_ELF
198 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
199 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
200 #endif
201 #ifndef ARM_PE
202 static void arm_encode_section_info (tree, rtx, int);
203 #endif
205 static void arm_file_end (void);
206 static void arm_file_start (void);
208 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
209 tree, int *, int);
210 static bool arm_pass_by_reference (cumulative_args_t,
211 enum machine_mode, const_tree, bool);
212 static bool arm_promote_prototypes (const_tree);
213 static bool arm_default_short_enums (void);
214 static bool arm_align_anon_bitfield (void);
215 static bool arm_return_in_msb (const_tree);
216 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
217 static bool arm_return_in_memory (const_tree, const_tree);
218 #if ARM_UNWIND_INFO
219 static void arm_unwind_emit (FILE *, rtx_insn *);
220 static bool arm_output_ttype (rtx);
221 static void arm_asm_emit_except_personality (rtx);
222 static void arm_asm_init_sections (void);
223 #endif
224 static rtx arm_dwarf_register_span (rtx);
226 static tree arm_cxx_guard_type (void);
227 static bool arm_cxx_guard_mask_bit (void);
228 static tree arm_get_cookie_size (tree);
229 static bool arm_cookie_has_size (void);
230 static bool arm_cxx_cdtor_returns_this (void);
231 static bool arm_cxx_key_method_may_be_inline (void);
232 static void arm_cxx_determine_class_data_visibility (tree);
233 static bool arm_cxx_class_data_always_comdat (void);
234 static bool arm_cxx_use_aeabi_atexit (void);
235 static void arm_init_libfuncs (void);
236 static tree arm_build_builtin_va_list (void);
237 static void arm_expand_builtin_va_start (tree, rtx);
238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
239 static void arm_option_override (void);
240 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
241 static bool arm_cannot_copy_insn_p (rtx_insn *);
242 static int arm_issue_rate (void);
243 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
244 static bool arm_output_addr_const_extra (FILE *, rtx);
245 static bool arm_allocate_stack_slots_for_args (void);
246 static bool arm_warn_func_return (tree);
247 static const char *arm_invalid_parameter_type (const_tree t);
248 static const char *arm_invalid_return_type (const_tree t);
249 static tree arm_promoted_type (const_tree t);
250 static tree arm_convert_to_type (tree type, tree expr);
251 static bool arm_scalar_mode_supported_p (enum machine_mode);
252 static bool arm_frame_pointer_required (void);
253 static bool arm_can_eliminate (const int, const int);
254 static void arm_asm_trampoline_template (FILE *);
255 static void arm_trampoline_init (rtx, tree, rtx);
256 static rtx arm_trampoline_adjust_address (rtx);
257 static rtx arm_pic_static_addr (rtx orig, rtx reg);
258 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
259 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
260 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
261 static bool arm_array_mode_supported_p (enum machine_mode,
262 unsigned HOST_WIDE_INT);
263 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
264 static bool arm_class_likely_spilled_p (reg_class_t);
265 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
266 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
267 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
268 const_tree type,
269 int misalignment,
270 bool is_packed);
271 static void arm_conditional_register_usage (void);
272 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
273 static unsigned int arm_autovectorize_vector_sizes (void);
274 static int arm_default_branch_cost (bool, bool);
275 static int arm_cortex_a5_branch_cost (bool, bool);
276 static int arm_cortex_m_branch_cost (bool, bool);
278 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
279 const unsigned char *sel);
281 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
282 tree vectype,
283 int misalign ATTRIBUTE_UNUSED);
284 static unsigned arm_add_stmt_cost (void *data, int count,
285 enum vect_cost_for_stmt kind,
286 struct _stmt_vec_info *stmt_info,
287 int misalign,
288 enum vect_cost_model_location where);
290 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
291 bool op0_preserve_value);
292 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
294 /* Table of machine attributes. */
295 static const struct attribute_spec arm_attribute_table[] =
297 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
298 affects_type_identity } */
299 /* Function calls made to this symbol must be done indirectly, because
300 it may lie outside of the 26 bit addressing range of a normal function
301 call. */
302 { "long_call", 0, 0, false, true, true, NULL, false },
303 /* Whereas these functions are always known to reside within the 26 bit
304 addressing range. */
305 { "short_call", 0, 0, false, true, true, NULL, false },
306 /* Specify the procedure call conventions for a function. */
307 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
308 false },
309 /* Interrupt Service Routines have special prologue and epilogue requirements. */
310 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
311 false },
312 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
313 false },
314 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
315 false },
316 #ifdef ARM_PE
317 /* ARM/PE has three new attributes:
318 interfacearm - ?
319 dllexport - for exporting a function/variable that will live in a dll
320 dllimport - for importing a function/variable from a dll
322 Microsoft allows multiple declspecs in one __declspec, separating
323 them with spaces. We do NOT support this. Instead, use __declspec
324 multiple times.
326 { "dllimport", 0, 0, true, false, false, NULL, false },
327 { "dllexport", 0, 0, true, false, false, NULL, false },
328 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
329 false },
330 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
331 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
332 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
333 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
334 false },
335 #endif
336 { NULL, 0, 0, false, false, false, NULL, false }
339 /* Initialize the GCC target structure. */
340 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
341 #undef TARGET_MERGE_DECL_ATTRIBUTES
342 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
343 #endif
345 #undef TARGET_LEGITIMIZE_ADDRESS
346 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
348 #undef TARGET_LRA_P
349 #define TARGET_LRA_P arm_lra_p
351 #undef TARGET_ATTRIBUTE_TABLE
352 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
354 #undef TARGET_ASM_FILE_START
355 #define TARGET_ASM_FILE_START arm_file_start
356 #undef TARGET_ASM_FILE_END
357 #define TARGET_ASM_FILE_END arm_file_end
359 #undef TARGET_ASM_ALIGNED_SI_OP
360 #define TARGET_ASM_ALIGNED_SI_OP NULL
361 #undef TARGET_ASM_INTEGER
362 #define TARGET_ASM_INTEGER arm_assemble_integer
364 #undef TARGET_PRINT_OPERAND
365 #define TARGET_PRINT_OPERAND arm_print_operand
366 #undef TARGET_PRINT_OPERAND_ADDRESS
367 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
368 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
369 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
371 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
372 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
374 #undef TARGET_ASM_FUNCTION_PROLOGUE
375 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
377 #undef TARGET_ASM_FUNCTION_EPILOGUE
378 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
380 #undef TARGET_OPTION_OVERRIDE
381 #define TARGET_OPTION_OVERRIDE arm_option_override
383 #undef TARGET_COMP_TYPE_ATTRIBUTES
384 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
386 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
387 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
389 #undef TARGET_SCHED_ADJUST_COST
390 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
392 #undef TARGET_SCHED_REORDER
393 #define TARGET_SCHED_REORDER arm_sched_reorder
395 #undef TARGET_REGISTER_MOVE_COST
396 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
398 #undef TARGET_MEMORY_MOVE_COST
399 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
401 #undef TARGET_ENCODE_SECTION_INFO
402 #ifdef ARM_PE
403 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
404 #else
405 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
406 #endif
408 #undef TARGET_STRIP_NAME_ENCODING
409 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
411 #undef TARGET_ASM_INTERNAL_LABEL
412 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
414 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
415 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
417 #undef TARGET_FUNCTION_VALUE
418 #define TARGET_FUNCTION_VALUE arm_function_value
420 #undef TARGET_LIBCALL_VALUE
421 #define TARGET_LIBCALL_VALUE arm_libcall_value
423 #undef TARGET_FUNCTION_VALUE_REGNO_P
424 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
426 #undef TARGET_ASM_OUTPUT_MI_THUNK
427 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
428 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
429 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
431 #undef TARGET_RTX_COSTS
432 #define TARGET_RTX_COSTS arm_rtx_costs
433 #undef TARGET_ADDRESS_COST
434 #define TARGET_ADDRESS_COST arm_address_cost
436 #undef TARGET_SHIFT_TRUNCATION_MASK
437 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
438 #undef TARGET_VECTOR_MODE_SUPPORTED_P
439 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
440 #undef TARGET_ARRAY_MODE_SUPPORTED_P
441 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
442 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
443 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
444 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
445 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
446 arm_autovectorize_vector_sizes
448 #undef TARGET_MACHINE_DEPENDENT_REORG
449 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
451 #undef TARGET_INIT_BUILTINS
452 #define TARGET_INIT_BUILTINS arm_init_builtins
453 #undef TARGET_EXPAND_BUILTIN
454 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
455 #undef TARGET_BUILTIN_DECL
456 #define TARGET_BUILTIN_DECL arm_builtin_decl
458 #undef TARGET_INIT_LIBFUNCS
459 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
461 #undef TARGET_PROMOTE_FUNCTION_MODE
462 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
463 #undef TARGET_PROMOTE_PROTOTYPES
464 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
465 #undef TARGET_PASS_BY_REFERENCE
466 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
467 #undef TARGET_ARG_PARTIAL_BYTES
468 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
469 #undef TARGET_FUNCTION_ARG
470 #define TARGET_FUNCTION_ARG arm_function_arg
471 #undef TARGET_FUNCTION_ARG_ADVANCE
472 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
473 #undef TARGET_FUNCTION_ARG_BOUNDARY
474 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
476 #undef TARGET_SETUP_INCOMING_VARARGS
477 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
479 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
480 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
482 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
483 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
484 #undef TARGET_TRAMPOLINE_INIT
485 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
486 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
487 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
489 #undef TARGET_WARN_FUNC_RETURN
490 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
492 #undef TARGET_DEFAULT_SHORT_ENUMS
493 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
495 #undef TARGET_ALIGN_ANON_BITFIELD
496 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
498 #undef TARGET_NARROW_VOLATILE_BITFIELD
499 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
501 #undef TARGET_CXX_GUARD_TYPE
502 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
504 #undef TARGET_CXX_GUARD_MASK_BIT
505 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
507 #undef TARGET_CXX_GET_COOKIE_SIZE
508 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
510 #undef TARGET_CXX_COOKIE_HAS_SIZE
511 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
513 #undef TARGET_CXX_CDTOR_RETURNS_THIS
514 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
516 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
517 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
519 #undef TARGET_CXX_USE_AEABI_ATEXIT
520 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
522 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
523 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
524 arm_cxx_determine_class_data_visibility
526 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
527 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
529 #undef TARGET_RETURN_IN_MSB
530 #define TARGET_RETURN_IN_MSB arm_return_in_msb
532 #undef TARGET_RETURN_IN_MEMORY
533 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
535 #undef TARGET_MUST_PASS_IN_STACK
536 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
538 #if ARM_UNWIND_INFO
539 #undef TARGET_ASM_UNWIND_EMIT
540 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
542 /* EABI unwinding tables use a different format for the typeinfo tables. */
543 #undef TARGET_ASM_TTYPE
544 #define TARGET_ASM_TTYPE arm_output_ttype
546 #undef TARGET_ARM_EABI_UNWINDER
547 #define TARGET_ARM_EABI_UNWINDER true
549 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
550 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
552 #undef TARGET_ASM_INIT_SECTIONS
553 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
554 #endif /* ARM_UNWIND_INFO */
556 #undef TARGET_DWARF_REGISTER_SPAN
557 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
559 #undef TARGET_CANNOT_COPY_INSN_P
560 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
562 #ifdef HAVE_AS_TLS
563 #undef TARGET_HAVE_TLS
564 #define TARGET_HAVE_TLS true
565 #endif
567 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
568 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
570 #undef TARGET_LEGITIMATE_CONSTANT_P
571 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
573 #undef TARGET_CANNOT_FORCE_CONST_MEM
574 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
576 #undef TARGET_MAX_ANCHOR_OFFSET
577 #define TARGET_MAX_ANCHOR_OFFSET 4095
579 /* The minimum is set such that the total size of the block
580 for a particular anchor is -4088 + 1 + 4095 bytes, which is
581 divisible by eight, ensuring natural spacing of anchors. */
582 #undef TARGET_MIN_ANCHOR_OFFSET
583 #define TARGET_MIN_ANCHOR_OFFSET -4088
585 #undef TARGET_SCHED_ISSUE_RATE
586 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
588 #undef TARGET_MANGLE_TYPE
589 #define TARGET_MANGLE_TYPE arm_mangle_type
591 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
592 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
594 #undef TARGET_BUILD_BUILTIN_VA_LIST
595 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
596 #undef TARGET_EXPAND_BUILTIN_VA_START
597 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
598 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
599 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
601 #ifdef HAVE_AS_TLS
602 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
603 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
604 #endif
606 #undef TARGET_LEGITIMATE_ADDRESS_P
607 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
609 #undef TARGET_PREFERRED_RELOAD_CLASS
610 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
612 #undef TARGET_INVALID_PARAMETER_TYPE
613 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
615 #undef TARGET_INVALID_RETURN_TYPE
616 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
618 #undef TARGET_PROMOTED_TYPE
619 #define TARGET_PROMOTED_TYPE arm_promoted_type
621 #undef TARGET_CONVERT_TO_TYPE
622 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
624 #undef TARGET_SCALAR_MODE_SUPPORTED_P
625 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
627 #undef TARGET_FRAME_POINTER_REQUIRED
628 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
630 #undef TARGET_CAN_ELIMINATE
631 #define TARGET_CAN_ELIMINATE arm_can_eliminate
633 #undef TARGET_CONDITIONAL_REGISTER_USAGE
634 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
636 #undef TARGET_CLASS_LIKELY_SPILLED_P
637 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
639 #undef TARGET_VECTORIZE_BUILTINS
640 #define TARGET_VECTORIZE_BUILTINS
642 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
643 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
644 arm_builtin_vectorized_function
646 #undef TARGET_VECTOR_ALIGNMENT
647 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
649 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
650 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
651 arm_vector_alignment_reachable
653 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
654 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
655 arm_builtin_support_vector_misalignment
657 #undef TARGET_PREFERRED_RENAME_CLASS
658 #define TARGET_PREFERRED_RENAME_CLASS \
659 arm_preferred_rename_class
661 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
662 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
663 arm_vectorize_vec_perm_const_ok
665 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
666 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
667 arm_builtin_vectorization_cost
668 #undef TARGET_VECTORIZE_ADD_STMT_COST
669 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
671 #undef TARGET_CANONICALIZE_COMPARISON
672 #define TARGET_CANONICALIZE_COMPARISON \
673 arm_canonicalize_comparison
675 #undef TARGET_ASAN_SHADOW_OFFSET
676 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
678 #undef MAX_INSN_PER_IT_BLOCK
679 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
681 #undef TARGET_CAN_USE_DOLOOP_P
682 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
684 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
685 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
687 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
688 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
690 struct gcc_target targetm = TARGET_INITIALIZER;
692 /* Obstack for minipool constant handling. */
693 static struct obstack minipool_obstack;
694 static char * minipool_startobj;
696 /* The maximum number of insns skipped which
697 will be conditionalised if possible. */
698 static int max_insns_skipped = 5;
700 extern FILE * asm_out_file;
702 /* True if we are currently building a constant table. */
703 int making_const_table;
705 /* The processor for which instructions should be scheduled. */
706 enum processor_type arm_tune = arm_none;
708 /* The current tuning set. */
709 const struct tune_params *current_tune;
711 /* Which floating point hardware to schedule for. */
712 int arm_fpu_attr;
714 /* Which floating popint hardware to use. */
715 const struct arm_fpu_desc *arm_fpu_desc;
717 /* Used for Thumb call_via trampolines. */
718 rtx thumb_call_via_label[14];
719 static int thumb_call_reg_needed;
721 /* Bit values used to identify processor capabilities. */
722 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
723 #define FL_ARCH3M (1 << 1) /* Extended multiply */
724 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
725 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
726 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
727 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
728 #define FL_THUMB (1 << 6) /* Thumb aware */
729 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
730 #define FL_STRONG (1 << 8) /* StrongARM */
731 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
732 #define FL_XSCALE (1 << 10) /* XScale */
733 /* spare (1 << 11) */
734 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
735 media instructions. */
736 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
737 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
738 Note: ARM6 & 7 derivatives only. */
739 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
740 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
741 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
742 profile. */
743 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
744 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
745 #define FL_NEON (1 << 20) /* Neon instructions. */
746 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
747 architecture. */
748 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
749 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
750 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
751 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
753 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
754 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
756 /* Flags that only effect tuning, not available instructions. */
757 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
758 | FL_CO_PROC)
760 #define FL_FOR_ARCH2 FL_NOTM
761 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
762 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
763 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
764 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
765 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
766 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
767 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
768 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
769 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
770 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
771 #define FL_FOR_ARCH6J FL_FOR_ARCH6
772 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
773 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
774 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
775 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
776 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
777 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
778 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
779 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
780 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
781 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
782 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
783 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
785 /* The bits in this mask specify which
786 instructions we are allowed to generate. */
787 static unsigned long insn_flags = 0;
789 /* The bits in this mask specify which instruction scheduling options should
790 be used. */
791 static unsigned long tune_flags = 0;
793 /* The highest ARM architecture version supported by the
794 target. */
795 enum base_architecture arm_base_arch = BASE_ARCH_0;
797 /* The following are used in the arm.md file as equivalents to bits
798 in the above two flag variables. */
800 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
801 int arm_arch3m = 0;
803 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
804 int arm_arch4 = 0;
806 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
807 int arm_arch4t = 0;
809 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
810 int arm_arch5 = 0;
812 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
813 int arm_arch5e = 0;
815 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
816 int arm_arch6 = 0;
818 /* Nonzero if this chip supports the ARM 6K extensions. */
819 int arm_arch6k = 0;
821 /* Nonzero if instructions present in ARMv6-M can be used. */
822 int arm_arch6m = 0;
824 /* Nonzero if this chip supports the ARM 7 extensions. */
825 int arm_arch7 = 0;
827 /* Nonzero if instructions not present in the 'M' profile can be used. */
828 int arm_arch_notm = 0;
830 /* Nonzero if instructions present in ARMv7E-M can be used. */
831 int arm_arch7em = 0;
833 /* Nonzero if instructions present in ARMv8 can be used. */
834 int arm_arch8 = 0;
836 /* Nonzero if this chip can benefit from load scheduling. */
837 int arm_ld_sched = 0;
839 /* Nonzero if this chip is a StrongARM. */
840 int arm_tune_strongarm = 0;
842 /* Nonzero if this chip supports Intel Wireless MMX technology. */
843 int arm_arch_iwmmxt = 0;
845 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
846 int arm_arch_iwmmxt2 = 0;
848 /* Nonzero if this chip is an XScale. */
849 int arm_arch_xscale = 0;
851 /* Nonzero if tuning for XScale */
852 int arm_tune_xscale = 0;
854 /* Nonzero if we want to tune for stores that access the write-buffer.
855 This typically means an ARM6 or ARM7 with MMU or MPU. */
856 int arm_tune_wbuf = 0;
858 /* Nonzero if tuning for Cortex-A9. */
859 int arm_tune_cortex_a9 = 0;
861 /* Nonzero if generating Thumb instructions. */
862 int thumb_code = 0;
864 /* Nonzero if generating Thumb-1 instructions. */
865 int thumb1_code = 0;
867 /* Nonzero if we should define __THUMB_INTERWORK__ in the
868 preprocessor.
869 XXX This is a bit of a hack, it's intended to help work around
870 problems in GLD which doesn't understand that armv5t code is
871 interworking clean. */
872 int arm_cpp_interwork = 0;
874 /* Nonzero if chip supports Thumb 2. */
875 int arm_arch_thumb2;
877 /* Nonzero if chip supports integer division instruction. */
878 int arm_arch_arm_hwdiv;
879 int arm_arch_thumb_hwdiv;
881 /* Nonzero if we should use Neon to handle 64-bits operations rather
882 than core registers. */
883 int prefer_neon_for_64bits = 0;
885 /* Nonzero if we shouldn't use literal pools. */
886 bool arm_disable_literal_pool = false;
888 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
889 we must report the mode of the memory reference from
890 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
891 enum machine_mode output_memory_reference_mode;
893 /* The register number to be used for the PIC offset register. */
894 unsigned arm_pic_register = INVALID_REGNUM;
896 enum arm_pcs arm_pcs_default;
898 /* For an explanation of these variables, see final_prescan_insn below. */
899 int arm_ccfsm_state;
900 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
901 enum arm_cond_code arm_current_cc;
903 rtx arm_target_insn;
904 int arm_target_label;
905 /* The number of conditionally executed insns, including the current insn. */
906 int arm_condexec_count = 0;
907 /* A bitmask specifying the patterns for the IT block.
908 Zero means do not output an IT block before this insn. */
909 int arm_condexec_mask = 0;
910 /* The number of bits used in arm_condexec_mask. */
911 int arm_condexec_masklen = 0;
913 /* Nonzero if chip supports the ARMv8 CRC instructions. */
914 int arm_arch_crc = 0;
916 /* The condition codes of the ARM, and the inverse function. */
917 static const char * const arm_condition_codes[] =
919 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
920 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
923 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
924 int arm_regs_in_sequence[] =
926 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
929 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
930 #define streq(string1, string2) (strcmp (string1, string2) == 0)
932 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
933 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
934 | (1 << PIC_OFFSET_TABLE_REGNUM)))
936 /* Initialization code. */
938 struct processors
940 const char *const name;
941 enum processor_type core;
942 const char *arch;
943 enum base_architecture base_arch;
944 const unsigned long flags;
945 const struct tune_params *const tune;
949 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
950 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
951 prefetch_slots, \
952 l1_size, \
953 l1_line_size
955 /* arm generic vectorizer costs. */
956 static const
957 struct cpu_vec_costs arm_default_vec_cost = {
958 1, /* scalar_stmt_cost. */
959 1, /* scalar load_cost. */
960 1, /* scalar_store_cost. */
961 1, /* vec_stmt_cost. */
962 1, /* vec_to_scalar_cost. */
963 1, /* scalar_to_vec_cost. */
964 1, /* vec_align_load_cost. */
965 1, /* vec_unalign_load_cost. */
966 1, /* vec_unalign_store_cost. */
967 1, /* vec_store_cost. */
968 3, /* cond_taken_branch_cost. */
969 1, /* cond_not_taken_branch_cost. */
972 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
973 #include "aarch-cost-tables.h"
977 const struct cpu_cost_table cortexa9_extra_costs =
979 /* ALU */
981 0, /* arith. */
982 0, /* logical. */
983 0, /* shift. */
984 COSTS_N_INSNS (1), /* shift_reg. */
985 COSTS_N_INSNS (1), /* arith_shift. */
986 COSTS_N_INSNS (2), /* arith_shift_reg. */
987 0, /* log_shift. */
988 COSTS_N_INSNS (1), /* log_shift_reg. */
989 COSTS_N_INSNS (1), /* extend. */
990 COSTS_N_INSNS (2), /* extend_arith. */
991 COSTS_N_INSNS (1), /* bfi. */
992 COSTS_N_INSNS (1), /* bfx. */
993 0, /* clz. */
994 0, /* rev. */
995 0, /* non_exec. */
996 true /* non_exec_costs_exec. */
999 /* MULT SImode */
1001 COSTS_N_INSNS (3), /* simple. */
1002 COSTS_N_INSNS (3), /* flag_setting. */
1003 COSTS_N_INSNS (2), /* extend. */
1004 COSTS_N_INSNS (3), /* add. */
1005 COSTS_N_INSNS (2), /* extend_add. */
1006 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1008 /* MULT DImode */
1010 0, /* simple (N/A). */
1011 0, /* flag_setting (N/A). */
1012 COSTS_N_INSNS (4), /* extend. */
1013 0, /* add (N/A). */
1014 COSTS_N_INSNS (4), /* extend_add. */
1015 0 /* idiv (N/A). */
1018 /* LD/ST */
1020 COSTS_N_INSNS (2), /* load. */
1021 COSTS_N_INSNS (2), /* load_sign_extend. */
1022 COSTS_N_INSNS (2), /* ldrd. */
1023 COSTS_N_INSNS (2), /* ldm_1st. */
1024 1, /* ldm_regs_per_insn_1st. */
1025 2, /* ldm_regs_per_insn_subsequent. */
1026 COSTS_N_INSNS (5), /* loadf. */
1027 COSTS_N_INSNS (5), /* loadd. */
1028 COSTS_N_INSNS (1), /* load_unaligned. */
1029 COSTS_N_INSNS (2), /* store. */
1030 COSTS_N_INSNS (2), /* strd. */
1031 COSTS_N_INSNS (2), /* stm_1st. */
1032 1, /* stm_regs_per_insn_1st. */
1033 2, /* stm_regs_per_insn_subsequent. */
1034 COSTS_N_INSNS (1), /* storef. */
1035 COSTS_N_INSNS (1), /* stored. */
1036 COSTS_N_INSNS (1) /* store_unaligned. */
1039 /* FP SFmode */
1041 COSTS_N_INSNS (14), /* div. */
1042 COSTS_N_INSNS (4), /* mult. */
1043 COSTS_N_INSNS (7), /* mult_addsub. */
1044 COSTS_N_INSNS (30), /* fma. */
1045 COSTS_N_INSNS (3), /* addsub. */
1046 COSTS_N_INSNS (1), /* fpconst. */
1047 COSTS_N_INSNS (1), /* neg. */
1048 COSTS_N_INSNS (3), /* compare. */
1049 COSTS_N_INSNS (3), /* widen. */
1050 COSTS_N_INSNS (3), /* narrow. */
1051 COSTS_N_INSNS (3), /* toint. */
1052 COSTS_N_INSNS (3), /* fromint. */
1053 COSTS_N_INSNS (3) /* roundint. */
1055 /* FP DFmode */
1057 COSTS_N_INSNS (24), /* div. */
1058 COSTS_N_INSNS (5), /* mult. */
1059 COSTS_N_INSNS (8), /* mult_addsub. */
1060 COSTS_N_INSNS (30), /* fma. */
1061 COSTS_N_INSNS (3), /* addsub. */
1062 COSTS_N_INSNS (1), /* fpconst. */
1063 COSTS_N_INSNS (1), /* neg. */
1064 COSTS_N_INSNS (3), /* compare. */
1065 COSTS_N_INSNS (3), /* widen. */
1066 COSTS_N_INSNS (3), /* narrow. */
1067 COSTS_N_INSNS (3), /* toint. */
1068 COSTS_N_INSNS (3), /* fromint. */
1069 COSTS_N_INSNS (3) /* roundint. */
1072 /* Vector */
1074 COSTS_N_INSNS (1) /* alu. */
1078 const struct cpu_cost_table cortexa8_extra_costs =
1080 /* ALU */
1082 0, /* arith. */
1083 0, /* logical. */
1084 COSTS_N_INSNS (1), /* shift. */
1085 0, /* shift_reg. */
1086 COSTS_N_INSNS (1), /* arith_shift. */
1087 0, /* arith_shift_reg. */
1088 COSTS_N_INSNS (1), /* log_shift. */
1089 0, /* log_shift_reg. */
1090 0, /* extend. */
1091 0, /* extend_arith. */
1092 0, /* bfi. */
1093 0, /* bfx. */
1094 0, /* clz. */
1095 0, /* rev. */
1096 0, /* non_exec. */
1097 true /* non_exec_costs_exec. */
1100 /* MULT SImode */
1102 COSTS_N_INSNS (1), /* simple. */
1103 COSTS_N_INSNS (1), /* flag_setting. */
1104 COSTS_N_INSNS (1), /* extend. */
1105 COSTS_N_INSNS (1), /* add. */
1106 COSTS_N_INSNS (1), /* extend_add. */
1107 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1109 /* MULT DImode */
1111 0, /* simple (N/A). */
1112 0, /* flag_setting (N/A). */
1113 COSTS_N_INSNS (2), /* extend. */
1114 0, /* add (N/A). */
1115 COSTS_N_INSNS (2), /* extend_add. */
1116 0 /* idiv (N/A). */
1119 /* LD/ST */
1121 COSTS_N_INSNS (1), /* load. */
1122 COSTS_N_INSNS (1), /* load_sign_extend. */
1123 COSTS_N_INSNS (1), /* ldrd. */
1124 COSTS_N_INSNS (1), /* ldm_1st. */
1125 1, /* ldm_regs_per_insn_1st. */
1126 2, /* ldm_regs_per_insn_subsequent. */
1127 COSTS_N_INSNS (1), /* loadf. */
1128 COSTS_N_INSNS (1), /* loadd. */
1129 COSTS_N_INSNS (1), /* load_unaligned. */
1130 COSTS_N_INSNS (1), /* store. */
1131 COSTS_N_INSNS (1), /* strd. */
1132 COSTS_N_INSNS (1), /* stm_1st. */
1133 1, /* stm_regs_per_insn_1st. */
1134 2, /* stm_regs_per_insn_subsequent. */
1135 COSTS_N_INSNS (1), /* storef. */
1136 COSTS_N_INSNS (1), /* stored. */
1137 COSTS_N_INSNS (1) /* store_unaligned. */
1140 /* FP SFmode */
1142 COSTS_N_INSNS (36), /* div. */
1143 COSTS_N_INSNS (11), /* mult. */
1144 COSTS_N_INSNS (20), /* mult_addsub. */
1145 COSTS_N_INSNS (30), /* fma. */
1146 COSTS_N_INSNS (9), /* addsub. */
1147 COSTS_N_INSNS (3), /* fpconst. */
1148 COSTS_N_INSNS (3), /* neg. */
1149 COSTS_N_INSNS (6), /* compare. */
1150 COSTS_N_INSNS (4), /* widen. */
1151 COSTS_N_INSNS (4), /* narrow. */
1152 COSTS_N_INSNS (8), /* toint. */
1153 COSTS_N_INSNS (8), /* fromint. */
1154 COSTS_N_INSNS (8) /* roundint. */
1156 /* FP DFmode */
1158 COSTS_N_INSNS (64), /* div. */
1159 COSTS_N_INSNS (16), /* mult. */
1160 COSTS_N_INSNS (25), /* mult_addsub. */
1161 COSTS_N_INSNS (30), /* fma. */
1162 COSTS_N_INSNS (9), /* addsub. */
1163 COSTS_N_INSNS (3), /* fpconst. */
1164 COSTS_N_INSNS (3), /* neg. */
1165 COSTS_N_INSNS (6), /* compare. */
1166 COSTS_N_INSNS (6), /* widen. */
1167 COSTS_N_INSNS (6), /* narrow. */
1168 COSTS_N_INSNS (8), /* toint. */
1169 COSTS_N_INSNS (8), /* fromint. */
1170 COSTS_N_INSNS (8) /* roundint. */
1173 /* Vector */
1175 COSTS_N_INSNS (1) /* alu. */
1179 const struct cpu_cost_table cortexa5_extra_costs =
1181 /* ALU */
1183 0, /* arith. */
1184 0, /* logical. */
1185 COSTS_N_INSNS (1), /* shift. */
1186 COSTS_N_INSNS (1), /* shift_reg. */
1187 COSTS_N_INSNS (1), /* arith_shift. */
1188 COSTS_N_INSNS (1), /* arith_shift_reg. */
1189 COSTS_N_INSNS (1), /* log_shift. */
1190 COSTS_N_INSNS (1), /* log_shift_reg. */
1191 COSTS_N_INSNS (1), /* extend. */
1192 COSTS_N_INSNS (1), /* extend_arith. */
1193 COSTS_N_INSNS (1), /* bfi. */
1194 COSTS_N_INSNS (1), /* bfx. */
1195 COSTS_N_INSNS (1), /* clz. */
1196 COSTS_N_INSNS (1), /* rev. */
1197 0, /* non_exec. */
1198 true /* non_exec_costs_exec. */
1202 /* MULT SImode */
1204 0, /* simple. */
1205 COSTS_N_INSNS (1), /* flag_setting. */
1206 COSTS_N_INSNS (1), /* extend. */
1207 COSTS_N_INSNS (1), /* add. */
1208 COSTS_N_INSNS (1), /* extend_add. */
1209 COSTS_N_INSNS (7) /* idiv. */
1211 /* MULT DImode */
1213 0, /* simple (N/A). */
1214 0, /* flag_setting (N/A). */
1215 COSTS_N_INSNS (1), /* extend. */
1216 0, /* add. */
1217 COSTS_N_INSNS (2), /* extend_add. */
1218 0 /* idiv (N/A). */
1221 /* LD/ST */
1223 COSTS_N_INSNS (1), /* load. */
1224 COSTS_N_INSNS (1), /* load_sign_extend. */
1225 COSTS_N_INSNS (6), /* ldrd. */
1226 COSTS_N_INSNS (1), /* ldm_1st. */
1227 1, /* ldm_regs_per_insn_1st. */
1228 2, /* ldm_regs_per_insn_subsequent. */
1229 COSTS_N_INSNS (2), /* loadf. */
1230 COSTS_N_INSNS (4), /* loadd. */
1231 COSTS_N_INSNS (1), /* load_unaligned. */
1232 COSTS_N_INSNS (1), /* store. */
1233 COSTS_N_INSNS (3), /* strd. */
1234 COSTS_N_INSNS (1), /* stm_1st. */
1235 1, /* stm_regs_per_insn_1st. */
1236 2, /* stm_regs_per_insn_subsequent. */
1237 COSTS_N_INSNS (2), /* storef. */
1238 COSTS_N_INSNS (2), /* stored. */
1239 COSTS_N_INSNS (1) /* store_unaligned. */
1242 /* FP SFmode */
1244 COSTS_N_INSNS (15), /* div. */
1245 COSTS_N_INSNS (3), /* mult. */
1246 COSTS_N_INSNS (7), /* mult_addsub. */
1247 COSTS_N_INSNS (7), /* fma. */
1248 COSTS_N_INSNS (3), /* addsub. */
1249 COSTS_N_INSNS (3), /* fpconst. */
1250 COSTS_N_INSNS (3), /* neg. */
1251 COSTS_N_INSNS (3), /* compare. */
1252 COSTS_N_INSNS (3), /* widen. */
1253 COSTS_N_INSNS (3), /* narrow. */
1254 COSTS_N_INSNS (3), /* toint. */
1255 COSTS_N_INSNS (3), /* fromint. */
1256 COSTS_N_INSNS (3) /* roundint. */
1258 /* FP DFmode */
1260 COSTS_N_INSNS (30), /* div. */
1261 COSTS_N_INSNS (6), /* mult. */
1262 COSTS_N_INSNS (10), /* mult_addsub. */
1263 COSTS_N_INSNS (7), /* fma. */
1264 COSTS_N_INSNS (3), /* addsub. */
1265 COSTS_N_INSNS (3), /* fpconst. */
1266 COSTS_N_INSNS (3), /* neg. */
1267 COSTS_N_INSNS (3), /* compare. */
1268 COSTS_N_INSNS (3), /* widen. */
1269 COSTS_N_INSNS (3), /* narrow. */
1270 COSTS_N_INSNS (3), /* toint. */
1271 COSTS_N_INSNS (3), /* fromint. */
1272 COSTS_N_INSNS (3) /* roundint. */
1275 /* Vector */
1277 COSTS_N_INSNS (1) /* alu. */
1282 const struct cpu_cost_table cortexa7_extra_costs =
1284 /* ALU */
1286 0, /* arith. */
1287 0, /* logical. */
1288 COSTS_N_INSNS (1), /* shift. */
1289 COSTS_N_INSNS (1), /* shift_reg. */
1290 COSTS_N_INSNS (1), /* arith_shift. */
1291 COSTS_N_INSNS (1), /* arith_shift_reg. */
1292 COSTS_N_INSNS (1), /* log_shift. */
1293 COSTS_N_INSNS (1), /* log_shift_reg. */
1294 COSTS_N_INSNS (1), /* extend. */
1295 COSTS_N_INSNS (1), /* extend_arith. */
1296 COSTS_N_INSNS (1), /* bfi. */
1297 COSTS_N_INSNS (1), /* bfx. */
1298 COSTS_N_INSNS (1), /* clz. */
1299 COSTS_N_INSNS (1), /* rev. */
1300 0, /* non_exec. */
1301 true /* non_exec_costs_exec. */
1305 /* MULT SImode */
1307 0, /* simple. */
1308 COSTS_N_INSNS (1), /* flag_setting. */
1309 COSTS_N_INSNS (1), /* extend. */
1310 COSTS_N_INSNS (1), /* add. */
1311 COSTS_N_INSNS (1), /* extend_add. */
1312 COSTS_N_INSNS (7) /* idiv. */
1314 /* MULT DImode */
1316 0, /* simple (N/A). */
1317 0, /* flag_setting (N/A). */
1318 COSTS_N_INSNS (1), /* extend. */
1319 0, /* add. */
1320 COSTS_N_INSNS (2), /* extend_add. */
1321 0 /* idiv (N/A). */
1324 /* LD/ST */
1326 COSTS_N_INSNS (1), /* load. */
1327 COSTS_N_INSNS (1), /* load_sign_extend. */
1328 COSTS_N_INSNS (3), /* ldrd. */
1329 COSTS_N_INSNS (1), /* ldm_1st. */
1330 1, /* ldm_regs_per_insn_1st. */
1331 2, /* ldm_regs_per_insn_subsequent. */
1332 COSTS_N_INSNS (2), /* loadf. */
1333 COSTS_N_INSNS (2), /* loadd. */
1334 COSTS_N_INSNS (1), /* load_unaligned. */
1335 COSTS_N_INSNS (1), /* store. */
1336 COSTS_N_INSNS (3), /* strd. */
1337 COSTS_N_INSNS (1), /* stm_1st. */
1338 1, /* stm_regs_per_insn_1st. */
1339 2, /* stm_regs_per_insn_subsequent. */
1340 COSTS_N_INSNS (2), /* storef. */
1341 COSTS_N_INSNS (2), /* stored. */
1342 COSTS_N_INSNS (1) /* store_unaligned. */
1345 /* FP SFmode */
1347 COSTS_N_INSNS (15), /* div. */
1348 COSTS_N_INSNS (3), /* mult. */
1349 COSTS_N_INSNS (7), /* mult_addsub. */
1350 COSTS_N_INSNS (7), /* fma. */
1351 COSTS_N_INSNS (3), /* addsub. */
1352 COSTS_N_INSNS (3), /* fpconst. */
1353 COSTS_N_INSNS (3), /* neg. */
1354 COSTS_N_INSNS (3), /* compare. */
1355 COSTS_N_INSNS (3), /* widen. */
1356 COSTS_N_INSNS (3), /* narrow. */
1357 COSTS_N_INSNS (3), /* toint. */
1358 COSTS_N_INSNS (3), /* fromint. */
1359 COSTS_N_INSNS (3) /* roundint. */
1361 /* FP DFmode */
1363 COSTS_N_INSNS (30), /* div. */
1364 COSTS_N_INSNS (6), /* mult. */
1365 COSTS_N_INSNS (10), /* mult_addsub. */
1366 COSTS_N_INSNS (7), /* fma. */
1367 COSTS_N_INSNS (3), /* addsub. */
1368 COSTS_N_INSNS (3), /* fpconst. */
1369 COSTS_N_INSNS (3), /* neg. */
1370 COSTS_N_INSNS (3), /* compare. */
1371 COSTS_N_INSNS (3), /* widen. */
1372 COSTS_N_INSNS (3), /* narrow. */
1373 COSTS_N_INSNS (3), /* toint. */
1374 COSTS_N_INSNS (3), /* fromint. */
1375 COSTS_N_INSNS (3) /* roundint. */
1378 /* Vector */
1380 COSTS_N_INSNS (1) /* alu. */
1384 const struct cpu_cost_table cortexa12_extra_costs =
1386 /* ALU */
1388 0, /* arith. */
1389 0, /* logical. */
1390 0, /* shift. */
1391 COSTS_N_INSNS (1), /* shift_reg. */
1392 COSTS_N_INSNS (1), /* arith_shift. */
1393 COSTS_N_INSNS (1), /* arith_shift_reg. */
1394 COSTS_N_INSNS (1), /* log_shift. */
1395 COSTS_N_INSNS (1), /* log_shift_reg. */
1396 0, /* extend. */
1397 COSTS_N_INSNS (1), /* extend_arith. */
1398 0, /* bfi. */
1399 COSTS_N_INSNS (1), /* bfx. */
1400 COSTS_N_INSNS (1), /* clz. */
1401 COSTS_N_INSNS (1), /* rev. */
1402 0, /* non_exec. */
1403 true /* non_exec_costs_exec. */
1405 /* MULT SImode */
1408 COSTS_N_INSNS (2), /* simple. */
1409 COSTS_N_INSNS (3), /* flag_setting. */
1410 COSTS_N_INSNS (2), /* extend. */
1411 COSTS_N_INSNS (3), /* add. */
1412 COSTS_N_INSNS (2), /* extend_add. */
1413 COSTS_N_INSNS (18) /* idiv. */
1415 /* MULT DImode */
1417 0, /* simple (N/A). */
1418 0, /* flag_setting (N/A). */
1419 COSTS_N_INSNS (3), /* extend. */
1420 0, /* add (N/A). */
1421 COSTS_N_INSNS (3), /* extend_add. */
1422 0 /* idiv (N/A). */
1425 /* LD/ST */
1427 COSTS_N_INSNS (3), /* load. */
1428 COSTS_N_INSNS (3), /* load_sign_extend. */
1429 COSTS_N_INSNS (3), /* ldrd. */
1430 COSTS_N_INSNS (3), /* ldm_1st. */
1431 1, /* ldm_regs_per_insn_1st. */
1432 2, /* ldm_regs_per_insn_subsequent. */
1433 COSTS_N_INSNS (3), /* loadf. */
1434 COSTS_N_INSNS (3), /* loadd. */
1435 0, /* load_unaligned. */
1436 0, /* store. */
1437 0, /* strd. */
1438 0, /* stm_1st. */
1439 1, /* stm_regs_per_insn_1st. */
1440 2, /* stm_regs_per_insn_subsequent. */
1441 COSTS_N_INSNS (2), /* storef. */
1442 COSTS_N_INSNS (2), /* stored. */
1443 0 /* store_unaligned. */
1446 /* FP SFmode */
1448 COSTS_N_INSNS (17), /* div. */
1449 COSTS_N_INSNS (4), /* mult. */
1450 COSTS_N_INSNS (8), /* mult_addsub. */
1451 COSTS_N_INSNS (8), /* fma. */
1452 COSTS_N_INSNS (4), /* addsub. */
1453 COSTS_N_INSNS (2), /* fpconst. */
1454 COSTS_N_INSNS (2), /* neg. */
1455 COSTS_N_INSNS (2), /* compare. */
1456 COSTS_N_INSNS (4), /* widen. */
1457 COSTS_N_INSNS (4), /* narrow. */
1458 COSTS_N_INSNS (4), /* toint. */
1459 COSTS_N_INSNS (4), /* fromint. */
1460 COSTS_N_INSNS (4) /* roundint. */
1462 /* FP DFmode */
1464 COSTS_N_INSNS (31), /* div. */
1465 COSTS_N_INSNS (4), /* mult. */
1466 COSTS_N_INSNS (8), /* mult_addsub. */
1467 COSTS_N_INSNS (8), /* fma. */
1468 COSTS_N_INSNS (4), /* addsub. */
1469 COSTS_N_INSNS (2), /* fpconst. */
1470 COSTS_N_INSNS (2), /* neg. */
1471 COSTS_N_INSNS (2), /* compare. */
1472 COSTS_N_INSNS (4), /* widen. */
1473 COSTS_N_INSNS (4), /* narrow. */
1474 COSTS_N_INSNS (4), /* toint. */
1475 COSTS_N_INSNS (4), /* fromint. */
1476 COSTS_N_INSNS (4) /* roundint. */
1479 /* Vector */
1481 COSTS_N_INSNS (1) /* alu. */
1485 const struct cpu_cost_table cortexa15_extra_costs =
1487 /* ALU */
1489 0, /* arith. */
1490 0, /* logical. */
1491 0, /* shift. */
1492 0, /* shift_reg. */
1493 COSTS_N_INSNS (1), /* arith_shift. */
1494 COSTS_N_INSNS (1), /* arith_shift_reg. */
1495 COSTS_N_INSNS (1), /* log_shift. */
1496 COSTS_N_INSNS (1), /* log_shift_reg. */
1497 0, /* extend. */
1498 COSTS_N_INSNS (1), /* extend_arith. */
1499 COSTS_N_INSNS (1), /* bfi. */
1500 0, /* bfx. */
1501 0, /* clz. */
1502 0, /* rev. */
1503 0, /* non_exec. */
1504 true /* non_exec_costs_exec. */
1506 /* MULT SImode */
1509 COSTS_N_INSNS (2), /* simple. */
1510 COSTS_N_INSNS (3), /* flag_setting. */
1511 COSTS_N_INSNS (2), /* extend. */
1512 COSTS_N_INSNS (2), /* add. */
1513 COSTS_N_INSNS (2), /* extend_add. */
1514 COSTS_N_INSNS (18) /* idiv. */
1516 /* MULT DImode */
1518 0, /* simple (N/A). */
1519 0, /* flag_setting (N/A). */
1520 COSTS_N_INSNS (3), /* extend. */
1521 0, /* add (N/A). */
1522 COSTS_N_INSNS (3), /* extend_add. */
1523 0 /* idiv (N/A). */
1526 /* LD/ST */
1528 COSTS_N_INSNS (3), /* load. */
1529 COSTS_N_INSNS (3), /* load_sign_extend. */
1530 COSTS_N_INSNS (3), /* ldrd. */
1531 COSTS_N_INSNS (4), /* ldm_1st. */
1532 1, /* ldm_regs_per_insn_1st. */
1533 2, /* ldm_regs_per_insn_subsequent. */
1534 COSTS_N_INSNS (4), /* loadf. */
1535 COSTS_N_INSNS (4), /* loadd. */
1536 0, /* load_unaligned. */
1537 0, /* store. */
1538 0, /* strd. */
1539 COSTS_N_INSNS (1), /* stm_1st. */
1540 1, /* stm_regs_per_insn_1st. */
1541 2, /* stm_regs_per_insn_subsequent. */
1542 0, /* storef. */
1543 0, /* stored. */
1544 0 /* store_unaligned. */
1547 /* FP SFmode */
1549 COSTS_N_INSNS (17), /* div. */
1550 COSTS_N_INSNS (4), /* mult. */
1551 COSTS_N_INSNS (8), /* mult_addsub. */
1552 COSTS_N_INSNS (8), /* fma. */
1553 COSTS_N_INSNS (4), /* addsub. */
1554 COSTS_N_INSNS (2), /* fpconst. */
1555 COSTS_N_INSNS (2), /* neg. */
1556 COSTS_N_INSNS (5), /* compare. */
1557 COSTS_N_INSNS (4), /* widen. */
1558 COSTS_N_INSNS (4), /* narrow. */
1559 COSTS_N_INSNS (4), /* toint. */
1560 COSTS_N_INSNS (4), /* fromint. */
1561 COSTS_N_INSNS (4) /* roundint. */
1563 /* FP DFmode */
1565 COSTS_N_INSNS (31), /* div. */
1566 COSTS_N_INSNS (4), /* mult. */
1567 COSTS_N_INSNS (8), /* mult_addsub. */
1568 COSTS_N_INSNS (8), /* fma. */
1569 COSTS_N_INSNS (4), /* addsub. */
1570 COSTS_N_INSNS (2), /* fpconst. */
1571 COSTS_N_INSNS (2), /* neg. */
1572 COSTS_N_INSNS (2), /* compare. */
1573 COSTS_N_INSNS (4), /* widen. */
1574 COSTS_N_INSNS (4), /* narrow. */
1575 COSTS_N_INSNS (4), /* toint. */
1576 COSTS_N_INSNS (4), /* fromint. */
1577 COSTS_N_INSNS (4) /* roundint. */
1580 /* Vector */
1582 COSTS_N_INSNS (1) /* alu. */
1586 const struct cpu_cost_table v7m_extra_costs =
1588 /* ALU */
1590 0, /* arith. */
1591 0, /* logical. */
1592 0, /* shift. */
1593 0, /* shift_reg. */
1594 0, /* arith_shift. */
1595 COSTS_N_INSNS (1), /* arith_shift_reg. */
1596 0, /* log_shift. */
1597 COSTS_N_INSNS (1), /* log_shift_reg. */
1598 0, /* extend. */
1599 COSTS_N_INSNS (1), /* extend_arith. */
1600 0, /* bfi. */
1601 0, /* bfx. */
1602 0, /* clz. */
1603 0, /* rev. */
1604 COSTS_N_INSNS (1), /* non_exec. */
1605 false /* non_exec_costs_exec. */
1608 /* MULT SImode */
1610 COSTS_N_INSNS (1), /* simple. */
1611 COSTS_N_INSNS (1), /* flag_setting. */
1612 COSTS_N_INSNS (2), /* extend. */
1613 COSTS_N_INSNS (1), /* add. */
1614 COSTS_N_INSNS (3), /* extend_add. */
1615 COSTS_N_INSNS (8) /* idiv. */
1617 /* MULT DImode */
1619 0, /* simple (N/A). */
1620 0, /* flag_setting (N/A). */
1621 COSTS_N_INSNS (2), /* extend. */
1622 0, /* add (N/A). */
1623 COSTS_N_INSNS (3), /* extend_add. */
1624 0 /* idiv (N/A). */
1627 /* LD/ST */
1629 COSTS_N_INSNS (2), /* load. */
1630 0, /* load_sign_extend. */
1631 COSTS_N_INSNS (3), /* ldrd. */
1632 COSTS_N_INSNS (2), /* ldm_1st. */
1633 1, /* ldm_regs_per_insn_1st. */
1634 1, /* ldm_regs_per_insn_subsequent. */
1635 COSTS_N_INSNS (2), /* loadf. */
1636 COSTS_N_INSNS (3), /* loadd. */
1637 COSTS_N_INSNS (1), /* load_unaligned. */
1638 COSTS_N_INSNS (2), /* store. */
1639 COSTS_N_INSNS (3), /* strd. */
1640 COSTS_N_INSNS (2), /* stm_1st. */
1641 1, /* stm_regs_per_insn_1st. */
1642 1, /* stm_regs_per_insn_subsequent. */
1643 COSTS_N_INSNS (2), /* storef. */
1644 COSTS_N_INSNS (3), /* stored. */
1645 COSTS_N_INSNS (1) /* store_unaligned. */
1648 /* FP SFmode */
1650 COSTS_N_INSNS (7), /* div. */
1651 COSTS_N_INSNS (2), /* mult. */
1652 COSTS_N_INSNS (5), /* mult_addsub. */
1653 COSTS_N_INSNS (3), /* fma. */
1654 COSTS_N_INSNS (1), /* addsub. */
1655 0, /* fpconst. */
1656 0, /* neg. */
1657 0, /* compare. */
1658 0, /* widen. */
1659 0, /* narrow. */
1660 0, /* toint. */
1661 0, /* fromint. */
1662 0 /* roundint. */
1664 /* FP DFmode */
1666 COSTS_N_INSNS (15), /* div. */
1667 COSTS_N_INSNS (5), /* mult. */
1668 COSTS_N_INSNS (7), /* mult_addsub. */
1669 COSTS_N_INSNS (7), /* fma. */
1670 COSTS_N_INSNS (3), /* addsub. */
1671 0, /* fpconst. */
1672 0, /* neg. */
1673 0, /* compare. */
1674 0, /* widen. */
1675 0, /* narrow. */
1676 0, /* toint. */
1677 0, /* fromint. */
1678 0 /* roundint. */
1681 /* Vector */
1683 COSTS_N_INSNS (1) /* alu. */
1687 const struct tune_params arm_slowmul_tune =
1689 arm_slowmul_rtx_costs,
1690 NULL,
1691 NULL, /* Sched adj cost. */
1692 3, /* Constant limit. */
1693 5, /* Max cond insns. */
1694 ARM_PREFETCH_NOT_BENEFICIAL,
1695 true, /* Prefer constant pool. */
1696 arm_default_branch_cost,
1697 false, /* Prefer LDRD/STRD. */
1698 {true, true}, /* Prefer non short circuit. */
1699 &arm_default_vec_cost, /* Vectorizer costs. */
1700 false, /* Prefer Neon for 64-bits bitops. */
1701 false, false, /* Prefer 32-bit encodings. */
1702 false, /* Prefer Neon for stringops. */
1703 8 /* Maximum insns to inline memset. */
1706 const struct tune_params arm_fastmul_tune =
1708 arm_fastmul_rtx_costs,
1709 NULL,
1710 NULL, /* Sched adj cost. */
1711 1, /* Constant limit. */
1712 5, /* Max cond insns. */
1713 ARM_PREFETCH_NOT_BENEFICIAL,
1714 true, /* Prefer constant pool. */
1715 arm_default_branch_cost,
1716 false, /* Prefer LDRD/STRD. */
1717 {true, true}, /* Prefer non short circuit. */
1718 &arm_default_vec_cost, /* Vectorizer costs. */
1719 false, /* Prefer Neon for 64-bits bitops. */
1720 false, false, /* Prefer 32-bit encodings. */
1721 false, /* Prefer Neon for stringops. */
1722 8 /* Maximum insns to inline memset. */
1725 /* StrongARM has early execution of branches, so a sequence that is worth
1726 skipping is shorter. Set max_insns_skipped to a lower value. */
1728 const struct tune_params arm_strongarm_tune =
1730 arm_fastmul_rtx_costs,
1731 NULL,
1732 NULL, /* Sched adj cost. */
1733 1, /* Constant limit. */
1734 3, /* Max cond insns. */
1735 ARM_PREFETCH_NOT_BENEFICIAL,
1736 true, /* Prefer constant pool. */
1737 arm_default_branch_cost,
1738 false, /* Prefer LDRD/STRD. */
1739 {true, true}, /* Prefer non short circuit. */
1740 &arm_default_vec_cost, /* Vectorizer costs. */
1741 false, /* Prefer Neon for 64-bits bitops. */
1742 false, false, /* Prefer 32-bit encodings. */
1743 false, /* Prefer Neon for stringops. */
1744 8 /* Maximum insns to inline memset. */
1747 const struct tune_params arm_xscale_tune =
1749 arm_xscale_rtx_costs,
1750 NULL,
1751 xscale_sched_adjust_cost,
1752 2, /* Constant limit. */
1753 3, /* Max cond insns. */
1754 ARM_PREFETCH_NOT_BENEFICIAL,
1755 true, /* Prefer constant pool. */
1756 arm_default_branch_cost,
1757 false, /* Prefer LDRD/STRD. */
1758 {true, true}, /* Prefer non short circuit. */
1759 &arm_default_vec_cost, /* Vectorizer costs. */
1760 false, /* Prefer Neon for 64-bits bitops. */
1761 false, false, /* Prefer 32-bit encodings. */
1762 false, /* Prefer Neon for stringops. */
1763 8 /* Maximum insns to inline memset. */
1766 const struct tune_params arm_9e_tune =
1768 arm_9e_rtx_costs,
1769 NULL,
1770 NULL, /* Sched adj cost. */
1771 1, /* Constant limit. */
1772 5, /* Max cond insns. */
1773 ARM_PREFETCH_NOT_BENEFICIAL,
1774 true, /* Prefer constant pool. */
1775 arm_default_branch_cost,
1776 false, /* Prefer LDRD/STRD. */
1777 {true, true}, /* Prefer non short circuit. */
1778 &arm_default_vec_cost, /* Vectorizer costs. */
1779 false, /* Prefer Neon for 64-bits bitops. */
1780 false, false, /* Prefer 32-bit encodings. */
1781 false, /* Prefer Neon for stringops. */
1782 8 /* Maximum insns to inline memset. */
1785 const struct tune_params arm_v6t2_tune =
1787 arm_9e_rtx_costs,
1788 NULL,
1789 NULL, /* Sched adj cost. */
1790 1, /* Constant limit. */
1791 5, /* Max cond insns. */
1792 ARM_PREFETCH_NOT_BENEFICIAL,
1793 false, /* Prefer constant pool. */
1794 arm_default_branch_cost,
1795 false, /* Prefer LDRD/STRD. */
1796 {true, true}, /* Prefer non short circuit. */
1797 &arm_default_vec_cost, /* Vectorizer costs. */
1798 false, /* Prefer Neon for 64-bits bitops. */
1799 false, false, /* Prefer 32-bit encodings. */
1800 false, /* Prefer Neon for stringops. */
1801 8 /* Maximum insns to inline memset. */
1804 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1805 const struct tune_params arm_cortex_tune =
1807 arm_9e_rtx_costs,
1808 &generic_extra_costs,
1809 NULL, /* Sched adj cost. */
1810 1, /* Constant limit. */
1811 5, /* Max cond insns. */
1812 ARM_PREFETCH_NOT_BENEFICIAL,
1813 false, /* Prefer constant pool. */
1814 arm_default_branch_cost,
1815 false, /* Prefer LDRD/STRD. */
1816 {true, true}, /* Prefer non short circuit. */
1817 &arm_default_vec_cost, /* Vectorizer costs. */
1818 false, /* Prefer Neon for 64-bits bitops. */
1819 false, false, /* Prefer 32-bit encodings. */
1820 false, /* Prefer Neon for stringops. */
1821 8 /* Maximum insns to inline memset. */
1824 const struct tune_params arm_cortex_a8_tune =
1826 arm_9e_rtx_costs,
1827 &cortexa8_extra_costs,
1828 NULL, /* Sched adj cost. */
1829 1, /* Constant limit. */
1830 5, /* Max cond insns. */
1831 ARM_PREFETCH_NOT_BENEFICIAL,
1832 false, /* Prefer constant pool. */
1833 arm_default_branch_cost,
1834 false, /* Prefer LDRD/STRD. */
1835 {true, true}, /* Prefer non short circuit. */
1836 &arm_default_vec_cost, /* Vectorizer costs. */
1837 false, /* Prefer Neon for 64-bits bitops. */
1838 false, false, /* Prefer 32-bit encodings. */
1839 true, /* Prefer Neon for stringops. */
1840 8 /* Maximum insns to inline memset. */
1843 const struct tune_params arm_cortex_a7_tune =
1845 arm_9e_rtx_costs,
1846 &cortexa7_extra_costs,
1847 NULL,
1848 1, /* Constant limit. */
1849 5, /* Max cond insns. */
1850 ARM_PREFETCH_NOT_BENEFICIAL,
1851 false, /* Prefer constant pool. */
1852 arm_default_branch_cost,
1853 false, /* Prefer LDRD/STRD. */
1854 {true, true}, /* Prefer non short circuit. */
1855 &arm_default_vec_cost, /* Vectorizer costs. */
1856 false, /* Prefer Neon for 64-bits bitops. */
1857 false, false, /* Prefer 32-bit encodings. */
1858 true, /* Prefer Neon for stringops. */
1859 8 /* Maximum insns to inline memset. */
1862 const struct tune_params arm_cortex_a15_tune =
1864 arm_9e_rtx_costs,
1865 &cortexa15_extra_costs,
1866 NULL, /* Sched adj cost. */
1867 1, /* Constant limit. */
1868 2, /* Max cond insns. */
1869 ARM_PREFETCH_NOT_BENEFICIAL,
1870 false, /* Prefer constant pool. */
1871 arm_default_branch_cost,
1872 true, /* Prefer LDRD/STRD. */
1873 {true, true}, /* Prefer non short circuit. */
1874 &arm_default_vec_cost, /* Vectorizer costs. */
1875 false, /* Prefer Neon for 64-bits bitops. */
1876 true, true, /* Prefer 32-bit encodings. */
1877 true, /* Prefer Neon for stringops. */
1878 8 /* Maximum insns to inline memset. */
1881 const struct tune_params arm_cortex_a53_tune =
1883 arm_9e_rtx_costs,
1884 &cortexa53_extra_costs,
1885 NULL, /* Scheduler cost adjustment. */
1886 1, /* Constant limit. */
1887 5, /* Max cond insns. */
1888 ARM_PREFETCH_NOT_BENEFICIAL,
1889 false, /* Prefer constant pool. */
1890 arm_default_branch_cost,
1891 false, /* Prefer LDRD/STRD. */
1892 {true, true}, /* Prefer non short circuit. */
1893 &arm_default_vec_cost, /* Vectorizer costs. */
1894 false, /* Prefer Neon for 64-bits bitops. */
1895 false, false, /* Prefer 32-bit encodings. */
1896 false, /* Prefer Neon for stringops. */
1897 8 /* Maximum insns to inline memset. */
1900 const struct tune_params arm_cortex_a57_tune =
1902 arm_9e_rtx_costs,
1903 &cortexa57_extra_costs,
1904 NULL, /* Scheduler cost adjustment. */
1905 1, /* Constant limit. */
1906 2, /* Max cond insns. */
1907 ARM_PREFETCH_NOT_BENEFICIAL,
1908 false, /* Prefer constant pool. */
1909 arm_default_branch_cost,
1910 true, /* Prefer LDRD/STRD. */
1911 {true, true}, /* Prefer non short circuit. */
1912 &arm_default_vec_cost, /* Vectorizer costs. */
1913 false, /* Prefer Neon for 64-bits bitops. */
1914 true, true, /* Prefer 32-bit encodings. */
1915 false, /* Prefer Neon for stringops. */
1916 8 /* Maximum insns to inline memset. */
1919 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1920 less appealing. Set max_insns_skipped to a low value. */
1922 const struct tune_params arm_cortex_a5_tune =
1924 arm_9e_rtx_costs,
1925 &cortexa5_extra_costs,
1926 NULL, /* Sched adj cost. */
1927 1, /* Constant limit. */
1928 1, /* Max cond insns. */
1929 ARM_PREFETCH_NOT_BENEFICIAL,
1930 false, /* Prefer constant pool. */
1931 arm_cortex_a5_branch_cost,
1932 false, /* Prefer LDRD/STRD. */
1933 {false, false}, /* Prefer non short circuit. */
1934 &arm_default_vec_cost, /* Vectorizer costs. */
1935 false, /* Prefer Neon for 64-bits bitops. */
1936 false, false, /* Prefer 32-bit encodings. */
1937 true, /* Prefer Neon for stringops. */
1938 8 /* Maximum insns to inline memset. */
1941 const struct tune_params arm_cortex_a9_tune =
1943 arm_9e_rtx_costs,
1944 &cortexa9_extra_costs,
1945 cortex_a9_sched_adjust_cost,
1946 1, /* Constant limit. */
1947 5, /* Max cond insns. */
1948 ARM_PREFETCH_BENEFICIAL(4,32,32),
1949 false, /* Prefer constant pool. */
1950 arm_default_branch_cost,
1951 false, /* Prefer LDRD/STRD. */
1952 {true, true}, /* Prefer non short circuit. */
1953 &arm_default_vec_cost, /* Vectorizer costs. */
1954 false, /* Prefer Neon for 64-bits bitops. */
1955 false, false, /* Prefer 32-bit encodings. */
1956 false, /* Prefer Neon for stringops. */
1957 8 /* Maximum insns to inline memset. */
1960 const struct tune_params arm_cortex_a12_tune =
1962 arm_9e_rtx_costs,
1963 &cortexa12_extra_costs,
1964 NULL,
1965 1, /* Constant limit. */
1966 5, /* Max cond insns. */
1967 ARM_PREFETCH_BENEFICIAL(4,32,32),
1968 false, /* Prefer constant pool. */
1969 arm_default_branch_cost,
1970 true, /* Prefer LDRD/STRD. */
1971 {true, true}, /* Prefer non short circuit. */
1972 &arm_default_vec_cost, /* Vectorizer costs. */
1973 false, /* Prefer Neon for 64-bits bitops. */
1974 false, false, /* Prefer 32-bit encodings. */
1975 true, /* Prefer Neon for stringops. */
1976 8 /* Maximum insns to inline memset. */
1979 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1980 cycle to execute each. An LDR from the constant pool also takes two cycles
1981 to execute, but mildly increases pipelining opportunity (consecutive
1982 loads/stores can be pipelined together, saving one cycle), and may also
1983 improve icache utilisation. Hence we prefer the constant pool for such
1984 processors. */
1986 const struct tune_params arm_v7m_tune =
1988 arm_9e_rtx_costs,
1989 &v7m_extra_costs,
1990 NULL, /* Sched adj cost. */
1991 1, /* Constant limit. */
1992 2, /* Max cond insns. */
1993 ARM_PREFETCH_NOT_BENEFICIAL,
1994 true, /* Prefer constant pool. */
1995 arm_cortex_m_branch_cost,
1996 false, /* Prefer LDRD/STRD. */
1997 {false, false}, /* Prefer non short circuit. */
1998 &arm_default_vec_cost, /* Vectorizer costs. */
1999 false, /* Prefer Neon for 64-bits bitops. */
2000 false, false, /* Prefer 32-bit encodings. */
2001 false, /* Prefer Neon for stringops. */
2002 8 /* Maximum insns to inline memset. */
2005 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2006 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2007 const struct tune_params arm_v6m_tune =
2009 arm_9e_rtx_costs,
2010 NULL,
2011 NULL, /* Sched adj cost. */
2012 1, /* Constant limit. */
2013 5, /* Max cond insns. */
2014 ARM_PREFETCH_NOT_BENEFICIAL,
2015 false, /* Prefer constant pool. */
2016 arm_default_branch_cost,
2017 false, /* Prefer LDRD/STRD. */
2018 {false, false}, /* Prefer non short circuit. */
2019 &arm_default_vec_cost, /* Vectorizer costs. */
2020 false, /* Prefer Neon for 64-bits bitops. */
2021 false, false, /* Prefer 32-bit encodings. */
2022 false, /* Prefer Neon for stringops. */
2023 8 /* Maximum insns to inline memset. */
2026 const struct tune_params arm_fa726te_tune =
2028 arm_9e_rtx_costs,
2029 NULL,
2030 fa726te_sched_adjust_cost,
2031 1, /* Constant limit. */
2032 5, /* Max cond insns. */
2033 ARM_PREFETCH_NOT_BENEFICIAL,
2034 true, /* Prefer constant pool. */
2035 arm_default_branch_cost,
2036 false, /* Prefer LDRD/STRD. */
2037 {true, true}, /* Prefer non short circuit. */
2038 &arm_default_vec_cost, /* Vectorizer costs. */
2039 false, /* Prefer Neon for 64-bits bitops. */
2040 false, false, /* Prefer 32-bit encodings. */
2041 false, /* Prefer Neon for stringops. */
2042 8 /* Maximum insns to inline memset. */
2046 /* Not all of these give usefully different compilation alternatives,
2047 but there is no simple way of generalizing them. */
2048 static const struct processors all_cores[] =
2050 /* ARM Cores */
2051 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2052 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2053 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2054 #include "arm-cores.def"
2055 #undef ARM_CORE
2056 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2059 static const struct processors all_architectures[] =
2061 /* ARM Architectures */
2062 /* We don't specify tuning costs here as it will be figured out
2063 from the core. */
2065 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2066 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2067 #include "arm-arches.def"
2068 #undef ARM_ARCH
2069 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2073 /* These are populated as commandline arguments are processed, or NULL
2074 if not specified. */
2075 static const struct processors *arm_selected_arch;
2076 static const struct processors *arm_selected_cpu;
2077 static const struct processors *arm_selected_tune;
2079 /* The name of the preprocessor macro to define for this architecture. */
2081 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2083 /* Available values for -mfpu=. */
2085 static const struct arm_fpu_desc all_fpus[] =
2087 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2088 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2089 #include "arm-fpus.def"
2090 #undef ARM_FPU
2094 /* Supported TLS relocations. */
2096 enum tls_reloc {
2097 TLS_GD32,
2098 TLS_LDM32,
2099 TLS_LDO32,
2100 TLS_IE32,
2101 TLS_LE32,
2102 TLS_DESCSEQ /* GNU scheme */
2105 /* The maximum number of insns to be used when loading a constant. */
2106 inline static int
2107 arm_constant_limit (bool size_p)
2109 return size_p ? 1 : current_tune->constant_limit;
2112 /* Emit an insn that's a simple single-set. Both the operands must be known
2113 to be valid. */
2114 inline static rtx_insn *
2115 emit_set_insn (rtx x, rtx y)
2117 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2120 /* Return the number of bits set in VALUE. */
2121 static unsigned
2122 bit_count (unsigned long value)
2124 unsigned long count = 0;
2126 while (value)
2128 count++;
2129 value &= value - 1; /* Clear the least-significant set bit. */
2132 return count;
2135 typedef struct
2137 enum machine_mode mode;
2138 const char *name;
2139 } arm_fixed_mode_set;
2141 /* A small helper for setting fixed-point library libfuncs. */
2143 static void
2144 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
2145 const char *funcname, const char *modename,
2146 int num_suffix)
2148 char buffer[50];
2150 if (num_suffix == 0)
2151 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2152 else
2153 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2155 set_optab_libfunc (optable, mode, buffer);
2158 static void
2159 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
2160 enum machine_mode from, const char *funcname,
2161 const char *toname, const char *fromname)
2163 char buffer[50];
2164 const char *maybe_suffix_2 = "";
2166 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2167 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2168 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2169 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2170 maybe_suffix_2 = "2";
2172 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2173 maybe_suffix_2);
2175 set_conv_libfunc (optable, to, from, buffer);
2178 /* Set up library functions unique to ARM. */
2180 static void
2181 arm_init_libfuncs (void)
2183 /* For Linux, we have access to kernel support for atomic operations. */
2184 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2185 init_sync_libfuncs (2 * UNITS_PER_WORD);
2187 /* There are no special library functions unless we are using the
2188 ARM BPABI. */
2189 if (!TARGET_BPABI)
2190 return;
2192 /* The functions below are described in Section 4 of the "Run-Time
2193 ABI for the ARM architecture", Version 1.0. */
2195 /* Double-precision floating-point arithmetic. Table 2. */
2196 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2197 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2198 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2199 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2200 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2202 /* Double-precision comparisons. Table 3. */
2203 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2204 set_optab_libfunc (ne_optab, DFmode, NULL);
2205 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2206 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2207 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2208 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2209 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2211 /* Single-precision floating-point arithmetic. Table 4. */
2212 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2213 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2214 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2215 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2216 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2218 /* Single-precision comparisons. Table 5. */
2219 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2220 set_optab_libfunc (ne_optab, SFmode, NULL);
2221 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2222 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2223 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2224 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2225 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2227 /* Floating-point to integer conversions. Table 6. */
2228 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2229 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2230 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2231 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2232 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2233 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2234 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2235 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2237 /* Conversions between floating types. Table 7. */
2238 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2239 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2241 /* Integer to floating-point conversions. Table 8. */
2242 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2243 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2244 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2245 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2246 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2247 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2248 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2249 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2251 /* Long long. Table 9. */
2252 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2253 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2254 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2255 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2256 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2257 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2258 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2259 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2261 /* Integer (32/32->32) division. \S 4.3.1. */
2262 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2263 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2265 /* The divmod functions are designed so that they can be used for
2266 plain division, even though they return both the quotient and the
2267 remainder. The quotient is returned in the usual location (i.e.,
2268 r0 for SImode, {r0, r1} for DImode), just as would be expected
2269 for an ordinary division routine. Because the AAPCS calling
2270 conventions specify that all of { r0, r1, r2, r3 } are
2271 callee-saved registers, there is no need to tell the compiler
2272 explicitly that those registers are clobbered by these
2273 routines. */
2274 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2275 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2277 /* For SImode division the ABI provides div-without-mod routines,
2278 which are faster. */
2279 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2280 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2282 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2283 divmod libcalls instead. */
2284 set_optab_libfunc (smod_optab, DImode, NULL);
2285 set_optab_libfunc (umod_optab, DImode, NULL);
2286 set_optab_libfunc (smod_optab, SImode, NULL);
2287 set_optab_libfunc (umod_optab, SImode, NULL);
2289 /* Half-precision float operations. The compiler handles all operations
2290 with NULL libfuncs by converting the SFmode. */
2291 switch (arm_fp16_format)
2293 case ARM_FP16_FORMAT_IEEE:
2294 case ARM_FP16_FORMAT_ALTERNATIVE:
2296 /* Conversions. */
2297 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2298 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2299 ? "__gnu_f2h_ieee"
2300 : "__gnu_f2h_alternative"));
2301 set_conv_libfunc (sext_optab, SFmode, HFmode,
2302 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2303 ? "__gnu_h2f_ieee"
2304 : "__gnu_h2f_alternative"));
2306 /* Arithmetic. */
2307 set_optab_libfunc (add_optab, HFmode, NULL);
2308 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2309 set_optab_libfunc (smul_optab, HFmode, NULL);
2310 set_optab_libfunc (neg_optab, HFmode, NULL);
2311 set_optab_libfunc (sub_optab, HFmode, NULL);
2313 /* Comparisons. */
2314 set_optab_libfunc (eq_optab, HFmode, NULL);
2315 set_optab_libfunc (ne_optab, HFmode, NULL);
2316 set_optab_libfunc (lt_optab, HFmode, NULL);
2317 set_optab_libfunc (le_optab, HFmode, NULL);
2318 set_optab_libfunc (ge_optab, HFmode, NULL);
2319 set_optab_libfunc (gt_optab, HFmode, NULL);
2320 set_optab_libfunc (unord_optab, HFmode, NULL);
2321 break;
2323 default:
2324 break;
2327 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2329 const arm_fixed_mode_set fixed_arith_modes[] =
2331 { QQmode, "qq" },
2332 { UQQmode, "uqq" },
2333 { HQmode, "hq" },
2334 { UHQmode, "uhq" },
2335 { SQmode, "sq" },
2336 { USQmode, "usq" },
2337 { DQmode, "dq" },
2338 { UDQmode, "udq" },
2339 { TQmode, "tq" },
2340 { UTQmode, "utq" },
2341 { HAmode, "ha" },
2342 { UHAmode, "uha" },
2343 { SAmode, "sa" },
2344 { USAmode, "usa" },
2345 { DAmode, "da" },
2346 { UDAmode, "uda" },
2347 { TAmode, "ta" },
2348 { UTAmode, "uta" }
2350 const arm_fixed_mode_set fixed_conv_modes[] =
2352 { QQmode, "qq" },
2353 { UQQmode, "uqq" },
2354 { HQmode, "hq" },
2355 { UHQmode, "uhq" },
2356 { SQmode, "sq" },
2357 { USQmode, "usq" },
2358 { DQmode, "dq" },
2359 { UDQmode, "udq" },
2360 { TQmode, "tq" },
2361 { UTQmode, "utq" },
2362 { HAmode, "ha" },
2363 { UHAmode, "uha" },
2364 { SAmode, "sa" },
2365 { USAmode, "usa" },
2366 { DAmode, "da" },
2367 { UDAmode, "uda" },
2368 { TAmode, "ta" },
2369 { UTAmode, "uta" },
2370 { QImode, "qi" },
2371 { HImode, "hi" },
2372 { SImode, "si" },
2373 { DImode, "di" },
2374 { TImode, "ti" },
2375 { SFmode, "sf" },
2376 { DFmode, "df" }
2378 unsigned int i, j;
2380 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2382 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2383 "add", fixed_arith_modes[i].name, 3);
2384 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2385 "ssadd", fixed_arith_modes[i].name, 3);
2386 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2387 "usadd", fixed_arith_modes[i].name, 3);
2388 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2389 "sub", fixed_arith_modes[i].name, 3);
2390 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2391 "sssub", fixed_arith_modes[i].name, 3);
2392 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2393 "ussub", fixed_arith_modes[i].name, 3);
2394 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2395 "mul", fixed_arith_modes[i].name, 3);
2396 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2397 "ssmul", fixed_arith_modes[i].name, 3);
2398 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2399 "usmul", fixed_arith_modes[i].name, 3);
2400 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2401 "div", fixed_arith_modes[i].name, 3);
2402 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2403 "udiv", fixed_arith_modes[i].name, 3);
2404 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2405 "ssdiv", fixed_arith_modes[i].name, 3);
2406 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2407 "usdiv", fixed_arith_modes[i].name, 3);
2408 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2409 "neg", fixed_arith_modes[i].name, 2);
2410 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2411 "ssneg", fixed_arith_modes[i].name, 2);
2412 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2413 "usneg", fixed_arith_modes[i].name, 2);
2414 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2415 "ashl", fixed_arith_modes[i].name, 3);
2416 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2417 "ashr", fixed_arith_modes[i].name, 3);
2418 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2419 "lshr", fixed_arith_modes[i].name, 3);
2420 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2421 "ssashl", fixed_arith_modes[i].name, 3);
2422 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2423 "usashl", fixed_arith_modes[i].name, 3);
2424 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2425 "cmp", fixed_arith_modes[i].name, 2);
2428 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2429 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2431 if (i == j
2432 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2433 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2434 continue;
2436 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2437 fixed_conv_modes[j].mode, "fract",
2438 fixed_conv_modes[i].name,
2439 fixed_conv_modes[j].name);
2440 arm_set_fixed_conv_libfunc (satfract_optab,
2441 fixed_conv_modes[i].mode,
2442 fixed_conv_modes[j].mode, "satfract",
2443 fixed_conv_modes[i].name,
2444 fixed_conv_modes[j].name);
2445 arm_set_fixed_conv_libfunc (fractuns_optab,
2446 fixed_conv_modes[i].mode,
2447 fixed_conv_modes[j].mode, "fractuns",
2448 fixed_conv_modes[i].name,
2449 fixed_conv_modes[j].name);
2450 arm_set_fixed_conv_libfunc (satfractuns_optab,
2451 fixed_conv_modes[i].mode,
2452 fixed_conv_modes[j].mode, "satfractuns",
2453 fixed_conv_modes[i].name,
2454 fixed_conv_modes[j].name);
2458 if (TARGET_AAPCS_BASED)
2459 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2462 /* On AAPCS systems, this is the "struct __va_list". */
2463 static GTY(()) tree va_list_type;
2465 /* Return the type to use as __builtin_va_list. */
2466 static tree
2467 arm_build_builtin_va_list (void)
2469 tree va_list_name;
2470 tree ap_field;
2472 if (!TARGET_AAPCS_BASED)
2473 return std_build_builtin_va_list ();
2475 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2476 defined as:
2478 struct __va_list
2480 void *__ap;
2483 The C Library ABI further reinforces this definition in \S
2484 4.1.
2486 We must follow this definition exactly. The structure tag
2487 name is visible in C++ mangled names, and thus forms a part
2488 of the ABI. The field name may be used by people who
2489 #include <stdarg.h>. */
2490 /* Create the type. */
2491 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2492 /* Give it the required name. */
2493 va_list_name = build_decl (BUILTINS_LOCATION,
2494 TYPE_DECL,
2495 get_identifier ("__va_list"),
2496 va_list_type);
2497 DECL_ARTIFICIAL (va_list_name) = 1;
2498 TYPE_NAME (va_list_type) = va_list_name;
2499 TYPE_STUB_DECL (va_list_type) = va_list_name;
2500 /* Create the __ap field. */
2501 ap_field = build_decl (BUILTINS_LOCATION,
2502 FIELD_DECL,
2503 get_identifier ("__ap"),
2504 ptr_type_node);
2505 DECL_ARTIFICIAL (ap_field) = 1;
2506 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2507 TYPE_FIELDS (va_list_type) = ap_field;
2508 /* Compute its layout. */
2509 layout_type (va_list_type);
2511 return va_list_type;
2514 /* Return an expression of type "void *" pointing to the next
2515 available argument in a variable-argument list. VALIST is the
2516 user-level va_list object, of type __builtin_va_list. */
2517 static tree
2518 arm_extract_valist_ptr (tree valist)
2520 if (TREE_TYPE (valist) == error_mark_node)
2521 return error_mark_node;
2523 /* On an AAPCS target, the pointer is stored within "struct
2524 va_list". */
2525 if (TARGET_AAPCS_BASED)
2527 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2528 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2529 valist, ap_field, NULL_TREE);
2532 return valist;
2535 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2536 static void
2537 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2539 valist = arm_extract_valist_ptr (valist);
2540 std_expand_builtin_va_start (valist, nextarg);
2543 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2544 static tree
2545 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2546 gimple_seq *post_p)
2548 valist = arm_extract_valist_ptr (valist);
2549 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2552 /* Fix up any incompatible options that the user has specified. */
2553 static void
2554 arm_option_override (void)
2556 if (global_options_set.x_arm_arch_option)
2557 arm_selected_arch = &all_architectures[arm_arch_option];
2559 if (global_options_set.x_arm_cpu_option)
2561 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2562 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2565 if (global_options_set.x_arm_tune_option)
2566 arm_selected_tune = &all_cores[(int) arm_tune_option];
2568 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2569 SUBTARGET_OVERRIDE_OPTIONS;
2570 #endif
2572 if (arm_selected_arch)
2574 if (arm_selected_cpu)
2576 /* Check for conflict between mcpu and march. */
2577 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2579 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2580 arm_selected_cpu->name, arm_selected_arch->name);
2581 /* -march wins for code generation.
2582 -mcpu wins for default tuning. */
2583 if (!arm_selected_tune)
2584 arm_selected_tune = arm_selected_cpu;
2586 arm_selected_cpu = arm_selected_arch;
2588 else
2589 /* -mcpu wins. */
2590 arm_selected_arch = NULL;
2592 else
2593 /* Pick a CPU based on the architecture. */
2594 arm_selected_cpu = arm_selected_arch;
2597 /* If the user did not specify a processor, choose one for them. */
2598 if (!arm_selected_cpu)
2600 const struct processors * sel;
2601 unsigned int sought;
2603 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2604 if (!arm_selected_cpu->name)
2606 #ifdef SUBTARGET_CPU_DEFAULT
2607 /* Use the subtarget default CPU if none was specified by
2608 configure. */
2609 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2610 #endif
2611 /* Default to ARM6. */
2612 if (!arm_selected_cpu->name)
2613 arm_selected_cpu = &all_cores[arm6];
2616 sel = arm_selected_cpu;
2617 insn_flags = sel->flags;
2619 /* Now check to see if the user has specified some command line
2620 switch that require certain abilities from the cpu. */
2621 sought = 0;
2623 if (TARGET_INTERWORK || TARGET_THUMB)
2625 sought |= (FL_THUMB | FL_MODE32);
2627 /* There are no ARM processors that support both APCS-26 and
2628 interworking. Therefore we force FL_MODE26 to be removed
2629 from insn_flags here (if it was set), so that the search
2630 below will always be able to find a compatible processor. */
2631 insn_flags &= ~FL_MODE26;
2634 if (sought != 0 && ((sought & insn_flags) != sought))
2636 /* Try to locate a CPU type that supports all of the abilities
2637 of the default CPU, plus the extra abilities requested by
2638 the user. */
2639 for (sel = all_cores; sel->name != NULL; sel++)
2640 if ((sel->flags & sought) == (sought | insn_flags))
2641 break;
2643 if (sel->name == NULL)
2645 unsigned current_bit_count = 0;
2646 const struct processors * best_fit = NULL;
2648 /* Ideally we would like to issue an error message here
2649 saying that it was not possible to find a CPU compatible
2650 with the default CPU, but which also supports the command
2651 line options specified by the programmer, and so they
2652 ought to use the -mcpu=<name> command line option to
2653 override the default CPU type.
2655 If we cannot find a cpu that has both the
2656 characteristics of the default cpu and the given
2657 command line options we scan the array again looking
2658 for a best match. */
2659 for (sel = all_cores; sel->name != NULL; sel++)
2660 if ((sel->flags & sought) == sought)
2662 unsigned count;
2664 count = bit_count (sel->flags & insn_flags);
2666 if (count >= current_bit_count)
2668 best_fit = sel;
2669 current_bit_count = count;
2673 gcc_assert (best_fit);
2674 sel = best_fit;
2677 arm_selected_cpu = sel;
2681 gcc_assert (arm_selected_cpu);
2682 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2683 if (!arm_selected_tune)
2684 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2686 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2687 insn_flags = arm_selected_cpu->flags;
2688 arm_base_arch = arm_selected_cpu->base_arch;
2690 arm_tune = arm_selected_tune->core;
2691 tune_flags = arm_selected_tune->flags;
2692 current_tune = arm_selected_tune->tune;
2694 /* Make sure that the processor choice does not conflict with any of the
2695 other command line choices. */
2696 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2697 error ("target CPU does not support ARM mode");
2699 /* BPABI targets use linker tricks to allow interworking on cores
2700 without thumb support. */
2701 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2703 warning (0, "target CPU does not support interworking" );
2704 target_flags &= ~MASK_INTERWORK;
2707 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2709 warning (0, "target CPU does not support THUMB instructions");
2710 target_flags &= ~MASK_THUMB;
2713 if (TARGET_APCS_FRAME && TARGET_THUMB)
2715 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2716 target_flags &= ~MASK_APCS_FRAME;
2719 /* Callee super interworking implies thumb interworking. Adding
2720 this to the flags here simplifies the logic elsewhere. */
2721 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2722 target_flags |= MASK_INTERWORK;
2724 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2725 from here where no function is being compiled currently. */
2726 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2727 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2729 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2730 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2732 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2734 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2735 target_flags |= MASK_APCS_FRAME;
2738 if (TARGET_POKE_FUNCTION_NAME)
2739 target_flags |= MASK_APCS_FRAME;
2741 if (TARGET_APCS_REENT && flag_pic)
2742 error ("-fpic and -mapcs-reent are incompatible");
2744 if (TARGET_APCS_REENT)
2745 warning (0, "APCS reentrant code not supported. Ignored");
2747 /* If this target is normally configured to use APCS frames, warn if they
2748 are turned off and debugging is turned on. */
2749 if (TARGET_ARM
2750 && write_symbols != NO_DEBUG
2751 && !TARGET_APCS_FRAME
2752 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2753 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2755 if (TARGET_APCS_FLOAT)
2756 warning (0, "passing floating point arguments in fp regs not yet supported");
2758 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2759 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2760 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2761 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2762 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2763 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2764 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2765 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2766 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2767 arm_arch6m = arm_arch6 && !arm_arch_notm;
2768 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2769 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2770 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2771 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2772 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2774 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2775 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2776 thumb_code = TARGET_ARM == 0;
2777 thumb1_code = TARGET_THUMB1 != 0;
2778 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2779 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2780 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2781 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2782 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2783 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2784 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2785 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2786 if (arm_restrict_it == 2)
2787 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2789 if (!TARGET_THUMB2)
2790 arm_restrict_it = 0;
2792 /* If we are not using the default (ARM mode) section anchor offset
2793 ranges, then set the correct ranges now. */
2794 if (TARGET_THUMB1)
2796 /* Thumb-1 LDR instructions cannot have negative offsets.
2797 Permissible positive offset ranges are 5-bit (for byte loads),
2798 6-bit (for halfword loads), or 7-bit (for word loads).
2799 Empirical results suggest a 7-bit anchor range gives the best
2800 overall code size. */
2801 targetm.min_anchor_offset = 0;
2802 targetm.max_anchor_offset = 127;
2804 else if (TARGET_THUMB2)
2806 /* The minimum is set such that the total size of the block
2807 for a particular anchor is 248 + 1 + 4095 bytes, which is
2808 divisible by eight, ensuring natural spacing of anchors. */
2809 targetm.min_anchor_offset = -248;
2810 targetm.max_anchor_offset = 4095;
2813 /* V5 code we generate is completely interworking capable, so we turn off
2814 TARGET_INTERWORK here to avoid many tests later on. */
2816 /* XXX However, we must pass the right pre-processor defines to CPP
2817 or GLD can get confused. This is a hack. */
2818 if (TARGET_INTERWORK)
2819 arm_cpp_interwork = 1;
2821 if (arm_arch5)
2822 target_flags &= ~MASK_INTERWORK;
2824 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2825 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2827 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2828 error ("iwmmxt abi requires an iwmmxt capable cpu");
2830 if (!global_options_set.x_arm_fpu_index)
2832 const char *target_fpu_name;
2833 bool ok;
2835 #ifdef FPUTYPE_DEFAULT
2836 target_fpu_name = FPUTYPE_DEFAULT;
2837 #else
2838 target_fpu_name = "vfp";
2839 #endif
2841 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2842 CL_TARGET);
2843 gcc_assert (ok);
2846 arm_fpu_desc = &all_fpus[arm_fpu_index];
2848 switch (arm_fpu_desc->model)
2850 case ARM_FP_MODEL_VFP:
2851 arm_fpu_attr = FPU_VFP;
2852 break;
2854 default:
2855 gcc_unreachable();
2858 if (TARGET_AAPCS_BASED)
2860 if (TARGET_CALLER_INTERWORKING)
2861 error ("AAPCS does not support -mcaller-super-interworking");
2862 else
2863 if (TARGET_CALLEE_INTERWORKING)
2864 error ("AAPCS does not support -mcallee-super-interworking");
2867 /* iWMMXt and NEON are incompatible. */
2868 if (TARGET_IWMMXT && TARGET_NEON)
2869 error ("iWMMXt and NEON are incompatible");
2871 /* iWMMXt unsupported under Thumb mode. */
2872 if (TARGET_THUMB && TARGET_IWMMXT)
2873 error ("iWMMXt unsupported under Thumb mode");
2875 /* __fp16 support currently assumes the core has ldrh. */
2876 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2877 sorry ("__fp16 and no ldrh");
2879 /* If soft-float is specified then don't use FPU. */
2880 if (TARGET_SOFT_FLOAT)
2881 arm_fpu_attr = FPU_NONE;
2883 if (TARGET_AAPCS_BASED)
2885 if (arm_abi == ARM_ABI_IWMMXT)
2886 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2887 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2888 && TARGET_HARD_FLOAT
2889 && TARGET_VFP)
2890 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2891 else
2892 arm_pcs_default = ARM_PCS_AAPCS;
2894 else
2896 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2897 sorry ("-mfloat-abi=hard and VFP");
2899 if (arm_abi == ARM_ABI_APCS)
2900 arm_pcs_default = ARM_PCS_APCS;
2901 else
2902 arm_pcs_default = ARM_PCS_ATPCS;
2905 /* For arm2/3 there is no need to do any scheduling if we are doing
2906 software floating-point. */
2907 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2908 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2910 /* Use the cp15 method if it is available. */
2911 if (target_thread_pointer == TP_AUTO)
2913 if (arm_arch6k && !TARGET_THUMB1)
2914 target_thread_pointer = TP_CP15;
2915 else
2916 target_thread_pointer = TP_SOFT;
2919 if (TARGET_HARD_TP && TARGET_THUMB1)
2920 error ("can not use -mtp=cp15 with 16-bit Thumb");
2922 /* Override the default structure alignment for AAPCS ABI. */
2923 if (!global_options_set.x_arm_structure_size_boundary)
2925 if (TARGET_AAPCS_BASED)
2926 arm_structure_size_boundary = 8;
2928 else
2930 if (arm_structure_size_boundary != 8
2931 && arm_structure_size_boundary != 32
2932 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2934 if (ARM_DOUBLEWORD_ALIGN)
2935 warning (0,
2936 "structure size boundary can only be set to 8, 32 or 64");
2937 else
2938 warning (0, "structure size boundary can only be set to 8 or 32");
2939 arm_structure_size_boundary
2940 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2944 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2946 error ("RTP PIC is incompatible with Thumb");
2947 flag_pic = 0;
2950 /* If stack checking is disabled, we can use r10 as the PIC register,
2951 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2952 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2954 if (TARGET_VXWORKS_RTP)
2955 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2956 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2959 if (flag_pic && TARGET_VXWORKS_RTP)
2960 arm_pic_register = 9;
2962 if (arm_pic_register_string != NULL)
2964 int pic_register = decode_reg_name (arm_pic_register_string);
2966 if (!flag_pic)
2967 warning (0, "-mpic-register= is useless without -fpic");
2969 /* Prevent the user from choosing an obviously stupid PIC register. */
2970 else if (pic_register < 0 || call_used_regs[pic_register]
2971 || pic_register == HARD_FRAME_POINTER_REGNUM
2972 || pic_register == STACK_POINTER_REGNUM
2973 || pic_register >= PC_REGNUM
2974 || (TARGET_VXWORKS_RTP
2975 && (unsigned int) pic_register != arm_pic_register))
2976 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2977 else
2978 arm_pic_register = pic_register;
2981 if (TARGET_VXWORKS_RTP
2982 && !global_options_set.x_arm_pic_data_is_text_relative)
2983 arm_pic_data_is_text_relative = 0;
2985 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2986 if (fix_cm3_ldrd == 2)
2988 if (arm_selected_cpu->core == cortexm3)
2989 fix_cm3_ldrd = 1;
2990 else
2991 fix_cm3_ldrd = 0;
2994 /* Enable -munaligned-access by default for
2995 - all ARMv6 architecture-based processors
2996 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2997 - ARMv8 architecture-base processors.
2999 Disable -munaligned-access by default for
3000 - all pre-ARMv6 architecture-based processors
3001 - ARMv6-M architecture-based processors. */
3003 if (unaligned_access == 2)
3005 if (arm_arch6 && (arm_arch_notm || arm_arch7))
3006 unaligned_access = 1;
3007 else
3008 unaligned_access = 0;
3010 else if (unaligned_access == 1
3011 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3013 warning (0, "target CPU does not support unaligned accesses");
3014 unaligned_access = 0;
3017 if (TARGET_THUMB1 && flag_schedule_insns)
3019 /* Don't warn since it's on by default in -O2. */
3020 flag_schedule_insns = 0;
3023 if (optimize_size)
3025 /* If optimizing for size, bump the number of instructions that we
3026 are prepared to conditionally execute (even on a StrongARM). */
3027 max_insns_skipped = 6;
3029 /* For THUMB2, we limit the conditional sequence to one IT block. */
3030 if (TARGET_THUMB2)
3031 max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3033 else
3034 max_insns_skipped = current_tune->max_insns_skipped;
3036 /* Hot/Cold partitioning is not currently supported, since we can't
3037 handle literal pool placement in that case. */
3038 if (flag_reorder_blocks_and_partition)
3040 inform (input_location,
3041 "-freorder-blocks-and-partition not supported on this architecture");
3042 flag_reorder_blocks_and_partition = 0;
3043 flag_reorder_blocks = 1;
3046 if (flag_pic)
3047 /* Hoisting PIC address calculations more aggressively provides a small,
3048 but measurable, size reduction for PIC code. Therefore, we decrease
3049 the bar for unrestricted expression hoisting to the cost of PIC address
3050 calculation, which is 2 instructions. */
3051 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3052 global_options.x_param_values,
3053 global_options_set.x_param_values);
3055 /* ARM EABI defaults to strict volatile bitfields. */
3056 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3057 && abi_version_at_least(2))
3058 flag_strict_volatile_bitfields = 1;
3060 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3061 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3062 if (flag_prefetch_loop_arrays < 0
3063 && HAVE_prefetch
3064 && optimize >= 3
3065 && current_tune->num_prefetch_slots > 0)
3066 flag_prefetch_loop_arrays = 1;
3068 /* Set up parameters to be used in prefetching algorithm. Do not override the
3069 defaults unless we are tuning for a core we have researched values for. */
3070 if (current_tune->num_prefetch_slots > 0)
3071 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3072 current_tune->num_prefetch_slots,
3073 global_options.x_param_values,
3074 global_options_set.x_param_values);
3075 if (current_tune->l1_cache_line_size >= 0)
3076 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3077 current_tune->l1_cache_line_size,
3078 global_options.x_param_values,
3079 global_options_set.x_param_values);
3080 if (current_tune->l1_cache_size >= 0)
3081 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3082 current_tune->l1_cache_size,
3083 global_options.x_param_values,
3084 global_options_set.x_param_values);
3086 /* Use Neon to perform 64-bits operations rather than core
3087 registers. */
3088 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3089 if (use_neon_for_64bits == 1)
3090 prefer_neon_for_64bits = true;
3092 /* Use the alternative scheduling-pressure algorithm by default. */
3093 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3094 global_options.x_param_values,
3095 global_options_set.x_param_values);
3097 /* Disable shrink-wrap when optimizing function for size, since it tends to
3098 generate additional returns. */
3099 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3100 flag_shrink_wrap = false;
3101 /* TBD: Dwarf info for apcs frame is not handled yet. */
3102 if (TARGET_APCS_FRAME)
3103 flag_shrink_wrap = false;
3105 /* We only support -mslow-flash-data on armv7-m targets. */
3106 if (target_slow_flash_data
3107 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3108 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3109 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3111 /* Currently, for slow flash data, we just disable literal pools. */
3112 if (target_slow_flash_data)
3113 arm_disable_literal_pool = true;
3115 /* Register global variables with the garbage collector. */
3116 arm_add_gc_roots ();
3119 static void
3120 arm_add_gc_roots (void)
3122 gcc_obstack_init(&minipool_obstack);
3123 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3126 /* A table of known ARM exception types.
3127 For use with the interrupt function attribute. */
3129 typedef struct
3131 const char *const arg;
3132 const unsigned long return_value;
3134 isr_attribute_arg;
3136 static const isr_attribute_arg isr_attribute_args [] =
3138 { "IRQ", ARM_FT_ISR },
3139 { "irq", ARM_FT_ISR },
3140 { "FIQ", ARM_FT_FIQ },
3141 { "fiq", ARM_FT_FIQ },
3142 { "ABORT", ARM_FT_ISR },
3143 { "abort", ARM_FT_ISR },
3144 { "ABORT", ARM_FT_ISR },
3145 { "abort", ARM_FT_ISR },
3146 { "UNDEF", ARM_FT_EXCEPTION },
3147 { "undef", ARM_FT_EXCEPTION },
3148 { "SWI", ARM_FT_EXCEPTION },
3149 { "swi", ARM_FT_EXCEPTION },
3150 { NULL, ARM_FT_NORMAL }
3153 /* Returns the (interrupt) function type of the current
3154 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3156 static unsigned long
3157 arm_isr_value (tree argument)
3159 const isr_attribute_arg * ptr;
3160 const char * arg;
3162 if (!arm_arch_notm)
3163 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3165 /* No argument - default to IRQ. */
3166 if (argument == NULL_TREE)
3167 return ARM_FT_ISR;
3169 /* Get the value of the argument. */
3170 if (TREE_VALUE (argument) == NULL_TREE
3171 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3172 return ARM_FT_UNKNOWN;
3174 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3176 /* Check it against the list of known arguments. */
3177 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3178 if (streq (arg, ptr->arg))
3179 return ptr->return_value;
3181 /* An unrecognized interrupt type. */
3182 return ARM_FT_UNKNOWN;
3185 /* Computes the type of the current function. */
3187 static unsigned long
3188 arm_compute_func_type (void)
3190 unsigned long type = ARM_FT_UNKNOWN;
3191 tree a;
3192 tree attr;
3194 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3196 /* Decide if the current function is volatile. Such functions
3197 never return, and many memory cycles can be saved by not storing
3198 register values that will never be needed again. This optimization
3199 was added to speed up context switching in a kernel application. */
3200 if (optimize > 0
3201 && (TREE_NOTHROW (current_function_decl)
3202 || !(flag_unwind_tables
3203 || (flag_exceptions
3204 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3205 && TREE_THIS_VOLATILE (current_function_decl))
3206 type |= ARM_FT_VOLATILE;
3208 if (cfun->static_chain_decl != NULL)
3209 type |= ARM_FT_NESTED;
3211 attr = DECL_ATTRIBUTES (current_function_decl);
3213 a = lookup_attribute ("naked", attr);
3214 if (a != NULL_TREE)
3215 type |= ARM_FT_NAKED;
3217 a = lookup_attribute ("isr", attr);
3218 if (a == NULL_TREE)
3219 a = lookup_attribute ("interrupt", attr);
3221 if (a == NULL_TREE)
3222 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3223 else
3224 type |= arm_isr_value (TREE_VALUE (a));
3226 return type;
3229 /* Returns the type of the current function. */
3231 unsigned long
3232 arm_current_func_type (void)
3234 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3235 cfun->machine->func_type = arm_compute_func_type ();
3237 return cfun->machine->func_type;
3240 bool
3241 arm_allocate_stack_slots_for_args (void)
3243 /* Naked functions should not allocate stack slots for arguments. */
3244 return !IS_NAKED (arm_current_func_type ());
3247 static bool
3248 arm_warn_func_return (tree decl)
3250 /* Naked functions are implemented entirely in assembly, including the
3251 return sequence, so suppress warnings about this. */
3252 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3256 /* Output assembler code for a block containing the constant parts
3257 of a trampoline, leaving space for the variable parts.
3259 On the ARM, (if r8 is the static chain regnum, and remembering that
3260 referencing pc adds an offset of 8) the trampoline looks like:
3261 ldr r8, [pc, #0]
3262 ldr pc, [pc]
3263 .word static chain value
3264 .word function's address
3265 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3267 static void
3268 arm_asm_trampoline_template (FILE *f)
3270 if (TARGET_ARM)
3272 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3273 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3275 else if (TARGET_THUMB2)
3277 /* The Thumb-2 trampoline is similar to the arm implementation.
3278 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3279 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3280 STATIC_CHAIN_REGNUM, PC_REGNUM);
3281 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3283 else
3285 ASM_OUTPUT_ALIGN (f, 2);
3286 fprintf (f, "\t.code\t16\n");
3287 fprintf (f, ".Ltrampoline_start:\n");
3288 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3289 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3290 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3291 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3292 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3293 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3295 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3296 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3299 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3301 static void
3302 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3304 rtx fnaddr, mem, a_tramp;
3306 emit_block_move (m_tramp, assemble_trampoline_template (),
3307 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3309 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3310 emit_move_insn (mem, chain_value);
3312 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3313 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3314 emit_move_insn (mem, fnaddr);
3316 a_tramp = XEXP (m_tramp, 0);
3317 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3318 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3319 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3322 /* Thumb trampolines should be entered in thumb mode, so set
3323 the bottom bit of the address. */
3325 static rtx
3326 arm_trampoline_adjust_address (rtx addr)
3328 if (TARGET_THUMB)
3329 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3330 NULL, 0, OPTAB_LIB_WIDEN);
3331 return addr;
3334 /* Return 1 if it is possible to return using a single instruction.
3335 If SIBLING is non-null, this is a test for a return before a sibling
3336 call. SIBLING is the call insn, so we can examine its register usage. */
3339 use_return_insn (int iscond, rtx sibling)
3341 int regno;
3342 unsigned int func_type;
3343 unsigned long saved_int_regs;
3344 unsigned HOST_WIDE_INT stack_adjust;
3345 arm_stack_offsets *offsets;
3347 /* Never use a return instruction before reload has run. */
3348 if (!reload_completed)
3349 return 0;
3351 func_type = arm_current_func_type ();
3353 /* Naked, volatile and stack alignment functions need special
3354 consideration. */
3355 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3356 return 0;
3358 /* So do interrupt functions that use the frame pointer and Thumb
3359 interrupt functions. */
3360 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3361 return 0;
3363 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3364 && !optimize_function_for_size_p (cfun))
3365 return 0;
3367 offsets = arm_get_frame_offsets ();
3368 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3370 /* As do variadic functions. */
3371 if (crtl->args.pretend_args_size
3372 || cfun->machine->uses_anonymous_args
3373 /* Or if the function calls __builtin_eh_return () */
3374 || crtl->calls_eh_return
3375 /* Or if the function calls alloca */
3376 || cfun->calls_alloca
3377 /* Or if there is a stack adjustment. However, if the stack pointer
3378 is saved on the stack, we can use a pre-incrementing stack load. */
3379 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3380 && stack_adjust == 4)))
3381 return 0;
3383 saved_int_regs = offsets->saved_regs_mask;
3385 /* Unfortunately, the insn
3387 ldmib sp, {..., sp, ...}
3389 triggers a bug on most SA-110 based devices, such that the stack
3390 pointer won't be correctly restored if the instruction takes a
3391 page fault. We work around this problem by popping r3 along with
3392 the other registers, since that is never slower than executing
3393 another instruction.
3395 We test for !arm_arch5 here, because code for any architecture
3396 less than this could potentially be run on one of the buggy
3397 chips. */
3398 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3400 /* Validate that r3 is a call-clobbered register (always true in
3401 the default abi) ... */
3402 if (!call_used_regs[3])
3403 return 0;
3405 /* ... that it isn't being used for a return value ... */
3406 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3407 return 0;
3409 /* ... or for a tail-call argument ... */
3410 if (sibling)
3412 gcc_assert (CALL_P (sibling));
3414 if (find_regno_fusage (sibling, USE, 3))
3415 return 0;
3418 /* ... and that there are no call-saved registers in r0-r2
3419 (always true in the default ABI). */
3420 if (saved_int_regs & 0x7)
3421 return 0;
3424 /* Can't be done if interworking with Thumb, and any registers have been
3425 stacked. */
3426 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3427 return 0;
3429 /* On StrongARM, conditional returns are expensive if they aren't
3430 taken and multiple registers have been stacked. */
3431 if (iscond && arm_tune_strongarm)
3433 /* Conditional return when just the LR is stored is a simple
3434 conditional-load instruction, that's not expensive. */
3435 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3436 return 0;
3438 if (flag_pic
3439 && arm_pic_register != INVALID_REGNUM
3440 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3441 return 0;
3444 /* If there are saved registers but the LR isn't saved, then we need
3445 two instructions for the return. */
3446 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3447 return 0;
3449 /* Can't be done if any of the VFP regs are pushed,
3450 since this also requires an insn. */
3451 if (TARGET_HARD_FLOAT && TARGET_VFP)
3452 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3453 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3454 return 0;
3456 if (TARGET_REALLY_IWMMXT)
3457 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3458 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3459 return 0;
3461 return 1;
3464 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3465 shrink-wrapping if possible. This is the case if we need to emit a
3466 prologue, which we can test by looking at the offsets. */
3467 bool
3468 use_simple_return_p (void)
3470 arm_stack_offsets *offsets;
3472 offsets = arm_get_frame_offsets ();
3473 return offsets->outgoing_args != 0;
3476 /* Return TRUE if int I is a valid immediate ARM constant. */
3479 const_ok_for_arm (HOST_WIDE_INT i)
3481 int lowbit;
3483 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3484 be all zero, or all one. */
3485 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3486 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3487 != ((~(unsigned HOST_WIDE_INT) 0)
3488 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3489 return FALSE;
3491 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3493 /* Fast return for 0 and small values. We must do this for zero, since
3494 the code below can't handle that one case. */
3495 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3496 return TRUE;
3498 /* Get the number of trailing zeros. */
3499 lowbit = ffs((int) i) - 1;
3501 /* Only even shifts are allowed in ARM mode so round down to the
3502 nearest even number. */
3503 if (TARGET_ARM)
3504 lowbit &= ~1;
3506 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3507 return TRUE;
3509 if (TARGET_ARM)
3511 /* Allow rotated constants in ARM mode. */
3512 if (lowbit <= 4
3513 && ((i & ~0xc000003f) == 0
3514 || (i & ~0xf000000f) == 0
3515 || (i & ~0xfc000003) == 0))
3516 return TRUE;
3518 else
3520 HOST_WIDE_INT v;
3522 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3523 v = i & 0xff;
3524 v |= v << 16;
3525 if (i == v || i == (v | (v << 8)))
3526 return TRUE;
3528 /* Allow repeated pattern 0xXY00XY00. */
3529 v = i & 0xff00;
3530 v |= v << 16;
3531 if (i == v)
3532 return TRUE;
3535 return FALSE;
3538 /* Return true if I is a valid constant for the operation CODE. */
3540 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3542 if (const_ok_for_arm (i))
3543 return 1;
3545 switch (code)
3547 case SET:
3548 /* See if we can use movw. */
3549 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3550 return 1;
3551 else
3552 /* Otherwise, try mvn. */
3553 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3555 case PLUS:
3556 /* See if we can use addw or subw. */
3557 if (TARGET_THUMB2
3558 && ((i & 0xfffff000) == 0
3559 || ((-i) & 0xfffff000) == 0))
3560 return 1;
3561 /* else fall through. */
3563 case COMPARE:
3564 case EQ:
3565 case NE:
3566 case GT:
3567 case LE:
3568 case LT:
3569 case GE:
3570 case GEU:
3571 case LTU:
3572 case GTU:
3573 case LEU:
3574 case UNORDERED:
3575 case ORDERED:
3576 case UNEQ:
3577 case UNGE:
3578 case UNLT:
3579 case UNGT:
3580 case UNLE:
3581 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3583 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3584 case XOR:
3585 return 0;
3587 case IOR:
3588 if (TARGET_THUMB2)
3589 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3590 return 0;
3592 case AND:
3593 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3595 default:
3596 gcc_unreachable ();
3600 /* Return true if I is a valid di mode constant for the operation CODE. */
3602 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3604 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3605 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3606 rtx hi = GEN_INT (hi_val);
3607 rtx lo = GEN_INT (lo_val);
3609 if (TARGET_THUMB1)
3610 return 0;
3612 switch (code)
3614 case AND:
3615 case IOR:
3616 case XOR:
3617 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3618 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3619 case PLUS:
3620 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3622 default:
3623 return 0;
3627 /* Emit a sequence of insns to handle a large constant.
3628 CODE is the code of the operation required, it can be any of SET, PLUS,
3629 IOR, AND, XOR, MINUS;
3630 MODE is the mode in which the operation is being performed;
3631 VAL is the integer to operate on;
3632 SOURCE is the other operand (a register, or a null-pointer for SET);
3633 SUBTARGETS means it is safe to create scratch registers if that will
3634 either produce a simpler sequence, or we will want to cse the values.
3635 Return value is the number of insns emitted. */
3637 /* ??? Tweak this for thumb2. */
3639 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
3640 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3642 rtx cond;
3644 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3645 cond = COND_EXEC_TEST (PATTERN (insn));
3646 else
3647 cond = NULL_RTX;
3649 if (subtargets || code == SET
3650 || (REG_P (target) && REG_P (source)
3651 && REGNO (target) != REGNO (source)))
3653 /* After arm_reorg has been called, we can't fix up expensive
3654 constants by pushing them into memory so we must synthesize
3655 them in-line, regardless of the cost. This is only likely to
3656 be more costly on chips that have load delay slots and we are
3657 compiling without running the scheduler (so no splitting
3658 occurred before the final instruction emission).
3660 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3662 if (!cfun->machine->after_arm_reorg
3663 && !cond
3664 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3665 1, 0)
3666 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3667 + (code != SET))))
3669 if (code == SET)
3671 /* Currently SET is the only monadic value for CODE, all
3672 the rest are diadic. */
3673 if (TARGET_USE_MOVT)
3674 arm_emit_movpair (target, GEN_INT (val));
3675 else
3676 emit_set_insn (target, GEN_INT (val));
3678 return 1;
3680 else
3682 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3684 if (TARGET_USE_MOVT)
3685 arm_emit_movpair (temp, GEN_INT (val));
3686 else
3687 emit_set_insn (temp, GEN_INT (val));
3689 /* For MINUS, the value is subtracted from, since we never
3690 have subtraction of a constant. */
3691 if (code == MINUS)
3692 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3693 else
3694 emit_set_insn (target,
3695 gen_rtx_fmt_ee (code, mode, source, temp));
3696 return 2;
3701 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3705 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3706 ARM/THUMB2 immediates, and add up to VAL.
3707 Thr function return value gives the number of insns required. */
3708 static int
3709 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3710 struct four_ints *return_sequence)
3712 int best_consecutive_zeros = 0;
3713 int i;
3714 int best_start = 0;
3715 int insns1, insns2;
3716 struct four_ints tmp_sequence;
3718 /* If we aren't targeting ARM, the best place to start is always at
3719 the bottom, otherwise look more closely. */
3720 if (TARGET_ARM)
3722 for (i = 0; i < 32; i += 2)
3724 int consecutive_zeros = 0;
3726 if (!(val & (3 << i)))
3728 while ((i < 32) && !(val & (3 << i)))
3730 consecutive_zeros += 2;
3731 i += 2;
3733 if (consecutive_zeros > best_consecutive_zeros)
3735 best_consecutive_zeros = consecutive_zeros;
3736 best_start = i - consecutive_zeros;
3738 i -= 2;
3743 /* So long as it won't require any more insns to do so, it's
3744 desirable to emit a small constant (in bits 0...9) in the last
3745 insn. This way there is more chance that it can be combined with
3746 a later addressing insn to form a pre-indexed load or store
3747 operation. Consider:
3749 *((volatile int *)0xe0000100) = 1;
3750 *((volatile int *)0xe0000110) = 2;
3752 We want this to wind up as:
3754 mov rA, #0xe0000000
3755 mov rB, #1
3756 str rB, [rA, #0x100]
3757 mov rB, #2
3758 str rB, [rA, #0x110]
3760 rather than having to synthesize both large constants from scratch.
3762 Therefore, we calculate how many insns would be required to emit
3763 the constant starting from `best_start', and also starting from
3764 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3765 yield a shorter sequence, we may as well use zero. */
3766 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3767 if (best_start != 0
3768 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3770 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3771 if (insns2 <= insns1)
3773 *return_sequence = tmp_sequence;
3774 insns1 = insns2;
3778 return insns1;
3781 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3782 static int
3783 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3784 struct four_ints *return_sequence, int i)
3786 int remainder = val & 0xffffffff;
3787 int insns = 0;
3789 /* Try and find a way of doing the job in either two or three
3790 instructions.
3792 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3793 location. We start at position I. This may be the MSB, or
3794 optimial_immediate_sequence may have positioned it at the largest block
3795 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3796 wrapping around to the top of the word when we drop off the bottom.
3797 In the worst case this code should produce no more than four insns.
3799 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3800 constants, shifted to any arbitrary location. We should always start
3801 at the MSB. */
3804 int end;
3805 unsigned int b1, b2, b3, b4;
3806 unsigned HOST_WIDE_INT result;
3807 int loc;
3809 gcc_assert (insns < 4);
3811 if (i <= 0)
3812 i += 32;
3814 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3815 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3817 loc = i;
3818 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3819 /* We can use addw/subw for the last 12 bits. */
3820 result = remainder;
3821 else
3823 /* Use an 8-bit shifted/rotated immediate. */
3824 end = i - 8;
3825 if (end < 0)
3826 end += 32;
3827 result = remainder & ((0x0ff << end)
3828 | ((i < end) ? (0xff >> (32 - end))
3829 : 0));
3830 i -= 8;
3833 else
3835 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3836 arbitrary shifts. */
3837 i -= TARGET_ARM ? 2 : 1;
3838 continue;
3841 /* Next, see if we can do a better job with a thumb2 replicated
3842 constant.
3844 We do it this way around to catch the cases like 0x01F001E0 where
3845 two 8-bit immediates would work, but a replicated constant would
3846 make it worse.
3848 TODO: 16-bit constants that don't clear all the bits, but still win.
3849 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3850 if (TARGET_THUMB2)
3852 b1 = (remainder & 0xff000000) >> 24;
3853 b2 = (remainder & 0x00ff0000) >> 16;
3854 b3 = (remainder & 0x0000ff00) >> 8;
3855 b4 = remainder & 0xff;
3857 if (loc > 24)
3859 /* The 8-bit immediate already found clears b1 (and maybe b2),
3860 but must leave b3 and b4 alone. */
3862 /* First try to find a 32-bit replicated constant that clears
3863 almost everything. We can assume that we can't do it in one,
3864 or else we wouldn't be here. */
3865 unsigned int tmp = b1 & b2 & b3 & b4;
3866 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3867 + (tmp << 24);
3868 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3869 + (tmp == b3) + (tmp == b4);
3870 if (tmp
3871 && (matching_bytes >= 3
3872 || (matching_bytes == 2
3873 && const_ok_for_op (remainder & ~tmp2, code))))
3875 /* At least 3 of the bytes match, and the fourth has at
3876 least as many bits set, or two of the bytes match
3877 and it will only require one more insn to finish. */
3878 result = tmp2;
3879 i = tmp != b1 ? 32
3880 : tmp != b2 ? 24
3881 : tmp != b3 ? 16
3882 : 8;
3885 /* Second, try to find a 16-bit replicated constant that can
3886 leave three of the bytes clear. If b2 or b4 is already
3887 zero, then we can. If the 8-bit from above would not
3888 clear b2 anyway, then we still win. */
3889 else if (b1 == b3 && (!b2 || !b4
3890 || (remainder & 0x00ff0000 & ~result)))
3892 result = remainder & 0xff00ff00;
3893 i = 24;
3896 else if (loc > 16)
3898 /* The 8-bit immediate already found clears b2 (and maybe b3)
3899 and we don't get here unless b1 is alredy clear, but it will
3900 leave b4 unchanged. */
3902 /* If we can clear b2 and b4 at once, then we win, since the
3903 8-bits couldn't possibly reach that far. */
3904 if (b2 == b4)
3906 result = remainder & 0x00ff00ff;
3907 i = 16;
3912 return_sequence->i[insns++] = result;
3913 remainder &= ~result;
3915 if (code == SET || code == MINUS)
3916 code = PLUS;
3918 while (remainder);
3920 return insns;
3923 /* Emit an instruction with the indicated PATTERN. If COND is
3924 non-NULL, conditionalize the execution of the instruction on COND
3925 being true. */
3927 static void
3928 emit_constant_insn (rtx cond, rtx pattern)
3930 if (cond)
3931 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3932 emit_insn (pattern);
3935 /* As above, but extra parameter GENERATE which, if clear, suppresses
3936 RTL generation. */
3938 static int
3939 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3940 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3941 int generate)
3943 int can_invert = 0;
3944 int can_negate = 0;
3945 int final_invert = 0;
3946 int i;
3947 int set_sign_bit_copies = 0;
3948 int clear_sign_bit_copies = 0;
3949 int clear_zero_bit_copies = 0;
3950 int set_zero_bit_copies = 0;
3951 int insns = 0, neg_insns, inv_insns;
3952 unsigned HOST_WIDE_INT temp1, temp2;
3953 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3954 struct four_ints *immediates;
3955 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3957 /* Find out which operations are safe for a given CODE. Also do a quick
3958 check for degenerate cases; these can occur when DImode operations
3959 are split. */
3960 switch (code)
3962 case SET:
3963 can_invert = 1;
3964 break;
3966 case PLUS:
3967 can_negate = 1;
3968 break;
3970 case IOR:
3971 if (remainder == 0xffffffff)
3973 if (generate)
3974 emit_constant_insn (cond,
3975 gen_rtx_SET (VOIDmode, target,
3976 GEN_INT (ARM_SIGN_EXTEND (val))));
3977 return 1;
3980 if (remainder == 0)
3982 if (reload_completed && rtx_equal_p (target, source))
3983 return 0;
3985 if (generate)
3986 emit_constant_insn (cond,
3987 gen_rtx_SET (VOIDmode, target, source));
3988 return 1;
3990 break;
3992 case AND:
3993 if (remainder == 0)
3995 if (generate)
3996 emit_constant_insn (cond,
3997 gen_rtx_SET (VOIDmode, target, const0_rtx));
3998 return 1;
4000 if (remainder == 0xffffffff)
4002 if (reload_completed && rtx_equal_p (target, source))
4003 return 0;
4004 if (generate)
4005 emit_constant_insn (cond,
4006 gen_rtx_SET (VOIDmode, target, source));
4007 return 1;
4009 can_invert = 1;
4010 break;
4012 case XOR:
4013 if (remainder == 0)
4015 if (reload_completed && rtx_equal_p (target, source))
4016 return 0;
4017 if (generate)
4018 emit_constant_insn (cond,
4019 gen_rtx_SET (VOIDmode, target, source));
4020 return 1;
4023 if (remainder == 0xffffffff)
4025 if (generate)
4026 emit_constant_insn (cond,
4027 gen_rtx_SET (VOIDmode, target,
4028 gen_rtx_NOT (mode, source)));
4029 return 1;
4031 final_invert = 1;
4032 break;
4034 case MINUS:
4035 /* We treat MINUS as (val - source), since (source - val) is always
4036 passed as (source + (-val)). */
4037 if (remainder == 0)
4039 if (generate)
4040 emit_constant_insn (cond,
4041 gen_rtx_SET (VOIDmode, target,
4042 gen_rtx_NEG (mode, source)));
4043 return 1;
4045 if (const_ok_for_arm (val))
4047 if (generate)
4048 emit_constant_insn (cond,
4049 gen_rtx_SET (VOIDmode, target,
4050 gen_rtx_MINUS (mode, GEN_INT (val),
4051 source)));
4052 return 1;
4055 break;
4057 default:
4058 gcc_unreachable ();
4061 /* If we can do it in one insn get out quickly. */
4062 if (const_ok_for_op (val, code))
4064 if (generate)
4065 emit_constant_insn (cond,
4066 gen_rtx_SET (VOIDmode, target,
4067 (source
4068 ? gen_rtx_fmt_ee (code, mode, source,
4069 GEN_INT (val))
4070 : GEN_INT (val))));
4071 return 1;
4074 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4075 insn. */
4076 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4077 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4079 if (generate)
4081 if (mode == SImode && i == 16)
4082 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4083 smaller insn. */
4084 emit_constant_insn (cond,
4085 gen_zero_extendhisi2
4086 (target, gen_lowpart (HImode, source)));
4087 else
4088 /* Extz only supports SImode, but we can coerce the operands
4089 into that mode. */
4090 emit_constant_insn (cond,
4091 gen_extzv_t2 (gen_lowpart (SImode, target),
4092 gen_lowpart (SImode, source),
4093 GEN_INT (i), const0_rtx));
4096 return 1;
4099 /* Calculate a few attributes that may be useful for specific
4100 optimizations. */
4101 /* Count number of leading zeros. */
4102 for (i = 31; i >= 0; i--)
4104 if ((remainder & (1 << i)) == 0)
4105 clear_sign_bit_copies++;
4106 else
4107 break;
4110 /* Count number of leading 1's. */
4111 for (i = 31; i >= 0; i--)
4113 if ((remainder & (1 << i)) != 0)
4114 set_sign_bit_copies++;
4115 else
4116 break;
4119 /* Count number of trailing zero's. */
4120 for (i = 0; i <= 31; i++)
4122 if ((remainder & (1 << i)) == 0)
4123 clear_zero_bit_copies++;
4124 else
4125 break;
4128 /* Count number of trailing 1's. */
4129 for (i = 0; i <= 31; i++)
4131 if ((remainder & (1 << i)) != 0)
4132 set_zero_bit_copies++;
4133 else
4134 break;
4137 switch (code)
4139 case SET:
4140 /* See if we can do this by sign_extending a constant that is known
4141 to be negative. This is a good, way of doing it, since the shift
4142 may well merge into a subsequent insn. */
4143 if (set_sign_bit_copies > 1)
4145 if (const_ok_for_arm
4146 (temp1 = ARM_SIGN_EXTEND (remainder
4147 << (set_sign_bit_copies - 1))))
4149 if (generate)
4151 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4152 emit_constant_insn (cond,
4153 gen_rtx_SET (VOIDmode, new_src,
4154 GEN_INT (temp1)));
4155 emit_constant_insn (cond,
4156 gen_ashrsi3 (target, new_src,
4157 GEN_INT (set_sign_bit_copies - 1)));
4159 return 2;
4161 /* For an inverted constant, we will need to set the low bits,
4162 these will be shifted out of harm's way. */
4163 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4164 if (const_ok_for_arm (~temp1))
4166 if (generate)
4168 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4169 emit_constant_insn (cond,
4170 gen_rtx_SET (VOIDmode, new_src,
4171 GEN_INT (temp1)));
4172 emit_constant_insn (cond,
4173 gen_ashrsi3 (target, new_src,
4174 GEN_INT (set_sign_bit_copies - 1)));
4176 return 2;
4180 /* See if we can calculate the value as the difference between two
4181 valid immediates. */
4182 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4184 int topshift = clear_sign_bit_copies & ~1;
4186 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4187 & (0xff000000 >> topshift));
4189 /* If temp1 is zero, then that means the 9 most significant
4190 bits of remainder were 1 and we've caused it to overflow.
4191 When topshift is 0 we don't need to do anything since we
4192 can borrow from 'bit 32'. */
4193 if (temp1 == 0 && topshift != 0)
4194 temp1 = 0x80000000 >> (topshift - 1);
4196 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4198 if (const_ok_for_arm (temp2))
4200 if (generate)
4202 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4203 emit_constant_insn (cond,
4204 gen_rtx_SET (VOIDmode, new_src,
4205 GEN_INT (temp1)));
4206 emit_constant_insn (cond,
4207 gen_addsi3 (target, new_src,
4208 GEN_INT (-temp2)));
4211 return 2;
4215 /* See if we can generate this by setting the bottom (or the top)
4216 16 bits, and then shifting these into the other half of the
4217 word. We only look for the simplest cases, to do more would cost
4218 too much. Be careful, however, not to generate this when the
4219 alternative would take fewer insns. */
4220 if (val & 0xffff0000)
4222 temp1 = remainder & 0xffff0000;
4223 temp2 = remainder & 0x0000ffff;
4225 /* Overlaps outside this range are best done using other methods. */
4226 for (i = 9; i < 24; i++)
4228 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4229 && !const_ok_for_arm (temp2))
4231 rtx new_src = (subtargets
4232 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4233 : target);
4234 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4235 source, subtargets, generate);
4236 source = new_src;
4237 if (generate)
4238 emit_constant_insn
4239 (cond,
4240 gen_rtx_SET
4241 (VOIDmode, target,
4242 gen_rtx_IOR (mode,
4243 gen_rtx_ASHIFT (mode, source,
4244 GEN_INT (i)),
4245 source)));
4246 return insns + 1;
4250 /* Don't duplicate cases already considered. */
4251 for (i = 17; i < 24; i++)
4253 if (((temp1 | (temp1 >> i)) == remainder)
4254 && !const_ok_for_arm (temp1))
4256 rtx new_src = (subtargets
4257 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4258 : target);
4259 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4260 source, subtargets, generate);
4261 source = new_src;
4262 if (generate)
4263 emit_constant_insn
4264 (cond,
4265 gen_rtx_SET (VOIDmode, target,
4266 gen_rtx_IOR
4267 (mode,
4268 gen_rtx_LSHIFTRT (mode, source,
4269 GEN_INT (i)),
4270 source)));
4271 return insns + 1;
4275 break;
4277 case IOR:
4278 case XOR:
4279 /* If we have IOR or XOR, and the constant can be loaded in a
4280 single instruction, and we can find a temporary to put it in,
4281 then this can be done in two instructions instead of 3-4. */
4282 if (subtargets
4283 /* TARGET can't be NULL if SUBTARGETS is 0 */
4284 || (reload_completed && !reg_mentioned_p (target, source)))
4286 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4288 if (generate)
4290 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4292 emit_constant_insn (cond,
4293 gen_rtx_SET (VOIDmode, sub,
4294 GEN_INT (val)));
4295 emit_constant_insn (cond,
4296 gen_rtx_SET (VOIDmode, target,
4297 gen_rtx_fmt_ee (code, mode,
4298 source, sub)));
4300 return 2;
4304 if (code == XOR)
4305 break;
4307 /* Convert.
4308 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4309 and the remainder 0s for e.g. 0xfff00000)
4310 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4312 This can be done in 2 instructions by using shifts with mov or mvn.
4313 e.g. for
4314 x = x | 0xfff00000;
4315 we generate.
4316 mvn r0, r0, asl #12
4317 mvn r0, r0, lsr #12 */
4318 if (set_sign_bit_copies > 8
4319 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4321 if (generate)
4323 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4324 rtx shift = GEN_INT (set_sign_bit_copies);
4326 emit_constant_insn
4327 (cond,
4328 gen_rtx_SET (VOIDmode, sub,
4329 gen_rtx_NOT (mode,
4330 gen_rtx_ASHIFT (mode,
4331 source,
4332 shift))));
4333 emit_constant_insn
4334 (cond,
4335 gen_rtx_SET (VOIDmode, target,
4336 gen_rtx_NOT (mode,
4337 gen_rtx_LSHIFTRT (mode, sub,
4338 shift))));
4340 return 2;
4343 /* Convert
4344 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4346 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4348 For eg. r0 = r0 | 0xfff
4349 mvn r0, r0, lsr #12
4350 mvn r0, r0, asl #12
4353 if (set_zero_bit_copies > 8
4354 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4356 if (generate)
4358 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4359 rtx shift = GEN_INT (set_zero_bit_copies);
4361 emit_constant_insn
4362 (cond,
4363 gen_rtx_SET (VOIDmode, sub,
4364 gen_rtx_NOT (mode,
4365 gen_rtx_LSHIFTRT (mode,
4366 source,
4367 shift))));
4368 emit_constant_insn
4369 (cond,
4370 gen_rtx_SET (VOIDmode, target,
4371 gen_rtx_NOT (mode,
4372 gen_rtx_ASHIFT (mode, sub,
4373 shift))));
4375 return 2;
4378 /* This will never be reached for Thumb2 because orn is a valid
4379 instruction. This is for Thumb1 and the ARM 32 bit cases.
4381 x = y | constant (such that ~constant is a valid constant)
4382 Transform this to
4383 x = ~(~y & ~constant).
4385 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4387 if (generate)
4389 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4390 emit_constant_insn (cond,
4391 gen_rtx_SET (VOIDmode, sub,
4392 gen_rtx_NOT (mode, source)));
4393 source = sub;
4394 if (subtargets)
4395 sub = gen_reg_rtx (mode);
4396 emit_constant_insn (cond,
4397 gen_rtx_SET (VOIDmode, sub,
4398 gen_rtx_AND (mode, source,
4399 GEN_INT (temp1))));
4400 emit_constant_insn (cond,
4401 gen_rtx_SET (VOIDmode, target,
4402 gen_rtx_NOT (mode, sub)));
4404 return 3;
4406 break;
4408 case AND:
4409 /* See if two shifts will do 2 or more insn's worth of work. */
4410 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4412 HOST_WIDE_INT shift_mask = ((0xffffffff
4413 << (32 - clear_sign_bit_copies))
4414 & 0xffffffff);
4416 if ((remainder | shift_mask) != 0xffffffff)
4418 if (generate)
4420 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4421 insns = arm_gen_constant (AND, mode, cond,
4422 remainder | shift_mask,
4423 new_src, source, subtargets, 1);
4424 source = new_src;
4426 else
4428 rtx targ = subtargets ? NULL_RTX : target;
4429 insns = arm_gen_constant (AND, mode, cond,
4430 remainder | shift_mask,
4431 targ, source, subtargets, 0);
4435 if (generate)
4437 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4438 rtx shift = GEN_INT (clear_sign_bit_copies);
4440 emit_insn (gen_ashlsi3 (new_src, source, shift));
4441 emit_insn (gen_lshrsi3 (target, new_src, shift));
4444 return insns + 2;
4447 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4449 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4451 if ((remainder | shift_mask) != 0xffffffff)
4453 if (generate)
4455 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4457 insns = arm_gen_constant (AND, mode, cond,
4458 remainder | shift_mask,
4459 new_src, source, subtargets, 1);
4460 source = new_src;
4462 else
4464 rtx targ = subtargets ? NULL_RTX : target;
4466 insns = arm_gen_constant (AND, mode, cond,
4467 remainder | shift_mask,
4468 targ, source, subtargets, 0);
4472 if (generate)
4474 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4475 rtx shift = GEN_INT (clear_zero_bit_copies);
4477 emit_insn (gen_lshrsi3 (new_src, source, shift));
4478 emit_insn (gen_ashlsi3 (target, new_src, shift));
4481 return insns + 2;
4484 break;
4486 default:
4487 break;
4490 /* Calculate what the instruction sequences would be if we generated it
4491 normally, negated, or inverted. */
4492 if (code == AND)
4493 /* AND cannot be split into multiple insns, so invert and use BIC. */
4494 insns = 99;
4495 else
4496 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4498 if (can_negate)
4499 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4500 &neg_immediates);
4501 else
4502 neg_insns = 99;
4504 if (can_invert || final_invert)
4505 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4506 &inv_immediates);
4507 else
4508 inv_insns = 99;
4510 immediates = &pos_immediates;
4512 /* Is the negated immediate sequence more efficient? */
4513 if (neg_insns < insns && neg_insns <= inv_insns)
4515 insns = neg_insns;
4516 immediates = &neg_immediates;
4518 else
4519 can_negate = 0;
4521 /* Is the inverted immediate sequence more efficient?
4522 We must allow for an extra NOT instruction for XOR operations, although
4523 there is some chance that the final 'mvn' will get optimized later. */
4524 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4526 insns = inv_insns;
4527 immediates = &inv_immediates;
4529 else
4531 can_invert = 0;
4532 final_invert = 0;
4535 /* Now output the chosen sequence as instructions. */
4536 if (generate)
4538 for (i = 0; i < insns; i++)
4540 rtx new_src, temp1_rtx;
4542 temp1 = immediates->i[i];
4544 if (code == SET || code == MINUS)
4545 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4546 else if ((final_invert || i < (insns - 1)) && subtargets)
4547 new_src = gen_reg_rtx (mode);
4548 else
4549 new_src = target;
4551 if (can_invert)
4552 temp1 = ~temp1;
4553 else if (can_negate)
4554 temp1 = -temp1;
4556 temp1 = trunc_int_for_mode (temp1, mode);
4557 temp1_rtx = GEN_INT (temp1);
4559 if (code == SET)
4561 else if (code == MINUS)
4562 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4563 else
4564 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4566 emit_constant_insn (cond,
4567 gen_rtx_SET (VOIDmode, new_src,
4568 temp1_rtx));
4569 source = new_src;
4571 if (code == SET)
4573 can_negate = can_invert;
4574 can_invert = 0;
4575 code = PLUS;
4577 else if (code == MINUS)
4578 code = PLUS;
4582 if (final_invert)
4584 if (generate)
4585 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4586 gen_rtx_NOT (mode, source)));
4587 insns++;
4590 return insns;
4593 /* Canonicalize a comparison so that we are more likely to recognize it.
4594 This can be done for a few constant compares, where we can make the
4595 immediate value easier to load. */
4597 static void
4598 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4599 bool op0_preserve_value)
4601 enum machine_mode mode;
4602 unsigned HOST_WIDE_INT i, maxval;
4604 mode = GET_MODE (*op0);
4605 if (mode == VOIDmode)
4606 mode = GET_MODE (*op1);
4608 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4610 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4611 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4612 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4613 for GTU/LEU in Thumb mode. */
4614 if (mode == DImode)
4616 rtx tem;
4618 if (*code == GT || *code == LE
4619 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4621 /* Missing comparison. First try to use an available
4622 comparison. */
4623 if (CONST_INT_P (*op1))
4625 i = INTVAL (*op1);
4626 switch (*code)
4628 case GT:
4629 case LE:
4630 if (i != maxval
4631 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4633 *op1 = GEN_INT (i + 1);
4634 *code = *code == GT ? GE : LT;
4635 return;
4637 break;
4638 case GTU:
4639 case LEU:
4640 if (i != ~((unsigned HOST_WIDE_INT) 0)
4641 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4643 *op1 = GEN_INT (i + 1);
4644 *code = *code == GTU ? GEU : LTU;
4645 return;
4647 break;
4648 default:
4649 gcc_unreachable ();
4653 /* If that did not work, reverse the condition. */
4654 if (!op0_preserve_value)
4656 tem = *op0;
4657 *op0 = *op1;
4658 *op1 = tem;
4659 *code = (int)swap_condition ((enum rtx_code)*code);
4662 return;
4665 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4666 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4667 to facilitate possible combining with a cmp into 'ands'. */
4668 if (mode == SImode
4669 && GET_CODE (*op0) == ZERO_EXTEND
4670 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4671 && GET_MODE (XEXP (*op0, 0)) == QImode
4672 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4673 && subreg_lowpart_p (XEXP (*op0, 0))
4674 && *op1 == const0_rtx)
4675 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4676 GEN_INT (255));
4678 /* Comparisons smaller than DImode. Only adjust comparisons against
4679 an out-of-range constant. */
4680 if (!CONST_INT_P (*op1)
4681 || const_ok_for_arm (INTVAL (*op1))
4682 || const_ok_for_arm (- INTVAL (*op1)))
4683 return;
4685 i = INTVAL (*op1);
4687 switch (*code)
4689 case EQ:
4690 case NE:
4691 return;
4693 case GT:
4694 case LE:
4695 if (i != maxval
4696 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4698 *op1 = GEN_INT (i + 1);
4699 *code = *code == GT ? GE : LT;
4700 return;
4702 break;
4704 case GE:
4705 case LT:
4706 if (i != ~maxval
4707 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4709 *op1 = GEN_INT (i - 1);
4710 *code = *code == GE ? GT : LE;
4711 return;
4713 break;
4715 case GTU:
4716 case LEU:
4717 if (i != ~((unsigned HOST_WIDE_INT) 0)
4718 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4720 *op1 = GEN_INT (i + 1);
4721 *code = *code == GTU ? GEU : LTU;
4722 return;
4724 break;
4726 case GEU:
4727 case LTU:
4728 if (i != 0
4729 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4731 *op1 = GEN_INT (i - 1);
4732 *code = *code == GEU ? GTU : LEU;
4733 return;
4735 break;
4737 default:
4738 gcc_unreachable ();
4743 /* Define how to find the value returned by a function. */
4745 static rtx
4746 arm_function_value(const_tree type, const_tree func,
4747 bool outgoing ATTRIBUTE_UNUSED)
4749 enum machine_mode mode;
4750 int unsignedp ATTRIBUTE_UNUSED;
4751 rtx r ATTRIBUTE_UNUSED;
4753 mode = TYPE_MODE (type);
4755 if (TARGET_AAPCS_BASED)
4756 return aapcs_allocate_return_reg (mode, type, func);
4758 /* Promote integer types. */
4759 if (INTEGRAL_TYPE_P (type))
4760 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4762 /* Promotes small structs returned in a register to full-word size
4763 for big-endian AAPCS. */
4764 if (arm_return_in_msb (type))
4766 HOST_WIDE_INT size = int_size_in_bytes (type);
4767 if (size % UNITS_PER_WORD != 0)
4769 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4770 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4774 return arm_libcall_value_1 (mode);
4777 /* libcall hashtable helpers. */
4779 struct libcall_hasher : typed_noop_remove <rtx_def>
4781 typedef rtx_def value_type;
4782 typedef rtx_def compare_type;
4783 static inline hashval_t hash (const value_type *);
4784 static inline bool equal (const value_type *, const compare_type *);
4785 static inline void remove (value_type *);
4788 inline bool
4789 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4791 return rtx_equal_p (p1, p2);
4794 inline hashval_t
4795 libcall_hasher::hash (const value_type *p1)
4797 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4800 typedef hash_table<libcall_hasher> libcall_table_type;
4802 static void
4803 add_libcall (libcall_table_type *htab, rtx libcall)
4805 *htab->find_slot (libcall, INSERT) = libcall;
4808 static bool
4809 arm_libcall_uses_aapcs_base (const_rtx libcall)
4811 static bool init_done = false;
4812 static libcall_table_type *libcall_htab = NULL;
4814 if (!init_done)
4816 init_done = true;
4818 libcall_htab = new libcall_table_type (31);
4819 add_libcall (libcall_htab,
4820 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4821 add_libcall (libcall_htab,
4822 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4823 add_libcall (libcall_htab,
4824 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4825 add_libcall (libcall_htab,
4826 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4828 add_libcall (libcall_htab,
4829 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4830 add_libcall (libcall_htab,
4831 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4832 add_libcall (libcall_htab,
4833 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4834 add_libcall (libcall_htab,
4835 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4837 add_libcall (libcall_htab,
4838 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4839 add_libcall (libcall_htab,
4840 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4841 add_libcall (libcall_htab,
4842 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4843 add_libcall (libcall_htab,
4844 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4845 add_libcall (libcall_htab,
4846 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4847 add_libcall (libcall_htab,
4848 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4849 add_libcall (libcall_htab,
4850 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4851 add_libcall (libcall_htab,
4852 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4854 /* Values from double-precision helper functions are returned in core
4855 registers if the selected core only supports single-precision
4856 arithmetic, even if we are using the hard-float ABI. The same is
4857 true for single-precision helpers, but we will never be using the
4858 hard-float ABI on a CPU which doesn't support single-precision
4859 operations in hardware. */
4860 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4861 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4862 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4863 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4864 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4865 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4866 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4867 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4868 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4869 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4870 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4871 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4872 SFmode));
4873 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4874 DFmode));
4877 return libcall && libcall_htab->find (libcall) != NULL;
4880 static rtx
4881 arm_libcall_value_1 (enum machine_mode mode)
4883 if (TARGET_AAPCS_BASED)
4884 return aapcs_libcall_value (mode);
4885 else if (TARGET_IWMMXT_ABI
4886 && arm_vector_mode_supported_p (mode))
4887 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4888 else
4889 return gen_rtx_REG (mode, ARG_REGISTER (1));
4892 /* Define how to find the value returned by a library function
4893 assuming the value has mode MODE. */
4895 static rtx
4896 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
4898 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4899 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4901 /* The following libcalls return their result in integer registers,
4902 even though they return a floating point value. */
4903 if (arm_libcall_uses_aapcs_base (libcall))
4904 return gen_rtx_REG (mode, ARG_REGISTER(1));
4908 return arm_libcall_value_1 (mode);
4911 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4913 static bool
4914 arm_function_value_regno_p (const unsigned int regno)
4916 if (regno == ARG_REGISTER (1)
4917 || (TARGET_32BIT
4918 && TARGET_AAPCS_BASED
4919 && TARGET_VFP
4920 && TARGET_HARD_FLOAT
4921 && regno == FIRST_VFP_REGNUM)
4922 || (TARGET_IWMMXT_ABI
4923 && regno == FIRST_IWMMXT_REGNUM))
4924 return true;
4926 return false;
4929 /* Determine the amount of memory needed to store the possible return
4930 registers of an untyped call. */
4932 arm_apply_result_size (void)
4934 int size = 16;
4936 if (TARGET_32BIT)
4938 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4939 size += 32;
4940 if (TARGET_IWMMXT_ABI)
4941 size += 8;
4944 return size;
4947 /* Decide whether TYPE should be returned in memory (true)
4948 or in a register (false). FNTYPE is the type of the function making
4949 the call. */
4950 static bool
4951 arm_return_in_memory (const_tree type, const_tree fntype)
4953 HOST_WIDE_INT size;
4955 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4957 if (TARGET_AAPCS_BASED)
4959 /* Simple, non-aggregate types (ie not including vectors and
4960 complex) are always returned in a register (or registers).
4961 We don't care about which register here, so we can short-cut
4962 some of the detail. */
4963 if (!AGGREGATE_TYPE_P (type)
4964 && TREE_CODE (type) != VECTOR_TYPE
4965 && TREE_CODE (type) != COMPLEX_TYPE)
4966 return false;
4968 /* Any return value that is no larger than one word can be
4969 returned in r0. */
4970 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4971 return false;
4973 /* Check any available co-processors to see if they accept the
4974 type as a register candidate (VFP, for example, can return
4975 some aggregates in consecutive registers). These aren't
4976 available if the call is variadic. */
4977 if (aapcs_select_return_coproc (type, fntype) >= 0)
4978 return false;
4980 /* Vector values should be returned using ARM registers, not
4981 memory (unless they're over 16 bytes, which will break since
4982 we only have four call-clobbered registers to play with). */
4983 if (TREE_CODE (type) == VECTOR_TYPE)
4984 return (size < 0 || size > (4 * UNITS_PER_WORD));
4986 /* The rest go in memory. */
4987 return true;
4990 if (TREE_CODE (type) == VECTOR_TYPE)
4991 return (size < 0 || size > (4 * UNITS_PER_WORD));
4993 if (!AGGREGATE_TYPE_P (type) &&
4994 (TREE_CODE (type) != VECTOR_TYPE))
4995 /* All simple types are returned in registers. */
4996 return false;
4998 if (arm_abi != ARM_ABI_APCS)
5000 /* ATPCS and later return aggregate types in memory only if they are
5001 larger than a word (or are variable size). */
5002 return (size < 0 || size > UNITS_PER_WORD);
5005 /* For the arm-wince targets we choose to be compatible with Microsoft's
5006 ARM and Thumb compilers, which always return aggregates in memory. */
5007 #ifndef ARM_WINCE
5008 /* All structures/unions bigger than one word are returned in memory.
5009 Also catch the case where int_size_in_bytes returns -1. In this case
5010 the aggregate is either huge or of variable size, and in either case
5011 we will want to return it via memory and not in a register. */
5012 if (size < 0 || size > UNITS_PER_WORD)
5013 return true;
5015 if (TREE_CODE (type) == RECORD_TYPE)
5017 tree field;
5019 /* For a struct the APCS says that we only return in a register
5020 if the type is 'integer like' and every addressable element
5021 has an offset of zero. For practical purposes this means
5022 that the structure can have at most one non bit-field element
5023 and that this element must be the first one in the structure. */
5025 /* Find the first field, ignoring non FIELD_DECL things which will
5026 have been created by C++. */
5027 for (field = TYPE_FIELDS (type);
5028 field && TREE_CODE (field) != FIELD_DECL;
5029 field = DECL_CHAIN (field))
5030 continue;
5032 if (field == NULL)
5033 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5035 /* Check that the first field is valid for returning in a register. */
5037 /* ... Floats are not allowed */
5038 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5039 return true;
5041 /* ... Aggregates that are not themselves valid for returning in
5042 a register are not allowed. */
5043 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5044 return true;
5046 /* Now check the remaining fields, if any. Only bitfields are allowed,
5047 since they are not addressable. */
5048 for (field = DECL_CHAIN (field);
5049 field;
5050 field = DECL_CHAIN (field))
5052 if (TREE_CODE (field) != FIELD_DECL)
5053 continue;
5055 if (!DECL_BIT_FIELD_TYPE (field))
5056 return true;
5059 return false;
5062 if (TREE_CODE (type) == UNION_TYPE)
5064 tree field;
5066 /* Unions can be returned in registers if every element is
5067 integral, or can be returned in an integer register. */
5068 for (field = TYPE_FIELDS (type);
5069 field;
5070 field = DECL_CHAIN (field))
5072 if (TREE_CODE (field) != FIELD_DECL)
5073 continue;
5075 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5076 return true;
5078 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5079 return true;
5082 return false;
5084 #endif /* not ARM_WINCE */
5086 /* Return all other types in memory. */
5087 return true;
5090 const struct pcs_attribute_arg
5092 const char *arg;
5093 enum arm_pcs value;
5094 } pcs_attribute_args[] =
5096 {"aapcs", ARM_PCS_AAPCS},
5097 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5098 #if 0
5099 /* We could recognize these, but changes would be needed elsewhere
5100 * to implement them. */
5101 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5102 {"atpcs", ARM_PCS_ATPCS},
5103 {"apcs", ARM_PCS_APCS},
5104 #endif
5105 {NULL, ARM_PCS_UNKNOWN}
5108 static enum arm_pcs
5109 arm_pcs_from_attribute (tree attr)
5111 const struct pcs_attribute_arg *ptr;
5112 const char *arg;
5114 /* Get the value of the argument. */
5115 if (TREE_VALUE (attr) == NULL_TREE
5116 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5117 return ARM_PCS_UNKNOWN;
5119 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5121 /* Check it against the list of known arguments. */
5122 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5123 if (streq (arg, ptr->arg))
5124 return ptr->value;
5126 /* An unrecognized interrupt type. */
5127 return ARM_PCS_UNKNOWN;
5130 /* Get the PCS variant to use for this call. TYPE is the function's type
5131 specification, DECL is the specific declartion. DECL may be null if
5132 the call could be indirect or if this is a library call. */
5133 static enum arm_pcs
5134 arm_get_pcs_model (const_tree type, const_tree decl)
5136 bool user_convention = false;
5137 enum arm_pcs user_pcs = arm_pcs_default;
5138 tree attr;
5140 gcc_assert (type);
5142 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5143 if (attr)
5145 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5146 user_convention = true;
5149 if (TARGET_AAPCS_BASED)
5151 /* Detect varargs functions. These always use the base rules
5152 (no argument is ever a candidate for a co-processor
5153 register). */
5154 bool base_rules = stdarg_p (type);
5156 if (user_convention)
5158 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5159 sorry ("non-AAPCS derived PCS variant");
5160 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5161 error ("variadic functions must use the base AAPCS variant");
5164 if (base_rules)
5165 return ARM_PCS_AAPCS;
5166 else if (user_convention)
5167 return user_pcs;
5168 else if (decl && flag_unit_at_a_time)
5170 /* Local functions never leak outside this compilation unit,
5171 so we are free to use whatever conventions are
5172 appropriate. */
5173 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5174 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5175 if (i && i->local)
5176 return ARM_PCS_AAPCS_LOCAL;
5179 else if (user_convention && user_pcs != arm_pcs_default)
5180 sorry ("PCS variant");
5182 /* For everything else we use the target's default. */
5183 return arm_pcs_default;
5187 static void
5188 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5189 const_tree fntype ATTRIBUTE_UNUSED,
5190 rtx libcall ATTRIBUTE_UNUSED,
5191 const_tree fndecl ATTRIBUTE_UNUSED)
5193 /* Record the unallocated VFP registers. */
5194 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5195 pcum->aapcs_vfp_reg_alloc = 0;
5198 /* Walk down the type tree of TYPE counting consecutive base elements.
5199 If *MODEP is VOIDmode, then set it to the first valid floating point
5200 type. If a non-floating point type is found, or if a floating point
5201 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5202 otherwise return the count in the sub-tree. */
5203 static int
5204 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5206 enum machine_mode mode;
5207 HOST_WIDE_INT size;
5209 switch (TREE_CODE (type))
5211 case REAL_TYPE:
5212 mode = TYPE_MODE (type);
5213 if (mode != DFmode && mode != SFmode)
5214 return -1;
5216 if (*modep == VOIDmode)
5217 *modep = mode;
5219 if (*modep == mode)
5220 return 1;
5222 break;
5224 case COMPLEX_TYPE:
5225 mode = TYPE_MODE (TREE_TYPE (type));
5226 if (mode != DFmode && mode != SFmode)
5227 return -1;
5229 if (*modep == VOIDmode)
5230 *modep = mode;
5232 if (*modep == mode)
5233 return 2;
5235 break;
5237 case VECTOR_TYPE:
5238 /* Use V2SImode and V4SImode as representatives of all 64-bit
5239 and 128-bit vector types, whether or not those modes are
5240 supported with the present options. */
5241 size = int_size_in_bytes (type);
5242 switch (size)
5244 case 8:
5245 mode = V2SImode;
5246 break;
5247 case 16:
5248 mode = V4SImode;
5249 break;
5250 default:
5251 return -1;
5254 if (*modep == VOIDmode)
5255 *modep = mode;
5257 /* Vector modes are considered to be opaque: two vectors are
5258 equivalent for the purposes of being homogeneous aggregates
5259 if they are the same size. */
5260 if (*modep == mode)
5261 return 1;
5263 break;
5265 case ARRAY_TYPE:
5267 int count;
5268 tree index = TYPE_DOMAIN (type);
5270 /* Can't handle incomplete types nor sizes that are not
5271 fixed. */
5272 if (!COMPLETE_TYPE_P (type)
5273 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5274 return -1;
5276 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5277 if (count == -1
5278 || !index
5279 || !TYPE_MAX_VALUE (index)
5280 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5281 || !TYPE_MIN_VALUE (index)
5282 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5283 || count < 0)
5284 return -1;
5286 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5287 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5289 /* There must be no padding. */
5290 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5291 return -1;
5293 return count;
5296 case RECORD_TYPE:
5298 int count = 0;
5299 int sub_count;
5300 tree field;
5302 /* Can't handle incomplete types nor sizes that are not
5303 fixed. */
5304 if (!COMPLETE_TYPE_P (type)
5305 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5306 return -1;
5308 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5310 if (TREE_CODE (field) != FIELD_DECL)
5311 continue;
5313 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5314 if (sub_count < 0)
5315 return -1;
5316 count += sub_count;
5319 /* There must be no padding. */
5320 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5321 return -1;
5323 return count;
5326 case UNION_TYPE:
5327 case QUAL_UNION_TYPE:
5329 /* These aren't very interesting except in a degenerate case. */
5330 int count = 0;
5331 int sub_count;
5332 tree field;
5334 /* Can't handle incomplete types nor sizes that are not
5335 fixed. */
5336 if (!COMPLETE_TYPE_P (type)
5337 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5338 return -1;
5340 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5342 if (TREE_CODE (field) != FIELD_DECL)
5343 continue;
5345 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5346 if (sub_count < 0)
5347 return -1;
5348 count = count > sub_count ? count : sub_count;
5351 /* There must be no padding. */
5352 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5353 return -1;
5355 return count;
5358 default:
5359 break;
5362 return -1;
5365 /* Return true if PCS_VARIANT should use VFP registers. */
5366 static bool
5367 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5369 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5371 static bool seen_thumb1_vfp = false;
5373 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5375 sorry ("Thumb-1 hard-float VFP ABI");
5376 /* sorry() is not immediately fatal, so only display this once. */
5377 seen_thumb1_vfp = true;
5380 return true;
5383 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5384 return false;
5386 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5387 (TARGET_VFP_DOUBLE || !is_double));
5390 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5391 suitable for passing or returning in VFP registers for the PCS
5392 variant selected. If it is, then *BASE_MODE is updated to contain
5393 a machine mode describing each element of the argument's type and
5394 *COUNT to hold the number of such elements. */
5395 static bool
5396 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5397 enum machine_mode mode, const_tree type,
5398 enum machine_mode *base_mode, int *count)
5400 enum machine_mode new_mode = VOIDmode;
5402 /* If we have the type information, prefer that to working things
5403 out from the mode. */
5404 if (type)
5406 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5408 if (ag_count > 0 && ag_count <= 4)
5409 *count = ag_count;
5410 else
5411 return false;
5413 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5414 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5415 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5417 *count = 1;
5418 new_mode = mode;
5420 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5422 *count = 2;
5423 new_mode = (mode == DCmode ? DFmode : SFmode);
5425 else
5426 return false;
5429 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5430 return false;
5432 *base_mode = new_mode;
5433 return true;
5436 static bool
5437 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5438 enum machine_mode mode, const_tree type)
5440 int count ATTRIBUTE_UNUSED;
5441 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
5443 if (!use_vfp_abi (pcs_variant, false))
5444 return false;
5445 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5446 &ag_mode, &count);
5449 static bool
5450 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5451 const_tree type)
5453 if (!use_vfp_abi (pcum->pcs_variant, false))
5454 return false;
5456 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5457 &pcum->aapcs_vfp_rmode,
5458 &pcum->aapcs_vfp_rcount);
5461 static bool
5462 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5463 const_tree type ATTRIBUTE_UNUSED)
5465 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5466 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5467 int regno;
5469 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5470 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5472 pcum->aapcs_vfp_reg_alloc = mask << regno;
5473 if (mode == BLKmode
5474 || (mode == TImode && ! TARGET_NEON)
5475 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5477 int i;
5478 int rcount = pcum->aapcs_vfp_rcount;
5479 int rshift = shift;
5480 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
5481 rtx par;
5482 if (!TARGET_NEON)
5484 /* Avoid using unsupported vector modes. */
5485 if (rmode == V2SImode)
5486 rmode = DImode;
5487 else if (rmode == V4SImode)
5489 rmode = DImode;
5490 rcount *= 2;
5491 rshift /= 2;
5494 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5495 for (i = 0; i < rcount; i++)
5497 rtx tmp = gen_rtx_REG (rmode,
5498 FIRST_VFP_REGNUM + regno + i * rshift);
5499 tmp = gen_rtx_EXPR_LIST
5500 (VOIDmode, tmp,
5501 GEN_INT (i * GET_MODE_SIZE (rmode)));
5502 XVECEXP (par, 0, i) = tmp;
5505 pcum->aapcs_reg = par;
5507 else
5508 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5509 return true;
5511 return false;
5514 static rtx
5515 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5516 enum machine_mode mode,
5517 const_tree type ATTRIBUTE_UNUSED)
5519 if (!use_vfp_abi (pcs_variant, false))
5520 return NULL;
5522 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5524 int count;
5525 enum machine_mode ag_mode;
5526 int i;
5527 rtx par;
5528 int shift;
5530 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5531 &ag_mode, &count);
5533 if (!TARGET_NEON)
5535 if (ag_mode == V2SImode)
5536 ag_mode = DImode;
5537 else if (ag_mode == V4SImode)
5539 ag_mode = DImode;
5540 count *= 2;
5543 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5544 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5545 for (i = 0; i < count; i++)
5547 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5548 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5549 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5550 XVECEXP (par, 0, i) = tmp;
5553 return par;
5556 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5559 static void
5560 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5561 enum machine_mode mode ATTRIBUTE_UNUSED,
5562 const_tree type ATTRIBUTE_UNUSED)
5564 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5565 pcum->aapcs_vfp_reg_alloc = 0;
5566 return;
5569 #define AAPCS_CP(X) \
5571 aapcs_ ## X ## _cum_init, \
5572 aapcs_ ## X ## _is_call_candidate, \
5573 aapcs_ ## X ## _allocate, \
5574 aapcs_ ## X ## _is_return_candidate, \
5575 aapcs_ ## X ## _allocate_return_reg, \
5576 aapcs_ ## X ## _advance \
5579 /* Table of co-processors that can be used to pass arguments in
5580 registers. Idealy no arugment should be a candidate for more than
5581 one co-processor table entry, but the table is processed in order
5582 and stops after the first match. If that entry then fails to put
5583 the argument into a co-processor register, the argument will go on
5584 the stack. */
5585 static struct
5587 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5588 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5590 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5591 BLKmode) is a candidate for this co-processor's registers; this
5592 function should ignore any position-dependent state in
5593 CUMULATIVE_ARGS and only use call-type dependent information. */
5594 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5596 /* Return true if the argument does get a co-processor register; it
5597 should set aapcs_reg to an RTX of the register allocated as is
5598 required for a return from FUNCTION_ARG. */
5599 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5601 /* Return true if a result of mode MODE (or type TYPE if MODE is
5602 BLKmode) is can be returned in this co-processor's registers. */
5603 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
5605 /* Allocate and return an RTX element to hold the return type of a
5606 call, this routine must not fail and will only be called if
5607 is_return_candidate returned true with the same parameters. */
5608 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
5610 /* Finish processing this argument and prepare to start processing
5611 the next one. */
5612 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5613 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5615 AAPCS_CP(vfp)
5618 #undef AAPCS_CP
5620 static int
5621 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5622 const_tree type)
5624 int i;
5626 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5627 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5628 return i;
5630 return -1;
5633 static int
5634 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5636 /* We aren't passed a decl, so we can't check that a call is local.
5637 However, it isn't clear that that would be a win anyway, since it
5638 might limit some tail-calling opportunities. */
5639 enum arm_pcs pcs_variant;
5641 if (fntype)
5643 const_tree fndecl = NULL_TREE;
5645 if (TREE_CODE (fntype) == FUNCTION_DECL)
5647 fndecl = fntype;
5648 fntype = TREE_TYPE (fntype);
5651 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5653 else
5654 pcs_variant = arm_pcs_default;
5656 if (pcs_variant != ARM_PCS_AAPCS)
5658 int i;
5660 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5661 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5662 TYPE_MODE (type),
5663 type))
5664 return i;
5666 return -1;
5669 static rtx
5670 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
5671 const_tree fntype)
5673 /* We aren't passed a decl, so we can't check that a call is local.
5674 However, it isn't clear that that would be a win anyway, since it
5675 might limit some tail-calling opportunities. */
5676 enum arm_pcs pcs_variant;
5677 int unsignedp ATTRIBUTE_UNUSED;
5679 if (fntype)
5681 const_tree fndecl = NULL_TREE;
5683 if (TREE_CODE (fntype) == FUNCTION_DECL)
5685 fndecl = fntype;
5686 fntype = TREE_TYPE (fntype);
5689 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5691 else
5692 pcs_variant = arm_pcs_default;
5694 /* Promote integer types. */
5695 if (type && INTEGRAL_TYPE_P (type))
5696 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5698 if (pcs_variant != ARM_PCS_AAPCS)
5700 int i;
5702 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5703 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5704 type))
5705 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5706 mode, type);
5709 /* Promotes small structs returned in a register to full-word size
5710 for big-endian AAPCS. */
5711 if (type && arm_return_in_msb (type))
5713 HOST_WIDE_INT size = int_size_in_bytes (type);
5714 if (size % UNITS_PER_WORD != 0)
5716 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5717 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5721 return gen_rtx_REG (mode, R0_REGNUM);
5724 static rtx
5725 aapcs_libcall_value (enum machine_mode mode)
5727 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5728 && GET_MODE_SIZE (mode) <= 4)
5729 mode = SImode;
5731 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5734 /* Lay out a function argument using the AAPCS rules. The rule
5735 numbers referred to here are those in the AAPCS. */
5736 static void
5737 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5738 const_tree type, bool named)
5740 int nregs, nregs2;
5741 int ncrn;
5743 /* We only need to do this once per argument. */
5744 if (pcum->aapcs_arg_processed)
5745 return;
5747 pcum->aapcs_arg_processed = true;
5749 /* Special case: if named is false then we are handling an incoming
5750 anonymous argument which is on the stack. */
5751 if (!named)
5752 return;
5754 /* Is this a potential co-processor register candidate? */
5755 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5757 int slot = aapcs_select_call_coproc (pcum, mode, type);
5758 pcum->aapcs_cprc_slot = slot;
5760 /* We don't have to apply any of the rules from part B of the
5761 preparation phase, these are handled elsewhere in the
5762 compiler. */
5764 if (slot >= 0)
5766 /* A Co-processor register candidate goes either in its own
5767 class of registers or on the stack. */
5768 if (!pcum->aapcs_cprc_failed[slot])
5770 /* C1.cp - Try to allocate the argument to co-processor
5771 registers. */
5772 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5773 return;
5775 /* C2.cp - Put the argument on the stack and note that we
5776 can't assign any more candidates in this slot. We also
5777 need to note that we have allocated stack space, so that
5778 we won't later try to split a non-cprc candidate between
5779 core registers and the stack. */
5780 pcum->aapcs_cprc_failed[slot] = true;
5781 pcum->can_split = false;
5784 /* We didn't get a register, so this argument goes on the
5785 stack. */
5786 gcc_assert (pcum->can_split == false);
5787 return;
5791 /* C3 - For double-word aligned arguments, round the NCRN up to the
5792 next even number. */
5793 ncrn = pcum->aapcs_ncrn;
5794 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5795 ncrn++;
5797 nregs = ARM_NUM_REGS2(mode, type);
5799 /* Sigh, this test should really assert that nregs > 0, but a GCC
5800 extension allows empty structs and then gives them empty size; it
5801 then allows such a structure to be passed by value. For some of
5802 the code below we have to pretend that such an argument has
5803 non-zero size so that we 'locate' it correctly either in
5804 registers or on the stack. */
5805 gcc_assert (nregs >= 0);
5807 nregs2 = nregs ? nregs : 1;
5809 /* C4 - Argument fits entirely in core registers. */
5810 if (ncrn + nregs2 <= NUM_ARG_REGS)
5812 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5813 pcum->aapcs_next_ncrn = ncrn + nregs;
5814 return;
5817 /* C5 - Some core registers left and there are no arguments already
5818 on the stack: split this argument between the remaining core
5819 registers and the stack. */
5820 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5822 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5823 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5824 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5825 return;
5828 /* C6 - NCRN is set to 4. */
5829 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5831 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5832 return;
5835 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5836 for a call to a function whose data type is FNTYPE.
5837 For a library call, FNTYPE is NULL. */
5838 void
5839 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5840 rtx libname,
5841 tree fndecl ATTRIBUTE_UNUSED)
5843 /* Long call handling. */
5844 if (fntype)
5845 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5846 else
5847 pcum->pcs_variant = arm_pcs_default;
5849 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5851 if (arm_libcall_uses_aapcs_base (libname))
5852 pcum->pcs_variant = ARM_PCS_AAPCS;
5854 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5855 pcum->aapcs_reg = NULL_RTX;
5856 pcum->aapcs_partial = 0;
5857 pcum->aapcs_arg_processed = false;
5858 pcum->aapcs_cprc_slot = -1;
5859 pcum->can_split = true;
5861 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5863 int i;
5865 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5867 pcum->aapcs_cprc_failed[i] = false;
5868 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5871 return;
5874 /* Legacy ABIs */
5876 /* On the ARM, the offset starts at 0. */
5877 pcum->nregs = 0;
5878 pcum->iwmmxt_nregs = 0;
5879 pcum->can_split = true;
5881 /* Varargs vectors are treated the same as long long.
5882 named_count avoids having to change the way arm handles 'named' */
5883 pcum->named_count = 0;
5884 pcum->nargs = 0;
5886 if (TARGET_REALLY_IWMMXT && fntype)
5888 tree fn_arg;
5890 for (fn_arg = TYPE_ARG_TYPES (fntype);
5891 fn_arg;
5892 fn_arg = TREE_CHAIN (fn_arg))
5893 pcum->named_count += 1;
5895 if (! pcum->named_count)
5896 pcum->named_count = INT_MAX;
5900 /* Return true if we use LRA instead of reload pass. */
5901 static bool
5902 arm_lra_p (void)
5904 return arm_lra_flag;
5907 /* Return true if mode/type need doubleword alignment. */
5908 static bool
5909 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
5911 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5912 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5916 /* Determine where to put an argument to a function.
5917 Value is zero to push the argument on the stack,
5918 or a hard register in which to store the argument.
5920 MODE is the argument's machine mode.
5921 TYPE is the data type of the argument (as a tree).
5922 This is null for libcalls where that information may
5923 not be available.
5924 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5925 the preceding args and about the function being called.
5926 NAMED is nonzero if this argument is a named parameter
5927 (otherwise it is an extra parameter matching an ellipsis).
5929 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5930 other arguments are passed on the stack. If (NAMED == 0) (which happens
5931 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5932 defined), say it is passed in the stack (function_prologue will
5933 indeed make it pass in the stack if necessary). */
5935 static rtx
5936 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
5937 const_tree type, bool named)
5939 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5940 int nregs;
5942 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5943 a call insn (op3 of a call_value insn). */
5944 if (mode == VOIDmode)
5945 return const0_rtx;
5947 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5949 aapcs_layout_arg (pcum, mode, type, named);
5950 return pcum->aapcs_reg;
5953 /* Varargs vectors are treated the same as long long.
5954 named_count avoids having to change the way arm handles 'named' */
5955 if (TARGET_IWMMXT_ABI
5956 && arm_vector_mode_supported_p (mode)
5957 && pcum->named_count > pcum->nargs + 1)
5959 if (pcum->iwmmxt_nregs <= 9)
5960 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5961 else
5963 pcum->can_split = false;
5964 return NULL_RTX;
5968 /* Put doubleword aligned quantities in even register pairs. */
5969 if (pcum->nregs & 1
5970 && ARM_DOUBLEWORD_ALIGN
5971 && arm_needs_doubleword_align (mode, type))
5972 pcum->nregs++;
5974 /* Only allow splitting an arg between regs and memory if all preceding
5975 args were allocated to regs. For args passed by reference we only count
5976 the reference pointer. */
5977 if (pcum->can_split)
5978 nregs = 1;
5979 else
5980 nregs = ARM_NUM_REGS2 (mode, type);
5982 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5983 return NULL_RTX;
5985 return gen_rtx_REG (mode, pcum->nregs);
5988 static unsigned int
5989 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5991 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5992 ? DOUBLEWORD_ALIGNMENT
5993 : PARM_BOUNDARY);
5996 static int
5997 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5998 tree type, bool named)
6000 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6001 int nregs = pcum->nregs;
6003 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6005 aapcs_layout_arg (pcum, mode, type, named);
6006 return pcum->aapcs_partial;
6009 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6010 return 0;
6012 if (NUM_ARG_REGS > nregs
6013 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6014 && pcum->can_split)
6015 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6017 return 0;
6020 /* Update the data in PCUM to advance over an argument
6021 of mode MODE and data type TYPE.
6022 (TYPE is null for libcalls where that information may not be available.) */
6024 static void
6025 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
6026 const_tree type, bool named)
6028 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6030 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6032 aapcs_layout_arg (pcum, mode, type, named);
6034 if (pcum->aapcs_cprc_slot >= 0)
6036 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6037 type);
6038 pcum->aapcs_cprc_slot = -1;
6041 /* Generic stuff. */
6042 pcum->aapcs_arg_processed = false;
6043 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6044 pcum->aapcs_reg = NULL_RTX;
6045 pcum->aapcs_partial = 0;
6047 else
6049 pcum->nargs += 1;
6050 if (arm_vector_mode_supported_p (mode)
6051 && pcum->named_count > pcum->nargs
6052 && TARGET_IWMMXT_ABI)
6053 pcum->iwmmxt_nregs += 1;
6054 else
6055 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6059 /* Variable sized types are passed by reference. This is a GCC
6060 extension to the ARM ABI. */
6062 static bool
6063 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6064 enum machine_mode mode ATTRIBUTE_UNUSED,
6065 const_tree type, bool named ATTRIBUTE_UNUSED)
6067 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6070 /* Encode the current state of the #pragma [no_]long_calls. */
6071 typedef enum
6073 OFF, /* No #pragma [no_]long_calls is in effect. */
6074 LONG, /* #pragma long_calls is in effect. */
6075 SHORT /* #pragma no_long_calls is in effect. */
6076 } arm_pragma_enum;
6078 static arm_pragma_enum arm_pragma_long_calls = OFF;
6080 void
6081 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6083 arm_pragma_long_calls = LONG;
6086 void
6087 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6089 arm_pragma_long_calls = SHORT;
6092 void
6093 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6095 arm_pragma_long_calls = OFF;
6098 /* Handle an attribute requiring a FUNCTION_DECL;
6099 arguments as in struct attribute_spec.handler. */
6100 static tree
6101 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6102 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6104 if (TREE_CODE (*node) != FUNCTION_DECL)
6106 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6107 name);
6108 *no_add_attrs = true;
6111 return NULL_TREE;
6114 /* Handle an "interrupt" or "isr" attribute;
6115 arguments as in struct attribute_spec.handler. */
6116 static tree
6117 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6118 bool *no_add_attrs)
6120 if (DECL_P (*node))
6122 if (TREE_CODE (*node) != FUNCTION_DECL)
6124 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6125 name);
6126 *no_add_attrs = true;
6128 /* FIXME: the argument if any is checked for type attributes;
6129 should it be checked for decl ones? */
6131 else
6133 if (TREE_CODE (*node) == FUNCTION_TYPE
6134 || TREE_CODE (*node) == METHOD_TYPE)
6136 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6138 warning (OPT_Wattributes, "%qE attribute ignored",
6139 name);
6140 *no_add_attrs = true;
6143 else if (TREE_CODE (*node) == POINTER_TYPE
6144 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6145 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6146 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6148 *node = build_variant_type_copy (*node);
6149 TREE_TYPE (*node) = build_type_attribute_variant
6150 (TREE_TYPE (*node),
6151 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6152 *no_add_attrs = true;
6154 else
6156 /* Possibly pass this attribute on from the type to a decl. */
6157 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6158 | (int) ATTR_FLAG_FUNCTION_NEXT
6159 | (int) ATTR_FLAG_ARRAY_NEXT))
6161 *no_add_attrs = true;
6162 return tree_cons (name, args, NULL_TREE);
6164 else
6166 warning (OPT_Wattributes, "%qE attribute ignored",
6167 name);
6172 return NULL_TREE;
6175 /* Handle a "pcs" attribute; arguments as in struct
6176 attribute_spec.handler. */
6177 static tree
6178 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6179 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6181 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6183 warning (OPT_Wattributes, "%qE attribute ignored", name);
6184 *no_add_attrs = true;
6186 return NULL_TREE;
6189 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6190 /* Handle the "notshared" attribute. This attribute is another way of
6191 requesting hidden visibility. ARM's compiler supports
6192 "__declspec(notshared)"; we support the same thing via an
6193 attribute. */
6195 static tree
6196 arm_handle_notshared_attribute (tree *node,
6197 tree name ATTRIBUTE_UNUSED,
6198 tree args ATTRIBUTE_UNUSED,
6199 int flags ATTRIBUTE_UNUSED,
6200 bool *no_add_attrs)
6202 tree decl = TYPE_NAME (*node);
6204 if (decl)
6206 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6207 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6208 *no_add_attrs = false;
6210 return NULL_TREE;
6212 #endif
6214 /* Return 0 if the attributes for two types are incompatible, 1 if they
6215 are compatible, and 2 if they are nearly compatible (which causes a
6216 warning to be generated). */
6217 static int
6218 arm_comp_type_attributes (const_tree type1, const_tree type2)
6220 int l1, l2, s1, s2;
6222 /* Check for mismatch of non-default calling convention. */
6223 if (TREE_CODE (type1) != FUNCTION_TYPE)
6224 return 1;
6226 /* Check for mismatched call attributes. */
6227 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6228 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6229 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6230 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6232 /* Only bother to check if an attribute is defined. */
6233 if (l1 | l2 | s1 | s2)
6235 /* If one type has an attribute, the other must have the same attribute. */
6236 if ((l1 != l2) || (s1 != s2))
6237 return 0;
6239 /* Disallow mixed attributes. */
6240 if ((l1 & s2) || (l2 & s1))
6241 return 0;
6244 /* Check for mismatched ISR attribute. */
6245 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6246 if (! l1)
6247 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6248 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6249 if (! l2)
6250 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6251 if (l1 != l2)
6252 return 0;
6254 return 1;
6257 /* Assigns default attributes to newly defined type. This is used to
6258 set short_call/long_call attributes for function types of
6259 functions defined inside corresponding #pragma scopes. */
6260 static void
6261 arm_set_default_type_attributes (tree type)
6263 /* Add __attribute__ ((long_call)) to all functions, when
6264 inside #pragma long_calls or __attribute__ ((short_call)),
6265 when inside #pragma no_long_calls. */
6266 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6268 tree type_attr_list, attr_name;
6269 type_attr_list = TYPE_ATTRIBUTES (type);
6271 if (arm_pragma_long_calls == LONG)
6272 attr_name = get_identifier ("long_call");
6273 else if (arm_pragma_long_calls == SHORT)
6274 attr_name = get_identifier ("short_call");
6275 else
6276 return;
6278 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6279 TYPE_ATTRIBUTES (type) = type_attr_list;
6283 /* Return true if DECL is known to be linked into section SECTION. */
6285 static bool
6286 arm_function_in_section_p (tree decl, section *section)
6288 /* We can only be certain about functions defined in the same
6289 compilation unit. */
6290 if (!TREE_STATIC (decl))
6291 return false;
6293 /* Make sure that SYMBOL always binds to the definition in this
6294 compilation unit. */
6295 if (!targetm.binds_local_p (decl))
6296 return false;
6298 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6299 if (!DECL_SECTION_NAME (decl))
6301 /* Make sure that we will not create a unique section for DECL. */
6302 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6303 return false;
6306 return function_section (decl) == section;
6309 /* Return nonzero if a 32-bit "long_call" should be generated for
6310 a call from the current function to DECL. We generate a long_call
6311 if the function:
6313 a. has an __attribute__((long call))
6314 or b. is within the scope of a #pragma long_calls
6315 or c. the -mlong-calls command line switch has been specified
6317 However we do not generate a long call if the function:
6319 d. has an __attribute__ ((short_call))
6320 or e. is inside the scope of a #pragma no_long_calls
6321 or f. is defined in the same section as the current function. */
6323 bool
6324 arm_is_long_call_p (tree decl)
6326 tree attrs;
6328 if (!decl)
6329 return TARGET_LONG_CALLS;
6331 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6332 if (lookup_attribute ("short_call", attrs))
6333 return false;
6335 /* For "f", be conservative, and only cater for cases in which the
6336 whole of the current function is placed in the same section. */
6337 if (!flag_reorder_blocks_and_partition
6338 && TREE_CODE (decl) == FUNCTION_DECL
6339 && arm_function_in_section_p (decl, current_function_section ()))
6340 return false;
6342 if (lookup_attribute ("long_call", attrs))
6343 return true;
6345 return TARGET_LONG_CALLS;
6348 /* Return nonzero if it is ok to make a tail-call to DECL. */
6349 static bool
6350 arm_function_ok_for_sibcall (tree decl, tree exp)
6352 unsigned long func_type;
6354 if (cfun->machine->sibcall_blocked)
6355 return false;
6357 /* Never tailcall something if we are generating code for Thumb-1. */
6358 if (TARGET_THUMB1)
6359 return false;
6361 /* The PIC register is live on entry to VxWorks PLT entries, so we
6362 must make the call before restoring the PIC register. */
6363 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6364 return false;
6366 /* If we are interworking and the function is not declared static
6367 then we can't tail-call it unless we know that it exists in this
6368 compilation unit (since it might be a Thumb routine). */
6369 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6370 && !TREE_ASM_WRITTEN (decl))
6371 return false;
6373 func_type = arm_current_func_type ();
6374 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6375 if (IS_INTERRUPT (func_type))
6376 return false;
6378 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6380 /* Check that the return value locations are the same. For
6381 example that we aren't returning a value from the sibling in
6382 a VFP register but then need to transfer it to a core
6383 register. */
6384 rtx a, b;
6386 a = arm_function_value (TREE_TYPE (exp), decl, false);
6387 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6388 cfun->decl, false);
6389 if (!rtx_equal_p (a, b))
6390 return false;
6393 /* Never tailcall if function may be called with a misaligned SP. */
6394 if (IS_STACKALIGN (func_type))
6395 return false;
6397 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6398 references should become a NOP. Don't convert such calls into
6399 sibling calls. */
6400 if (TARGET_AAPCS_BASED
6401 && arm_abi == ARM_ABI_AAPCS
6402 && decl
6403 && DECL_WEAK (decl))
6404 return false;
6406 /* Everything else is ok. */
6407 return true;
6411 /* Addressing mode support functions. */
6413 /* Return nonzero if X is a legitimate immediate operand when compiling
6414 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6416 legitimate_pic_operand_p (rtx x)
6418 if (GET_CODE (x) == SYMBOL_REF
6419 || (GET_CODE (x) == CONST
6420 && GET_CODE (XEXP (x, 0)) == PLUS
6421 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6422 return 0;
6424 return 1;
6427 /* Record that the current function needs a PIC register. Initialize
6428 cfun->machine->pic_reg if we have not already done so. */
6430 static void
6431 require_pic_register (void)
6433 /* A lot of the logic here is made obscure by the fact that this
6434 routine gets called as part of the rtx cost estimation process.
6435 We don't want those calls to affect any assumptions about the real
6436 function; and further, we can't call entry_of_function() until we
6437 start the real expansion process. */
6438 if (!crtl->uses_pic_offset_table)
6440 gcc_assert (can_create_pseudo_p ());
6441 if (arm_pic_register != INVALID_REGNUM
6442 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6444 if (!cfun->machine->pic_reg)
6445 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6447 /* Play games to avoid marking the function as needing pic
6448 if we are being called as part of the cost-estimation
6449 process. */
6450 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6451 crtl->uses_pic_offset_table = 1;
6453 else
6455 rtx_insn *seq, *insn;
6457 if (!cfun->machine->pic_reg)
6458 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6460 /* Play games to avoid marking the function as needing pic
6461 if we are being called as part of the cost-estimation
6462 process. */
6463 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6465 crtl->uses_pic_offset_table = 1;
6466 start_sequence ();
6468 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6469 && arm_pic_register > LAST_LO_REGNUM)
6470 emit_move_insn (cfun->machine->pic_reg,
6471 gen_rtx_REG (Pmode, arm_pic_register));
6472 else
6473 arm_load_pic_register (0UL);
6475 seq = get_insns ();
6476 end_sequence ();
6478 for (insn = seq; insn; insn = NEXT_INSN (insn))
6479 if (INSN_P (insn))
6480 INSN_LOCATION (insn) = prologue_location;
6482 /* We can be called during expansion of PHI nodes, where
6483 we can't yet emit instructions directly in the final
6484 insn stream. Queue the insns on the entry edge, they will
6485 be committed after everything else is expanded. */
6486 insert_insn_on_edge (seq,
6487 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6494 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
6496 if (GET_CODE (orig) == SYMBOL_REF
6497 || GET_CODE (orig) == LABEL_REF)
6499 rtx insn;
6501 if (reg == 0)
6503 gcc_assert (can_create_pseudo_p ());
6504 reg = gen_reg_rtx (Pmode);
6507 /* VxWorks does not impose a fixed gap between segments; the run-time
6508 gap can be different from the object-file gap. We therefore can't
6509 use GOTOFF unless we are absolutely sure that the symbol is in the
6510 same segment as the GOT. Unfortunately, the flexibility of linker
6511 scripts means that we can't be sure of that in general, so assume
6512 that GOTOFF is never valid on VxWorks. */
6513 if ((GET_CODE (orig) == LABEL_REF
6514 || (GET_CODE (orig) == SYMBOL_REF &&
6515 SYMBOL_REF_LOCAL_P (orig)))
6516 && NEED_GOT_RELOC
6517 && arm_pic_data_is_text_relative)
6518 insn = arm_pic_static_addr (orig, reg);
6519 else
6521 rtx pat;
6522 rtx mem;
6524 /* If this function doesn't have a pic register, create one now. */
6525 require_pic_register ();
6527 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6529 /* Make the MEM as close to a constant as possible. */
6530 mem = SET_SRC (pat);
6531 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6532 MEM_READONLY_P (mem) = 1;
6533 MEM_NOTRAP_P (mem) = 1;
6535 insn = emit_insn (pat);
6538 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6539 by loop. */
6540 set_unique_reg_note (insn, REG_EQUAL, orig);
6542 return reg;
6544 else if (GET_CODE (orig) == CONST)
6546 rtx base, offset;
6548 if (GET_CODE (XEXP (orig, 0)) == PLUS
6549 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6550 return orig;
6552 /* Handle the case where we have: const (UNSPEC_TLS). */
6553 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6554 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6555 return orig;
6557 /* Handle the case where we have:
6558 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6559 CONST_INT. */
6560 if (GET_CODE (XEXP (orig, 0)) == PLUS
6561 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6562 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6564 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6565 return orig;
6568 if (reg == 0)
6570 gcc_assert (can_create_pseudo_p ());
6571 reg = gen_reg_rtx (Pmode);
6574 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6576 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6577 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6578 base == reg ? 0 : reg);
6580 if (CONST_INT_P (offset))
6582 /* The base register doesn't really matter, we only want to
6583 test the index for the appropriate mode. */
6584 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6586 gcc_assert (can_create_pseudo_p ());
6587 offset = force_reg (Pmode, offset);
6590 if (CONST_INT_P (offset))
6591 return plus_constant (Pmode, base, INTVAL (offset));
6594 if (GET_MODE_SIZE (mode) > 4
6595 && (GET_MODE_CLASS (mode) == MODE_INT
6596 || TARGET_SOFT_FLOAT))
6598 emit_insn (gen_addsi3 (reg, base, offset));
6599 return reg;
6602 return gen_rtx_PLUS (Pmode, base, offset);
6605 return orig;
6609 /* Find a spare register to use during the prolog of a function. */
6611 static int
6612 thumb_find_work_register (unsigned long pushed_regs_mask)
6614 int reg;
6616 /* Check the argument registers first as these are call-used. The
6617 register allocation order means that sometimes r3 might be used
6618 but earlier argument registers might not, so check them all. */
6619 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6620 if (!df_regs_ever_live_p (reg))
6621 return reg;
6623 /* Before going on to check the call-saved registers we can try a couple
6624 more ways of deducing that r3 is available. The first is when we are
6625 pushing anonymous arguments onto the stack and we have less than 4
6626 registers worth of fixed arguments(*). In this case r3 will be part of
6627 the variable argument list and so we can be sure that it will be
6628 pushed right at the start of the function. Hence it will be available
6629 for the rest of the prologue.
6630 (*): ie crtl->args.pretend_args_size is greater than 0. */
6631 if (cfun->machine->uses_anonymous_args
6632 && crtl->args.pretend_args_size > 0)
6633 return LAST_ARG_REGNUM;
6635 /* The other case is when we have fixed arguments but less than 4 registers
6636 worth. In this case r3 might be used in the body of the function, but
6637 it is not being used to convey an argument into the function. In theory
6638 we could just check crtl->args.size to see how many bytes are
6639 being passed in argument registers, but it seems that it is unreliable.
6640 Sometimes it will have the value 0 when in fact arguments are being
6641 passed. (See testcase execute/20021111-1.c for an example). So we also
6642 check the args_info.nregs field as well. The problem with this field is
6643 that it makes no allowances for arguments that are passed to the
6644 function but which are not used. Hence we could miss an opportunity
6645 when a function has an unused argument in r3. But it is better to be
6646 safe than to be sorry. */
6647 if (! cfun->machine->uses_anonymous_args
6648 && crtl->args.size >= 0
6649 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6650 && (TARGET_AAPCS_BASED
6651 ? crtl->args.info.aapcs_ncrn < 4
6652 : crtl->args.info.nregs < 4))
6653 return LAST_ARG_REGNUM;
6655 /* Otherwise look for a call-saved register that is going to be pushed. */
6656 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6657 if (pushed_regs_mask & (1 << reg))
6658 return reg;
6660 if (TARGET_THUMB2)
6662 /* Thumb-2 can use high regs. */
6663 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6664 if (pushed_regs_mask & (1 << reg))
6665 return reg;
6667 /* Something went wrong - thumb_compute_save_reg_mask()
6668 should have arranged for a suitable register to be pushed. */
6669 gcc_unreachable ();
6672 static GTY(()) int pic_labelno;
6674 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6675 low register. */
6677 void
6678 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6680 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6682 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6683 return;
6685 gcc_assert (flag_pic);
6687 pic_reg = cfun->machine->pic_reg;
6688 if (TARGET_VXWORKS_RTP)
6690 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6691 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6692 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6694 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6696 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6697 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6699 else
6701 /* We use an UNSPEC rather than a LABEL_REF because this label
6702 never appears in the code stream. */
6704 labelno = GEN_INT (pic_labelno++);
6705 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6706 l1 = gen_rtx_CONST (VOIDmode, l1);
6708 /* On the ARM the PC register contains 'dot + 8' at the time of the
6709 addition, on the Thumb it is 'dot + 4'. */
6710 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6711 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6712 UNSPEC_GOTSYM_OFF);
6713 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6715 if (TARGET_32BIT)
6717 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6719 else /* TARGET_THUMB1 */
6721 if (arm_pic_register != INVALID_REGNUM
6722 && REGNO (pic_reg) > LAST_LO_REGNUM)
6724 /* We will have pushed the pic register, so we should always be
6725 able to find a work register. */
6726 pic_tmp = gen_rtx_REG (SImode,
6727 thumb_find_work_register (saved_regs));
6728 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6729 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6730 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6732 else if (arm_pic_register != INVALID_REGNUM
6733 && arm_pic_register > LAST_LO_REGNUM
6734 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6736 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6737 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6738 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6740 else
6741 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6745 /* Need to emit this whether or not we obey regdecls,
6746 since setjmp/longjmp can cause life info to screw up. */
6747 emit_use (pic_reg);
6750 /* Generate code to load the address of a static var when flag_pic is set. */
6751 static rtx
6752 arm_pic_static_addr (rtx orig, rtx reg)
6754 rtx l1, labelno, offset_rtx, insn;
6756 gcc_assert (flag_pic);
6758 /* We use an UNSPEC rather than a LABEL_REF because this label
6759 never appears in the code stream. */
6760 labelno = GEN_INT (pic_labelno++);
6761 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6762 l1 = gen_rtx_CONST (VOIDmode, l1);
6764 /* On the ARM the PC register contains 'dot + 8' at the time of the
6765 addition, on the Thumb it is 'dot + 4'. */
6766 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6767 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6768 UNSPEC_SYMBOL_OFFSET);
6769 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6771 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6772 return insn;
6775 /* Return nonzero if X is valid as an ARM state addressing register. */
6776 static int
6777 arm_address_register_rtx_p (rtx x, int strict_p)
6779 int regno;
6781 if (!REG_P (x))
6782 return 0;
6784 regno = REGNO (x);
6786 if (strict_p)
6787 return ARM_REGNO_OK_FOR_BASE_P (regno);
6789 return (regno <= LAST_ARM_REGNUM
6790 || regno >= FIRST_PSEUDO_REGISTER
6791 || regno == FRAME_POINTER_REGNUM
6792 || regno == ARG_POINTER_REGNUM);
6795 /* Return TRUE if this rtx is the difference of a symbol and a label,
6796 and will reduce to a PC-relative relocation in the object file.
6797 Expressions like this can be left alone when generating PIC, rather
6798 than forced through the GOT. */
6799 static int
6800 pcrel_constant_p (rtx x)
6802 if (GET_CODE (x) == MINUS)
6803 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6805 return FALSE;
6808 /* Return true if X will surely end up in an index register after next
6809 splitting pass. */
6810 static bool
6811 will_be_in_index_register (const_rtx x)
6813 /* arm.md: calculate_pic_address will split this into a register. */
6814 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6817 /* Return nonzero if X is a valid ARM state address operand. */
6819 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
6820 int strict_p)
6822 bool use_ldrd;
6823 enum rtx_code code = GET_CODE (x);
6825 if (arm_address_register_rtx_p (x, strict_p))
6826 return 1;
6828 use_ldrd = (TARGET_LDRD
6829 && (mode == DImode
6830 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6832 if (code == POST_INC || code == PRE_DEC
6833 || ((code == PRE_INC || code == POST_DEC)
6834 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6835 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6837 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6838 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6839 && GET_CODE (XEXP (x, 1)) == PLUS
6840 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6842 rtx addend = XEXP (XEXP (x, 1), 1);
6844 /* Don't allow ldrd post increment by register because it's hard
6845 to fixup invalid register choices. */
6846 if (use_ldrd
6847 && GET_CODE (x) == POST_MODIFY
6848 && REG_P (addend))
6849 return 0;
6851 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6852 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6855 /* After reload constants split into minipools will have addresses
6856 from a LABEL_REF. */
6857 else if (reload_completed
6858 && (code == LABEL_REF
6859 || (code == CONST
6860 && GET_CODE (XEXP (x, 0)) == PLUS
6861 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6862 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6863 return 1;
6865 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6866 return 0;
6868 else if (code == PLUS)
6870 rtx xop0 = XEXP (x, 0);
6871 rtx xop1 = XEXP (x, 1);
6873 return ((arm_address_register_rtx_p (xop0, strict_p)
6874 && ((CONST_INT_P (xop1)
6875 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6876 || (!strict_p && will_be_in_index_register (xop1))))
6877 || (arm_address_register_rtx_p (xop1, strict_p)
6878 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6881 #if 0
6882 /* Reload currently can't handle MINUS, so disable this for now */
6883 else if (GET_CODE (x) == MINUS)
6885 rtx xop0 = XEXP (x, 0);
6886 rtx xop1 = XEXP (x, 1);
6888 return (arm_address_register_rtx_p (xop0, strict_p)
6889 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6891 #endif
6893 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6894 && code == SYMBOL_REF
6895 && CONSTANT_POOL_ADDRESS_P (x)
6896 && ! (flag_pic
6897 && symbol_mentioned_p (get_pool_constant (x))
6898 && ! pcrel_constant_p (get_pool_constant (x))))
6899 return 1;
6901 return 0;
6904 /* Return nonzero if X is a valid Thumb-2 address operand. */
6905 static int
6906 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6908 bool use_ldrd;
6909 enum rtx_code code = GET_CODE (x);
6911 if (arm_address_register_rtx_p (x, strict_p))
6912 return 1;
6914 use_ldrd = (TARGET_LDRD
6915 && (mode == DImode
6916 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6918 if (code == POST_INC || code == PRE_DEC
6919 || ((code == PRE_INC || code == POST_DEC)
6920 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6921 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6923 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6924 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6925 && GET_CODE (XEXP (x, 1)) == PLUS
6926 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6928 /* Thumb-2 only has autoincrement by constant. */
6929 rtx addend = XEXP (XEXP (x, 1), 1);
6930 HOST_WIDE_INT offset;
6932 if (!CONST_INT_P (addend))
6933 return 0;
6935 offset = INTVAL(addend);
6936 if (GET_MODE_SIZE (mode) <= 4)
6937 return (offset > -256 && offset < 256);
6939 return (use_ldrd && offset > -1024 && offset < 1024
6940 && (offset & 3) == 0);
6943 /* After reload constants split into minipools will have addresses
6944 from a LABEL_REF. */
6945 else if (reload_completed
6946 && (code == LABEL_REF
6947 || (code == CONST
6948 && GET_CODE (XEXP (x, 0)) == PLUS
6949 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6950 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6951 return 1;
6953 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6954 return 0;
6956 else if (code == PLUS)
6958 rtx xop0 = XEXP (x, 0);
6959 rtx xop1 = XEXP (x, 1);
6961 return ((arm_address_register_rtx_p (xop0, strict_p)
6962 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6963 || (!strict_p && will_be_in_index_register (xop1))))
6964 || (arm_address_register_rtx_p (xop1, strict_p)
6965 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6968 /* Normally we can assign constant values to target registers without
6969 the help of constant pool. But there are cases we have to use constant
6970 pool like:
6971 1) assign a label to register.
6972 2) sign-extend a 8bit value to 32bit and then assign to register.
6974 Constant pool access in format:
6975 (set (reg r0) (mem (symbol_ref (".LC0"))))
6976 will cause the use of literal pool (later in function arm_reorg).
6977 So here we mark such format as an invalid format, then the compiler
6978 will adjust it into:
6979 (set (reg r0) (symbol_ref (".LC0")))
6980 (set (reg r0) (mem (reg r0))).
6981 No extra register is required, and (mem (reg r0)) won't cause the use
6982 of literal pools. */
6983 else if (arm_disable_literal_pool && code == SYMBOL_REF
6984 && CONSTANT_POOL_ADDRESS_P (x))
6985 return 0;
6987 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6988 && code == SYMBOL_REF
6989 && CONSTANT_POOL_ADDRESS_P (x)
6990 && ! (flag_pic
6991 && symbol_mentioned_p (get_pool_constant (x))
6992 && ! pcrel_constant_p (get_pool_constant (x))))
6993 return 1;
6995 return 0;
6998 /* Return nonzero if INDEX is valid for an address index operand in
6999 ARM state. */
7000 static int
7001 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
7002 int strict_p)
7004 HOST_WIDE_INT range;
7005 enum rtx_code code = GET_CODE (index);
7007 /* Standard coprocessor addressing modes. */
7008 if (TARGET_HARD_FLOAT
7009 && TARGET_VFP
7010 && (mode == SFmode || mode == DFmode))
7011 return (code == CONST_INT && INTVAL (index) < 1024
7012 && INTVAL (index) > -1024
7013 && (INTVAL (index) & 3) == 0);
7015 /* For quad modes, we restrict the constant offset to be slightly less
7016 than what the instruction format permits. We do this because for
7017 quad mode moves, we will actually decompose them into two separate
7018 double-mode reads or writes. INDEX must therefore be a valid
7019 (double-mode) offset and so should INDEX+8. */
7020 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7021 return (code == CONST_INT
7022 && INTVAL (index) < 1016
7023 && INTVAL (index) > -1024
7024 && (INTVAL (index) & 3) == 0);
7026 /* We have no such constraint on double mode offsets, so we permit the
7027 full range of the instruction format. */
7028 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7029 return (code == CONST_INT
7030 && INTVAL (index) < 1024
7031 && INTVAL (index) > -1024
7032 && (INTVAL (index) & 3) == 0);
7034 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7035 return (code == CONST_INT
7036 && INTVAL (index) < 1024
7037 && INTVAL (index) > -1024
7038 && (INTVAL (index) & 3) == 0);
7040 if (arm_address_register_rtx_p (index, strict_p)
7041 && (GET_MODE_SIZE (mode) <= 4))
7042 return 1;
7044 if (mode == DImode || mode == DFmode)
7046 if (code == CONST_INT)
7048 HOST_WIDE_INT val = INTVAL (index);
7050 if (TARGET_LDRD)
7051 return val > -256 && val < 256;
7052 else
7053 return val > -4096 && val < 4092;
7056 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7059 if (GET_MODE_SIZE (mode) <= 4
7060 && ! (arm_arch4
7061 && (mode == HImode
7062 || mode == HFmode
7063 || (mode == QImode && outer == SIGN_EXTEND))))
7065 if (code == MULT)
7067 rtx xiop0 = XEXP (index, 0);
7068 rtx xiop1 = XEXP (index, 1);
7070 return ((arm_address_register_rtx_p (xiop0, strict_p)
7071 && power_of_two_operand (xiop1, SImode))
7072 || (arm_address_register_rtx_p (xiop1, strict_p)
7073 && power_of_two_operand (xiop0, SImode)));
7075 else if (code == LSHIFTRT || code == ASHIFTRT
7076 || code == ASHIFT || code == ROTATERT)
7078 rtx op = XEXP (index, 1);
7080 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7081 && CONST_INT_P (op)
7082 && INTVAL (op) > 0
7083 && INTVAL (op) <= 31);
7087 /* For ARM v4 we may be doing a sign-extend operation during the
7088 load. */
7089 if (arm_arch4)
7091 if (mode == HImode
7092 || mode == HFmode
7093 || (outer == SIGN_EXTEND && mode == QImode))
7094 range = 256;
7095 else
7096 range = 4096;
7098 else
7099 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7101 return (code == CONST_INT
7102 && INTVAL (index) < range
7103 && INTVAL (index) > -range);
7106 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7107 index operand. i.e. 1, 2, 4 or 8. */
7108 static bool
7109 thumb2_index_mul_operand (rtx op)
7111 HOST_WIDE_INT val;
7113 if (!CONST_INT_P (op))
7114 return false;
7116 val = INTVAL(op);
7117 return (val == 1 || val == 2 || val == 4 || val == 8);
7120 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7121 static int
7122 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
7124 enum rtx_code code = GET_CODE (index);
7126 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7127 /* Standard coprocessor addressing modes. */
7128 if (TARGET_HARD_FLOAT
7129 && TARGET_VFP
7130 && (mode == SFmode || mode == DFmode))
7131 return (code == CONST_INT && INTVAL (index) < 1024
7132 /* Thumb-2 allows only > -256 index range for it's core register
7133 load/stores. Since we allow SF/DF in core registers, we have
7134 to use the intersection between -256~4096 (core) and -1024~1024
7135 (coprocessor). */
7136 && INTVAL (index) > -256
7137 && (INTVAL (index) & 3) == 0);
7139 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7141 /* For DImode assume values will usually live in core regs
7142 and only allow LDRD addressing modes. */
7143 if (!TARGET_LDRD || mode != DImode)
7144 return (code == CONST_INT
7145 && INTVAL (index) < 1024
7146 && INTVAL (index) > -1024
7147 && (INTVAL (index) & 3) == 0);
7150 /* For quad modes, we restrict the constant offset to be slightly less
7151 than what the instruction format permits. We do this because for
7152 quad mode moves, we will actually decompose them into two separate
7153 double-mode reads or writes. INDEX must therefore be a valid
7154 (double-mode) offset and so should INDEX+8. */
7155 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7156 return (code == CONST_INT
7157 && INTVAL (index) < 1016
7158 && INTVAL (index) > -1024
7159 && (INTVAL (index) & 3) == 0);
7161 /* We have no such constraint on double mode offsets, so we permit the
7162 full range of the instruction format. */
7163 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7164 return (code == CONST_INT
7165 && INTVAL (index) < 1024
7166 && INTVAL (index) > -1024
7167 && (INTVAL (index) & 3) == 0);
7169 if (arm_address_register_rtx_p (index, strict_p)
7170 && (GET_MODE_SIZE (mode) <= 4))
7171 return 1;
7173 if (mode == DImode || mode == DFmode)
7175 if (code == CONST_INT)
7177 HOST_WIDE_INT val = INTVAL (index);
7178 /* ??? Can we assume ldrd for thumb2? */
7179 /* Thumb-2 ldrd only has reg+const addressing modes. */
7180 /* ldrd supports offsets of +-1020.
7181 However the ldr fallback does not. */
7182 return val > -256 && val < 256 && (val & 3) == 0;
7184 else
7185 return 0;
7188 if (code == MULT)
7190 rtx xiop0 = XEXP (index, 0);
7191 rtx xiop1 = XEXP (index, 1);
7193 return ((arm_address_register_rtx_p (xiop0, strict_p)
7194 && thumb2_index_mul_operand (xiop1))
7195 || (arm_address_register_rtx_p (xiop1, strict_p)
7196 && thumb2_index_mul_operand (xiop0)));
7198 else if (code == ASHIFT)
7200 rtx op = XEXP (index, 1);
7202 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7203 && CONST_INT_P (op)
7204 && INTVAL (op) > 0
7205 && INTVAL (op) <= 3);
7208 return (code == CONST_INT
7209 && INTVAL (index) < 4096
7210 && INTVAL (index) > -256);
7213 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7214 static int
7215 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
7217 int regno;
7219 if (!REG_P (x))
7220 return 0;
7222 regno = REGNO (x);
7224 if (strict_p)
7225 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7227 return (regno <= LAST_LO_REGNUM
7228 || regno > LAST_VIRTUAL_REGISTER
7229 || regno == FRAME_POINTER_REGNUM
7230 || (GET_MODE_SIZE (mode) >= 4
7231 && (regno == STACK_POINTER_REGNUM
7232 || regno >= FIRST_PSEUDO_REGISTER
7233 || x == hard_frame_pointer_rtx
7234 || x == arg_pointer_rtx)));
7237 /* Return nonzero if x is a legitimate index register. This is the case
7238 for any base register that can access a QImode object. */
7239 inline static int
7240 thumb1_index_register_rtx_p (rtx x, int strict_p)
7242 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7245 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7247 The AP may be eliminated to either the SP or the FP, so we use the
7248 least common denominator, e.g. SImode, and offsets from 0 to 64.
7250 ??? Verify whether the above is the right approach.
7252 ??? Also, the FP may be eliminated to the SP, so perhaps that
7253 needs special handling also.
7255 ??? Look at how the mips16 port solves this problem. It probably uses
7256 better ways to solve some of these problems.
7258 Although it is not incorrect, we don't accept QImode and HImode
7259 addresses based on the frame pointer or arg pointer until the
7260 reload pass starts. This is so that eliminating such addresses
7261 into stack based ones won't produce impossible code. */
7263 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
7265 /* ??? Not clear if this is right. Experiment. */
7266 if (GET_MODE_SIZE (mode) < 4
7267 && !(reload_in_progress || reload_completed)
7268 && (reg_mentioned_p (frame_pointer_rtx, x)
7269 || reg_mentioned_p (arg_pointer_rtx, x)
7270 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7271 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7272 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7273 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7274 return 0;
7276 /* Accept any base register. SP only in SImode or larger. */
7277 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7278 return 1;
7280 /* This is PC relative data before arm_reorg runs. */
7281 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7282 && GET_CODE (x) == SYMBOL_REF
7283 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7284 return 1;
7286 /* This is PC relative data after arm_reorg runs. */
7287 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7288 && reload_completed
7289 && (GET_CODE (x) == LABEL_REF
7290 || (GET_CODE (x) == CONST
7291 && GET_CODE (XEXP (x, 0)) == PLUS
7292 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7293 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7294 return 1;
7296 /* Post-inc indexing only supported for SImode and larger. */
7297 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7298 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7299 return 1;
7301 else if (GET_CODE (x) == PLUS)
7303 /* REG+REG address can be any two index registers. */
7304 /* We disallow FRAME+REG addressing since we know that FRAME
7305 will be replaced with STACK, and SP relative addressing only
7306 permits SP+OFFSET. */
7307 if (GET_MODE_SIZE (mode) <= 4
7308 && XEXP (x, 0) != frame_pointer_rtx
7309 && XEXP (x, 1) != frame_pointer_rtx
7310 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7311 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7312 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7313 return 1;
7315 /* REG+const has 5-7 bit offset for non-SP registers. */
7316 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7317 || XEXP (x, 0) == arg_pointer_rtx)
7318 && CONST_INT_P (XEXP (x, 1))
7319 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7320 return 1;
7322 /* REG+const has 10-bit offset for SP, but only SImode and
7323 larger is supported. */
7324 /* ??? Should probably check for DI/DFmode overflow here
7325 just like GO_IF_LEGITIMATE_OFFSET does. */
7326 else if (REG_P (XEXP (x, 0))
7327 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7328 && GET_MODE_SIZE (mode) >= 4
7329 && CONST_INT_P (XEXP (x, 1))
7330 && INTVAL (XEXP (x, 1)) >= 0
7331 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7332 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7333 return 1;
7335 else if (REG_P (XEXP (x, 0))
7336 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7337 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7338 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7339 && REGNO (XEXP (x, 0))
7340 <= LAST_VIRTUAL_POINTER_REGISTER))
7341 && GET_MODE_SIZE (mode) >= 4
7342 && CONST_INT_P (XEXP (x, 1))
7343 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7344 return 1;
7347 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7348 && GET_MODE_SIZE (mode) == 4
7349 && GET_CODE (x) == SYMBOL_REF
7350 && CONSTANT_POOL_ADDRESS_P (x)
7351 && ! (flag_pic
7352 && symbol_mentioned_p (get_pool_constant (x))
7353 && ! pcrel_constant_p (get_pool_constant (x))))
7354 return 1;
7356 return 0;
7359 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7360 instruction of mode MODE. */
7362 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
7364 switch (GET_MODE_SIZE (mode))
7366 case 1:
7367 return val >= 0 && val < 32;
7369 case 2:
7370 return val >= 0 && val < 64 && (val & 1) == 0;
7372 default:
7373 return (val >= 0
7374 && (val + GET_MODE_SIZE (mode)) <= 128
7375 && (val & 3) == 0);
7379 bool
7380 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
7382 if (TARGET_ARM)
7383 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7384 else if (TARGET_THUMB2)
7385 return thumb2_legitimate_address_p (mode, x, strict_p);
7386 else /* if (TARGET_THUMB1) */
7387 return thumb1_legitimate_address_p (mode, x, strict_p);
7390 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7392 Given an rtx X being reloaded into a reg required to be
7393 in class CLASS, return the class of reg to actually use.
7394 In general this is just CLASS, but for the Thumb core registers and
7395 immediate constants we prefer a LO_REGS class or a subset. */
7397 static reg_class_t
7398 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7400 if (TARGET_32BIT)
7401 return rclass;
7402 else
7404 if (rclass == GENERAL_REGS)
7405 return LO_REGS;
7406 else
7407 return rclass;
7411 /* Build the SYMBOL_REF for __tls_get_addr. */
7413 static GTY(()) rtx tls_get_addr_libfunc;
7415 static rtx
7416 get_tls_get_addr (void)
7418 if (!tls_get_addr_libfunc)
7419 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7420 return tls_get_addr_libfunc;
7424 arm_load_tp (rtx target)
7426 if (!target)
7427 target = gen_reg_rtx (SImode);
7429 if (TARGET_HARD_TP)
7431 /* Can return in any reg. */
7432 emit_insn (gen_load_tp_hard (target));
7434 else
7436 /* Always returned in r0. Immediately copy the result into a pseudo,
7437 otherwise other uses of r0 (e.g. setting up function arguments) may
7438 clobber the value. */
7440 rtx tmp;
7442 emit_insn (gen_load_tp_soft ());
7444 tmp = gen_rtx_REG (SImode, 0);
7445 emit_move_insn (target, tmp);
7447 return target;
7450 static rtx
7451 load_tls_operand (rtx x, rtx reg)
7453 rtx tmp;
7455 if (reg == NULL_RTX)
7456 reg = gen_reg_rtx (SImode);
7458 tmp = gen_rtx_CONST (SImode, x);
7460 emit_move_insn (reg, tmp);
7462 return reg;
7465 static rtx
7466 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7468 rtx insns, label, labelno, sum;
7470 gcc_assert (reloc != TLS_DESCSEQ);
7471 start_sequence ();
7473 labelno = GEN_INT (pic_labelno++);
7474 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7475 label = gen_rtx_CONST (VOIDmode, label);
7477 sum = gen_rtx_UNSPEC (Pmode,
7478 gen_rtvec (4, x, GEN_INT (reloc), label,
7479 GEN_INT (TARGET_ARM ? 8 : 4)),
7480 UNSPEC_TLS);
7481 reg = load_tls_operand (sum, reg);
7483 if (TARGET_ARM)
7484 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7485 else
7486 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7488 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7489 LCT_PURE, /* LCT_CONST? */
7490 Pmode, 1, reg, Pmode);
7492 insns = get_insns ();
7493 end_sequence ();
7495 return insns;
7498 static rtx
7499 arm_tls_descseq_addr (rtx x, rtx reg)
7501 rtx labelno = GEN_INT (pic_labelno++);
7502 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7503 rtx sum = gen_rtx_UNSPEC (Pmode,
7504 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7505 gen_rtx_CONST (VOIDmode, label),
7506 GEN_INT (!TARGET_ARM)),
7507 UNSPEC_TLS);
7508 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7510 emit_insn (gen_tlscall (x, labelno));
7511 if (!reg)
7512 reg = gen_reg_rtx (SImode);
7513 else
7514 gcc_assert (REGNO (reg) != 0);
7516 emit_move_insn (reg, reg0);
7518 return reg;
7522 legitimize_tls_address (rtx x, rtx reg)
7524 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7525 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7527 switch (model)
7529 case TLS_MODEL_GLOBAL_DYNAMIC:
7530 if (TARGET_GNU2_TLS)
7532 reg = arm_tls_descseq_addr (x, reg);
7534 tp = arm_load_tp (NULL_RTX);
7536 dest = gen_rtx_PLUS (Pmode, tp, reg);
7538 else
7540 /* Original scheme */
7541 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7542 dest = gen_reg_rtx (Pmode);
7543 emit_libcall_block (insns, dest, ret, x);
7545 return dest;
7547 case TLS_MODEL_LOCAL_DYNAMIC:
7548 if (TARGET_GNU2_TLS)
7550 reg = arm_tls_descseq_addr (x, reg);
7552 tp = arm_load_tp (NULL_RTX);
7554 dest = gen_rtx_PLUS (Pmode, tp, reg);
7556 else
7558 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7560 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7561 share the LDM result with other LD model accesses. */
7562 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7563 UNSPEC_TLS);
7564 dest = gen_reg_rtx (Pmode);
7565 emit_libcall_block (insns, dest, ret, eqv);
7567 /* Load the addend. */
7568 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7569 GEN_INT (TLS_LDO32)),
7570 UNSPEC_TLS);
7571 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7572 dest = gen_rtx_PLUS (Pmode, dest, addend);
7574 return dest;
7576 case TLS_MODEL_INITIAL_EXEC:
7577 labelno = GEN_INT (pic_labelno++);
7578 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7579 label = gen_rtx_CONST (VOIDmode, label);
7580 sum = gen_rtx_UNSPEC (Pmode,
7581 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7582 GEN_INT (TARGET_ARM ? 8 : 4)),
7583 UNSPEC_TLS);
7584 reg = load_tls_operand (sum, reg);
7586 if (TARGET_ARM)
7587 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7588 else if (TARGET_THUMB2)
7589 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7590 else
7592 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7593 emit_move_insn (reg, gen_const_mem (SImode, reg));
7596 tp = arm_load_tp (NULL_RTX);
7598 return gen_rtx_PLUS (Pmode, tp, reg);
7600 case TLS_MODEL_LOCAL_EXEC:
7601 tp = arm_load_tp (NULL_RTX);
7603 reg = gen_rtx_UNSPEC (Pmode,
7604 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7605 UNSPEC_TLS);
7606 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7608 return gen_rtx_PLUS (Pmode, tp, reg);
7610 default:
7611 abort ();
7615 /* Try machine-dependent ways of modifying an illegitimate address
7616 to be legitimate. If we find one, return the new, valid address. */
7618 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7620 if (arm_tls_referenced_p (x))
7622 rtx addend = NULL;
7624 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7626 addend = XEXP (XEXP (x, 0), 1);
7627 x = XEXP (XEXP (x, 0), 0);
7630 if (GET_CODE (x) != SYMBOL_REF)
7631 return x;
7633 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7635 x = legitimize_tls_address (x, NULL_RTX);
7637 if (addend)
7639 x = gen_rtx_PLUS (SImode, x, addend);
7640 orig_x = x;
7642 else
7643 return x;
7646 if (!TARGET_ARM)
7648 /* TODO: legitimize_address for Thumb2. */
7649 if (TARGET_THUMB2)
7650 return x;
7651 return thumb_legitimize_address (x, orig_x, mode);
7654 if (GET_CODE (x) == PLUS)
7656 rtx xop0 = XEXP (x, 0);
7657 rtx xop1 = XEXP (x, 1);
7659 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7660 xop0 = force_reg (SImode, xop0);
7662 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7663 && !symbol_mentioned_p (xop1))
7664 xop1 = force_reg (SImode, xop1);
7666 if (ARM_BASE_REGISTER_RTX_P (xop0)
7667 && CONST_INT_P (xop1))
7669 HOST_WIDE_INT n, low_n;
7670 rtx base_reg, val;
7671 n = INTVAL (xop1);
7673 /* VFP addressing modes actually allow greater offsets, but for
7674 now we just stick with the lowest common denominator. */
7675 if (mode == DImode
7676 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7678 low_n = n & 0x0f;
7679 n &= ~0x0f;
7680 if (low_n > 4)
7682 n += 16;
7683 low_n -= 16;
7686 else
7688 low_n = ((mode) == TImode ? 0
7689 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7690 n -= low_n;
7693 base_reg = gen_reg_rtx (SImode);
7694 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7695 emit_move_insn (base_reg, val);
7696 x = plus_constant (Pmode, base_reg, low_n);
7698 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7699 x = gen_rtx_PLUS (SImode, xop0, xop1);
7702 /* XXX We don't allow MINUS any more -- see comment in
7703 arm_legitimate_address_outer_p (). */
7704 else if (GET_CODE (x) == MINUS)
7706 rtx xop0 = XEXP (x, 0);
7707 rtx xop1 = XEXP (x, 1);
7709 if (CONSTANT_P (xop0))
7710 xop0 = force_reg (SImode, xop0);
7712 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7713 xop1 = force_reg (SImode, xop1);
7715 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7716 x = gen_rtx_MINUS (SImode, xop0, xop1);
7719 /* Make sure to take full advantage of the pre-indexed addressing mode
7720 with absolute addresses which often allows for the base register to
7721 be factorized for multiple adjacent memory references, and it might
7722 even allows for the mini pool to be avoided entirely. */
7723 else if (CONST_INT_P (x) && optimize > 0)
7725 unsigned int bits;
7726 HOST_WIDE_INT mask, base, index;
7727 rtx base_reg;
7729 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7730 use a 8-bit index. So let's use a 12-bit index for SImode only and
7731 hope that arm_gen_constant will enable ldrb to use more bits. */
7732 bits = (mode == SImode) ? 12 : 8;
7733 mask = (1 << bits) - 1;
7734 base = INTVAL (x) & ~mask;
7735 index = INTVAL (x) & mask;
7736 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7738 /* It'll most probably be more efficient to generate the base
7739 with more bits set and use a negative index instead. */
7740 base |= mask;
7741 index -= mask;
7743 base_reg = force_reg (SImode, GEN_INT (base));
7744 x = plus_constant (Pmode, base_reg, index);
7747 if (flag_pic)
7749 /* We need to find and carefully transform any SYMBOL and LABEL
7750 references; so go back to the original address expression. */
7751 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7753 if (new_x != orig_x)
7754 x = new_x;
7757 return x;
7761 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7762 to be legitimate. If we find one, return the new, valid address. */
7764 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7766 if (GET_CODE (x) == PLUS
7767 && CONST_INT_P (XEXP (x, 1))
7768 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7769 || INTVAL (XEXP (x, 1)) < 0))
7771 rtx xop0 = XEXP (x, 0);
7772 rtx xop1 = XEXP (x, 1);
7773 HOST_WIDE_INT offset = INTVAL (xop1);
7775 /* Try and fold the offset into a biasing of the base register and
7776 then offsetting that. Don't do this when optimizing for space
7777 since it can cause too many CSEs. */
7778 if (optimize_size && offset >= 0
7779 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7781 HOST_WIDE_INT delta;
7783 if (offset >= 256)
7784 delta = offset - (256 - GET_MODE_SIZE (mode));
7785 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7786 delta = 31 * GET_MODE_SIZE (mode);
7787 else
7788 delta = offset & (~31 * GET_MODE_SIZE (mode));
7790 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7791 NULL_RTX);
7792 x = plus_constant (Pmode, xop0, delta);
7794 else if (offset < 0 && offset > -256)
7795 /* Small negative offsets are best done with a subtract before the
7796 dereference, forcing these into a register normally takes two
7797 instructions. */
7798 x = force_operand (x, NULL_RTX);
7799 else
7801 /* For the remaining cases, force the constant into a register. */
7802 xop1 = force_reg (SImode, xop1);
7803 x = gen_rtx_PLUS (SImode, xop0, xop1);
7806 else if (GET_CODE (x) == PLUS
7807 && s_register_operand (XEXP (x, 1), SImode)
7808 && !s_register_operand (XEXP (x, 0), SImode))
7810 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7812 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7815 if (flag_pic)
7817 /* We need to find and carefully transform any SYMBOL and LABEL
7818 references; so go back to the original address expression. */
7819 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7821 if (new_x != orig_x)
7822 x = new_x;
7825 return x;
7828 bool
7829 arm_legitimize_reload_address (rtx *p,
7830 enum machine_mode mode,
7831 int opnum, int type,
7832 int ind_levels ATTRIBUTE_UNUSED)
7834 /* We must recognize output that we have already generated ourselves. */
7835 if (GET_CODE (*p) == PLUS
7836 && GET_CODE (XEXP (*p, 0)) == PLUS
7837 && REG_P (XEXP (XEXP (*p, 0), 0))
7838 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7839 && CONST_INT_P (XEXP (*p, 1)))
7841 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7842 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7843 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7844 return true;
7847 if (GET_CODE (*p) == PLUS
7848 && REG_P (XEXP (*p, 0))
7849 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7850 /* If the base register is equivalent to a constant, let the generic
7851 code handle it. Otherwise we will run into problems if a future
7852 reload pass decides to rematerialize the constant. */
7853 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7854 && CONST_INT_P (XEXP (*p, 1)))
7856 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7857 HOST_WIDE_INT low, high;
7859 /* Detect coprocessor load/stores. */
7860 bool coproc_p = ((TARGET_HARD_FLOAT
7861 && TARGET_VFP
7862 && (mode == SFmode || mode == DFmode))
7863 || (TARGET_REALLY_IWMMXT
7864 && VALID_IWMMXT_REG_MODE (mode))
7865 || (TARGET_NEON
7866 && (VALID_NEON_DREG_MODE (mode)
7867 || VALID_NEON_QREG_MODE (mode))));
7869 /* For some conditions, bail out when lower two bits are unaligned. */
7870 if ((val & 0x3) != 0
7871 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7872 && (coproc_p
7873 /* For DI, and DF under soft-float: */
7874 || ((mode == DImode || mode == DFmode)
7875 /* Without ldrd, we use stm/ldm, which does not
7876 fair well with unaligned bits. */
7877 && (! TARGET_LDRD
7878 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7879 || TARGET_THUMB2))))
7880 return false;
7882 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7883 of which the (reg+high) gets turned into a reload add insn,
7884 we try to decompose the index into high/low values that can often
7885 also lead to better reload CSE.
7886 For example:
7887 ldr r0, [r2, #4100] // Offset too large
7888 ldr r1, [r2, #4104] // Offset too large
7890 is best reloaded as:
7891 add t1, r2, #4096
7892 ldr r0, [t1, #4]
7893 add t2, r2, #4096
7894 ldr r1, [t2, #8]
7896 which post-reload CSE can simplify in most cases to eliminate the
7897 second add instruction:
7898 add t1, r2, #4096
7899 ldr r0, [t1, #4]
7900 ldr r1, [t1, #8]
7902 The idea here is that we want to split out the bits of the constant
7903 as a mask, rather than as subtracting the maximum offset that the
7904 respective type of load/store used can handle.
7906 When encountering negative offsets, we can still utilize it even if
7907 the overall offset is positive; sometimes this may lead to an immediate
7908 that can be constructed with fewer instructions.
7909 For example:
7910 ldr r0, [r2, #0x3FFFFC]
7912 This is best reloaded as:
7913 add t1, r2, #0x400000
7914 ldr r0, [t1, #-4]
7916 The trick for spotting this for a load insn with N bits of offset
7917 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7918 negative offset that is going to make bit N and all the bits below
7919 it become zero in the remainder part.
7921 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7922 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7923 used in most cases of ARM load/store instructions. */
7925 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7926 (((VAL) & ((1 << (N)) - 1)) \
7927 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7928 : 0)
7930 if (coproc_p)
7932 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7934 /* NEON quad-word load/stores are made of two double-word accesses,
7935 so the valid index range is reduced by 8. Treat as 9-bit range if
7936 we go over it. */
7937 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7938 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7940 else if (GET_MODE_SIZE (mode) == 8)
7942 if (TARGET_LDRD)
7943 low = (TARGET_THUMB2
7944 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7945 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7946 else
7947 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7948 to access doublewords. The supported load/store offsets are
7949 -8, -4, and 4, which we try to produce here. */
7950 low = ((val & 0xf) ^ 0x8) - 0x8;
7952 else if (GET_MODE_SIZE (mode) < 8)
7954 /* NEON element load/stores do not have an offset. */
7955 if (TARGET_NEON_FP16 && mode == HFmode)
7956 return false;
7958 if (TARGET_THUMB2)
7960 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7961 Try the wider 12-bit range first, and re-try if the result
7962 is out of range. */
7963 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7964 if (low < -255)
7965 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7967 else
7969 if (mode == HImode || mode == HFmode)
7971 if (arm_arch4)
7972 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7973 else
7975 /* The storehi/movhi_bytes fallbacks can use only
7976 [-4094,+4094] of the full ldrb/strb index range. */
7977 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7978 if (low == 4095 || low == -4095)
7979 return false;
7982 else
7983 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7986 else
7987 return false;
7989 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7990 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7991 - (unsigned HOST_WIDE_INT) 0x80000000);
7992 /* Check for overflow or zero */
7993 if (low == 0 || high == 0 || (high + low != val))
7994 return false;
7996 /* Reload the high part into a base reg; leave the low part
7997 in the mem.
7998 Note that replacing this gen_rtx_PLUS with plus_constant is
7999 wrong in this case because we rely on the
8000 (plus (plus reg c1) c2) structure being preserved so that
8001 XEXP (*p, 0) in push_reload below uses the correct term. */
8002 *p = gen_rtx_PLUS (GET_MODE (*p),
8003 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8004 GEN_INT (high)),
8005 GEN_INT (low));
8006 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8007 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8008 VOIDmode, 0, 0, opnum, (enum reload_type) type);
8009 return true;
8012 return false;
8016 thumb_legitimize_reload_address (rtx *x_p,
8017 enum machine_mode mode,
8018 int opnum, int type,
8019 int ind_levels ATTRIBUTE_UNUSED)
8021 rtx x = *x_p;
8023 if (GET_CODE (x) == PLUS
8024 && GET_MODE_SIZE (mode) < 4
8025 && REG_P (XEXP (x, 0))
8026 && XEXP (x, 0) == stack_pointer_rtx
8027 && CONST_INT_P (XEXP (x, 1))
8028 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8030 rtx orig_x = x;
8032 x = copy_rtx (x);
8033 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8034 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8035 return x;
8038 /* If both registers are hi-regs, then it's better to reload the
8039 entire expression rather than each register individually. That
8040 only requires one reload register rather than two. */
8041 if (GET_CODE (x) == PLUS
8042 && REG_P (XEXP (x, 0))
8043 && REG_P (XEXP (x, 1))
8044 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8045 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8047 rtx orig_x = x;
8049 x = copy_rtx (x);
8050 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8051 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8052 return x;
8055 return NULL;
8058 /* Test for various thread-local symbols. */
8060 /* Helper for arm_tls_referenced_p. */
8062 static int
8063 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
8065 if (GET_CODE (*x) == SYMBOL_REF)
8066 return SYMBOL_REF_TLS_MODEL (*x) != 0;
8068 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8069 TLS offsets, not real symbol references. */
8070 if (GET_CODE (*x) == UNSPEC
8071 && XINT (*x, 1) == UNSPEC_TLS)
8072 return -1;
8074 return 0;
8077 /* Return TRUE if X contains any TLS symbol references. */
8079 bool
8080 arm_tls_referenced_p (rtx x)
8082 if (! TARGET_HAVE_TLS)
8083 return false;
8085 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
8088 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8090 On the ARM, allow any integer (invalid ones are removed later by insn
8091 patterns), nice doubles and symbol_refs which refer to the function's
8092 constant pool XXX.
8094 When generating pic allow anything. */
8096 static bool
8097 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
8099 /* At present, we have no support for Neon structure constants, so forbid
8100 them here. It might be possible to handle simple cases like 0 and -1
8101 in future. */
8102 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8103 return false;
8105 return flag_pic || !label_mentioned_p (x);
8108 static bool
8109 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8111 return (CONST_INT_P (x)
8112 || CONST_DOUBLE_P (x)
8113 || CONSTANT_ADDRESS_P (x)
8114 || flag_pic);
8117 static bool
8118 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
8120 return (!arm_cannot_force_const_mem (mode, x)
8121 && (TARGET_32BIT
8122 ? arm_legitimate_constant_p_1 (mode, x)
8123 : thumb_legitimate_constant_p (mode, x)));
8126 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8128 static bool
8129 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8131 rtx base, offset;
8133 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8135 split_const (x, &base, &offset);
8136 if (GET_CODE (base) == SYMBOL_REF
8137 && !offset_within_block_p (base, INTVAL (offset)))
8138 return true;
8140 return arm_tls_referenced_p (x);
8143 #define REG_OR_SUBREG_REG(X) \
8144 (REG_P (X) \
8145 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8147 #define REG_OR_SUBREG_RTX(X) \
8148 (REG_P (X) ? (X) : SUBREG_REG (X))
8150 static inline int
8151 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8153 enum machine_mode mode = GET_MODE (x);
8154 int total, words;
8156 switch (code)
8158 case ASHIFT:
8159 case ASHIFTRT:
8160 case LSHIFTRT:
8161 case ROTATERT:
8162 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8164 case PLUS:
8165 case MINUS:
8166 case COMPARE:
8167 case NEG:
8168 case NOT:
8169 return COSTS_N_INSNS (1);
8171 case MULT:
8172 if (CONST_INT_P (XEXP (x, 1)))
8174 int cycles = 0;
8175 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8177 while (i)
8179 i >>= 2;
8180 cycles++;
8182 return COSTS_N_INSNS (2) + cycles;
8184 return COSTS_N_INSNS (1) + 16;
8186 case SET:
8187 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8188 the mode. */
8189 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8190 return (COSTS_N_INSNS (words)
8191 + 4 * ((MEM_P (SET_SRC (x)))
8192 + MEM_P (SET_DEST (x))));
8194 case CONST_INT:
8195 if (outer == SET)
8197 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8198 return 0;
8199 if (thumb_shiftable_const (INTVAL (x)))
8200 return COSTS_N_INSNS (2);
8201 return COSTS_N_INSNS (3);
8203 else if ((outer == PLUS || outer == COMPARE)
8204 && INTVAL (x) < 256 && INTVAL (x) > -256)
8205 return 0;
8206 else if ((outer == IOR || outer == XOR || outer == AND)
8207 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8208 return COSTS_N_INSNS (1);
8209 else if (outer == AND)
8211 int i;
8212 /* This duplicates the tests in the andsi3 expander. */
8213 for (i = 9; i <= 31; i++)
8214 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8215 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8216 return COSTS_N_INSNS (2);
8218 else if (outer == ASHIFT || outer == ASHIFTRT
8219 || outer == LSHIFTRT)
8220 return 0;
8221 return COSTS_N_INSNS (2);
8223 case CONST:
8224 case CONST_DOUBLE:
8225 case LABEL_REF:
8226 case SYMBOL_REF:
8227 return COSTS_N_INSNS (3);
8229 case UDIV:
8230 case UMOD:
8231 case DIV:
8232 case MOD:
8233 return 100;
8235 case TRUNCATE:
8236 return 99;
8238 case AND:
8239 case XOR:
8240 case IOR:
8241 /* XXX guess. */
8242 return 8;
8244 case MEM:
8245 /* XXX another guess. */
8246 /* Memory costs quite a lot for the first word, but subsequent words
8247 load at the equivalent of a single insn each. */
8248 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8249 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8250 ? 4 : 0));
8252 case IF_THEN_ELSE:
8253 /* XXX a guess. */
8254 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8255 return 14;
8256 return 2;
8258 case SIGN_EXTEND:
8259 case ZERO_EXTEND:
8260 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8261 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8263 if (mode == SImode)
8264 return total;
8266 if (arm_arch6)
8267 return total + COSTS_N_INSNS (1);
8269 /* Assume a two-shift sequence. Increase the cost slightly so
8270 we prefer actual shifts over an extend operation. */
8271 return total + 1 + COSTS_N_INSNS (2);
8273 default:
8274 return 99;
8278 static inline bool
8279 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8281 enum machine_mode mode = GET_MODE (x);
8282 enum rtx_code subcode;
8283 rtx operand;
8284 enum rtx_code code = GET_CODE (x);
8285 *total = 0;
8287 switch (code)
8289 case MEM:
8290 /* Memory costs quite a lot for the first word, but subsequent words
8291 load at the equivalent of a single insn each. */
8292 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8293 return true;
8295 case DIV:
8296 case MOD:
8297 case UDIV:
8298 case UMOD:
8299 if (TARGET_HARD_FLOAT && mode == SFmode)
8300 *total = COSTS_N_INSNS (2);
8301 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8302 *total = COSTS_N_INSNS (4);
8303 else
8304 *total = COSTS_N_INSNS (20);
8305 return false;
8307 case ROTATE:
8308 if (REG_P (XEXP (x, 1)))
8309 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8310 else if (!CONST_INT_P (XEXP (x, 1)))
8311 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8313 /* Fall through */
8314 case ROTATERT:
8315 if (mode != SImode)
8317 *total += COSTS_N_INSNS (4);
8318 return true;
8321 /* Fall through */
8322 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8323 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8324 if (mode == DImode)
8326 *total += COSTS_N_INSNS (3);
8327 return true;
8330 *total += COSTS_N_INSNS (1);
8331 /* Increase the cost of complex shifts because they aren't any faster,
8332 and reduce dual issue opportunities. */
8333 if (arm_tune_cortex_a9
8334 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8335 ++*total;
8337 return true;
8339 case MINUS:
8340 if (mode == DImode)
8342 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8343 if (CONST_INT_P (XEXP (x, 0))
8344 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8346 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8347 return true;
8350 if (CONST_INT_P (XEXP (x, 1))
8351 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8353 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8354 return true;
8357 return false;
8360 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8362 if (TARGET_HARD_FLOAT
8363 && (mode == SFmode
8364 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8366 *total = COSTS_N_INSNS (1);
8367 if (CONST_DOUBLE_P (XEXP (x, 0))
8368 && arm_const_double_rtx (XEXP (x, 0)))
8370 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8371 return true;
8374 if (CONST_DOUBLE_P (XEXP (x, 1))
8375 && arm_const_double_rtx (XEXP (x, 1)))
8377 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8378 return true;
8381 return false;
8383 *total = COSTS_N_INSNS (20);
8384 return false;
8387 *total = COSTS_N_INSNS (1);
8388 if (CONST_INT_P (XEXP (x, 0))
8389 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8391 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8392 return true;
8395 subcode = GET_CODE (XEXP (x, 1));
8396 if (subcode == ASHIFT || subcode == ASHIFTRT
8397 || subcode == LSHIFTRT
8398 || subcode == ROTATE || subcode == ROTATERT)
8400 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8401 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8402 return true;
8405 /* A shift as a part of RSB costs no more than RSB itself. */
8406 if (GET_CODE (XEXP (x, 0)) == MULT
8407 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8409 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8410 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8411 return true;
8414 if (subcode == MULT
8415 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8417 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8418 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8419 return true;
8422 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8423 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8425 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8426 if (REG_P (XEXP (XEXP (x, 1), 0))
8427 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8428 *total += COSTS_N_INSNS (1);
8430 return true;
8433 /* Fall through */
8435 case PLUS:
8436 if (code == PLUS && arm_arch6 && mode == SImode
8437 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8438 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8440 *total = COSTS_N_INSNS (1);
8441 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8442 0, speed);
8443 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8444 return true;
8447 /* MLA: All arguments must be registers. We filter out
8448 multiplication by a power of two, so that we fall down into
8449 the code below. */
8450 if (GET_CODE (XEXP (x, 0)) == MULT
8451 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8453 /* The cost comes from the cost of the multiply. */
8454 return false;
8457 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8459 if (TARGET_HARD_FLOAT
8460 && (mode == SFmode
8461 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8463 *total = COSTS_N_INSNS (1);
8464 if (CONST_DOUBLE_P (XEXP (x, 1))
8465 && arm_const_double_rtx (XEXP (x, 1)))
8467 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8468 return true;
8471 return false;
8474 *total = COSTS_N_INSNS (20);
8475 return false;
8478 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8479 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8481 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8482 if (REG_P (XEXP (XEXP (x, 0), 0))
8483 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8484 *total += COSTS_N_INSNS (1);
8485 return true;
8488 /* Fall through */
8490 case AND: case XOR: case IOR:
8492 /* Normally the frame registers will be spilt into reg+const during
8493 reload, so it is a bad idea to combine them with other instructions,
8494 since then they might not be moved outside of loops. As a compromise
8495 we allow integration with ops that have a constant as their second
8496 operand. */
8497 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8498 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8499 && !CONST_INT_P (XEXP (x, 1)))
8500 *total = COSTS_N_INSNS (1);
8502 if (mode == DImode)
8504 *total += COSTS_N_INSNS (2);
8505 if (CONST_INT_P (XEXP (x, 1))
8506 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8508 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8509 return true;
8512 return false;
8515 *total += COSTS_N_INSNS (1);
8516 if (CONST_INT_P (XEXP (x, 1))
8517 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8519 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8520 return true;
8522 subcode = GET_CODE (XEXP (x, 0));
8523 if (subcode == ASHIFT || subcode == ASHIFTRT
8524 || subcode == LSHIFTRT
8525 || subcode == ROTATE || subcode == ROTATERT)
8527 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8528 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8529 return true;
8532 if (subcode == MULT
8533 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8535 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8536 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8537 return true;
8540 if (subcode == UMIN || subcode == UMAX
8541 || subcode == SMIN || subcode == SMAX)
8543 *total = COSTS_N_INSNS (3);
8544 return true;
8547 return false;
8549 case MULT:
8550 /* This should have been handled by the CPU specific routines. */
8551 gcc_unreachable ();
8553 case TRUNCATE:
8554 if (arm_arch3m && mode == SImode
8555 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8556 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8557 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8558 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8559 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8560 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8562 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8563 return true;
8565 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8566 return false;
8568 case NEG:
8569 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8571 if (TARGET_HARD_FLOAT
8572 && (mode == SFmode
8573 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8575 *total = COSTS_N_INSNS (1);
8576 return false;
8578 *total = COSTS_N_INSNS (2);
8579 return false;
8582 /* Fall through */
8583 case NOT:
8584 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8585 if (mode == SImode && code == NOT)
8587 subcode = GET_CODE (XEXP (x, 0));
8588 if (subcode == ASHIFT || subcode == ASHIFTRT
8589 || subcode == LSHIFTRT
8590 || subcode == ROTATE || subcode == ROTATERT
8591 || (subcode == MULT
8592 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8594 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8595 /* Register shifts cost an extra cycle. */
8596 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8597 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8598 subcode, 1, speed);
8599 return true;
8603 return false;
8605 case IF_THEN_ELSE:
8606 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8608 *total = COSTS_N_INSNS (4);
8609 return true;
8612 operand = XEXP (x, 0);
8614 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8615 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8616 && REG_P (XEXP (operand, 0))
8617 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8618 *total += COSTS_N_INSNS (1);
8619 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8620 + rtx_cost (XEXP (x, 2), code, 2, speed));
8621 return true;
8623 case NE:
8624 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8626 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8627 return true;
8629 goto scc_insn;
8631 case GE:
8632 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8633 && mode == SImode && XEXP (x, 1) == const0_rtx)
8635 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8636 return true;
8638 goto scc_insn;
8640 case LT:
8641 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8642 && mode == SImode && XEXP (x, 1) == const0_rtx)
8644 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8645 return true;
8647 goto scc_insn;
8649 case EQ:
8650 case GT:
8651 case LE:
8652 case GEU:
8653 case LTU:
8654 case GTU:
8655 case LEU:
8656 case UNORDERED:
8657 case ORDERED:
8658 case UNEQ:
8659 case UNGE:
8660 case UNLT:
8661 case UNGT:
8662 case UNLE:
8663 scc_insn:
8664 /* SCC insns. In the case where the comparison has already been
8665 performed, then they cost 2 instructions. Otherwise they need
8666 an additional comparison before them. */
8667 *total = COSTS_N_INSNS (2);
8668 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8670 return true;
8673 /* Fall through */
8674 case COMPARE:
8675 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8677 *total = 0;
8678 return true;
8681 *total += COSTS_N_INSNS (1);
8682 if (CONST_INT_P (XEXP (x, 1))
8683 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8685 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8686 return true;
8689 subcode = GET_CODE (XEXP (x, 0));
8690 if (subcode == ASHIFT || subcode == ASHIFTRT
8691 || subcode == LSHIFTRT
8692 || subcode == ROTATE || subcode == ROTATERT)
8694 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8695 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8696 return true;
8699 if (subcode == MULT
8700 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8702 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8703 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8704 return true;
8707 return false;
8709 case UMIN:
8710 case UMAX:
8711 case SMIN:
8712 case SMAX:
8713 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8714 if (!CONST_INT_P (XEXP (x, 1))
8715 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8716 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8717 return true;
8719 case ABS:
8720 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8722 if (TARGET_HARD_FLOAT
8723 && (mode == SFmode
8724 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8726 *total = COSTS_N_INSNS (1);
8727 return false;
8729 *total = COSTS_N_INSNS (20);
8730 return false;
8732 *total = COSTS_N_INSNS (1);
8733 if (mode == DImode)
8734 *total += COSTS_N_INSNS (3);
8735 return false;
8737 case SIGN_EXTEND:
8738 case ZERO_EXTEND:
8739 *total = 0;
8740 if (GET_MODE_CLASS (mode) == MODE_INT)
8742 rtx op = XEXP (x, 0);
8743 enum machine_mode opmode = GET_MODE (op);
8745 if (mode == DImode)
8746 *total += COSTS_N_INSNS (1);
8748 if (opmode != SImode)
8750 if (MEM_P (op))
8752 /* If !arm_arch4, we use one of the extendhisi2_mem
8753 or movhi_bytes patterns for HImode. For a QImode
8754 sign extension, we first zero-extend from memory
8755 and then perform a shift sequence. */
8756 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8757 *total += COSTS_N_INSNS (2);
8759 else if (arm_arch6)
8760 *total += COSTS_N_INSNS (1);
8762 /* We don't have the necessary insn, so we need to perform some
8763 other operation. */
8764 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8765 /* An and with constant 255. */
8766 *total += COSTS_N_INSNS (1);
8767 else
8768 /* A shift sequence. Increase costs slightly to avoid
8769 combining two shifts into an extend operation. */
8770 *total += COSTS_N_INSNS (2) + 1;
8773 return false;
8776 switch (GET_MODE (XEXP (x, 0)))
8778 case V8QImode:
8779 case V4HImode:
8780 case V2SImode:
8781 case V4QImode:
8782 case V2HImode:
8783 *total = COSTS_N_INSNS (1);
8784 return false;
8786 default:
8787 gcc_unreachable ();
8789 gcc_unreachable ();
8791 case ZERO_EXTRACT:
8792 case SIGN_EXTRACT:
8793 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8794 return true;
8796 case CONST_INT:
8797 if (const_ok_for_arm (INTVAL (x))
8798 || const_ok_for_arm (~INTVAL (x)))
8799 *total = COSTS_N_INSNS (1);
8800 else
8801 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8802 INTVAL (x), NULL_RTX,
8803 NULL_RTX, 0, 0));
8804 return true;
8806 case CONST:
8807 case LABEL_REF:
8808 case SYMBOL_REF:
8809 *total = COSTS_N_INSNS (3);
8810 return true;
8812 case HIGH:
8813 *total = COSTS_N_INSNS (1);
8814 return true;
8816 case LO_SUM:
8817 *total = COSTS_N_INSNS (1);
8818 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8819 return true;
8821 case CONST_DOUBLE:
8822 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8823 && (mode == SFmode || !TARGET_VFP_SINGLE))
8824 *total = COSTS_N_INSNS (1);
8825 else
8826 *total = COSTS_N_INSNS (4);
8827 return true;
8829 case SET:
8830 /* The vec_extract patterns accept memory operands that require an
8831 address reload. Account for the cost of that reload to give the
8832 auto-inc-dec pass an incentive to try to replace them. */
8833 if (TARGET_NEON && MEM_P (SET_DEST (x))
8834 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8836 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8837 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8838 *total += COSTS_N_INSNS (1);
8839 return true;
8841 /* Likewise for the vec_set patterns. */
8842 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8843 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8844 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8846 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8847 *total = rtx_cost (mem, code, 0, speed);
8848 if (!neon_vector_mem_operand (mem, 2, true))
8849 *total += COSTS_N_INSNS (1);
8850 return true;
8852 return false;
8854 case UNSPEC:
8855 /* We cost this as high as our memory costs to allow this to
8856 be hoisted from loops. */
8857 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8859 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8861 return true;
8863 case CONST_VECTOR:
8864 if (TARGET_NEON
8865 && TARGET_HARD_FLOAT
8866 && outer == SET
8867 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8868 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8869 *total = COSTS_N_INSNS (1);
8870 else
8871 *total = COSTS_N_INSNS (4);
8872 return true;
8874 default:
8875 *total = COSTS_N_INSNS (4);
8876 return false;
8880 /* Estimates the size cost of thumb1 instructions.
8881 For now most of the code is copied from thumb1_rtx_costs. We need more
8882 fine grain tuning when we have more related test cases. */
8883 static inline int
8884 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8886 enum machine_mode mode = GET_MODE (x);
8887 int words;
8889 switch (code)
8891 case ASHIFT:
8892 case ASHIFTRT:
8893 case LSHIFTRT:
8894 case ROTATERT:
8895 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8897 case PLUS:
8898 case MINUS:
8899 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8900 defined by RTL expansion, especially for the expansion of
8901 multiplication. */
8902 if ((GET_CODE (XEXP (x, 0)) == MULT
8903 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8904 || (GET_CODE (XEXP (x, 1)) == MULT
8905 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8906 return COSTS_N_INSNS (2);
8907 /* On purpose fall through for normal RTX. */
8908 case COMPARE:
8909 case NEG:
8910 case NOT:
8911 return COSTS_N_INSNS (1);
8913 case MULT:
8914 if (CONST_INT_P (XEXP (x, 1)))
8916 /* Thumb1 mul instruction can't operate on const. We must Load it
8917 into a register first. */
8918 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8919 return COSTS_N_INSNS (1) + const_size;
8921 return COSTS_N_INSNS (1);
8923 case SET:
8924 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8925 the mode. */
8926 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8927 return (COSTS_N_INSNS (words)
8928 + 4 * ((MEM_P (SET_SRC (x)))
8929 + MEM_P (SET_DEST (x))));
8931 case CONST_INT:
8932 if (outer == SET)
8934 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8935 return COSTS_N_INSNS (1);
8936 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8937 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8938 return COSTS_N_INSNS (2);
8939 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8940 if (thumb_shiftable_const (INTVAL (x)))
8941 return COSTS_N_INSNS (2);
8942 return COSTS_N_INSNS (3);
8944 else if ((outer == PLUS || outer == COMPARE)
8945 && INTVAL (x) < 256 && INTVAL (x) > -256)
8946 return 0;
8947 else if ((outer == IOR || outer == XOR || outer == AND)
8948 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8949 return COSTS_N_INSNS (1);
8950 else if (outer == AND)
8952 int i;
8953 /* This duplicates the tests in the andsi3 expander. */
8954 for (i = 9; i <= 31; i++)
8955 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8956 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8957 return COSTS_N_INSNS (2);
8959 else if (outer == ASHIFT || outer == ASHIFTRT
8960 || outer == LSHIFTRT)
8961 return 0;
8962 return COSTS_N_INSNS (2);
8964 case CONST:
8965 case CONST_DOUBLE:
8966 case LABEL_REF:
8967 case SYMBOL_REF:
8968 return COSTS_N_INSNS (3);
8970 case UDIV:
8971 case UMOD:
8972 case DIV:
8973 case MOD:
8974 return 100;
8976 case TRUNCATE:
8977 return 99;
8979 case AND:
8980 case XOR:
8981 case IOR:
8982 /* XXX guess. */
8983 return 8;
8985 case MEM:
8986 /* XXX another guess. */
8987 /* Memory costs quite a lot for the first word, but subsequent words
8988 load at the equivalent of a single insn each. */
8989 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8990 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8991 ? 4 : 0));
8993 case IF_THEN_ELSE:
8994 /* XXX a guess. */
8995 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8996 return 14;
8997 return 2;
8999 case ZERO_EXTEND:
9000 /* XXX still guessing. */
9001 switch (GET_MODE (XEXP (x, 0)))
9003 case QImode:
9004 return (1 + (mode == DImode ? 4 : 0)
9005 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9007 case HImode:
9008 return (4 + (mode == DImode ? 4 : 0)
9009 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9011 case SImode:
9012 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9014 default:
9015 return 99;
9018 default:
9019 return 99;
9023 /* RTX costs when optimizing for size. */
9024 static bool
9025 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9026 int *total)
9028 enum machine_mode mode = GET_MODE (x);
9029 if (TARGET_THUMB1)
9031 *total = thumb1_size_rtx_costs (x, code, outer_code);
9032 return true;
9035 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9036 switch (code)
9038 case MEM:
9039 /* A memory access costs 1 insn if the mode is small, or the address is
9040 a single register, otherwise it costs one insn per word. */
9041 if (REG_P (XEXP (x, 0)))
9042 *total = COSTS_N_INSNS (1);
9043 else if (flag_pic
9044 && GET_CODE (XEXP (x, 0)) == PLUS
9045 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9046 /* This will be split into two instructions.
9047 See arm.md:calculate_pic_address. */
9048 *total = COSTS_N_INSNS (2);
9049 else
9050 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9051 return true;
9053 case DIV:
9054 case MOD:
9055 case UDIV:
9056 case UMOD:
9057 /* Needs a libcall, so it costs about this. */
9058 *total = COSTS_N_INSNS (2);
9059 return false;
9061 case ROTATE:
9062 if (mode == SImode && REG_P (XEXP (x, 1)))
9064 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9065 return true;
9067 /* Fall through */
9068 case ROTATERT:
9069 case ASHIFT:
9070 case LSHIFTRT:
9071 case ASHIFTRT:
9072 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9074 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9075 return true;
9077 else if (mode == SImode)
9079 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9080 /* Slightly disparage register shifts, but not by much. */
9081 if (!CONST_INT_P (XEXP (x, 1)))
9082 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9083 return true;
9086 /* Needs a libcall. */
9087 *total = COSTS_N_INSNS (2);
9088 return false;
9090 case MINUS:
9091 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9092 && (mode == SFmode || !TARGET_VFP_SINGLE))
9094 *total = COSTS_N_INSNS (1);
9095 return false;
9098 if (mode == SImode)
9100 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9101 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9103 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9104 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9105 || subcode1 == ROTATE || subcode1 == ROTATERT
9106 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9107 || subcode1 == ASHIFTRT)
9109 /* It's just the cost of the two operands. */
9110 *total = 0;
9111 return false;
9114 *total = COSTS_N_INSNS (1);
9115 return false;
9118 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9119 return false;
9121 case PLUS:
9122 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9123 && (mode == SFmode || !TARGET_VFP_SINGLE))
9125 *total = COSTS_N_INSNS (1);
9126 return false;
9129 /* A shift as a part of ADD costs nothing. */
9130 if (GET_CODE (XEXP (x, 0)) == MULT
9131 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9133 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9134 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9135 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9136 return true;
9139 /* Fall through */
9140 case AND: case XOR: case IOR:
9141 if (mode == SImode)
9143 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9145 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9146 || subcode == LSHIFTRT || subcode == ASHIFTRT
9147 || (code == AND && subcode == NOT))
9149 /* It's just the cost of the two operands. */
9150 *total = 0;
9151 return false;
9155 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9156 return false;
9158 case MULT:
9159 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9160 return false;
9162 case NEG:
9163 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9164 && (mode == SFmode || !TARGET_VFP_SINGLE))
9166 *total = COSTS_N_INSNS (1);
9167 return false;
9170 /* Fall through */
9171 case NOT:
9172 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9174 return false;
9176 case IF_THEN_ELSE:
9177 *total = 0;
9178 return false;
9180 case COMPARE:
9181 if (cc_register (XEXP (x, 0), VOIDmode))
9182 * total = 0;
9183 else
9184 *total = COSTS_N_INSNS (1);
9185 return false;
9187 case ABS:
9188 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9189 && (mode == SFmode || !TARGET_VFP_SINGLE))
9190 *total = COSTS_N_INSNS (1);
9191 else
9192 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9193 return false;
9195 case SIGN_EXTEND:
9196 case ZERO_EXTEND:
9197 return arm_rtx_costs_1 (x, outer_code, total, 0);
9199 case CONST_INT:
9200 if (const_ok_for_arm (INTVAL (x)))
9201 /* A multiplication by a constant requires another instruction
9202 to load the constant to a register. */
9203 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9204 ? 1 : 0);
9205 else if (const_ok_for_arm (~INTVAL (x)))
9206 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9207 else if (const_ok_for_arm (-INTVAL (x)))
9209 if (outer_code == COMPARE || outer_code == PLUS
9210 || outer_code == MINUS)
9211 *total = 0;
9212 else
9213 *total = COSTS_N_INSNS (1);
9215 else
9216 *total = COSTS_N_INSNS (2);
9217 return true;
9219 case CONST:
9220 case LABEL_REF:
9221 case SYMBOL_REF:
9222 *total = COSTS_N_INSNS (2);
9223 return true;
9225 case CONST_DOUBLE:
9226 *total = COSTS_N_INSNS (4);
9227 return true;
9229 case CONST_VECTOR:
9230 if (TARGET_NEON
9231 && TARGET_HARD_FLOAT
9232 && outer_code == SET
9233 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9234 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9235 *total = COSTS_N_INSNS (1);
9236 else
9237 *total = COSTS_N_INSNS (4);
9238 return true;
9240 case HIGH:
9241 case LO_SUM:
9242 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9243 cost of these slightly. */
9244 *total = COSTS_N_INSNS (1) + 1;
9245 return true;
9247 case SET:
9248 return false;
9250 default:
9251 if (mode != VOIDmode)
9252 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9253 else
9254 *total = COSTS_N_INSNS (4); /* How knows? */
9255 return false;
9259 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9260 operand, then return the operand that is being shifted. If the shift
9261 is not by a constant, then set SHIFT_REG to point to the operand.
9262 Return NULL if OP is not a shifter operand. */
9263 static rtx
9264 shifter_op_p (rtx op, rtx *shift_reg)
9266 enum rtx_code code = GET_CODE (op);
9268 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9269 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9270 return XEXP (op, 0);
9271 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9272 return XEXP (op, 0);
9273 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9274 || code == ASHIFTRT)
9276 if (!CONST_INT_P (XEXP (op, 1)))
9277 *shift_reg = XEXP (op, 1);
9278 return XEXP (op, 0);
9281 return NULL;
9284 static bool
9285 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9287 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9288 gcc_assert (GET_CODE (x) == UNSPEC);
9290 switch (XINT (x, 1))
9292 case UNSPEC_UNALIGNED_LOAD:
9293 /* We can only do unaligned loads into the integer unit, and we can't
9294 use LDM or LDRD. */
9295 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9296 if (speed_p)
9297 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9298 + extra_cost->ldst.load_unaligned);
9300 #ifdef NOT_YET
9301 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9302 ADDR_SPACE_GENERIC, speed_p);
9303 #endif
9304 return true;
9306 case UNSPEC_UNALIGNED_STORE:
9307 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9308 if (speed_p)
9309 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9310 + extra_cost->ldst.store_unaligned);
9312 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9313 #ifdef NOT_YET
9314 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9315 ADDR_SPACE_GENERIC, speed_p);
9316 #endif
9317 return true;
9319 case UNSPEC_VRINTZ:
9320 case UNSPEC_VRINTP:
9321 case UNSPEC_VRINTM:
9322 case UNSPEC_VRINTR:
9323 case UNSPEC_VRINTX:
9324 case UNSPEC_VRINTA:
9325 *cost = COSTS_N_INSNS (1);
9326 if (speed_p)
9327 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9329 return true;
9330 default:
9331 *cost = COSTS_N_INSNS (2);
9332 break;
9334 return false;
9337 /* Cost of a libcall. We assume one insn per argument, an amount for the
9338 call (one insn for -Os) and then one for processing the result. */
9339 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9341 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9342 do \
9344 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9345 if (shift_op != NULL \
9346 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9348 if (shift_reg) \
9350 if (speed_p) \
9351 *cost += extra_cost->alu.arith_shift_reg; \
9352 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9354 else if (speed_p) \
9355 *cost += extra_cost->alu.arith_shift; \
9357 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9358 + rtx_cost (XEXP (x, 1 - IDX), \
9359 OP, 1, speed_p)); \
9360 return true; \
9363 while (0);
9365 /* RTX costs. Make an estimate of the cost of executing the operation
9366 X, which is contained with an operation with code OUTER_CODE.
9367 SPEED_P indicates whether the cost desired is the performance cost,
9368 or the size cost. The estimate is stored in COST and the return
9369 value is TRUE if the cost calculation is final, or FALSE if the
9370 caller should recurse through the operands of X to add additional
9371 costs.
9373 We currently make no attempt to model the size savings of Thumb-2
9374 16-bit instructions. At the normal points in compilation where
9375 this code is called we have no measure of whether the condition
9376 flags are live or not, and thus no realistic way to determine what
9377 the size will eventually be. */
9378 static bool
9379 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9380 const struct cpu_cost_table *extra_cost,
9381 int *cost, bool speed_p)
9383 enum machine_mode mode = GET_MODE (x);
9385 if (TARGET_THUMB1)
9387 if (speed_p)
9388 *cost = thumb1_rtx_costs (x, code, outer_code);
9389 else
9390 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9391 return true;
9394 switch (code)
9396 case SET:
9397 *cost = 0;
9398 /* SET RTXs don't have a mode so we get it from the destination. */
9399 mode = GET_MODE (SET_DEST (x));
9401 if (REG_P (SET_SRC (x))
9402 && REG_P (SET_DEST (x)))
9404 /* Assume that most copies can be done with a single insn,
9405 unless we don't have HW FP, in which case everything
9406 larger than word mode will require two insns. */
9407 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9408 && GET_MODE_SIZE (mode) > 4)
9409 || mode == DImode)
9410 ? 2 : 1);
9411 /* Conditional register moves can be encoded
9412 in 16 bits in Thumb mode. */
9413 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9414 *cost >>= 1;
9416 return true;
9419 if (CONST_INT_P (SET_SRC (x)))
9421 /* Handle CONST_INT here, since the value doesn't have a mode
9422 and we would otherwise be unable to work out the true cost. */
9423 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9424 outer_code = SET;
9425 /* Slightly lower the cost of setting a core reg to a constant.
9426 This helps break up chains and allows for better scheduling. */
9427 if (REG_P (SET_DEST (x))
9428 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9429 *cost -= 1;
9430 x = SET_SRC (x);
9431 /* Immediate moves with an immediate in the range [0, 255] can be
9432 encoded in 16 bits in Thumb mode. */
9433 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9434 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9435 *cost >>= 1;
9436 goto const_int_cost;
9439 return false;
9441 case MEM:
9442 /* A memory access costs 1 insn if the mode is small, or the address is
9443 a single register, otherwise it costs one insn per word. */
9444 if (REG_P (XEXP (x, 0)))
9445 *cost = COSTS_N_INSNS (1);
9446 else if (flag_pic
9447 && GET_CODE (XEXP (x, 0)) == PLUS
9448 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9449 /* This will be split into two instructions.
9450 See arm.md:calculate_pic_address. */
9451 *cost = COSTS_N_INSNS (2);
9452 else
9453 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9455 /* For speed optimizations, add the costs of the address and
9456 accessing memory. */
9457 if (speed_p)
9458 #ifdef NOT_YET
9459 *cost += (extra_cost->ldst.load
9460 + arm_address_cost (XEXP (x, 0), mode,
9461 ADDR_SPACE_GENERIC, speed_p));
9462 #else
9463 *cost += extra_cost->ldst.load;
9464 #endif
9465 return true;
9467 case PARALLEL:
9469 /* Calculations of LDM costs are complex. We assume an initial cost
9470 (ldm_1st) which will load the number of registers mentioned in
9471 ldm_regs_per_insn_1st registers; then each additional
9472 ldm_regs_per_insn_subsequent registers cost one more insn. The
9473 formula for N regs is thus:
9475 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9476 + ldm_regs_per_insn_subsequent - 1)
9477 / ldm_regs_per_insn_subsequent).
9479 Additional costs may also be added for addressing. A similar
9480 formula is used for STM. */
9482 bool is_ldm = load_multiple_operation (x, SImode);
9483 bool is_stm = store_multiple_operation (x, SImode);
9485 *cost = COSTS_N_INSNS (1);
9487 if (is_ldm || is_stm)
9489 if (speed_p)
9491 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9492 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9493 ? extra_cost->ldst.ldm_regs_per_insn_1st
9494 : extra_cost->ldst.stm_regs_per_insn_1st;
9495 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9496 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9497 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9499 *cost += regs_per_insn_1st
9500 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9501 + regs_per_insn_sub - 1)
9502 / regs_per_insn_sub);
9503 return true;
9507 return false;
9509 case DIV:
9510 case UDIV:
9511 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9512 && (mode == SFmode || !TARGET_VFP_SINGLE))
9513 *cost = COSTS_N_INSNS (speed_p
9514 ? extra_cost->fp[mode != SFmode].div : 1);
9515 else if (mode == SImode && TARGET_IDIV)
9516 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9517 else
9518 *cost = LIBCALL_COST (2);
9519 return false; /* All arguments must be in registers. */
9521 case MOD:
9522 case UMOD:
9523 *cost = LIBCALL_COST (2);
9524 return false; /* All arguments must be in registers. */
9526 case ROTATE:
9527 if (mode == SImode && REG_P (XEXP (x, 1)))
9529 *cost = (COSTS_N_INSNS (2)
9530 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9531 if (speed_p)
9532 *cost += extra_cost->alu.shift_reg;
9533 return true;
9535 /* Fall through */
9536 case ROTATERT:
9537 case ASHIFT:
9538 case LSHIFTRT:
9539 case ASHIFTRT:
9540 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9542 *cost = (COSTS_N_INSNS (3)
9543 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9544 if (speed_p)
9545 *cost += 2 * extra_cost->alu.shift;
9546 return true;
9548 else if (mode == SImode)
9550 *cost = (COSTS_N_INSNS (1)
9551 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9552 /* Slightly disparage register shifts at -Os, but not by much. */
9553 if (!CONST_INT_P (XEXP (x, 1)))
9554 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9555 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9556 return true;
9558 else if (GET_MODE_CLASS (mode) == MODE_INT
9559 && GET_MODE_SIZE (mode) < 4)
9561 if (code == ASHIFT)
9563 *cost = (COSTS_N_INSNS (1)
9564 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9565 /* Slightly disparage register shifts at -Os, but not by
9566 much. */
9567 if (!CONST_INT_P (XEXP (x, 1)))
9568 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9569 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9571 else if (code == LSHIFTRT || code == ASHIFTRT)
9573 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9575 /* Can use SBFX/UBFX. */
9576 *cost = COSTS_N_INSNS (1);
9577 if (speed_p)
9578 *cost += extra_cost->alu.bfx;
9579 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9581 else
9583 *cost = COSTS_N_INSNS (2);
9584 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9585 if (speed_p)
9587 if (CONST_INT_P (XEXP (x, 1)))
9588 *cost += 2 * extra_cost->alu.shift;
9589 else
9590 *cost += (extra_cost->alu.shift
9591 + extra_cost->alu.shift_reg);
9593 else
9594 /* Slightly disparage register shifts. */
9595 *cost += !CONST_INT_P (XEXP (x, 1));
9598 else /* Rotates. */
9600 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9601 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9602 if (speed_p)
9604 if (CONST_INT_P (XEXP (x, 1)))
9605 *cost += (2 * extra_cost->alu.shift
9606 + extra_cost->alu.log_shift);
9607 else
9608 *cost += (extra_cost->alu.shift
9609 + extra_cost->alu.shift_reg
9610 + extra_cost->alu.log_shift_reg);
9613 return true;
9616 *cost = LIBCALL_COST (2);
9617 return false;
9619 case BSWAP:
9620 if (arm_arch6)
9622 if (mode == SImode)
9624 *cost = COSTS_N_INSNS (1);
9625 if (speed_p)
9626 *cost += extra_cost->alu.rev;
9628 return false;
9631 else
9633 /* No rev instruction available. Look at arm_legacy_rev
9634 and thumb_legacy_rev for the form of RTL used then. */
9635 if (TARGET_THUMB)
9637 *cost = COSTS_N_INSNS (10);
9639 if (speed_p)
9641 *cost += 6 * extra_cost->alu.shift;
9642 *cost += 3 * extra_cost->alu.logical;
9645 else
9647 *cost = COSTS_N_INSNS (5);
9649 if (speed_p)
9651 *cost += 2 * extra_cost->alu.shift;
9652 *cost += extra_cost->alu.arith_shift;
9653 *cost += 2 * extra_cost->alu.logical;
9656 return true;
9658 return false;
9660 case MINUS:
9661 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9662 && (mode == SFmode || !TARGET_VFP_SINGLE))
9664 *cost = COSTS_N_INSNS (1);
9665 if (GET_CODE (XEXP (x, 0)) == MULT
9666 || GET_CODE (XEXP (x, 1)) == MULT)
9668 rtx mul_op0, mul_op1, sub_op;
9670 if (speed_p)
9671 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9673 if (GET_CODE (XEXP (x, 0)) == MULT)
9675 mul_op0 = XEXP (XEXP (x, 0), 0);
9676 mul_op1 = XEXP (XEXP (x, 0), 1);
9677 sub_op = XEXP (x, 1);
9679 else
9681 mul_op0 = XEXP (XEXP (x, 1), 0);
9682 mul_op1 = XEXP (XEXP (x, 1), 1);
9683 sub_op = XEXP (x, 0);
9686 /* The first operand of the multiply may be optionally
9687 negated. */
9688 if (GET_CODE (mul_op0) == NEG)
9689 mul_op0 = XEXP (mul_op0, 0);
9691 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9692 + rtx_cost (mul_op1, code, 0, speed_p)
9693 + rtx_cost (sub_op, code, 0, speed_p));
9695 return true;
9698 if (speed_p)
9699 *cost += extra_cost->fp[mode != SFmode].addsub;
9700 return false;
9703 if (mode == SImode)
9705 rtx shift_by_reg = NULL;
9706 rtx shift_op;
9707 rtx non_shift_op;
9709 *cost = COSTS_N_INSNS (1);
9711 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9712 if (shift_op == NULL)
9714 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9715 non_shift_op = XEXP (x, 0);
9717 else
9718 non_shift_op = XEXP (x, 1);
9720 if (shift_op != NULL)
9722 if (shift_by_reg != NULL)
9724 if (speed_p)
9725 *cost += extra_cost->alu.arith_shift_reg;
9726 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9728 else if (speed_p)
9729 *cost += extra_cost->alu.arith_shift;
9731 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9732 + rtx_cost (non_shift_op, code, 0, speed_p));
9733 return true;
9736 if (arm_arch_thumb2
9737 && GET_CODE (XEXP (x, 1)) == MULT)
9739 /* MLS. */
9740 if (speed_p)
9741 *cost += extra_cost->mult[0].add;
9742 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9743 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9744 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9745 return true;
9748 if (CONST_INT_P (XEXP (x, 0)))
9750 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9751 INTVAL (XEXP (x, 0)), NULL_RTX,
9752 NULL_RTX, 1, 0);
9753 *cost = COSTS_N_INSNS (insns);
9754 if (speed_p)
9755 *cost += insns * extra_cost->alu.arith;
9756 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9757 return true;
9760 return false;
9763 if (GET_MODE_CLASS (mode) == MODE_INT
9764 && GET_MODE_SIZE (mode) < 4)
9766 rtx shift_op, shift_reg;
9767 shift_reg = NULL;
9769 /* We check both sides of the MINUS for shifter operands since,
9770 unlike PLUS, it's not commutative. */
9772 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9773 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9775 /* Slightly disparage, as we might need to widen the result. */
9776 *cost = 1 + COSTS_N_INSNS (1);
9777 if (speed_p)
9778 *cost += extra_cost->alu.arith;
9780 if (CONST_INT_P (XEXP (x, 0)))
9782 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9783 return true;
9786 return false;
9789 if (mode == DImode)
9791 *cost = COSTS_N_INSNS (2);
9793 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9795 rtx op1 = XEXP (x, 1);
9797 if (speed_p)
9798 *cost += 2 * extra_cost->alu.arith;
9800 if (GET_CODE (op1) == ZERO_EXTEND)
9801 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9802 else
9803 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9804 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9805 0, speed_p);
9806 return true;
9808 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9810 if (speed_p)
9811 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9812 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9813 0, speed_p)
9814 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9815 return true;
9817 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9818 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9820 if (speed_p)
9821 *cost += (extra_cost->alu.arith
9822 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9823 ? extra_cost->alu.arith
9824 : extra_cost->alu.arith_shift));
9825 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9826 + rtx_cost (XEXP (XEXP (x, 1), 0),
9827 GET_CODE (XEXP (x, 1)), 0, speed_p));
9828 return true;
9831 if (speed_p)
9832 *cost += 2 * extra_cost->alu.arith;
9833 return false;
9836 /* Vector mode? */
9838 *cost = LIBCALL_COST (2);
9839 return false;
9841 case PLUS:
9842 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9843 && (mode == SFmode || !TARGET_VFP_SINGLE))
9845 *cost = COSTS_N_INSNS (1);
9846 if (GET_CODE (XEXP (x, 0)) == MULT)
9848 rtx mul_op0, mul_op1, add_op;
9850 if (speed_p)
9851 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9853 mul_op0 = XEXP (XEXP (x, 0), 0);
9854 mul_op1 = XEXP (XEXP (x, 0), 1);
9855 add_op = XEXP (x, 1);
9857 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9858 + rtx_cost (mul_op1, code, 0, speed_p)
9859 + rtx_cost (add_op, code, 0, speed_p));
9861 return true;
9864 if (speed_p)
9865 *cost += extra_cost->fp[mode != SFmode].addsub;
9866 return false;
9868 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9870 *cost = LIBCALL_COST (2);
9871 return false;
9874 /* Narrow modes can be synthesized in SImode, but the range
9875 of useful sub-operations is limited. Check for shift operations
9876 on one of the operands. Only left shifts can be used in the
9877 narrow modes. */
9878 if (GET_MODE_CLASS (mode) == MODE_INT
9879 && GET_MODE_SIZE (mode) < 4)
9881 rtx shift_op, shift_reg;
9882 shift_reg = NULL;
9884 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9886 if (CONST_INT_P (XEXP (x, 1)))
9888 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9889 INTVAL (XEXP (x, 1)), NULL_RTX,
9890 NULL_RTX, 1, 0);
9891 *cost = COSTS_N_INSNS (insns);
9892 if (speed_p)
9893 *cost += insns * extra_cost->alu.arith;
9894 /* Slightly penalize a narrow operation as the result may
9895 need widening. */
9896 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9897 return true;
9900 /* Slightly penalize a narrow operation as the result may
9901 need widening. */
9902 *cost = 1 + COSTS_N_INSNS (1);
9903 if (speed_p)
9904 *cost += extra_cost->alu.arith;
9906 return false;
9909 if (mode == SImode)
9911 rtx shift_op, shift_reg;
9913 *cost = COSTS_N_INSNS (1);
9914 if (TARGET_INT_SIMD
9915 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9916 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9918 /* UXTA[BH] or SXTA[BH]. */
9919 if (speed_p)
9920 *cost += extra_cost->alu.extend_arith;
9921 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9922 speed_p)
9923 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9924 return true;
9927 shift_reg = NULL;
9928 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9929 if (shift_op != NULL)
9931 if (shift_reg)
9933 if (speed_p)
9934 *cost += extra_cost->alu.arith_shift_reg;
9935 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9937 else if (speed_p)
9938 *cost += extra_cost->alu.arith_shift;
9940 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9941 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9942 return true;
9944 if (GET_CODE (XEXP (x, 0)) == MULT)
9946 rtx mul_op = XEXP (x, 0);
9948 *cost = COSTS_N_INSNS (1);
9950 if (TARGET_DSP_MULTIPLY
9951 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9952 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9953 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9954 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9955 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9956 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9957 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9958 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9959 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9960 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9961 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9962 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9963 == 16))))))
9965 /* SMLA[BT][BT]. */
9966 if (speed_p)
9967 *cost += extra_cost->mult[0].extend_add;
9968 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9969 SIGN_EXTEND, 0, speed_p)
9970 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9971 SIGN_EXTEND, 0, speed_p)
9972 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9973 return true;
9976 if (speed_p)
9977 *cost += extra_cost->mult[0].add;
9978 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9979 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9980 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9981 return true;
9983 if (CONST_INT_P (XEXP (x, 1)))
9985 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9986 INTVAL (XEXP (x, 1)), NULL_RTX,
9987 NULL_RTX, 1, 0);
9988 *cost = COSTS_N_INSNS (insns);
9989 if (speed_p)
9990 *cost += insns * extra_cost->alu.arith;
9991 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9992 return true;
9994 return false;
9997 if (mode == DImode)
9999 if (arm_arch3m
10000 && GET_CODE (XEXP (x, 0)) == MULT
10001 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10002 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10003 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10004 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10006 *cost = COSTS_N_INSNS (1);
10007 if (speed_p)
10008 *cost += extra_cost->mult[1].extend_add;
10009 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10010 ZERO_EXTEND, 0, speed_p)
10011 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10012 ZERO_EXTEND, 0, speed_p)
10013 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10014 return true;
10017 *cost = COSTS_N_INSNS (2);
10019 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10020 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10022 if (speed_p)
10023 *cost += (extra_cost->alu.arith
10024 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10025 ? extra_cost->alu.arith
10026 : extra_cost->alu.arith_shift));
10028 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10029 speed_p)
10030 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10031 return true;
10034 if (speed_p)
10035 *cost += 2 * extra_cost->alu.arith;
10036 return false;
10039 /* Vector mode? */
10040 *cost = LIBCALL_COST (2);
10041 return false;
10042 case IOR:
10043 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10045 *cost = COSTS_N_INSNS (1);
10046 if (speed_p)
10047 *cost += extra_cost->alu.rev;
10049 return true;
10051 /* Fall through. */
10052 case AND: case XOR:
10053 if (mode == SImode)
10055 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10056 rtx op0 = XEXP (x, 0);
10057 rtx shift_op, shift_reg;
10059 *cost = COSTS_N_INSNS (1);
10061 if (subcode == NOT
10062 && (code == AND
10063 || (code == IOR && TARGET_THUMB2)))
10064 op0 = XEXP (op0, 0);
10066 shift_reg = NULL;
10067 shift_op = shifter_op_p (op0, &shift_reg);
10068 if (shift_op != NULL)
10070 if (shift_reg)
10072 if (speed_p)
10073 *cost += extra_cost->alu.log_shift_reg;
10074 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10076 else if (speed_p)
10077 *cost += extra_cost->alu.log_shift;
10079 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10080 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10081 return true;
10084 if (CONST_INT_P (XEXP (x, 1)))
10086 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10087 INTVAL (XEXP (x, 1)), NULL_RTX,
10088 NULL_RTX, 1, 0);
10090 *cost = COSTS_N_INSNS (insns);
10091 if (speed_p)
10092 *cost += insns * extra_cost->alu.logical;
10093 *cost += rtx_cost (op0, code, 0, speed_p);
10094 return true;
10097 if (speed_p)
10098 *cost += extra_cost->alu.logical;
10099 *cost += (rtx_cost (op0, code, 0, speed_p)
10100 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10101 return true;
10104 if (mode == DImode)
10106 rtx op0 = XEXP (x, 0);
10107 enum rtx_code subcode = GET_CODE (op0);
10109 *cost = COSTS_N_INSNS (2);
10111 if (subcode == NOT
10112 && (code == AND
10113 || (code == IOR && TARGET_THUMB2)))
10114 op0 = XEXP (op0, 0);
10116 if (GET_CODE (op0) == ZERO_EXTEND)
10118 if (speed_p)
10119 *cost += 2 * extra_cost->alu.logical;
10121 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10122 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10123 return true;
10125 else if (GET_CODE (op0) == SIGN_EXTEND)
10127 if (speed_p)
10128 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10130 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10131 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10132 return true;
10135 if (speed_p)
10136 *cost += 2 * extra_cost->alu.logical;
10138 return true;
10140 /* Vector mode? */
10142 *cost = LIBCALL_COST (2);
10143 return false;
10145 case MULT:
10146 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10147 && (mode == SFmode || !TARGET_VFP_SINGLE))
10149 rtx op0 = XEXP (x, 0);
10151 *cost = COSTS_N_INSNS (1);
10153 if (GET_CODE (op0) == NEG)
10154 op0 = XEXP (op0, 0);
10156 if (speed_p)
10157 *cost += extra_cost->fp[mode != SFmode].mult;
10159 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10160 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10161 return true;
10163 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10165 *cost = LIBCALL_COST (2);
10166 return false;
10169 if (mode == SImode)
10171 *cost = COSTS_N_INSNS (1);
10172 if (TARGET_DSP_MULTIPLY
10173 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10174 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10175 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10176 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10177 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10178 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10179 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10180 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10181 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10182 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10183 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10184 && (INTVAL (XEXP (XEXP (x, 1), 1))
10185 == 16))))))
10187 /* SMUL[TB][TB]. */
10188 if (speed_p)
10189 *cost += extra_cost->mult[0].extend;
10190 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10191 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10192 return true;
10194 if (speed_p)
10195 *cost += extra_cost->mult[0].simple;
10196 return false;
10199 if (mode == DImode)
10201 if (arm_arch3m
10202 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10203 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10204 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10205 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10207 *cost = COSTS_N_INSNS (1);
10208 if (speed_p)
10209 *cost += extra_cost->mult[1].extend;
10210 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10211 ZERO_EXTEND, 0, speed_p)
10212 + rtx_cost (XEXP (XEXP (x, 1), 0),
10213 ZERO_EXTEND, 0, speed_p));
10214 return true;
10217 *cost = LIBCALL_COST (2);
10218 return false;
10221 /* Vector mode? */
10222 *cost = LIBCALL_COST (2);
10223 return false;
10225 case NEG:
10226 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10227 && (mode == SFmode || !TARGET_VFP_SINGLE))
10229 *cost = COSTS_N_INSNS (1);
10230 if (speed_p)
10231 *cost += extra_cost->fp[mode != SFmode].neg;
10233 return false;
10235 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10237 *cost = LIBCALL_COST (1);
10238 return false;
10241 if (mode == SImode)
10243 if (GET_CODE (XEXP (x, 0)) == ABS)
10245 *cost = COSTS_N_INSNS (2);
10246 /* Assume the non-flag-changing variant. */
10247 if (speed_p)
10248 *cost += (extra_cost->alu.log_shift
10249 + extra_cost->alu.arith_shift);
10250 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10251 return true;
10254 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10255 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10257 *cost = COSTS_N_INSNS (2);
10258 /* No extra cost for MOV imm and MVN imm. */
10259 /* If the comparison op is using the flags, there's no further
10260 cost, otherwise we need to add the cost of the comparison. */
10261 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10262 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10263 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10265 *cost += (COSTS_N_INSNS (1)
10266 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10267 speed_p)
10268 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10269 speed_p));
10270 if (speed_p)
10271 *cost += extra_cost->alu.arith;
10273 return true;
10275 *cost = COSTS_N_INSNS (1);
10276 if (speed_p)
10277 *cost += extra_cost->alu.arith;
10278 return false;
10281 if (GET_MODE_CLASS (mode) == MODE_INT
10282 && GET_MODE_SIZE (mode) < 4)
10284 /* Slightly disparage, as we might need an extend operation. */
10285 *cost = 1 + COSTS_N_INSNS (1);
10286 if (speed_p)
10287 *cost += extra_cost->alu.arith;
10288 return false;
10291 if (mode == DImode)
10293 *cost = COSTS_N_INSNS (2);
10294 if (speed_p)
10295 *cost += 2 * extra_cost->alu.arith;
10296 return false;
10299 /* Vector mode? */
10300 *cost = LIBCALL_COST (1);
10301 return false;
10303 case NOT:
10304 if (mode == SImode)
10306 rtx shift_op;
10307 rtx shift_reg = NULL;
10309 *cost = COSTS_N_INSNS (1);
10310 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10312 if (shift_op)
10314 if (shift_reg != NULL)
10316 if (speed_p)
10317 *cost += extra_cost->alu.log_shift_reg;
10318 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10320 else if (speed_p)
10321 *cost += extra_cost->alu.log_shift;
10322 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10323 return true;
10326 if (speed_p)
10327 *cost += extra_cost->alu.logical;
10328 return false;
10330 if (mode == DImode)
10332 *cost = COSTS_N_INSNS (2);
10333 return false;
10336 /* Vector mode? */
10338 *cost += LIBCALL_COST (1);
10339 return false;
10341 case IF_THEN_ELSE:
10343 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10345 *cost = COSTS_N_INSNS (4);
10346 return true;
10348 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10349 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10351 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10352 /* Assume that if one arm of the if_then_else is a register,
10353 that it will be tied with the result and eliminate the
10354 conditional insn. */
10355 if (REG_P (XEXP (x, 1)))
10356 *cost += op2cost;
10357 else if (REG_P (XEXP (x, 2)))
10358 *cost += op1cost;
10359 else
10361 if (speed_p)
10363 if (extra_cost->alu.non_exec_costs_exec)
10364 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10365 else
10366 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10368 else
10369 *cost += op1cost + op2cost;
10372 return true;
10374 case COMPARE:
10375 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10376 *cost = 0;
10377 else
10379 enum machine_mode op0mode;
10380 /* We'll mostly assume that the cost of a compare is the cost of the
10381 LHS. However, there are some notable exceptions. */
10383 /* Floating point compares are never done as side-effects. */
10384 op0mode = GET_MODE (XEXP (x, 0));
10385 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10386 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10388 *cost = COSTS_N_INSNS (1);
10389 if (speed_p)
10390 *cost += extra_cost->fp[op0mode != SFmode].compare;
10392 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10394 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10395 return true;
10398 return false;
10400 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10402 *cost = LIBCALL_COST (2);
10403 return false;
10406 /* DImode compares normally take two insns. */
10407 if (op0mode == DImode)
10409 *cost = COSTS_N_INSNS (2);
10410 if (speed_p)
10411 *cost += 2 * extra_cost->alu.arith;
10412 return false;
10415 if (op0mode == SImode)
10417 rtx shift_op;
10418 rtx shift_reg;
10420 if (XEXP (x, 1) == const0_rtx
10421 && !(REG_P (XEXP (x, 0))
10422 || (GET_CODE (XEXP (x, 0)) == SUBREG
10423 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10425 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10427 /* Multiply operations that set the flags are often
10428 significantly more expensive. */
10429 if (speed_p
10430 && GET_CODE (XEXP (x, 0)) == MULT
10431 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10432 *cost += extra_cost->mult[0].flag_setting;
10434 if (speed_p
10435 && GET_CODE (XEXP (x, 0)) == PLUS
10436 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10437 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10438 0), 1), mode))
10439 *cost += extra_cost->mult[0].flag_setting;
10440 return true;
10443 shift_reg = NULL;
10444 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10445 if (shift_op != NULL)
10447 *cost = COSTS_N_INSNS (1);
10448 if (shift_reg != NULL)
10450 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10451 if (speed_p)
10452 *cost += extra_cost->alu.arith_shift_reg;
10454 else if (speed_p)
10455 *cost += extra_cost->alu.arith_shift;
10456 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10457 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10458 return true;
10461 *cost = COSTS_N_INSNS (1);
10462 if (speed_p)
10463 *cost += extra_cost->alu.arith;
10464 if (CONST_INT_P (XEXP (x, 1))
10465 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10467 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10468 return true;
10470 return false;
10473 /* Vector mode? */
10475 *cost = LIBCALL_COST (2);
10476 return false;
10478 return true;
10480 case EQ:
10481 case NE:
10482 case LT:
10483 case LE:
10484 case GT:
10485 case GE:
10486 case LTU:
10487 case LEU:
10488 case GEU:
10489 case GTU:
10490 case ORDERED:
10491 case UNORDERED:
10492 case UNEQ:
10493 case UNLE:
10494 case UNLT:
10495 case UNGE:
10496 case UNGT:
10497 case LTGT:
10498 if (outer_code == SET)
10500 /* Is it a store-flag operation? */
10501 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10502 && XEXP (x, 1) == const0_rtx)
10504 /* Thumb also needs an IT insn. */
10505 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10506 return true;
10508 if (XEXP (x, 1) == const0_rtx)
10510 switch (code)
10512 case LT:
10513 /* LSR Rd, Rn, #31. */
10514 *cost = COSTS_N_INSNS (1);
10515 if (speed_p)
10516 *cost += extra_cost->alu.shift;
10517 break;
10519 case EQ:
10520 /* RSBS T1, Rn, #0
10521 ADC Rd, Rn, T1. */
10523 case NE:
10524 /* SUBS T1, Rn, #1
10525 SBC Rd, Rn, T1. */
10526 *cost = COSTS_N_INSNS (2);
10527 break;
10529 case LE:
10530 /* RSBS T1, Rn, Rn, LSR #31
10531 ADC Rd, Rn, T1. */
10532 *cost = COSTS_N_INSNS (2);
10533 if (speed_p)
10534 *cost += extra_cost->alu.arith_shift;
10535 break;
10537 case GT:
10538 /* RSB Rd, Rn, Rn, ASR #1
10539 LSR Rd, Rd, #31. */
10540 *cost = COSTS_N_INSNS (2);
10541 if (speed_p)
10542 *cost += (extra_cost->alu.arith_shift
10543 + extra_cost->alu.shift);
10544 break;
10546 case GE:
10547 /* ASR Rd, Rn, #31
10548 ADD Rd, Rn, #1. */
10549 *cost = COSTS_N_INSNS (2);
10550 if (speed_p)
10551 *cost += extra_cost->alu.shift;
10552 break;
10554 default:
10555 /* Remaining cases are either meaningless or would take
10556 three insns anyway. */
10557 *cost = COSTS_N_INSNS (3);
10558 break;
10560 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10561 return true;
10563 else
10565 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10566 if (CONST_INT_P (XEXP (x, 1))
10567 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10569 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10570 return true;
10573 return false;
10576 /* Not directly inside a set. If it involves the condition code
10577 register it must be the condition for a branch, cond_exec or
10578 I_T_E operation. Since the comparison is performed elsewhere
10579 this is just the control part which has no additional
10580 cost. */
10581 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10582 && XEXP (x, 1) == const0_rtx)
10584 *cost = 0;
10585 return true;
10587 return false;
10589 case ABS:
10590 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10591 && (mode == SFmode || !TARGET_VFP_SINGLE))
10593 *cost = COSTS_N_INSNS (1);
10594 if (speed_p)
10595 *cost += extra_cost->fp[mode != SFmode].neg;
10597 return false;
10599 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10601 *cost = LIBCALL_COST (1);
10602 return false;
10605 if (mode == SImode)
10607 *cost = COSTS_N_INSNS (1);
10608 if (speed_p)
10609 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10610 return false;
10612 /* Vector mode? */
10613 *cost = LIBCALL_COST (1);
10614 return false;
10616 case SIGN_EXTEND:
10617 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10618 && MEM_P (XEXP (x, 0)))
10620 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10622 if (mode == DImode)
10623 *cost += COSTS_N_INSNS (1);
10625 if (!speed_p)
10626 return true;
10628 if (GET_MODE (XEXP (x, 0)) == SImode)
10629 *cost += extra_cost->ldst.load;
10630 else
10631 *cost += extra_cost->ldst.load_sign_extend;
10633 if (mode == DImode)
10634 *cost += extra_cost->alu.shift;
10636 return true;
10639 /* Widening from less than 32-bits requires an extend operation. */
10640 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10642 /* We have SXTB/SXTH. */
10643 *cost = COSTS_N_INSNS (1);
10644 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10645 if (speed_p)
10646 *cost += extra_cost->alu.extend;
10648 else if (GET_MODE (XEXP (x, 0)) != SImode)
10650 /* Needs two shifts. */
10651 *cost = COSTS_N_INSNS (2);
10652 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10653 if (speed_p)
10654 *cost += 2 * extra_cost->alu.shift;
10657 /* Widening beyond 32-bits requires one more insn. */
10658 if (mode == DImode)
10660 *cost += COSTS_N_INSNS (1);
10661 if (speed_p)
10662 *cost += extra_cost->alu.shift;
10665 return true;
10667 case ZERO_EXTEND:
10668 if ((arm_arch4
10669 || GET_MODE (XEXP (x, 0)) == SImode
10670 || GET_MODE (XEXP (x, 0)) == QImode)
10671 && MEM_P (XEXP (x, 0)))
10673 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10675 if (mode == DImode)
10676 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10678 return true;
10681 /* Widening from less than 32-bits requires an extend operation. */
10682 if (GET_MODE (XEXP (x, 0)) == QImode)
10684 /* UXTB can be a shorter instruction in Thumb2, but it might
10685 be slower than the AND Rd, Rn, #255 alternative. When
10686 optimizing for speed it should never be slower to use
10687 AND, and we don't really model 16-bit vs 32-bit insns
10688 here. */
10689 *cost = COSTS_N_INSNS (1);
10690 if (speed_p)
10691 *cost += extra_cost->alu.logical;
10693 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10695 /* We have UXTB/UXTH. */
10696 *cost = COSTS_N_INSNS (1);
10697 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10698 if (speed_p)
10699 *cost += extra_cost->alu.extend;
10701 else if (GET_MODE (XEXP (x, 0)) != SImode)
10703 /* Needs two shifts. It's marginally preferable to use
10704 shifts rather than two BIC instructions as the second
10705 shift may merge with a subsequent insn as a shifter
10706 op. */
10707 *cost = COSTS_N_INSNS (2);
10708 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10709 if (speed_p)
10710 *cost += 2 * extra_cost->alu.shift;
10712 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10713 *cost = COSTS_N_INSNS (1);
10715 /* Widening beyond 32-bits requires one more insn. */
10716 if (mode == DImode)
10718 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10721 return true;
10723 case CONST_INT:
10724 *cost = 0;
10725 /* CONST_INT has no mode, so we cannot tell for sure how many
10726 insns are really going to be needed. The best we can do is
10727 look at the value passed. If it fits in SImode, then assume
10728 that's the mode it will be used for. Otherwise assume it
10729 will be used in DImode. */
10730 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10731 mode = SImode;
10732 else
10733 mode = DImode;
10735 /* Avoid blowing up in arm_gen_constant (). */
10736 if (!(outer_code == PLUS
10737 || outer_code == AND
10738 || outer_code == IOR
10739 || outer_code == XOR
10740 || outer_code == MINUS))
10741 outer_code = SET;
10743 const_int_cost:
10744 if (mode == SImode)
10746 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10747 INTVAL (x), NULL, NULL,
10748 0, 0));
10749 /* Extra costs? */
10751 else
10753 *cost += COSTS_N_INSNS (arm_gen_constant
10754 (outer_code, SImode, NULL,
10755 trunc_int_for_mode (INTVAL (x), SImode),
10756 NULL, NULL, 0, 0)
10757 + arm_gen_constant (outer_code, SImode, NULL,
10758 INTVAL (x) >> 32, NULL,
10759 NULL, 0, 0));
10760 /* Extra costs? */
10763 return true;
10765 case CONST:
10766 case LABEL_REF:
10767 case SYMBOL_REF:
10768 if (speed_p)
10770 if (arm_arch_thumb2 && !flag_pic)
10771 *cost = COSTS_N_INSNS (2);
10772 else
10773 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10775 else
10776 *cost = COSTS_N_INSNS (2);
10778 if (flag_pic)
10780 *cost += COSTS_N_INSNS (1);
10781 if (speed_p)
10782 *cost += extra_cost->alu.arith;
10785 return true;
10787 case CONST_FIXED:
10788 *cost = COSTS_N_INSNS (4);
10789 /* Fixme. */
10790 return true;
10792 case CONST_DOUBLE:
10793 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10794 && (mode == SFmode || !TARGET_VFP_SINGLE))
10796 if (vfp3_const_double_rtx (x))
10798 *cost = COSTS_N_INSNS (1);
10799 if (speed_p)
10800 *cost += extra_cost->fp[mode == DFmode].fpconst;
10801 return true;
10804 if (speed_p)
10806 *cost = COSTS_N_INSNS (1);
10807 if (mode == DFmode)
10808 *cost += extra_cost->ldst.loadd;
10809 else
10810 *cost += extra_cost->ldst.loadf;
10812 else
10813 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10815 return true;
10817 *cost = COSTS_N_INSNS (4);
10818 return true;
10820 case CONST_VECTOR:
10821 /* Fixme. */
10822 if (TARGET_NEON
10823 && TARGET_HARD_FLOAT
10824 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10825 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10826 *cost = COSTS_N_INSNS (1);
10827 else
10828 *cost = COSTS_N_INSNS (4);
10829 return true;
10831 case HIGH:
10832 case LO_SUM:
10833 *cost = COSTS_N_INSNS (1);
10834 /* When optimizing for size, we prefer constant pool entries to
10835 MOVW/MOVT pairs, so bump the cost of these slightly. */
10836 if (!speed_p)
10837 *cost += 1;
10838 return true;
10840 case CLZ:
10841 *cost = COSTS_N_INSNS (1);
10842 if (speed_p)
10843 *cost += extra_cost->alu.clz;
10844 return false;
10846 case SMIN:
10847 if (XEXP (x, 1) == const0_rtx)
10849 *cost = COSTS_N_INSNS (1);
10850 if (speed_p)
10851 *cost += extra_cost->alu.log_shift;
10852 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10853 return true;
10855 /* Fall through. */
10856 case SMAX:
10857 case UMIN:
10858 case UMAX:
10859 *cost = COSTS_N_INSNS (2);
10860 return false;
10862 case TRUNCATE:
10863 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10864 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10865 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10866 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10867 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10868 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10869 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10870 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10871 == ZERO_EXTEND))))
10873 *cost = COSTS_N_INSNS (1);
10874 if (speed_p)
10875 *cost += extra_cost->mult[1].extend;
10876 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10877 speed_p)
10878 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10879 0, speed_p));
10880 return true;
10882 *cost = LIBCALL_COST (1);
10883 return false;
10885 case UNSPEC:
10886 return arm_unspec_cost (x, outer_code, speed_p, cost);
10888 case PC:
10889 /* Reading the PC is like reading any other register. Writing it
10890 is more expensive, but we take that into account elsewhere. */
10891 *cost = 0;
10892 return true;
10894 case ZERO_EXTRACT:
10895 /* TODO: Simple zero_extract of bottom bits using AND. */
10896 /* Fall through. */
10897 case SIGN_EXTRACT:
10898 if (arm_arch6
10899 && mode == SImode
10900 && CONST_INT_P (XEXP (x, 1))
10901 && CONST_INT_P (XEXP (x, 2)))
10903 *cost = COSTS_N_INSNS (1);
10904 if (speed_p)
10905 *cost += extra_cost->alu.bfx;
10906 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10907 return true;
10909 /* Without UBFX/SBFX, need to resort to shift operations. */
10910 *cost = COSTS_N_INSNS (2);
10911 if (speed_p)
10912 *cost += 2 * extra_cost->alu.shift;
10913 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10914 return true;
10916 case FLOAT_EXTEND:
10917 if (TARGET_HARD_FLOAT)
10919 *cost = COSTS_N_INSNS (1);
10920 if (speed_p)
10921 *cost += extra_cost->fp[mode == DFmode].widen;
10922 if (!TARGET_FPU_ARMV8
10923 && GET_MODE (XEXP (x, 0)) == HFmode)
10925 /* Pre v8, widening HF->DF is a two-step process, first
10926 widening to SFmode. */
10927 *cost += COSTS_N_INSNS (1);
10928 if (speed_p)
10929 *cost += extra_cost->fp[0].widen;
10931 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10932 return true;
10935 *cost = LIBCALL_COST (1);
10936 return false;
10938 case FLOAT_TRUNCATE:
10939 if (TARGET_HARD_FLOAT)
10941 *cost = COSTS_N_INSNS (1);
10942 if (speed_p)
10943 *cost += extra_cost->fp[mode == DFmode].narrow;
10944 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10945 return true;
10946 /* Vector modes? */
10948 *cost = LIBCALL_COST (1);
10949 return false;
10951 case FMA:
10952 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10954 rtx op0 = XEXP (x, 0);
10955 rtx op1 = XEXP (x, 1);
10956 rtx op2 = XEXP (x, 2);
10958 *cost = COSTS_N_INSNS (1);
10960 /* vfms or vfnma. */
10961 if (GET_CODE (op0) == NEG)
10962 op0 = XEXP (op0, 0);
10964 /* vfnms or vfnma. */
10965 if (GET_CODE (op2) == NEG)
10966 op2 = XEXP (op2, 0);
10968 *cost += rtx_cost (op0, FMA, 0, speed_p);
10969 *cost += rtx_cost (op1, FMA, 1, speed_p);
10970 *cost += rtx_cost (op2, FMA, 2, speed_p);
10972 if (speed_p)
10973 *cost += extra_cost->fp[mode ==DFmode].fma;
10975 return true;
10978 *cost = LIBCALL_COST (3);
10979 return false;
10981 case FIX:
10982 case UNSIGNED_FIX:
10983 if (TARGET_HARD_FLOAT)
10985 if (GET_MODE_CLASS (mode) == MODE_INT)
10987 *cost = COSTS_N_INSNS (1);
10988 if (speed_p)
10989 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10990 /* Strip of the 'cost' of rounding towards zero. */
10991 if (GET_CODE (XEXP (x, 0)) == FIX)
10992 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10993 else
10994 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10995 /* ??? Increase the cost to deal with transferring from
10996 FP -> CORE registers? */
10997 return true;
10999 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11000 && TARGET_FPU_ARMV8)
11002 *cost = COSTS_N_INSNS (1);
11003 if (speed_p)
11004 *cost += extra_cost->fp[mode == DFmode].roundint;
11005 return false;
11007 /* Vector costs? */
11009 *cost = LIBCALL_COST (1);
11010 return false;
11012 case FLOAT:
11013 case UNSIGNED_FLOAT:
11014 if (TARGET_HARD_FLOAT)
11016 /* ??? Increase the cost to deal with transferring from CORE
11017 -> FP registers? */
11018 *cost = COSTS_N_INSNS (1);
11019 if (speed_p)
11020 *cost += extra_cost->fp[mode == DFmode].fromint;
11021 return false;
11023 *cost = LIBCALL_COST (1);
11024 return false;
11026 case CALL:
11027 *cost = COSTS_N_INSNS (1);
11028 return true;
11030 case ASM_OPERANDS:
11032 /* Just a guess. Guess number of instructions in the asm
11033 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11034 though (see PR60663). */
11035 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11036 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11038 *cost = COSTS_N_INSNS (asm_length + num_operands);
11039 return true;
11041 default:
11042 if (mode != VOIDmode)
11043 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11044 else
11045 *cost = COSTS_N_INSNS (4); /* Who knows? */
11046 return false;
11050 #undef HANDLE_NARROW_SHIFT_ARITH
11052 /* RTX costs when optimizing for size. */
11053 static bool
11054 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11055 int *total, bool speed)
11057 bool result;
11059 if (TARGET_OLD_RTX_COSTS
11060 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11062 /* Old way. (Deprecated.) */
11063 if (!speed)
11064 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11065 (enum rtx_code) outer_code, total);
11066 else
11067 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11068 (enum rtx_code) outer_code, total,
11069 speed);
11071 else
11073 /* New way. */
11074 if (current_tune->insn_extra_cost)
11075 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11076 (enum rtx_code) outer_code,
11077 current_tune->insn_extra_cost,
11078 total, speed);
11079 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11080 && current_tune->insn_extra_cost != NULL */
11081 else
11082 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11083 (enum rtx_code) outer_code,
11084 &generic_extra_costs, total, speed);
11087 if (dump_file && (dump_flags & TDF_DETAILS))
11089 print_rtl_single (dump_file, x);
11090 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11091 *total, result ? "final" : "partial");
11093 return result;
11096 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11097 supported on any "slowmul" cores, so it can be ignored. */
11099 static bool
11100 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11101 int *total, bool speed)
11103 enum machine_mode mode = GET_MODE (x);
11105 if (TARGET_THUMB)
11107 *total = thumb1_rtx_costs (x, code, outer_code);
11108 return true;
11111 switch (code)
11113 case MULT:
11114 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11115 || mode == DImode)
11117 *total = COSTS_N_INSNS (20);
11118 return false;
11121 if (CONST_INT_P (XEXP (x, 1)))
11123 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11124 & (unsigned HOST_WIDE_INT) 0xffffffff);
11125 int cost, const_ok = const_ok_for_arm (i);
11126 int j, booth_unit_size;
11128 /* Tune as appropriate. */
11129 cost = const_ok ? 4 : 8;
11130 booth_unit_size = 2;
11131 for (j = 0; i && j < 32; j += booth_unit_size)
11133 i >>= booth_unit_size;
11134 cost++;
11137 *total = COSTS_N_INSNS (cost);
11138 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11139 return true;
11142 *total = COSTS_N_INSNS (20);
11143 return false;
11145 default:
11146 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11151 /* RTX cost for cores with a fast multiply unit (M variants). */
11153 static bool
11154 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11155 int *total, bool speed)
11157 enum machine_mode mode = GET_MODE (x);
11159 if (TARGET_THUMB1)
11161 *total = thumb1_rtx_costs (x, code, outer_code);
11162 return true;
11165 /* ??? should thumb2 use different costs? */
11166 switch (code)
11168 case MULT:
11169 /* There is no point basing this on the tuning, since it is always the
11170 fast variant if it exists at all. */
11171 if (mode == DImode
11172 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11173 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11174 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11176 *total = COSTS_N_INSNS(2);
11177 return false;
11181 if (mode == DImode)
11183 *total = COSTS_N_INSNS (5);
11184 return false;
11187 if (CONST_INT_P (XEXP (x, 1)))
11189 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11190 & (unsigned HOST_WIDE_INT) 0xffffffff);
11191 int cost, const_ok = const_ok_for_arm (i);
11192 int j, booth_unit_size;
11194 /* Tune as appropriate. */
11195 cost = const_ok ? 4 : 8;
11196 booth_unit_size = 8;
11197 for (j = 0; i && j < 32; j += booth_unit_size)
11199 i >>= booth_unit_size;
11200 cost++;
11203 *total = COSTS_N_INSNS(cost);
11204 return false;
11207 if (mode == SImode)
11209 *total = COSTS_N_INSNS (4);
11210 return false;
11213 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11215 if (TARGET_HARD_FLOAT
11216 && (mode == SFmode
11217 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11219 *total = COSTS_N_INSNS (1);
11220 return false;
11224 /* Requires a lib call */
11225 *total = COSTS_N_INSNS (20);
11226 return false;
11228 default:
11229 return arm_rtx_costs_1 (x, outer_code, total, speed);
11234 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11235 so it can be ignored. */
11237 static bool
11238 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11239 int *total, bool speed)
11241 enum machine_mode mode = GET_MODE (x);
11243 if (TARGET_THUMB)
11245 *total = thumb1_rtx_costs (x, code, outer_code);
11246 return true;
11249 switch (code)
11251 case COMPARE:
11252 if (GET_CODE (XEXP (x, 0)) != MULT)
11253 return arm_rtx_costs_1 (x, outer_code, total, speed);
11255 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11256 will stall until the multiplication is complete. */
11257 *total = COSTS_N_INSNS (3);
11258 return false;
11260 case MULT:
11261 /* There is no point basing this on the tuning, since it is always the
11262 fast variant if it exists at all. */
11263 if (mode == DImode
11264 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11265 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11266 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11268 *total = COSTS_N_INSNS (2);
11269 return false;
11273 if (mode == DImode)
11275 *total = COSTS_N_INSNS (5);
11276 return false;
11279 if (CONST_INT_P (XEXP (x, 1)))
11281 /* If operand 1 is a constant we can more accurately
11282 calculate the cost of the multiply. The multiplier can
11283 retire 15 bits on the first cycle and a further 12 on the
11284 second. We do, of course, have to load the constant into
11285 a register first. */
11286 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11287 /* There's a general overhead of one cycle. */
11288 int cost = 1;
11289 unsigned HOST_WIDE_INT masked_const;
11291 if (i & 0x80000000)
11292 i = ~i;
11294 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11296 masked_const = i & 0xffff8000;
11297 if (masked_const != 0)
11299 cost++;
11300 masked_const = i & 0xf8000000;
11301 if (masked_const != 0)
11302 cost++;
11304 *total = COSTS_N_INSNS (cost);
11305 return false;
11308 if (mode == SImode)
11310 *total = COSTS_N_INSNS (3);
11311 return false;
11314 /* Requires a lib call */
11315 *total = COSTS_N_INSNS (20);
11316 return false;
11318 default:
11319 return arm_rtx_costs_1 (x, outer_code, total, speed);
11324 /* RTX costs for 9e (and later) cores. */
11326 static bool
11327 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11328 int *total, bool speed)
11330 enum machine_mode mode = GET_MODE (x);
11332 if (TARGET_THUMB1)
11334 switch (code)
11336 case MULT:
11337 *total = COSTS_N_INSNS (3);
11338 return true;
11340 default:
11341 *total = thumb1_rtx_costs (x, code, outer_code);
11342 return true;
11346 switch (code)
11348 case MULT:
11349 /* There is no point basing this on the tuning, since it is always the
11350 fast variant if it exists at all. */
11351 if (mode == DImode
11352 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11353 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11354 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11356 *total = COSTS_N_INSNS (2);
11357 return false;
11361 if (mode == DImode)
11363 *total = COSTS_N_INSNS (5);
11364 return false;
11367 if (mode == SImode)
11369 *total = COSTS_N_INSNS (2);
11370 return false;
11373 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11375 if (TARGET_HARD_FLOAT
11376 && (mode == SFmode
11377 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11379 *total = COSTS_N_INSNS (1);
11380 return false;
11384 *total = COSTS_N_INSNS (20);
11385 return false;
11387 default:
11388 return arm_rtx_costs_1 (x, outer_code, total, speed);
11391 /* All address computations that can be done are free, but rtx cost returns
11392 the same for practically all of them. So we weight the different types
11393 of address here in the order (most pref first):
11394 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11395 static inline int
11396 arm_arm_address_cost (rtx x)
11398 enum rtx_code c = GET_CODE (x);
11400 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11401 return 0;
11402 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11403 return 10;
11405 if (c == PLUS)
11407 if (CONST_INT_P (XEXP (x, 1)))
11408 return 2;
11410 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11411 return 3;
11413 return 4;
11416 return 6;
11419 static inline int
11420 arm_thumb_address_cost (rtx x)
11422 enum rtx_code c = GET_CODE (x);
11424 if (c == REG)
11425 return 1;
11426 if (c == PLUS
11427 && REG_P (XEXP (x, 0))
11428 && CONST_INT_P (XEXP (x, 1)))
11429 return 1;
11431 return 2;
11434 static int
11435 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
11436 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11438 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11441 /* Adjust cost hook for XScale. */
11442 static bool
11443 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11445 /* Some true dependencies can have a higher cost depending
11446 on precisely how certain input operands are used. */
11447 if (REG_NOTE_KIND(link) == 0
11448 && recog_memoized (insn) >= 0
11449 && recog_memoized (dep) >= 0)
11451 int shift_opnum = get_attr_shift (insn);
11452 enum attr_type attr_type = get_attr_type (dep);
11454 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11455 operand for INSN. If we have a shifted input operand and the
11456 instruction we depend on is another ALU instruction, then we may
11457 have to account for an additional stall. */
11458 if (shift_opnum != 0
11459 && (attr_type == TYPE_ALU_SHIFT_IMM
11460 || attr_type == TYPE_ALUS_SHIFT_IMM
11461 || attr_type == TYPE_LOGIC_SHIFT_IMM
11462 || attr_type == TYPE_LOGICS_SHIFT_IMM
11463 || attr_type == TYPE_ALU_SHIFT_REG
11464 || attr_type == TYPE_ALUS_SHIFT_REG
11465 || attr_type == TYPE_LOGIC_SHIFT_REG
11466 || attr_type == TYPE_LOGICS_SHIFT_REG
11467 || attr_type == TYPE_MOV_SHIFT
11468 || attr_type == TYPE_MVN_SHIFT
11469 || attr_type == TYPE_MOV_SHIFT_REG
11470 || attr_type == TYPE_MVN_SHIFT_REG))
11472 rtx shifted_operand;
11473 int opno;
11475 /* Get the shifted operand. */
11476 extract_insn (insn);
11477 shifted_operand = recog_data.operand[shift_opnum];
11479 /* Iterate over all the operands in DEP. If we write an operand
11480 that overlaps with SHIFTED_OPERAND, then we have increase the
11481 cost of this dependency. */
11482 extract_insn (dep);
11483 preprocess_constraints (dep);
11484 for (opno = 0; opno < recog_data.n_operands; opno++)
11486 /* We can ignore strict inputs. */
11487 if (recog_data.operand_type[opno] == OP_IN)
11488 continue;
11490 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11491 shifted_operand))
11493 *cost = 2;
11494 return false;
11499 return true;
11502 /* Adjust cost hook for Cortex A9. */
11503 static bool
11504 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11506 switch (REG_NOTE_KIND (link))
11508 case REG_DEP_ANTI:
11509 *cost = 0;
11510 return false;
11512 case REG_DEP_TRUE:
11513 case REG_DEP_OUTPUT:
11514 if (recog_memoized (insn) >= 0
11515 && recog_memoized (dep) >= 0)
11517 if (GET_CODE (PATTERN (insn)) == SET)
11519 if (GET_MODE_CLASS
11520 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11521 || GET_MODE_CLASS
11522 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11524 enum attr_type attr_type_insn = get_attr_type (insn);
11525 enum attr_type attr_type_dep = get_attr_type (dep);
11527 /* By default all dependencies of the form
11528 s0 = s0 <op> s1
11529 s0 = s0 <op> s2
11530 have an extra latency of 1 cycle because
11531 of the input and output dependency in this
11532 case. However this gets modeled as an true
11533 dependency and hence all these checks. */
11534 if (REG_P (SET_DEST (PATTERN (insn)))
11535 && REG_P (SET_DEST (PATTERN (dep)))
11536 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11537 SET_DEST (PATTERN (dep))))
11539 /* FMACS is a special case where the dependent
11540 instruction can be issued 3 cycles before
11541 the normal latency in case of an output
11542 dependency. */
11543 if ((attr_type_insn == TYPE_FMACS
11544 || attr_type_insn == TYPE_FMACD)
11545 && (attr_type_dep == TYPE_FMACS
11546 || attr_type_dep == TYPE_FMACD))
11548 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11549 *cost = insn_default_latency (dep) - 3;
11550 else
11551 *cost = insn_default_latency (dep);
11552 return false;
11554 else
11556 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11557 *cost = insn_default_latency (dep) + 1;
11558 else
11559 *cost = insn_default_latency (dep);
11561 return false;
11566 break;
11568 default:
11569 gcc_unreachable ();
11572 return true;
11575 /* Adjust cost hook for FA726TE. */
11576 static bool
11577 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11579 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11580 have penalty of 3. */
11581 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11582 && recog_memoized (insn) >= 0
11583 && recog_memoized (dep) >= 0
11584 && get_attr_conds (dep) == CONDS_SET)
11586 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11587 if (get_attr_conds (insn) == CONDS_USE
11588 && get_attr_type (insn) != TYPE_BRANCH)
11590 *cost = 3;
11591 return false;
11594 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11595 || get_attr_conds (insn) == CONDS_USE)
11597 *cost = 0;
11598 return false;
11602 return true;
11605 /* Implement TARGET_REGISTER_MOVE_COST.
11607 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11608 it is typically more expensive than a single memory access. We set
11609 the cost to less than two memory accesses so that floating
11610 point to integer conversion does not go through memory. */
11613 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11614 reg_class_t from, reg_class_t to)
11616 if (TARGET_32BIT)
11618 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11619 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11620 return 15;
11621 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11622 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11623 return 4;
11624 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11625 return 20;
11626 else
11627 return 2;
11629 else
11631 if (from == HI_REGS || to == HI_REGS)
11632 return 4;
11633 else
11634 return 2;
11638 /* Implement TARGET_MEMORY_MOVE_COST. */
11641 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
11642 bool in ATTRIBUTE_UNUSED)
11644 if (TARGET_32BIT)
11645 return 10;
11646 else
11648 if (GET_MODE_SIZE (mode) < 4)
11649 return 8;
11650 else
11651 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11655 /* Vectorizer cost model implementation. */
11657 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11658 static int
11659 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11660 tree vectype,
11661 int misalign ATTRIBUTE_UNUSED)
11663 unsigned elements;
11665 switch (type_of_cost)
11667 case scalar_stmt:
11668 return current_tune->vec_costs->scalar_stmt_cost;
11670 case scalar_load:
11671 return current_tune->vec_costs->scalar_load_cost;
11673 case scalar_store:
11674 return current_tune->vec_costs->scalar_store_cost;
11676 case vector_stmt:
11677 return current_tune->vec_costs->vec_stmt_cost;
11679 case vector_load:
11680 return current_tune->vec_costs->vec_align_load_cost;
11682 case vector_store:
11683 return current_tune->vec_costs->vec_store_cost;
11685 case vec_to_scalar:
11686 return current_tune->vec_costs->vec_to_scalar_cost;
11688 case scalar_to_vec:
11689 return current_tune->vec_costs->scalar_to_vec_cost;
11691 case unaligned_load:
11692 return current_tune->vec_costs->vec_unalign_load_cost;
11694 case unaligned_store:
11695 return current_tune->vec_costs->vec_unalign_store_cost;
11697 case cond_branch_taken:
11698 return current_tune->vec_costs->cond_taken_branch_cost;
11700 case cond_branch_not_taken:
11701 return current_tune->vec_costs->cond_not_taken_branch_cost;
11703 case vec_perm:
11704 case vec_promote_demote:
11705 return current_tune->vec_costs->vec_stmt_cost;
11707 case vec_construct:
11708 elements = TYPE_VECTOR_SUBPARTS (vectype);
11709 return elements / 2 + 1;
11711 default:
11712 gcc_unreachable ();
11716 /* Implement targetm.vectorize.add_stmt_cost. */
11718 static unsigned
11719 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11720 struct _stmt_vec_info *stmt_info, int misalign,
11721 enum vect_cost_model_location where)
11723 unsigned *cost = (unsigned *) data;
11724 unsigned retval = 0;
11726 if (flag_vect_cost_model)
11728 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11729 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11731 /* Statements in an inner loop relative to the loop being
11732 vectorized are weighted more heavily. The value here is
11733 arbitrary and could potentially be improved with analysis. */
11734 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11735 count *= 50; /* FIXME. */
11737 retval = (unsigned) (count * stmt_cost);
11738 cost[where] += retval;
11741 return retval;
11744 /* Return true if and only if this insn can dual-issue only as older. */
11745 static bool
11746 cortexa7_older_only (rtx_insn *insn)
11748 if (recog_memoized (insn) < 0)
11749 return false;
11751 switch (get_attr_type (insn))
11753 case TYPE_ALU_DSP_REG:
11754 case TYPE_ALU_SREG:
11755 case TYPE_ALUS_SREG:
11756 case TYPE_LOGIC_REG:
11757 case TYPE_LOGICS_REG:
11758 case TYPE_ADC_REG:
11759 case TYPE_ADCS_REG:
11760 case TYPE_ADR:
11761 case TYPE_BFM:
11762 case TYPE_REV:
11763 case TYPE_MVN_REG:
11764 case TYPE_SHIFT_IMM:
11765 case TYPE_SHIFT_REG:
11766 case TYPE_LOAD_BYTE:
11767 case TYPE_LOAD1:
11768 case TYPE_STORE1:
11769 case TYPE_FFARITHS:
11770 case TYPE_FADDS:
11771 case TYPE_FFARITHD:
11772 case TYPE_FADDD:
11773 case TYPE_FMOV:
11774 case TYPE_F_CVT:
11775 case TYPE_FCMPS:
11776 case TYPE_FCMPD:
11777 case TYPE_FCONSTS:
11778 case TYPE_FCONSTD:
11779 case TYPE_FMULS:
11780 case TYPE_FMACS:
11781 case TYPE_FMULD:
11782 case TYPE_FMACD:
11783 case TYPE_FDIVS:
11784 case TYPE_FDIVD:
11785 case TYPE_F_MRC:
11786 case TYPE_F_MRRC:
11787 case TYPE_F_FLAG:
11788 case TYPE_F_LOADS:
11789 case TYPE_F_STORES:
11790 return true;
11791 default:
11792 return false;
11796 /* Return true if and only if this insn can dual-issue as younger. */
11797 static bool
11798 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11800 if (recog_memoized (insn) < 0)
11802 if (verbose > 5)
11803 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11804 return false;
11807 switch (get_attr_type (insn))
11809 case TYPE_ALU_IMM:
11810 case TYPE_ALUS_IMM:
11811 case TYPE_LOGIC_IMM:
11812 case TYPE_LOGICS_IMM:
11813 case TYPE_EXTEND:
11814 case TYPE_MVN_IMM:
11815 case TYPE_MOV_IMM:
11816 case TYPE_MOV_REG:
11817 case TYPE_MOV_SHIFT:
11818 case TYPE_MOV_SHIFT_REG:
11819 case TYPE_BRANCH:
11820 case TYPE_CALL:
11821 return true;
11822 default:
11823 return false;
11828 /* Look for an instruction that can dual issue only as an older
11829 instruction, and move it in front of any instructions that can
11830 dual-issue as younger, while preserving the relative order of all
11831 other instructions in the ready list. This is a hueuristic to help
11832 dual-issue in later cycles, by postponing issue of more flexible
11833 instructions. This heuristic may affect dual issue opportunities
11834 in the current cycle. */
11835 static void
11836 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11837 int *n_readyp, int clock)
11839 int i;
11840 int first_older_only = -1, first_younger = -1;
11842 if (verbose > 5)
11843 fprintf (file,
11844 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11845 clock,
11846 *n_readyp);
11848 /* Traverse the ready list from the head (the instruction to issue
11849 first), and looking for the first instruction that can issue as
11850 younger and the first instruction that can dual-issue only as
11851 older. */
11852 for (i = *n_readyp - 1; i >= 0; i--)
11854 rtx_insn *insn = ready[i];
11855 if (cortexa7_older_only (insn))
11857 first_older_only = i;
11858 if (verbose > 5)
11859 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11860 break;
11862 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11863 first_younger = i;
11866 /* Nothing to reorder because either no younger insn found or insn
11867 that can dual-issue only as older appears before any insn that
11868 can dual-issue as younger. */
11869 if (first_younger == -1)
11871 if (verbose > 5)
11872 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11873 return;
11876 /* Nothing to reorder because no older-only insn in the ready list. */
11877 if (first_older_only == -1)
11879 if (verbose > 5)
11880 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11881 return;
11884 /* Move first_older_only insn before first_younger. */
11885 if (verbose > 5)
11886 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11887 INSN_UID(ready [first_older_only]),
11888 INSN_UID(ready [first_younger]));
11889 rtx_insn *first_older_only_insn = ready [first_older_only];
11890 for (i = first_older_only; i < first_younger; i++)
11892 ready[i] = ready[i+1];
11895 ready[i] = first_older_only_insn;
11896 return;
11899 /* Implement TARGET_SCHED_REORDER. */
11900 static int
11901 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11902 int clock)
11904 switch (arm_tune)
11906 case cortexa7:
11907 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11908 break;
11909 default:
11910 /* Do nothing for other cores. */
11911 break;
11914 return arm_issue_rate ();
11917 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11918 It corrects the value of COST based on the relationship between
11919 INSN and DEP through the dependence LINK. It returns the new
11920 value. There is a per-core adjust_cost hook to adjust scheduler costs
11921 and the per-core hook can choose to completely override the generic
11922 adjust_cost function. Only put bits of code into arm_adjust_cost that
11923 are common across all cores. */
11924 static int
11925 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11927 rtx i_pat, d_pat;
11929 /* When generating Thumb-1 code, we want to place flag-setting operations
11930 close to a conditional branch which depends on them, so that we can
11931 omit the comparison. */
11932 if (TARGET_THUMB1
11933 && REG_NOTE_KIND (link) == 0
11934 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11935 && recog_memoized (dep) >= 0
11936 && get_attr_conds (dep) == CONDS_SET)
11937 return 0;
11939 if (current_tune->sched_adjust_cost != NULL)
11941 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11942 return cost;
11945 /* XXX Is this strictly true? */
11946 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11947 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11948 return 0;
11950 /* Call insns don't incur a stall, even if they follow a load. */
11951 if (REG_NOTE_KIND (link) == 0
11952 && CALL_P (insn))
11953 return 1;
11955 if ((i_pat = single_set (insn)) != NULL
11956 && MEM_P (SET_SRC (i_pat))
11957 && (d_pat = single_set (dep)) != NULL
11958 && MEM_P (SET_DEST (d_pat)))
11960 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11961 /* This is a load after a store, there is no conflict if the load reads
11962 from a cached area. Assume that loads from the stack, and from the
11963 constant pool are cached, and that others will miss. This is a
11964 hack. */
11966 if ((GET_CODE (src_mem) == SYMBOL_REF
11967 && CONSTANT_POOL_ADDRESS_P (src_mem))
11968 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11969 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11970 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11971 return 1;
11974 return cost;
11978 arm_max_conditional_execute (void)
11980 return max_insns_skipped;
11983 static int
11984 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11986 if (TARGET_32BIT)
11987 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11988 else
11989 return (optimize > 0) ? 2 : 0;
11992 static int
11993 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11995 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11998 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11999 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12000 sequences of non-executed instructions in IT blocks probably take the same
12001 amount of time as executed instructions (and the IT instruction itself takes
12002 space in icache). This function was experimentally determined to give good
12003 results on a popular embedded benchmark. */
12005 static int
12006 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12008 return (TARGET_32BIT && speed_p) ? 1
12009 : arm_default_branch_cost (speed_p, predictable_p);
12012 static bool fp_consts_inited = false;
12014 static REAL_VALUE_TYPE value_fp0;
12016 static void
12017 init_fp_table (void)
12019 REAL_VALUE_TYPE r;
12021 r = REAL_VALUE_ATOF ("0", DFmode);
12022 value_fp0 = r;
12023 fp_consts_inited = true;
12026 /* Return TRUE if rtx X is a valid immediate FP constant. */
12028 arm_const_double_rtx (rtx x)
12030 REAL_VALUE_TYPE r;
12032 if (!fp_consts_inited)
12033 init_fp_table ();
12035 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12036 if (REAL_VALUE_MINUS_ZERO (r))
12037 return 0;
12039 if (REAL_VALUES_EQUAL (r, value_fp0))
12040 return 1;
12042 return 0;
12045 /* VFPv3 has a fairly wide range of representable immediates, formed from
12046 "quarter-precision" floating-point values. These can be evaluated using this
12047 formula (with ^ for exponentiation):
12049 -1^s * n * 2^-r
12051 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12052 16 <= n <= 31 and 0 <= r <= 7.
12054 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12056 - A (most-significant) is the sign bit.
12057 - BCD are the exponent (encoded as r XOR 3).
12058 - EFGH are the mantissa (encoded as n - 16).
12061 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12062 fconst[sd] instruction, or -1 if X isn't suitable. */
12063 static int
12064 vfp3_const_double_index (rtx x)
12066 REAL_VALUE_TYPE r, m;
12067 int sign, exponent;
12068 unsigned HOST_WIDE_INT mantissa, mant_hi;
12069 unsigned HOST_WIDE_INT mask;
12070 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12071 bool fail;
12073 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12074 return -1;
12076 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12078 /* We can't represent these things, so detect them first. */
12079 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12080 return -1;
12082 /* Extract sign, exponent and mantissa. */
12083 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12084 r = real_value_abs (&r);
12085 exponent = REAL_EXP (&r);
12086 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12087 highest (sign) bit, with a fixed binary point at bit point_pos.
12088 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12089 bits for the mantissa, this may fail (low bits would be lost). */
12090 real_ldexp (&m, &r, point_pos - exponent);
12091 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12092 mantissa = w.elt (0);
12093 mant_hi = w.elt (1);
12095 /* If there are bits set in the low part of the mantissa, we can't
12096 represent this value. */
12097 if (mantissa != 0)
12098 return -1;
12100 /* Now make it so that mantissa contains the most-significant bits, and move
12101 the point_pos to indicate that the least-significant bits have been
12102 discarded. */
12103 point_pos -= HOST_BITS_PER_WIDE_INT;
12104 mantissa = mant_hi;
12106 /* We can permit four significant bits of mantissa only, plus a high bit
12107 which is always 1. */
12108 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12109 if ((mantissa & mask) != 0)
12110 return -1;
12112 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12113 mantissa >>= point_pos - 5;
12115 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12116 floating-point immediate zero with Neon using an integer-zero load, but
12117 that case is handled elsewhere.) */
12118 if (mantissa == 0)
12119 return -1;
12121 gcc_assert (mantissa >= 16 && mantissa <= 31);
12123 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12124 normalized significands are in the range [1, 2). (Our mantissa is shifted
12125 left 4 places at this point relative to normalized IEEE754 values). GCC
12126 internally uses [0.5, 1) (see real.c), so the exponent returned from
12127 REAL_EXP must be altered. */
12128 exponent = 5 - exponent;
12130 if (exponent < 0 || exponent > 7)
12131 return -1;
12133 /* Sign, mantissa and exponent are now in the correct form to plug into the
12134 formula described in the comment above. */
12135 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12138 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12140 vfp3_const_double_rtx (rtx x)
12142 if (!TARGET_VFP3)
12143 return 0;
12145 return vfp3_const_double_index (x) != -1;
12148 /* Recognize immediates which can be used in various Neon instructions. Legal
12149 immediates are described by the following table (for VMVN variants, the
12150 bitwise inverse of the constant shown is recognized. In either case, VMOV
12151 is output and the correct instruction to use for a given constant is chosen
12152 by the assembler). The constant shown is replicated across all elements of
12153 the destination vector.
12155 insn elems variant constant (binary)
12156 ---- ----- ------- -----------------
12157 vmov i32 0 00000000 00000000 00000000 abcdefgh
12158 vmov i32 1 00000000 00000000 abcdefgh 00000000
12159 vmov i32 2 00000000 abcdefgh 00000000 00000000
12160 vmov i32 3 abcdefgh 00000000 00000000 00000000
12161 vmov i16 4 00000000 abcdefgh
12162 vmov i16 5 abcdefgh 00000000
12163 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12164 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12165 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12166 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12167 vmvn i16 10 00000000 abcdefgh
12168 vmvn i16 11 abcdefgh 00000000
12169 vmov i32 12 00000000 00000000 abcdefgh 11111111
12170 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12171 vmov i32 14 00000000 abcdefgh 11111111 11111111
12172 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12173 vmov i8 16 abcdefgh
12174 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12175 eeeeeeee ffffffff gggggggg hhhhhhhh
12176 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12177 vmov f32 19 00000000 00000000 00000000 00000000
12179 For case 18, B = !b. Representable values are exactly those accepted by
12180 vfp3_const_double_index, but are output as floating-point numbers rather
12181 than indices.
12183 For case 19, we will change it to vmov.i32 when assembling.
12185 Variants 0-5 (inclusive) may also be used as immediates for the second
12186 operand of VORR/VBIC instructions.
12188 The INVERSE argument causes the bitwise inverse of the given operand to be
12189 recognized instead (used for recognizing legal immediates for the VAND/VORN
12190 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12191 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12192 output, rather than the real insns vbic/vorr).
12194 INVERSE makes no difference to the recognition of float vectors.
12196 The return value is the variant of immediate as shown in the above table, or
12197 -1 if the given value doesn't match any of the listed patterns.
12199 static int
12200 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
12201 rtx *modconst, int *elementwidth)
12203 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12204 matches = 1; \
12205 for (i = 0; i < idx; i += (STRIDE)) \
12206 if (!(TEST)) \
12207 matches = 0; \
12208 if (matches) \
12210 immtype = (CLASS); \
12211 elsize = (ELSIZE); \
12212 break; \
12215 unsigned int i, elsize = 0, idx = 0, n_elts;
12216 unsigned int innersize;
12217 unsigned char bytes[16];
12218 int immtype = -1, matches;
12219 unsigned int invmask = inverse ? 0xff : 0;
12220 bool vector = GET_CODE (op) == CONST_VECTOR;
12222 if (vector)
12224 n_elts = CONST_VECTOR_NUNITS (op);
12225 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12227 else
12229 n_elts = 1;
12230 if (mode == VOIDmode)
12231 mode = DImode;
12232 innersize = GET_MODE_SIZE (mode);
12235 /* Vectors of float constants. */
12236 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12238 rtx el0 = CONST_VECTOR_ELT (op, 0);
12239 REAL_VALUE_TYPE r0;
12241 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12242 return -1;
12244 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12246 for (i = 1; i < n_elts; i++)
12248 rtx elt = CONST_VECTOR_ELT (op, i);
12249 REAL_VALUE_TYPE re;
12251 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12253 if (!REAL_VALUES_EQUAL (r0, re))
12254 return -1;
12257 if (modconst)
12258 *modconst = CONST_VECTOR_ELT (op, 0);
12260 if (elementwidth)
12261 *elementwidth = 0;
12263 if (el0 == CONST0_RTX (GET_MODE (el0)))
12264 return 19;
12265 else
12266 return 18;
12269 /* Splat vector constant out into a byte vector. */
12270 for (i = 0; i < n_elts; i++)
12272 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12273 unsigned HOST_WIDE_INT elpart;
12274 unsigned int part, parts;
12276 if (CONST_INT_P (el))
12278 elpart = INTVAL (el);
12279 parts = 1;
12281 else if (CONST_DOUBLE_P (el))
12283 elpart = CONST_DOUBLE_LOW (el);
12284 parts = 2;
12286 else
12287 gcc_unreachable ();
12289 for (part = 0; part < parts; part++)
12291 unsigned int byte;
12292 for (byte = 0; byte < innersize; byte++)
12294 bytes[idx++] = (elpart & 0xff) ^ invmask;
12295 elpart >>= BITS_PER_UNIT;
12297 if (CONST_DOUBLE_P (el))
12298 elpart = CONST_DOUBLE_HIGH (el);
12302 /* Sanity check. */
12303 gcc_assert (idx == GET_MODE_SIZE (mode));
12307 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12308 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12310 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12311 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12313 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12314 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12316 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12317 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12319 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12321 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12323 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12324 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12326 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12327 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12329 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12330 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12332 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12333 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12335 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12337 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12339 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12340 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12342 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12343 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12345 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12346 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12348 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12349 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12351 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12353 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12354 && bytes[i] == bytes[(i + 8) % idx]);
12356 while (0);
12358 if (immtype == -1)
12359 return -1;
12361 if (elementwidth)
12362 *elementwidth = elsize;
12364 if (modconst)
12366 unsigned HOST_WIDE_INT imm = 0;
12368 /* Un-invert bytes of recognized vector, if necessary. */
12369 if (invmask != 0)
12370 for (i = 0; i < idx; i++)
12371 bytes[i] ^= invmask;
12373 if (immtype == 17)
12375 /* FIXME: Broken on 32-bit H_W_I hosts. */
12376 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12378 for (i = 0; i < 8; i++)
12379 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12380 << (i * BITS_PER_UNIT);
12382 *modconst = GEN_INT (imm);
12384 else
12386 unsigned HOST_WIDE_INT imm = 0;
12388 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12389 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12391 *modconst = GEN_INT (imm);
12395 return immtype;
12396 #undef CHECK
12399 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12400 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12401 float elements), and a modified constant (whatever should be output for a
12402 VMOV) in *MODCONST. */
12405 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
12406 rtx *modconst, int *elementwidth)
12408 rtx tmpconst;
12409 int tmpwidth;
12410 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12412 if (retval == -1)
12413 return 0;
12415 if (modconst)
12416 *modconst = tmpconst;
12418 if (elementwidth)
12419 *elementwidth = tmpwidth;
12421 return 1;
12424 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12425 the immediate is valid, write a constant suitable for using as an operand
12426 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12427 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12430 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
12431 rtx *modconst, int *elementwidth)
12433 rtx tmpconst;
12434 int tmpwidth;
12435 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12437 if (retval < 0 || retval > 5)
12438 return 0;
12440 if (modconst)
12441 *modconst = tmpconst;
12443 if (elementwidth)
12444 *elementwidth = tmpwidth;
12446 return 1;
12449 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12450 the immediate is valid, write a constant suitable for using as an operand
12451 to VSHR/VSHL to *MODCONST and the corresponding element width to
12452 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12453 because they have different limitations. */
12456 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
12457 rtx *modconst, int *elementwidth,
12458 bool isleftshift)
12460 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12461 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12462 unsigned HOST_WIDE_INT last_elt = 0;
12463 unsigned HOST_WIDE_INT maxshift;
12465 /* Split vector constant out into a byte vector. */
12466 for (i = 0; i < n_elts; i++)
12468 rtx el = CONST_VECTOR_ELT (op, i);
12469 unsigned HOST_WIDE_INT elpart;
12471 if (CONST_INT_P (el))
12472 elpart = INTVAL (el);
12473 else if (CONST_DOUBLE_P (el))
12474 return 0;
12475 else
12476 gcc_unreachable ();
12478 if (i != 0 && elpart != last_elt)
12479 return 0;
12481 last_elt = elpart;
12484 /* Shift less than element size. */
12485 maxshift = innersize * 8;
12487 if (isleftshift)
12489 /* Left shift immediate value can be from 0 to <size>-1. */
12490 if (last_elt >= maxshift)
12491 return 0;
12493 else
12495 /* Right shift immediate value can be from 1 to <size>. */
12496 if (last_elt == 0 || last_elt > maxshift)
12497 return 0;
12500 if (elementwidth)
12501 *elementwidth = innersize * 8;
12503 if (modconst)
12504 *modconst = CONST_VECTOR_ELT (op, 0);
12506 return 1;
12509 /* Return a string suitable for output of Neon immediate logic operation
12510 MNEM. */
12512 char *
12513 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
12514 int inverse, int quad)
12516 int width, is_valid;
12517 static char templ[40];
12519 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12521 gcc_assert (is_valid != 0);
12523 if (quad)
12524 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12525 else
12526 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12528 return templ;
12531 /* Return a string suitable for output of Neon immediate shift operation
12532 (VSHR or VSHL) MNEM. */
12534 char *
12535 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12536 enum machine_mode mode, int quad,
12537 bool isleftshift)
12539 int width, is_valid;
12540 static char templ[40];
12542 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12543 gcc_assert (is_valid != 0);
12545 if (quad)
12546 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12547 else
12548 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12550 return templ;
12553 /* Output a sequence of pairwise operations to implement a reduction.
12554 NOTE: We do "too much work" here, because pairwise operations work on two
12555 registers-worth of operands in one go. Unfortunately we can't exploit those
12556 extra calculations to do the full operation in fewer steps, I don't think.
12557 Although all vector elements of the result but the first are ignored, we
12558 actually calculate the same result in each of the elements. An alternative
12559 such as initially loading a vector with zero to use as each of the second
12560 operands would use up an additional register and take an extra instruction,
12561 for no particular gain. */
12563 void
12564 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
12565 rtx (*reduc) (rtx, rtx, rtx))
12567 enum machine_mode inner = GET_MODE_INNER (mode);
12568 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12569 rtx tmpsum = op1;
12571 for (i = parts / 2; i >= 1; i /= 2)
12573 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12574 emit_insn (reduc (dest, tmpsum, tmpsum));
12575 tmpsum = dest;
12579 /* If VALS is a vector constant that can be loaded into a register
12580 using VDUP, generate instructions to do so and return an RTX to
12581 assign to the register. Otherwise return NULL_RTX. */
12583 static rtx
12584 neon_vdup_constant (rtx vals)
12586 enum machine_mode mode = GET_MODE (vals);
12587 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12588 int n_elts = GET_MODE_NUNITS (mode);
12589 bool all_same = true;
12590 rtx x;
12591 int i;
12593 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12594 return NULL_RTX;
12596 for (i = 0; i < n_elts; ++i)
12598 x = XVECEXP (vals, 0, i);
12599 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12600 all_same = false;
12603 if (!all_same)
12604 /* The elements are not all the same. We could handle repeating
12605 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12606 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12607 vdup.i16). */
12608 return NULL_RTX;
12610 /* We can load this constant by using VDUP and a constant in a
12611 single ARM register. This will be cheaper than a vector
12612 load. */
12614 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12615 return gen_rtx_VEC_DUPLICATE (mode, x);
12618 /* Generate code to load VALS, which is a PARALLEL containing only
12619 constants (for vec_init) or CONST_VECTOR, efficiently into a
12620 register. Returns an RTX to copy into the register, or NULL_RTX
12621 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12624 neon_make_constant (rtx vals)
12626 enum machine_mode mode = GET_MODE (vals);
12627 rtx target;
12628 rtx const_vec = NULL_RTX;
12629 int n_elts = GET_MODE_NUNITS (mode);
12630 int n_const = 0;
12631 int i;
12633 if (GET_CODE (vals) == CONST_VECTOR)
12634 const_vec = vals;
12635 else if (GET_CODE (vals) == PARALLEL)
12637 /* A CONST_VECTOR must contain only CONST_INTs and
12638 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12639 Only store valid constants in a CONST_VECTOR. */
12640 for (i = 0; i < n_elts; ++i)
12642 rtx x = XVECEXP (vals, 0, i);
12643 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12644 n_const++;
12646 if (n_const == n_elts)
12647 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12649 else
12650 gcc_unreachable ();
12652 if (const_vec != NULL
12653 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12654 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12655 return const_vec;
12656 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12657 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12658 pipeline cycle; creating the constant takes one or two ARM
12659 pipeline cycles. */
12660 return target;
12661 else if (const_vec != NULL_RTX)
12662 /* Load from constant pool. On Cortex-A8 this takes two cycles
12663 (for either double or quad vectors). We can not take advantage
12664 of single-cycle VLD1 because we need a PC-relative addressing
12665 mode. */
12666 return const_vec;
12667 else
12668 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12669 We can not construct an initializer. */
12670 return NULL_RTX;
12673 /* Initialize vector TARGET to VALS. */
12675 void
12676 neon_expand_vector_init (rtx target, rtx vals)
12678 enum machine_mode mode = GET_MODE (target);
12679 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12680 int n_elts = GET_MODE_NUNITS (mode);
12681 int n_var = 0, one_var = -1;
12682 bool all_same = true;
12683 rtx x, mem;
12684 int i;
12686 for (i = 0; i < n_elts; ++i)
12688 x = XVECEXP (vals, 0, i);
12689 if (!CONSTANT_P (x))
12690 ++n_var, one_var = i;
12692 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12693 all_same = false;
12696 if (n_var == 0)
12698 rtx constant = neon_make_constant (vals);
12699 if (constant != NULL_RTX)
12701 emit_move_insn (target, constant);
12702 return;
12706 /* Splat a single non-constant element if we can. */
12707 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12709 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12710 emit_insn (gen_rtx_SET (VOIDmode, target,
12711 gen_rtx_VEC_DUPLICATE (mode, x)));
12712 return;
12715 /* One field is non-constant. Load constant then overwrite varying
12716 field. This is more efficient than using the stack. */
12717 if (n_var == 1)
12719 rtx copy = copy_rtx (vals);
12720 rtx index = GEN_INT (one_var);
12722 /* Load constant part of vector, substitute neighboring value for
12723 varying element. */
12724 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12725 neon_expand_vector_init (target, copy);
12727 /* Insert variable. */
12728 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12729 switch (mode)
12731 case V8QImode:
12732 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12733 break;
12734 case V16QImode:
12735 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12736 break;
12737 case V4HImode:
12738 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12739 break;
12740 case V8HImode:
12741 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12742 break;
12743 case V2SImode:
12744 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12745 break;
12746 case V4SImode:
12747 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12748 break;
12749 case V2SFmode:
12750 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12751 break;
12752 case V4SFmode:
12753 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12754 break;
12755 case V2DImode:
12756 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12757 break;
12758 default:
12759 gcc_unreachable ();
12761 return;
12764 /* Construct the vector in memory one field at a time
12765 and load the whole vector. */
12766 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12767 for (i = 0; i < n_elts; i++)
12768 emit_move_insn (adjust_address_nv (mem, inner_mode,
12769 i * GET_MODE_SIZE (inner_mode)),
12770 XVECEXP (vals, 0, i));
12771 emit_move_insn (target, mem);
12774 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12775 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12776 reported source locations are bogus. */
12778 static void
12779 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12780 const char *err)
12782 HOST_WIDE_INT lane;
12784 gcc_assert (CONST_INT_P (operand));
12786 lane = INTVAL (operand);
12788 if (lane < low || lane >= high)
12789 error (err);
12792 /* Bounds-check lanes. */
12794 void
12795 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12797 bounds_check (operand, low, high, "lane out of range");
12800 /* Bounds-check constants. */
12802 void
12803 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12805 bounds_check (operand, low, high, "constant out of range");
12808 HOST_WIDE_INT
12809 neon_element_bits (enum machine_mode mode)
12811 if (mode == DImode)
12812 return GET_MODE_BITSIZE (mode);
12813 else
12814 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12818 /* Predicates for `match_operand' and `match_operator'. */
12820 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12821 WB is true if full writeback address modes are allowed and is false
12822 if limited writeback address modes (POST_INC and PRE_DEC) are
12823 allowed. */
12826 arm_coproc_mem_operand (rtx op, bool wb)
12828 rtx ind;
12830 /* Reject eliminable registers. */
12831 if (! (reload_in_progress || reload_completed || lra_in_progress)
12832 && ( reg_mentioned_p (frame_pointer_rtx, op)
12833 || reg_mentioned_p (arg_pointer_rtx, op)
12834 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12835 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12836 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12837 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12838 return FALSE;
12840 /* Constants are converted into offsets from labels. */
12841 if (!MEM_P (op))
12842 return FALSE;
12844 ind = XEXP (op, 0);
12846 if (reload_completed
12847 && (GET_CODE (ind) == LABEL_REF
12848 || (GET_CODE (ind) == CONST
12849 && GET_CODE (XEXP (ind, 0)) == PLUS
12850 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12851 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12852 return TRUE;
12854 /* Match: (mem (reg)). */
12855 if (REG_P (ind))
12856 return arm_address_register_rtx_p (ind, 0);
12858 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12859 acceptable in any case (subject to verification by
12860 arm_address_register_rtx_p). We need WB to be true to accept
12861 PRE_INC and POST_DEC. */
12862 if (GET_CODE (ind) == POST_INC
12863 || GET_CODE (ind) == PRE_DEC
12864 || (wb
12865 && (GET_CODE (ind) == PRE_INC
12866 || GET_CODE (ind) == POST_DEC)))
12867 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12869 if (wb
12870 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12871 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12872 && GET_CODE (XEXP (ind, 1)) == PLUS
12873 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12874 ind = XEXP (ind, 1);
12876 /* Match:
12877 (plus (reg)
12878 (const)). */
12879 if (GET_CODE (ind) == PLUS
12880 && REG_P (XEXP (ind, 0))
12881 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12882 && CONST_INT_P (XEXP (ind, 1))
12883 && INTVAL (XEXP (ind, 1)) > -1024
12884 && INTVAL (XEXP (ind, 1)) < 1024
12885 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12886 return TRUE;
12888 return FALSE;
12891 /* Return TRUE if OP is a memory operand which we can load or store a vector
12892 to/from. TYPE is one of the following values:
12893 0 - Vector load/stor (vldr)
12894 1 - Core registers (ldm)
12895 2 - Element/structure loads (vld1)
12898 neon_vector_mem_operand (rtx op, int type, bool strict)
12900 rtx ind;
12902 /* Reject eliminable registers. */
12903 if (! (reload_in_progress || reload_completed)
12904 && ( reg_mentioned_p (frame_pointer_rtx, op)
12905 || reg_mentioned_p (arg_pointer_rtx, op)
12906 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12907 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12908 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12909 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12910 return !strict;
12912 /* Constants are converted into offsets from labels. */
12913 if (!MEM_P (op))
12914 return FALSE;
12916 ind = XEXP (op, 0);
12918 if (reload_completed
12919 && (GET_CODE (ind) == LABEL_REF
12920 || (GET_CODE (ind) == CONST
12921 && GET_CODE (XEXP (ind, 0)) == PLUS
12922 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12923 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12924 return TRUE;
12926 /* Match: (mem (reg)). */
12927 if (REG_P (ind))
12928 return arm_address_register_rtx_p (ind, 0);
12930 /* Allow post-increment with Neon registers. */
12931 if ((type != 1 && GET_CODE (ind) == POST_INC)
12932 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12933 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12935 /* Allow post-increment by register for VLDn */
12936 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12937 && GET_CODE (XEXP (ind, 1)) == PLUS
12938 && REG_P (XEXP (XEXP (ind, 1), 1)))
12939 return true;
12941 /* Match:
12942 (plus (reg)
12943 (const)). */
12944 if (type == 0
12945 && GET_CODE (ind) == PLUS
12946 && REG_P (XEXP (ind, 0))
12947 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12948 && CONST_INT_P (XEXP (ind, 1))
12949 && INTVAL (XEXP (ind, 1)) > -1024
12950 /* For quad modes, we restrict the constant offset to be slightly less
12951 than what the instruction format permits. We have no such constraint
12952 on double mode offsets. (This must match arm_legitimate_index_p.) */
12953 && (INTVAL (XEXP (ind, 1))
12954 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12955 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12956 return TRUE;
12958 return FALSE;
12961 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12962 type. */
12964 neon_struct_mem_operand (rtx op)
12966 rtx ind;
12968 /* Reject eliminable registers. */
12969 if (! (reload_in_progress || reload_completed)
12970 && ( reg_mentioned_p (frame_pointer_rtx, op)
12971 || reg_mentioned_p (arg_pointer_rtx, op)
12972 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12973 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12974 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12975 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12976 return FALSE;
12978 /* Constants are converted into offsets from labels. */
12979 if (!MEM_P (op))
12980 return FALSE;
12982 ind = XEXP (op, 0);
12984 if (reload_completed
12985 && (GET_CODE (ind) == LABEL_REF
12986 || (GET_CODE (ind) == CONST
12987 && GET_CODE (XEXP (ind, 0)) == PLUS
12988 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12989 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12990 return TRUE;
12992 /* Match: (mem (reg)). */
12993 if (REG_P (ind))
12994 return arm_address_register_rtx_p (ind, 0);
12996 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12997 if (GET_CODE (ind) == POST_INC
12998 || GET_CODE (ind) == PRE_DEC)
12999 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13001 return FALSE;
13004 /* Return true if X is a register that will be eliminated later on. */
13006 arm_eliminable_register (rtx x)
13008 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13009 || REGNO (x) == ARG_POINTER_REGNUM
13010 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13011 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13014 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13015 coprocessor registers. Otherwise return NO_REGS. */
13017 enum reg_class
13018 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
13020 if (mode == HFmode)
13022 if (!TARGET_NEON_FP16)
13023 return GENERAL_REGS;
13024 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13025 return NO_REGS;
13026 return GENERAL_REGS;
13029 /* The neon move patterns handle all legitimate vector and struct
13030 addresses. */
13031 if (TARGET_NEON
13032 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13033 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13034 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13035 || VALID_NEON_STRUCT_MODE (mode)))
13036 return NO_REGS;
13038 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13039 return NO_REGS;
13041 return GENERAL_REGS;
13044 /* Values which must be returned in the most-significant end of the return
13045 register. */
13047 static bool
13048 arm_return_in_msb (const_tree valtype)
13050 return (TARGET_AAPCS_BASED
13051 && BYTES_BIG_ENDIAN
13052 && (AGGREGATE_TYPE_P (valtype)
13053 || TREE_CODE (valtype) == COMPLEX_TYPE
13054 || FIXED_POINT_TYPE_P (valtype)));
13057 /* Return TRUE if X references a SYMBOL_REF. */
13059 symbol_mentioned_p (rtx x)
13061 const char * fmt;
13062 int i;
13064 if (GET_CODE (x) == SYMBOL_REF)
13065 return 1;
13067 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13068 are constant offsets, not symbols. */
13069 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13070 return 0;
13072 fmt = GET_RTX_FORMAT (GET_CODE (x));
13074 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13076 if (fmt[i] == 'E')
13078 int j;
13080 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13081 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13082 return 1;
13084 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13085 return 1;
13088 return 0;
13091 /* Return TRUE if X references a LABEL_REF. */
13093 label_mentioned_p (rtx x)
13095 const char * fmt;
13096 int i;
13098 if (GET_CODE (x) == LABEL_REF)
13099 return 1;
13101 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13102 instruction, but they are constant offsets, not symbols. */
13103 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13104 return 0;
13106 fmt = GET_RTX_FORMAT (GET_CODE (x));
13107 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13109 if (fmt[i] == 'E')
13111 int j;
13113 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13114 if (label_mentioned_p (XVECEXP (x, i, j)))
13115 return 1;
13117 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13118 return 1;
13121 return 0;
13125 tls_mentioned_p (rtx x)
13127 switch (GET_CODE (x))
13129 case CONST:
13130 return tls_mentioned_p (XEXP (x, 0));
13132 case UNSPEC:
13133 if (XINT (x, 1) == UNSPEC_TLS)
13134 return 1;
13136 default:
13137 return 0;
13141 /* Must not copy any rtx that uses a pc-relative address. */
13143 static int
13144 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
13146 if (GET_CODE (*x) == UNSPEC
13147 && (XINT (*x, 1) == UNSPEC_PIC_BASE
13148 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
13149 return 1;
13150 return 0;
13153 static bool
13154 arm_cannot_copy_insn_p (rtx_insn *insn)
13156 /* The tls call insn cannot be copied, as it is paired with a data
13157 word. */
13158 if (recog_memoized (insn) == CODE_FOR_tlscall)
13159 return true;
13161 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
13164 enum rtx_code
13165 minmax_code (rtx x)
13167 enum rtx_code code = GET_CODE (x);
13169 switch (code)
13171 case SMAX:
13172 return GE;
13173 case SMIN:
13174 return LE;
13175 case UMIN:
13176 return LEU;
13177 case UMAX:
13178 return GEU;
13179 default:
13180 gcc_unreachable ();
13184 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13186 bool
13187 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13188 int *mask, bool *signed_sat)
13190 /* The high bound must be a power of two minus one. */
13191 int log = exact_log2 (INTVAL (hi_bound) + 1);
13192 if (log == -1)
13193 return false;
13195 /* The low bound is either zero (for usat) or one less than the
13196 negation of the high bound (for ssat). */
13197 if (INTVAL (lo_bound) == 0)
13199 if (mask)
13200 *mask = log;
13201 if (signed_sat)
13202 *signed_sat = false;
13204 return true;
13207 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13209 if (mask)
13210 *mask = log + 1;
13211 if (signed_sat)
13212 *signed_sat = true;
13214 return true;
13217 return false;
13220 /* Return 1 if memory locations are adjacent. */
13222 adjacent_mem_locations (rtx a, rtx b)
13224 /* We don't guarantee to preserve the order of these memory refs. */
13225 if (volatile_refs_p (a) || volatile_refs_p (b))
13226 return 0;
13228 if ((REG_P (XEXP (a, 0))
13229 || (GET_CODE (XEXP (a, 0)) == PLUS
13230 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13231 && (REG_P (XEXP (b, 0))
13232 || (GET_CODE (XEXP (b, 0)) == PLUS
13233 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13235 HOST_WIDE_INT val0 = 0, val1 = 0;
13236 rtx reg0, reg1;
13237 int val_diff;
13239 if (GET_CODE (XEXP (a, 0)) == PLUS)
13241 reg0 = XEXP (XEXP (a, 0), 0);
13242 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13244 else
13245 reg0 = XEXP (a, 0);
13247 if (GET_CODE (XEXP (b, 0)) == PLUS)
13249 reg1 = XEXP (XEXP (b, 0), 0);
13250 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13252 else
13253 reg1 = XEXP (b, 0);
13255 /* Don't accept any offset that will require multiple
13256 instructions to handle, since this would cause the
13257 arith_adjacentmem pattern to output an overlong sequence. */
13258 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13259 return 0;
13261 /* Don't allow an eliminable register: register elimination can make
13262 the offset too large. */
13263 if (arm_eliminable_register (reg0))
13264 return 0;
13266 val_diff = val1 - val0;
13268 if (arm_ld_sched)
13270 /* If the target has load delay slots, then there's no benefit
13271 to using an ldm instruction unless the offset is zero and
13272 we are optimizing for size. */
13273 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13274 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13275 && (val_diff == 4 || val_diff == -4));
13278 return ((REGNO (reg0) == REGNO (reg1))
13279 && (val_diff == 4 || val_diff == -4));
13282 return 0;
13285 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13286 for load operations, false for store operations. CONSECUTIVE is true
13287 if the register numbers in the operation must be consecutive in the register
13288 bank. RETURN_PC is true if value is to be loaded in PC.
13289 The pattern we are trying to match for load is:
13290 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13291 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13294 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13296 where
13297 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13298 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13299 3. If consecutive is TRUE, then for kth register being loaded,
13300 REGNO (R_dk) = REGNO (R_d0) + k.
13301 The pattern for store is similar. */
13302 bool
13303 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
13304 bool consecutive, bool return_pc)
13306 HOST_WIDE_INT count = XVECLEN (op, 0);
13307 rtx reg, mem, addr;
13308 unsigned regno;
13309 unsigned first_regno;
13310 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13311 rtx elt;
13312 bool addr_reg_in_reglist = false;
13313 bool update = false;
13314 int reg_increment;
13315 int offset_adj;
13316 int regs_per_val;
13318 /* If not in SImode, then registers must be consecutive
13319 (e.g., VLDM instructions for DFmode). */
13320 gcc_assert ((mode == SImode) || consecutive);
13321 /* Setting return_pc for stores is illegal. */
13322 gcc_assert (!return_pc || load);
13324 /* Set up the increments and the regs per val based on the mode. */
13325 reg_increment = GET_MODE_SIZE (mode);
13326 regs_per_val = reg_increment / 4;
13327 offset_adj = return_pc ? 1 : 0;
13329 if (count <= 1
13330 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13331 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13332 return false;
13334 /* Check if this is a write-back. */
13335 elt = XVECEXP (op, 0, offset_adj);
13336 if (GET_CODE (SET_SRC (elt)) == PLUS)
13338 i++;
13339 base = 1;
13340 update = true;
13342 /* The offset adjustment must be the number of registers being
13343 popped times the size of a single register. */
13344 if (!REG_P (SET_DEST (elt))
13345 || !REG_P (XEXP (SET_SRC (elt), 0))
13346 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13347 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13348 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13349 ((count - 1 - offset_adj) * reg_increment))
13350 return false;
13353 i = i + offset_adj;
13354 base = base + offset_adj;
13355 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13356 success depends on the type: VLDM can do just one reg,
13357 LDM must do at least two. */
13358 if ((count <= i) && (mode == SImode))
13359 return false;
13361 elt = XVECEXP (op, 0, i - 1);
13362 if (GET_CODE (elt) != SET)
13363 return false;
13365 if (load)
13367 reg = SET_DEST (elt);
13368 mem = SET_SRC (elt);
13370 else
13372 reg = SET_SRC (elt);
13373 mem = SET_DEST (elt);
13376 if (!REG_P (reg) || !MEM_P (mem))
13377 return false;
13379 regno = REGNO (reg);
13380 first_regno = regno;
13381 addr = XEXP (mem, 0);
13382 if (GET_CODE (addr) == PLUS)
13384 if (!CONST_INT_P (XEXP (addr, 1)))
13385 return false;
13387 offset = INTVAL (XEXP (addr, 1));
13388 addr = XEXP (addr, 0);
13391 if (!REG_P (addr))
13392 return false;
13394 /* Don't allow SP to be loaded unless it is also the base register. It
13395 guarantees that SP is reset correctly when an LDM instruction
13396 is interrupted. Otherwise, we might end up with a corrupt stack. */
13397 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13398 return false;
13400 for (; i < count; i++)
13402 elt = XVECEXP (op, 0, i);
13403 if (GET_CODE (elt) != SET)
13404 return false;
13406 if (load)
13408 reg = SET_DEST (elt);
13409 mem = SET_SRC (elt);
13411 else
13413 reg = SET_SRC (elt);
13414 mem = SET_DEST (elt);
13417 if (!REG_P (reg)
13418 || GET_MODE (reg) != mode
13419 || REGNO (reg) <= regno
13420 || (consecutive
13421 && (REGNO (reg) !=
13422 (unsigned int) (first_regno + regs_per_val * (i - base))))
13423 /* Don't allow SP to be loaded unless it is also the base register. It
13424 guarantees that SP is reset correctly when an LDM instruction
13425 is interrupted. Otherwise, we might end up with a corrupt stack. */
13426 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13427 || !MEM_P (mem)
13428 || GET_MODE (mem) != mode
13429 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13430 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13431 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13432 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13433 offset + (i - base) * reg_increment))
13434 && (!REG_P (XEXP (mem, 0))
13435 || offset + (i - base) * reg_increment != 0)))
13436 return false;
13438 regno = REGNO (reg);
13439 if (regno == REGNO (addr))
13440 addr_reg_in_reglist = true;
13443 if (load)
13445 if (update && addr_reg_in_reglist)
13446 return false;
13448 /* For Thumb-1, address register is always modified - either by write-back
13449 or by explicit load. If the pattern does not describe an update,
13450 then the address register must be in the list of loaded registers. */
13451 if (TARGET_THUMB1)
13452 return update || addr_reg_in_reglist;
13455 return true;
13458 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13459 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13460 instruction. ADD_OFFSET is nonzero if the base address register needs
13461 to be modified with an add instruction before we can use it. */
13463 static bool
13464 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13465 int nops, HOST_WIDE_INT add_offset)
13467 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13468 if the offset isn't small enough. The reason 2 ldrs are faster
13469 is because these ARMs are able to do more than one cache access
13470 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13471 whilst the ARM8 has a double bandwidth cache. This means that
13472 these cores can do both an instruction fetch and a data fetch in
13473 a single cycle, so the trick of calculating the address into a
13474 scratch register (one of the result regs) and then doing a load
13475 multiple actually becomes slower (and no smaller in code size).
13476 That is the transformation
13478 ldr rd1, [rbase + offset]
13479 ldr rd2, [rbase + offset + 4]
13483 add rd1, rbase, offset
13484 ldmia rd1, {rd1, rd2}
13486 produces worse code -- '3 cycles + any stalls on rd2' instead of
13487 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13488 access per cycle, the first sequence could never complete in less
13489 than 6 cycles, whereas the ldm sequence would only take 5 and
13490 would make better use of sequential accesses if not hitting the
13491 cache.
13493 We cheat here and test 'arm_ld_sched' which we currently know to
13494 only be true for the ARM8, ARM9 and StrongARM. If this ever
13495 changes, then the test below needs to be reworked. */
13496 if (nops == 2 && arm_ld_sched && add_offset != 0)
13497 return false;
13499 /* XScale has load-store double instructions, but they have stricter
13500 alignment requirements than load-store multiple, so we cannot
13501 use them.
13503 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13504 the pipeline until completion.
13506 NREGS CYCLES
13512 An ldr instruction takes 1-3 cycles, but does not block the
13513 pipeline.
13515 NREGS CYCLES
13516 1 1-3
13517 2 2-6
13518 3 3-9
13519 4 4-12
13521 Best case ldr will always win. However, the more ldr instructions
13522 we issue, the less likely we are to be able to schedule them well.
13523 Using ldr instructions also increases code size.
13525 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13526 for counts of 3 or 4 regs. */
13527 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13528 return false;
13529 return true;
13532 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13533 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13534 an array ORDER which describes the sequence to use when accessing the
13535 offsets that produces an ascending order. In this sequence, each
13536 offset must be larger by exactly 4 than the previous one. ORDER[0]
13537 must have been filled in with the lowest offset by the caller.
13538 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13539 we use to verify that ORDER produces an ascending order of registers.
13540 Return true if it was possible to construct such an order, false if
13541 not. */
13543 static bool
13544 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13545 int *unsorted_regs)
13547 int i;
13548 for (i = 1; i < nops; i++)
13550 int j;
13552 order[i] = order[i - 1];
13553 for (j = 0; j < nops; j++)
13554 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13556 /* We must find exactly one offset that is higher than the
13557 previous one by 4. */
13558 if (order[i] != order[i - 1])
13559 return false;
13560 order[i] = j;
13562 if (order[i] == order[i - 1])
13563 return false;
13564 /* The register numbers must be ascending. */
13565 if (unsorted_regs != NULL
13566 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13567 return false;
13569 return true;
13572 /* Used to determine in a peephole whether a sequence of load
13573 instructions can be changed into a load-multiple instruction.
13574 NOPS is the number of separate load instructions we are examining. The
13575 first NOPS entries in OPERANDS are the destination registers, the
13576 next NOPS entries are memory operands. If this function is
13577 successful, *BASE is set to the common base register of the memory
13578 accesses; *LOAD_OFFSET is set to the first memory location's offset
13579 from that base register.
13580 REGS is an array filled in with the destination register numbers.
13581 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13582 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13583 the sequence of registers in REGS matches the loads from ascending memory
13584 locations, and the function verifies that the register numbers are
13585 themselves ascending. If CHECK_REGS is false, the register numbers
13586 are stored in the order they are found in the operands. */
13587 static int
13588 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13589 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13591 int unsorted_regs[MAX_LDM_STM_OPS];
13592 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13593 int order[MAX_LDM_STM_OPS];
13594 rtx base_reg_rtx = NULL;
13595 int base_reg = -1;
13596 int i, ldm_case;
13598 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13599 easily extended if required. */
13600 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13602 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13604 /* Loop over the operands and check that the memory references are
13605 suitable (i.e. immediate offsets from the same base register). At
13606 the same time, extract the target register, and the memory
13607 offsets. */
13608 for (i = 0; i < nops; i++)
13610 rtx reg;
13611 rtx offset;
13613 /* Convert a subreg of a mem into the mem itself. */
13614 if (GET_CODE (operands[nops + i]) == SUBREG)
13615 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13617 gcc_assert (MEM_P (operands[nops + i]));
13619 /* Don't reorder volatile memory references; it doesn't seem worth
13620 looking for the case where the order is ok anyway. */
13621 if (MEM_VOLATILE_P (operands[nops + i]))
13622 return 0;
13624 offset = const0_rtx;
13626 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13627 || (GET_CODE (reg) == SUBREG
13628 && REG_P (reg = SUBREG_REG (reg))))
13629 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13630 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13631 || (GET_CODE (reg) == SUBREG
13632 && REG_P (reg = SUBREG_REG (reg))))
13633 && (CONST_INT_P (offset
13634 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13636 if (i == 0)
13638 base_reg = REGNO (reg);
13639 base_reg_rtx = reg;
13640 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13641 return 0;
13643 else if (base_reg != (int) REGNO (reg))
13644 /* Not addressed from the same base register. */
13645 return 0;
13647 unsorted_regs[i] = (REG_P (operands[i])
13648 ? REGNO (operands[i])
13649 : REGNO (SUBREG_REG (operands[i])));
13651 /* If it isn't an integer register, or if it overwrites the
13652 base register but isn't the last insn in the list, then
13653 we can't do this. */
13654 if (unsorted_regs[i] < 0
13655 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13656 || unsorted_regs[i] > 14
13657 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13658 return 0;
13660 /* Don't allow SP to be loaded unless it is also the base
13661 register. It guarantees that SP is reset correctly when
13662 an LDM instruction is interrupted. Otherwise, we might
13663 end up with a corrupt stack. */
13664 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13665 return 0;
13667 unsorted_offsets[i] = INTVAL (offset);
13668 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13669 order[0] = i;
13671 else
13672 /* Not a suitable memory address. */
13673 return 0;
13676 /* All the useful information has now been extracted from the
13677 operands into unsorted_regs and unsorted_offsets; additionally,
13678 order[0] has been set to the lowest offset in the list. Sort
13679 the offsets into order, verifying that they are adjacent, and
13680 check that the register numbers are ascending. */
13681 if (!compute_offset_order (nops, unsorted_offsets, order,
13682 check_regs ? unsorted_regs : NULL))
13683 return 0;
13685 if (saved_order)
13686 memcpy (saved_order, order, sizeof order);
13688 if (base)
13690 *base = base_reg;
13692 for (i = 0; i < nops; i++)
13693 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13695 *load_offset = unsorted_offsets[order[0]];
13698 if (TARGET_THUMB1
13699 && !peep2_reg_dead_p (nops, base_reg_rtx))
13700 return 0;
13702 if (unsorted_offsets[order[0]] == 0)
13703 ldm_case = 1; /* ldmia */
13704 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13705 ldm_case = 2; /* ldmib */
13706 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13707 ldm_case = 3; /* ldmda */
13708 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13709 ldm_case = 4; /* ldmdb */
13710 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13711 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13712 ldm_case = 5;
13713 else
13714 return 0;
13716 if (!multiple_operation_profitable_p (false, nops,
13717 ldm_case == 5
13718 ? unsorted_offsets[order[0]] : 0))
13719 return 0;
13721 return ldm_case;
13724 /* Used to determine in a peephole whether a sequence of store instructions can
13725 be changed into a store-multiple instruction.
13726 NOPS is the number of separate store instructions we are examining.
13727 NOPS_TOTAL is the total number of instructions recognized by the peephole
13728 pattern.
13729 The first NOPS entries in OPERANDS are the source registers, the next
13730 NOPS entries are memory operands. If this function is successful, *BASE is
13731 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13732 to the first memory location's offset from that base register. REGS is an
13733 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13734 likewise filled with the corresponding rtx's.
13735 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13736 numbers to an ascending order of stores.
13737 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13738 from ascending memory locations, and the function verifies that the register
13739 numbers are themselves ascending. If CHECK_REGS is false, the register
13740 numbers are stored in the order they are found in the operands. */
13741 static int
13742 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13743 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13744 HOST_WIDE_INT *load_offset, bool check_regs)
13746 int unsorted_regs[MAX_LDM_STM_OPS];
13747 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13748 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13749 int order[MAX_LDM_STM_OPS];
13750 int base_reg = -1;
13751 rtx base_reg_rtx = NULL;
13752 int i, stm_case;
13754 /* Write back of base register is currently only supported for Thumb 1. */
13755 int base_writeback = TARGET_THUMB1;
13757 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13758 easily extended if required. */
13759 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13761 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13763 /* Loop over the operands and check that the memory references are
13764 suitable (i.e. immediate offsets from the same base register). At
13765 the same time, extract the target register, and the memory
13766 offsets. */
13767 for (i = 0; i < nops; i++)
13769 rtx reg;
13770 rtx offset;
13772 /* Convert a subreg of a mem into the mem itself. */
13773 if (GET_CODE (operands[nops + i]) == SUBREG)
13774 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13776 gcc_assert (MEM_P (operands[nops + i]));
13778 /* Don't reorder volatile memory references; it doesn't seem worth
13779 looking for the case where the order is ok anyway. */
13780 if (MEM_VOLATILE_P (operands[nops + i]))
13781 return 0;
13783 offset = const0_rtx;
13785 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13786 || (GET_CODE (reg) == SUBREG
13787 && REG_P (reg = SUBREG_REG (reg))))
13788 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13789 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13790 || (GET_CODE (reg) == SUBREG
13791 && REG_P (reg = SUBREG_REG (reg))))
13792 && (CONST_INT_P (offset
13793 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13795 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13796 ? operands[i] : SUBREG_REG (operands[i]));
13797 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13799 if (i == 0)
13801 base_reg = REGNO (reg);
13802 base_reg_rtx = reg;
13803 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13804 return 0;
13806 else if (base_reg != (int) REGNO (reg))
13807 /* Not addressed from the same base register. */
13808 return 0;
13810 /* If it isn't an integer register, then we can't do this. */
13811 if (unsorted_regs[i] < 0
13812 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13813 /* The effects are unpredictable if the base register is
13814 both updated and stored. */
13815 || (base_writeback && unsorted_regs[i] == base_reg)
13816 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13817 || unsorted_regs[i] > 14)
13818 return 0;
13820 unsorted_offsets[i] = INTVAL (offset);
13821 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13822 order[0] = i;
13824 else
13825 /* Not a suitable memory address. */
13826 return 0;
13829 /* All the useful information has now been extracted from the
13830 operands into unsorted_regs and unsorted_offsets; additionally,
13831 order[0] has been set to the lowest offset in the list. Sort
13832 the offsets into order, verifying that they are adjacent, and
13833 check that the register numbers are ascending. */
13834 if (!compute_offset_order (nops, unsorted_offsets, order,
13835 check_regs ? unsorted_regs : NULL))
13836 return 0;
13838 if (saved_order)
13839 memcpy (saved_order, order, sizeof order);
13841 if (base)
13843 *base = base_reg;
13845 for (i = 0; i < nops; i++)
13847 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13848 if (reg_rtxs)
13849 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13852 *load_offset = unsorted_offsets[order[0]];
13855 if (TARGET_THUMB1
13856 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13857 return 0;
13859 if (unsorted_offsets[order[0]] == 0)
13860 stm_case = 1; /* stmia */
13861 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13862 stm_case = 2; /* stmib */
13863 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13864 stm_case = 3; /* stmda */
13865 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13866 stm_case = 4; /* stmdb */
13867 else
13868 return 0;
13870 if (!multiple_operation_profitable_p (false, nops, 0))
13871 return 0;
13873 return stm_case;
13876 /* Routines for use in generating RTL. */
13878 /* Generate a load-multiple instruction. COUNT is the number of loads in
13879 the instruction; REGS and MEMS are arrays containing the operands.
13880 BASEREG is the base register to be used in addressing the memory operands.
13881 WBACK_OFFSET is nonzero if the instruction should update the base
13882 register. */
13884 static rtx
13885 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13886 HOST_WIDE_INT wback_offset)
13888 int i = 0, j;
13889 rtx result;
13891 if (!multiple_operation_profitable_p (false, count, 0))
13893 rtx seq;
13895 start_sequence ();
13897 for (i = 0; i < count; i++)
13898 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13900 if (wback_offset != 0)
13901 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13903 seq = get_insns ();
13904 end_sequence ();
13906 return seq;
13909 result = gen_rtx_PARALLEL (VOIDmode,
13910 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13911 if (wback_offset != 0)
13913 XVECEXP (result, 0, 0)
13914 = gen_rtx_SET (VOIDmode, basereg,
13915 plus_constant (Pmode, basereg, wback_offset));
13916 i = 1;
13917 count++;
13920 for (j = 0; i < count; i++, j++)
13921 XVECEXP (result, 0, i)
13922 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13924 return result;
13927 /* Generate a store-multiple instruction. COUNT is the number of stores in
13928 the instruction; REGS and MEMS are arrays containing the operands.
13929 BASEREG is the base register to be used in addressing the memory operands.
13930 WBACK_OFFSET is nonzero if the instruction should update the base
13931 register. */
13933 static rtx
13934 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13935 HOST_WIDE_INT wback_offset)
13937 int i = 0, j;
13938 rtx result;
13940 if (GET_CODE (basereg) == PLUS)
13941 basereg = XEXP (basereg, 0);
13943 if (!multiple_operation_profitable_p (false, count, 0))
13945 rtx seq;
13947 start_sequence ();
13949 for (i = 0; i < count; i++)
13950 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13952 if (wback_offset != 0)
13953 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13955 seq = get_insns ();
13956 end_sequence ();
13958 return seq;
13961 result = gen_rtx_PARALLEL (VOIDmode,
13962 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13963 if (wback_offset != 0)
13965 XVECEXP (result, 0, 0)
13966 = gen_rtx_SET (VOIDmode, basereg,
13967 plus_constant (Pmode, basereg, wback_offset));
13968 i = 1;
13969 count++;
13972 for (j = 0; i < count; i++, j++)
13973 XVECEXP (result, 0, i)
13974 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13976 return result;
13979 /* Generate either a load-multiple or a store-multiple instruction. This
13980 function can be used in situations where we can start with a single MEM
13981 rtx and adjust its address upwards.
13982 COUNT is the number of operations in the instruction, not counting a
13983 possible update of the base register. REGS is an array containing the
13984 register operands.
13985 BASEREG is the base register to be used in addressing the memory operands,
13986 which are constructed from BASEMEM.
13987 WRITE_BACK specifies whether the generated instruction should include an
13988 update of the base register.
13989 OFFSETP is used to pass an offset to and from this function; this offset
13990 is not used when constructing the address (instead BASEMEM should have an
13991 appropriate offset in its address), it is used only for setting
13992 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13994 static rtx
13995 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13996 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13998 rtx mems[MAX_LDM_STM_OPS];
13999 HOST_WIDE_INT offset = *offsetp;
14000 int i;
14002 gcc_assert (count <= MAX_LDM_STM_OPS);
14004 if (GET_CODE (basereg) == PLUS)
14005 basereg = XEXP (basereg, 0);
14007 for (i = 0; i < count; i++)
14009 rtx addr = plus_constant (Pmode, basereg, i * 4);
14010 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14011 offset += 4;
14014 if (write_back)
14015 *offsetp = offset;
14017 if (is_load)
14018 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14019 write_back ? 4 * count : 0);
14020 else
14021 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14022 write_back ? 4 * count : 0);
14026 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14027 rtx basemem, HOST_WIDE_INT *offsetp)
14029 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14030 offsetp);
14034 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14035 rtx basemem, HOST_WIDE_INT *offsetp)
14037 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14038 offsetp);
14041 /* Called from a peephole2 expander to turn a sequence of loads into an
14042 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14043 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14044 is true if we can reorder the registers because they are used commutatively
14045 subsequently.
14046 Returns true iff we could generate a new instruction. */
14048 bool
14049 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14051 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14052 rtx mems[MAX_LDM_STM_OPS];
14053 int i, j, base_reg;
14054 rtx base_reg_rtx;
14055 HOST_WIDE_INT offset;
14056 int write_back = FALSE;
14057 int ldm_case;
14058 rtx addr;
14060 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14061 &base_reg, &offset, !sort_regs);
14063 if (ldm_case == 0)
14064 return false;
14066 if (sort_regs)
14067 for (i = 0; i < nops - 1; i++)
14068 for (j = i + 1; j < nops; j++)
14069 if (regs[i] > regs[j])
14071 int t = regs[i];
14072 regs[i] = regs[j];
14073 regs[j] = t;
14075 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14077 if (TARGET_THUMB1)
14079 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14080 gcc_assert (ldm_case == 1 || ldm_case == 5);
14081 write_back = TRUE;
14084 if (ldm_case == 5)
14086 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14087 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14088 offset = 0;
14089 if (!TARGET_THUMB1)
14091 base_reg = regs[0];
14092 base_reg_rtx = newbase;
14096 for (i = 0; i < nops; i++)
14098 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14099 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14100 SImode, addr, 0);
14102 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14103 write_back ? offset + i * 4 : 0));
14104 return true;
14107 /* Called from a peephole2 expander to turn a sequence of stores into an
14108 STM instruction. OPERANDS are the operands found by the peephole matcher;
14109 NOPS indicates how many separate stores we are trying to combine.
14110 Returns true iff we could generate a new instruction. */
14112 bool
14113 gen_stm_seq (rtx *operands, int nops)
14115 int i;
14116 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14117 rtx mems[MAX_LDM_STM_OPS];
14118 int base_reg;
14119 rtx base_reg_rtx;
14120 HOST_WIDE_INT offset;
14121 int write_back = FALSE;
14122 int stm_case;
14123 rtx addr;
14124 bool base_reg_dies;
14126 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14127 mem_order, &base_reg, &offset, true);
14129 if (stm_case == 0)
14130 return false;
14132 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14134 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14135 if (TARGET_THUMB1)
14137 gcc_assert (base_reg_dies);
14138 write_back = TRUE;
14141 if (stm_case == 5)
14143 gcc_assert (base_reg_dies);
14144 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14145 offset = 0;
14148 addr = plus_constant (Pmode, base_reg_rtx, offset);
14150 for (i = 0; i < nops; i++)
14152 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14153 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14154 SImode, addr, 0);
14156 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14157 write_back ? offset + i * 4 : 0));
14158 return true;
14161 /* Called from a peephole2 expander to turn a sequence of stores that are
14162 preceded by constant loads into an STM instruction. OPERANDS are the
14163 operands found by the peephole matcher; NOPS indicates how many
14164 separate stores we are trying to combine; there are 2 * NOPS
14165 instructions in the peephole.
14166 Returns true iff we could generate a new instruction. */
14168 bool
14169 gen_const_stm_seq (rtx *operands, int nops)
14171 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14172 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14173 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14174 rtx mems[MAX_LDM_STM_OPS];
14175 int base_reg;
14176 rtx base_reg_rtx;
14177 HOST_WIDE_INT offset;
14178 int write_back = FALSE;
14179 int stm_case;
14180 rtx addr;
14181 bool base_reg_dies;
14182 int i, j;
14183 HARD_REG_SET allocated;
14185 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14186 mem_order, &base_reg, &offset, false);
14188 if (stm_case == 0)
14189 return false;
14191 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14193 /* If the same register is used more than once, try to find a free
14194 register. */
14195 CLEAR_HARD_REG_SET (allocated);
14196 for (i = 0; i < nops; i++)
14198 for (j = i + 1; j < nops; j++)
14199 if (regs[i] == regs[j])
14201 rtx t = peep2_find_free_register (0, nops * 2,
14202 TARGET_THUMB1 ? "l" : "r",
14203 SImode, &allocated);
14204 if (t == NULL_RTX)
14205 return false;
14206 reg_rtxs[i] = t;
14207 regs[i] = REGNO (t);
14211 /* Compute an ordering that maps the register numbers to an ascending
14212 sequence. */
14213 reg_order[0] = 0;
14214 for (i = 0; i < nops; i++)
14215 if (regs[i] < regs[reg_order[0]])
14216 reg_order[0] = i;
14218 for (i = 1; i < nops; i++)
14220 int this_order = reg_order[i - 1];
14221 for (j = 0; j < nops; j++)
14222 if (regs[j] > regs[reg_order[i - 1]]
14223 && (this_order == reg_order[i - 1]
14224 || regs[j] < regs[this_order]))
14225 this_order = j;
14226 reg_order[i] = this_order;
14229 /* Ensure that registers that must be live after the instruction end
14230 up with the correct value. */
14231 for (i = 0; i < nops; i++)
14233 int this_order = reg_order[i];
14234 if ((this_order != mem_order[i]
14235 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14236 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14237 return false;
14240 /* Load the constants. */
14241 for (i = 0; i < nops; i++)
14243 rtx op = operands[2 * nops + mem_order[i]];
14244 sorted_regs[i] = regs[reg_order[i]];
14245 emit_move_insn (reg_rtxs[reg_order[i]], op);
14248 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14250 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14251 if (TARGET_THUMB1)
14253 gcc_assert (base_reg_dies);
14254 write_back = TRUE;
14257 if (stm_case == 5)
14259 gcc_assert (base_reg_dies);
14260 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14261 offset = 0;
14264 addr = plus_constant (Pmode, base_reg_rtx, offset);
14266 for (i = 0; i < nops; i++)
14268 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14269 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14270 SImode, addr, 0);
14272 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14273 write_back ? offset + i * 4 : 0));
14274 return true;
14277 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14278 unaligned copies on processors which support unaligned semantics for those
14279 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14280 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14281 An interleave factor of 1 (the minimum) will perform no interleaving.
14282 Load/store multiple are used for aligned addresses where possible. */
14284 static void
14285 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14286 HOST_WIDE_INT length,
14287 unsigned int interleave_factor)
14289 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14290 int *regnos = XALLOCAVEC (int, interleave_factor);
14291 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14292 HOST_WIDE_INT i, j;
14293 HOST_WIDE_INT remaining = length, words;
14294 rtx halfword_tmp = NULL, byte_tmp = NULL;
14295 rtx dst, src;
14296 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14297 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14298 HOST_WIDE_INT srcoffset, dstoffset;
14299 HOST_WIDE_INT src_autoinc, dst_autoinc;
14300 rtx mem, addr;
14302 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14304 /* Use hard registers if we have aligned source or destination so we can use
14305 load/store multiple with contiguous registers. */
14306 if (dst_aligned || src_aligned)
14307 for (i = 0; i < interleave_factor; i++)
14308 regs[i] = gen_rtx_REG (SImode, i);
14309 else
14310 for (i = 0; i < interleave_factor; i++)
14311 regs[i] = gen_reg_rtx (SImode);
14313 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14314 src = copy_addr_to_reg (XEXP (srcbase, 0));
14316 srcoffset = dstoffset = 0;
14318 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14319 For copying the last bytes we want to subtract this offset again. */
14320 src_autoinc = dst_autoinc = 0;
14322 for (i = 0; i < interleave_factor; i++)
14323 regnos[i] = i;
14325 /* Copy BLOCK_SIZE_BYTES chunks. */
14327 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14329 /* Load words. */
14330 if (src_aligned && interleave_factor > 1)
14332 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14333 TRUE, srcbase, &srcoffset));
14334 src_autoinc += UNITS_PER_WORD * interleave_factor;
14336 else
14338 for (j = 0; j < interleave_factor; j++)
14340 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14341 - src_autoinc));
14342 mem = adjust_automodify_address (srcbase, SImode, addr,
14343 srcoffset + j * UNITS_PER_WORD);
14344 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14346 srcoffset += block_size_bytes;
14349 /* Store words. */
14350 if (dst_aligned && interleave_factor > 1)
14352 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14353 TRUE, dstbase, &dstoffset));
14354 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14356 else
14358 for (j = 0; j < interleave_factor; j++)
14360 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14361 - dst_autoinc));
14362 mem = adjust_automodify_address (dstbase, SImode, addr,
14363 dstoffset + j * UNITS_PER_WORD);
14364 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14366 dstoffset += block_size_bytes;
14369 remaining -= block_size_bytes;
14372 /* Copy any whole words left (note these aren't interleaved with any
14373 subsequent halfword/byte load/stores in the interests of simplicity). */
14375 words = remaining / UNITS_PER_WORD;
14377 gcc_assert (words < interleave_factor);
14379 if (src_aligned && words > 1)
14381 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14382 &srcoffset));
14383 src_autoinc += UNITS_PER_WORD * words;
14385 else
14387 for (j = 0; j < words; j++)
14389 addr = plus_constant (Pmode, src,
14390 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14391 mem = adjust_automodify_address (srcbase, SImode, addr,
14392 srcoffset + j * UNITS_PER_WORD);
14393 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14395 srcoffset += words * UNITS_PER_WORD;
14398 if (dst_aligned && words > 1)
14400 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14401 &dstoffset));
14402 dst_autoinc += words * UNITS_PER_WORD;
14404 else
14406 for (j = 0; j < words; j++)
14408 addr = plus_constant (Pmode, dst,
14409 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14410 mem = adjust_automodify_address (dstbase, SImode, addr,
14411 dstoffset + j * UNITS_PER_WORD);
14412 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14414 dstoffset += words * UNITS_PER_WORD;
14417 remaining -= words * UNITS_PER_WORD;
14419 gcc_assert (remaining < 4);
14421 /* Copy a halfword if necessary. */
14423 if (remaining >= 2)
14425 halfword_tmp = gen_reg_rtx (SImode);
14427 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14428 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14429 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14431 /* Either write out immediately, or delay until we've loaded the last
14432 byte, depending on interleave factor. */
14433 if (interleave_factor == 1)
14435 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14436 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14437 emit_insn (gen_unaligned_storehi (mem,
14438 gen_lowpart (HImode, halfword_tmp)));
14439 halfword_tmp = NULL;
14440 dstoffset += 2;
14443 remaining -= 2;
14444 srcoffset += 2;
14447 gcc_assert (remaining < 2);
14449 /* Copy last byte. */
14451 if ((remaining & 1) != 0)
14453 byte_tmp = gen_reg_rtx (SImode);
14455 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14456 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14457 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14459 if (interleave_factor == 1)
14461 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14462 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14463 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14464 byte_tmp = NULL;
14465 dstoffset++;
14468 remaining--;
14469 srcoffset++;
14472 /* Store last halfword if we haven't done so already. */
14474 if (halfword_tmp)
14476 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14477 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14478 emit_insn (gen_unaligned_storehi (mem,
14479 gen_lowpart (HImode, halfword_tmp)));
14480 dstoffset += 2;
14483 /* Likewise for last byte. */
14485 if (byte_tmp)
14487 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14488 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14489 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14490 dstoffset++;
14493 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14496 /* From mips_adjust_block_mem:
14498 Helper function for doing a loop-based block operation on memory
14499 reference MEM. Each iteration of the loop will operate on LENGTH
14500 bytes of MEM.
14502 Create a new base register for use within the loop and point it to
14503 the start of MEM. Create a new memory reference that uses this
14504 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14506 static void
14507 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14508 rtx *loop_mem)
14510 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14512 /* Although the new mem does not refer to a known location,
14513 it does keep up to LENGTH bytes of alignment. */
14514 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14515 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14518 /* From mips_block_move_loop:
14520 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14521 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14522 the memory regions do not overlap. */
14524 static void
14525 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14526 unsigned int interleave_factor,
14527 HOST_WIDE_INT bytes_per_iter)
14529 rtx src_reg, dest_reg, final_src, test;
14530 HOST_WIDE_INT leftover;
14532 leftover = length % bytes_per_iter;
14533 length -= leftover;
14535 /* Create registers and memory references for use within the loop. */
14536 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14537 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14539 /* Calculate the value that SRC_REG should have after the last iteration of
14540 the loop. */
14541 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14542 0, 0, OPTAB_WIDEN);
14544 /* Emit the start of the loop. */
14545 rtx_code_label *label = gen_label_rtx ();
14546 emit_label (label);
14548 /* Emit the loop body. */
14549 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14550 interleave_factor);
14552 /* Move on to the next block. */
14553 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14554 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14556 /* Emit the loop condition. */
14557 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14558 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14560 /* Mop up any left-over bytes. */
14561 if (leftover)
14562 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14565 /* Emit a block move when either the source or destination is unaligned (not
14566 aligned to a four-byte boundary). This may need further tuning depending on
14567 core type, optimize_size setting, etc. */
14569 static int
14570 arm_movmemqi_unaligned (rtx *operands)
14572 HOST_WIDE_INT length = INTVAL (operands[2]);
14574 if (optimize_size)
14576 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14577 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14578 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14579 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14580 or dst_aligned though: allow more interleaving in those cases since the
14581 resulting code can be smaller. */
14582 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14583 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14585 if (length > 12)
14586 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14587 interleave_factor, bytes_per_iter);
14588 else
14589 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14590 interleave_factor);
14592 else
14594 /* Note that the loop created by arm_block_move_unaligned_loop may be
14595 subject to loop unrolling, which makes tuning this condition a little
14596 redundant. */
14597 if (length > 32)
14598 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14599 else
14600 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14603 return 1;
14607 arm_gen_movmemqi (rtx *operands)
14609 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14610 HOST_WIDE_INT srcoffset, dstoffset;
14611 int i;
14612 rtx src, dst, srcbase, dstbase;
14613 rtx part_bytes_reg = NULL;
14614 rtx mem;
14616 if (!CONST_INT_P (operands[2])
14617 || !CONST_INT_P (operands[3])
14618 || INTVAL (operands[2]) > 64)
14619 return 0;
14621 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14622 return arm_movmemqi_unaligned (operands);
14624 if (INTVAL (operands[3]) & 3)
14625 return 0;
14627 dstbase = operands[0];
14628 srcbase = operands[1];
14630 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14631 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14633 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14634 out_words_to_go = INTVAL (operands[2]) / 4;
14635 last_bytes = INTVAL (operands[2]) & 3;
14636 dstoffset = srcoffset = 0;
14638 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14639 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14641 for (i = 0; in_words_to_go >= 2; i+=4)
14643 if (in_words_to_go > 4)
14644 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14645 TRUE, srcbase, &srcoffset));
14646 else
14647 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14648 src, FALSE, srcbase,
14649 &srcoffset));
14651 if (out_words_to_go)
14653 if (out_words_to_go > 4)
14654 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14655 TRUE, dstbase, &dstoffset));
14656 else if (out_words_to_go != 1)
14657 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14658 out_words_to_go, dst,
14659 (last_bytes == 0
14660 ? FALSE : TRUE),
14661 dstbase, &dstoffset));
14662 else
14664 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14665 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14666 if (last_bytes != 0)
14668 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14669 dstoffset += 4;
14674 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14675 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14678 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14679 if (out_words_to_go)
14681 rtx sreg;
14683 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14684 sreg = copy_to_reg (mem);
14686 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14687 emit_move_insn (mem, sreg);
14688 in_words_to_go--;
14690 gcc_assert (!in_words_to_go); /* Sanity check */
14693 if (in_words_to_go)
14695 gcc_assert (in_words_to_go > 0);
14697 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14698 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14701 gcc_assert (!last_bytes || part_bytes_reg);
14703 if (BYTES_BIG_ENDIAN && last_bytes)
14705 rtx tmp = gen_reg_rtx (SImode);
14707 /* The bytes we want are in the top end of the word. */
14708 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14709 GEN_INT (8 * (4 - last_bytes))));
14710 part_bytes_reg = tmp;
14712 while (last_bytes)
14714 mem = adjust_automodify_address (dstbase, QImode,
14715 plus_constant (Pmode, dst,
14716 last_bytes - 1),
14717 dstoffset + last_bytes - 1);
14718 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14720 if (--last_bytes)
14722 tmp = gen_reg_rtx (SImode);
14723 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14724 part_bytes_reg = tmp;
14729 else
14731 if (last_bytes > 1)
14733 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14734 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14735 last_bytes -= 2;
14736 if (last_bytes)
14738 rtx tmp = gen_reg_rtx (SImode);
14739 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14740 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14741 part_bytes_reg = tmp;
14742 dstoffset += 2;
14746 if (last_bytes)
14748 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14749 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14753 return 1;
14756 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14757 by mode size. */
14758 inline static rtx
14759 next_consecutive_mem (rtx mem)
14761 enum machine_mode mode = GET_MODE (mem);
14762 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14763 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14765 return adjust_automodify_address (mem, mode, addr, offset);
14768 /* Copy using LDRD/STRD instructions whenever possible.
14769 Returns true upon success. */
14770 bool
14771 gen_movmem_ldrd_strd (rtx *operands)
14773 unsigned HOST_WIDE_INT len;
14774 HOST_WIDE_INT align;
14775 rtx src, dst, base;
14776 rtx reg0;
14777 bool src_aligned, dst_aligned;
14778 bool src_volatile, dst_volatile;
14780 gcc_assert (CONST_INT_P (operands[2]));
14781 gcc_assert (CONST_INT_P (operands[3]));
14783 len = UINTVAL (operands[2]);
14784 if (len > 64)
14785 return false;
14787 /* Maximum alignment we can assume for both src and dst buffers. */
14788 align = INTVAL (operands[3]);
14790 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14791 return false;
14793 /* Place src and dst addresses in registers
14794 and update the corresponding mem rtx. */
14795 dst = operands[0];
14796 dst_volatile = MEM_VOLATILE_P (dst);
14797 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14798 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14799 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14801 src = operands[1];
14802 src_volatile = MEM_VOLATILE_P (src);
14803 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14804 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14805 src = adjust_automodify_address (src, VOIDmode, base, 0);
14807 if (!unaligned_access && !(src_aligned && dst_aligned))
14808 return false;
14810 if (src_volatile || dst_volatile)
14811 return false;
14813 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14814 if (!(dst_aligned || src_aligned))
14815 return arm_gen_movmemqi (operands);
14817 src = adjust_address (src, DImode, 0);
14818 dst = adjust_address (dst, DImode, 0);
14819 while (len >= 8)
14821 len -= 8;
14822 reg0 = gen_reg_rtx (DImode);
14823 if (src_aligned)
14824 emit_move_insn (reg0, src);
14825 else
14826 emit_insn (gen_unaligned_loaddi (reg0, src));
14828 if (dst_aligned)
14829 emit_move_insn (dst, reg0);
14830 else
14831 emit_insn (gen_unaligned_storedi (dst, reg0));
14833 src = next_consecutive_mem (src);
14834 dst = next_consecutive_mem (dst);
14837 gcc_assert (len < 8);
14838 if (len >= 4)
14840 /* More than a word but less than a double-word to copy. Copy a word. */
14841 reg0 = gen_reg_rtx (SImode);
14842 src = adjust_address (src, SImode, 0);
14843 dst = adjust_address (dst, SImode, 0);
14844 if (src_aligned)
14845 emit_move_insn (reg0, src);
14846 else
14847 emit_insn (gen_unaligned_loadsi (reg0, src));
14849 if (dst_aligned)
14850 emit_move_insn (dst, reg0);
14851 else
14852 emit_insn (gen_unaligned_storesi (dst, reg0));
14854 src = next_consecutive_mem (src);
14855 dst = next_consecutive_mem (dst);
14856 len -= 4;
14859 if (len == 0)
14860 return true;
14862 /* Copy the remaining bytes. */
14863 if (len >= 2)
14865 dst = adjust_address (dst, HImode, 0);
14866 src = adjust_address (src, HImode, 0);
14867 reg0 = gen_reg_rtx (SImode);
14868 if (src_aligned)
14869 emit_insn (gen_zero_extendhisi2 (reg0, src));
14870 else
14871 emit_insn (gen_unaligned_loadhiu (reg0, src));
14873 if (dst_aligned)
14874 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14875 else
14876 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14878 src = next_consecutive_mem (src);
14879 dst = next_consecutive_mem (dst);
14880 if (len == 2)
14881 return true;
14884 dst = adjust_address (dst, QImode, 0);
14885 src = adjust_address (src, QImode, 0);
14886 reg0 = gen_reg_rtx (QImode);
14887 emit_move_insn (reg0, src);
14888 emit_move_insn (dst, reg0);
14889 return true;
14892 /* Select a dominance comparison mode if possible for a test of the general
14893 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14894 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14895 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14896 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14897 In all cases OP will be either EQ or NE, but we don't need to know which
14898 here. If we are unable to support a dominance comparison we return
14899 CC mode. This will then fail to match for the RTL expressions that
14900 generate this call. */
14901 enum machine_mode
14902 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14904 enum rtx_code cond1, cond2;
14905 int swapped = 0;
14907 /* Currently we will probably get the wrong result if the individual
14908 comparisons are not simple. This also ensures that it is safe to
14909 reverse a comparison if necessary. */
14910 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14911 != CCmode)
14912 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14913 != CCmode))
14914 return CCmode;
14916 /* The if_then_else variant of this tests the second condition if the
14917 first passes, but is true if the first fails. Reverse the first
14918 condition to get a true "inclusive-or" expression. */
14919 if (cond_or == DOM_CC_NX_OR_Y)
14920 cond1 = reverse_condition (cond1);
14922 /* If the comparisons are not equal, and one doesn't dominate the other,
14923 then we can't do this. */
14924 if (cond1 != cond2
14925 && !comparison_dominates_p (cond1, cond2)
14926 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14927 return CCmode;
14929 if (swapped)
14931 enum rtx_code temp = cond1;
14932 cond1 = cond2;
14933 cond2 = temp;
14936 switch (cond1)
14938 case EQ:
14939 if (cond_or == DOM_CC_X_AND_Y)
14940 return CC_DEQmode;
14942 switch (cond2)
14944 case EQ: return CC_DEQmode;
14945 case LE: return CC_DLEmode;
14946 case LEU: return CC_DLEUmode;
14947 case GE: return CC_DGEmode;
14948 case GEU: return CC_DGEUmode;
14949 default: gcc_unreachable ();
14952 case LT:
14953 if (cond_or == DOM_CC_X_AND_Y)
14954 return CC_DLTmode;
14956 switch (cond2)
14958 case LT:
14959 return CC_DLTmode;
14960 case LE:
14961 return CC_DLEmode;
14962 case NE:
14963 return CC_DNEmode;
14964 default:
14965 gcc_unreachable ();
14968 case GT:
14969 if (cond_or == DOM_CC_X_AND_Y)
14970 return CC_DGTmode;
14972 switch (cond2)
14974 case GT:
14975 return CC_DGTmode;
14976 case GE:
14977 return CC_DGEmode;
14978 case NE:
14979 return CC_DNEmode;
14980 default:
14981 gcc_unreachable ();
14984 case LTU:
14985 if (cond_or == DOM_CC_X_AND_Y)
14986 return CC_DLTUmode;
14988 switch (cond2)
14990 case LTU:
14991 return CC_DLTUmode;
14992 case LEU:
14993 return CC_DLEUmode;
14994 case NE:
14995 return CC_DNEmode;
14996 default:
14997 gcc_unreachable ();
15000 case GTU:
15001 if (cond_or == DOM_CC_X_AND_Y)
15002 return CC_DGTUmode;
15004 switch (cond2)
15006 case GTU:
15007 return CC_DGTUmode;
15008 case GEU:
15009 return CC_DGEUmode;
15010 case NE:
15011 return CC_DNEmode;
15012 default:
15013 gcc_unreachable ();
15016 /* The remaining cases only occur when both comparisons are the
15017 same. */
15018 case NE:
15019 gcc_assert (cond1 == cond2);
15020 return CC_DNEmode;
15022 case LE:
15023 gcc_assert (cond1 == cond2);
15024 return CC_DLEmode;
15026 case GE:
15027 gcc_assert (cond1 == cond2);
15028 return CC_DGEmode;
15030 case LEU:
15031 gcc_assert (cond1 == cond2);
15032 return CC_DLEUmode;
15034 case GEU:
15035 gcc_assert (cond1 == cond2);
15036 return CC_DGEUmode;
15038 default:
15039 gcc_unreachable ();
15043 enum machine_mode
15044 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15046 /* All floating point compares return CCFP if it is an equality
15047 comparison, and CCFPE otherwise. */
15048 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15050 switch (op)
15052 case EQ:
15053 case NE:
15054 case UNORDERED:
15055 case ORDERED:
15056 case UNLT:
15057 case UNLE:
15058 case UNGT:
15059 case UNGE:
15060 case UNEQ:
15061 case LTGT:
15062 return CCFPmode;
15064 case LT:
15065 case LE:
15066 case GT:
15067 case GE:
15068 return CCFPEmode;
15070 default:
15071 gcc_unreachable ();
15075 /* A compare with a shifted operand. Because of canonicalization, the
15076 comparison will have to be swapped when we emit the assembler. */
15077 if (GET_MODE (y) == SImode
15078 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15079 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15080 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15081 || GET_CODE (x) == ROTATERT))
15082 return CC_SWPmode;
15084 /* This operation is performed swapped, but since we only rely on the Z
15085 flag we don't need an additional mode. */
15086 if (GET_MODE (y) == SImode
15087 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15088 && GET_CODE (x) == NEG
15089 && (op == EQ || op == NE))
15090 return CC_Zmode;
15092 /* This is a special case that is used by combine to allow a
15093 comparison of a shifted byte load to be split into a zero-extend
15094 followed by a comparison of the shifted integer (only valid for
15095 equalities and unsigned inequalities). */
15096 if (GET_MODE (x) == SImode
15097 && GET_CODE (x) == ASHIFT
15098 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15099 && GET_CODE (XEXP (x, 0)) == SUBREG
15100 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15101 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15102 && (op == EQ || op == NE
15103 || op == GEU || op == GTU || op == LTU || op == LEU)
15104 && CONST_INT_P (y))
15105 return CC_Zmode;
15107 /* A construct for a conditional compare, if the false arm contains
15108 0, then both conditions must be true, otherwise either condition
15109 must be true. Not all conditions are possible, so CCmode is
15110 returned if it can't be done. */
15111 if (GET_CODE (x) == IF_THEN_ELSE
15112 && (XEXP (x, 2) == const0_rtx
15113 || XEXP (x, 2) == const1_rtx)
15114 && COMPARISON_P (XEXP (x, 0))
15115 && COMPARISON_P (XEXP (x, 1)))
15116 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15117 INTVAL (XEXP (x, 2)));
15119 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15120 if (GET_CODE (x) == AND
15121 && (op == EQ || op == NE)
15122 && COMPARISON_P (XEXP (x, 0))
15123 && COMPARISON_P (XEXP (x, 1)))
15124 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15125 DOM_CC_X_AND_Y);
15127 if (GET_CODE (x) == IOR
15128 && (op == EQ || op == NE)
15129 && COMPARISON_P (XEXP (x, 0))
15130 && COMPARISON_P (XEXP (x, 1)))
15131 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15132 DOM_CC_X_OR_Y);
15134 /* An operation (on Thumb) where we want to test for a single bit.
15135 This is done by shifting that bit up into the top bit of a
15136 scratch register; we can then branch on the sign bit. */
15137 if (TARGET_THUMB1
15138 && GET_MODE (x) == SImode
15139 && (op == EQ || op == NE)
15140 && GET_CODE (x) == ZERO_EXTRACT
15141 && XEXP (x, 1) == const1_rtx)
15142 return CC_Nmode;
15144 /* An operation that sets the condition codes as a side-effect, the
15145 V flag is not set correctly, so we can only use comparisons where
15146 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15147 instead.) */
15148 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15149 if (GET_MODE (x) == SImode
15150 && y == const0_rtx
15151 && (op == EQ || op == NE || op == LT || op == GE)
15152 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15153 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15154 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15155 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15156 || GET_CODE (x) == LSHIFTRT
15157 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15158 || GET_CODE (x) == ROTATERT
15159 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15160 return CC_NOOVmode;
15162 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15163 return CC_Zmode;
15165 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15166 && GET_CODE (x) == PLUS
15167 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15168 return CC_Cmode;
15170 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15172 switch (op)
15174 case EQ:
15175 case NE:
15176 /* A DImode comparison against zero can be implemented by
15177 or'ing the two halves together. */
15178 if (y == const0_rtx)
15179 return CC_Zmode;
15181 /* We can do an equality test in three Thumb instructions. */
15182 if (!TARGET_32BIT)
15183 return CC_Zmode;
15185 /* FALLTHROUGH */
15187 case LTU:
15188 case LEU:
15189 case GTU:
15190 case GEU:
15191 /* DImode unsigned comparisons can be implemented by cmp +
15192 cmpeq without a scratch register. Not worth doing in
15193 Thumb-2. */
15194 if (TARGET_32BIT)
15195 return CC_CZmode;
15197 /* FALLTHROUGH */
15199 case LT:
15200 case LE:
15201 case GT:
15202 case GE:
15203 /* DImode signed and unsigned comparisons can be implemented
15204 by cmp + sbcs with a scratch register, but that does not
15205 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15206 gcc_assert (op != EQ && op != NE);
15207 return CC_NCVmode;
15209 default:
15210 gcc_unreachable ();
15214 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15215 return GET_MODE (x);
15217 return CCmode;
15220 /* X and Y are two things to compare using CODE. Emit the compare insn and
15221 return the rtx for register 0 in the proper mode. FP means this is a
15222 floating point compare: I don't think that it is needed on the arm. */
15224 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15226 enum machine_mode mode;
15227 rtx cc_reg;
15228 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15230 /* We might have X as a constant, Y as a register because of the predicates
15231 used for cmpdi. If so, force X to a register here. */
15232 if (dimode_comparison && !REG_P (x))
15233 x = force_reg (DImode, x);
15235 mode = SELECT_CC_MODE (code, x, y);
15236 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15238 if (dimode_comparison
15239 && mode != CC_CZmode)
15241 rtx clobber, set;
15243 /* To compare two non-zero values for equality, XOR them and
15244 then compare against zero. Not used for ARM mode; there
15245 CC_CZmode is cheaper. */
15246 if (mode == CC_Zmode && y != const0_rtx)
15248 gcc_assert (!reload_completed);
15249 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15250 y = const0_rtx;
15253 /* A scratch register is required. */
15254 if (reload_completed)
15255 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15256 else
15257 scratch = gen_rtx_SCRATCH (SImode);
15259 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15260 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15261 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15263 else
15264 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15266 return cc_reg;
15269 /* Generate a sequence of insns that will generate the correct return
15270 address mask depending on the physical architecture that the program
15271 is running on. */
15273 arm_gen_return_addr_mask (void)
15275 rtx reg = gen_reg_rtx (Pmode);
15277 emit_insn (gen_return_addr_mask (reg));
15278 return reg;
15281 void
15282 arm_reload_in_hi (rtx *operands)
15284 rtx ref = operands[1];
15285 rtx base, scratch;
15286 HOST_WIDE_INT offset = 0;
15288 if (GET_CODE (ref) == SUBREG)
15290 offset = SUBREG_BYTE (ref);
15291 ref = SUBREG_REG (ref);
15294 if (REG_P (ref))
15296 /* We have a pseudo which has been spilt onto the stack; there
15297 are two cases here: the first where there is a simple
15298 stack-slot replacement and a second where the stack-slot is
15299 out of range, or is used as a subreg. */
15300 if (reg_equiv_mem (REGNO (ref)))
15302 ref = reg_equiv_mem (REGNO (ref));
15303 base = find_replacement (&XEXP (ref, 0));
15305 else
15306 /* The slot is out of range, or was dressed up in a SUBREG. */
15307 base = reg_equiv_address (REGNO (ref));
15309 else
15310 base = find_replacement (&XEXP (ref, 0));
15312 /* Handle the case where the address is too complex to be offset by 1. */
15313 if (GET_CODE (base) == MINUS
15314 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15316 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15318 emit_set_insn (base_plus, base);
15319 base = base_plus;
15321 else if (GET_CODE (base) == PLUS)
15323 /* The addend must be CONST_INT, or we would have dealt with it above. */
15324 HOST_WIDE_INT hi, lo;
15326 offset += INTVAL (XEXP (base, 1));
15327 base = XEXP (base, 0);
15329 /* Rework the address into a legal sequence of insns. */
15330 /* Valid range for lo is -4095 -> 4095 */
15331 lo = (offset >= 0
15332 ? (offset & 0xfff)
15333 : -((-offset) & 0xfff));
15335 /* Corner case, if lo is the max offset then we would be out of range
15336 once we have added the additional 1 below, so bump the msb into the
15337 pre-loading insn(s). */
15338 if (lo == 4095)
15339 lo &= 0x7ff;
15341 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15342 ^ (HOST_WIDE_INT) 0x80000000)
15343 - (HOST_WIDE_INT) 0x80000000);
15345 gcc_assert (hi + lo == offset);
15347 if (hi != 0)
15349 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15351 /* Get the base address; addsi3 knows how to handle constants
15352 that require more than one insn. */
15353 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15354 base = base_plus;
15355 offset = lo;
15359 /* Operands[2] may overlap operands[0] (though it won't overlap
15360 operands[1]), that's why we asked for a DImode reg -- so we can
15361 use the bit that does not overlap. */
15362 if (REGNO (operands[2]) == REGNO (operands[0]))
15363 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15364 else
15365 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15367 emit_insn (gen_zero_extendqisi2 (scratch,
15368 gen_rtx_MEM (QImode,
15369 plus_constant (Pmode, base,
15370 offset))));
15371 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15372 gen_rtx_MEM (QImode,
15373 plus_constant (Pmode, base,
15374 offset + 1))));
15375 if (!BYTES_BIG_ENDIAN)
15376 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15377 gen_rtx_IOR (SImode,
15378 gen_rtx_ASHIFT
15379 (SImode,
15380 gen_rtx_SUBREG (SImode, operands[0], 0),
15381 GEN_INT (8)),
15382 scratch));
15383 else
15384 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15385 gen_rtx_IOR (SImode,
15386 gen_rtx_ASHIFT (SImode, scratch,
15387 GEN_INT (8)),
15388 gen_rtx_SUBREG (SImode, operands[0], 0)));
15391 /* Handle storing a half-word to memory during reload by synthesizing as two
15392 byte stores. Take care not to clobber the input values until after we
15393 have moved them somewhere safe. This code assumes that if the DImode
15394 scratch in operands[2] overlaps either the input value or output address
15395 in some way, then that value must die in this insn (we absolutely need
15396 two scratch registers for some corner cases). */
15397 void
15398 arm_reload_out_hi (rtx *operands)
15400 rtx ref = operands[0];
15401 rtx outval = operands[1];
15402 rtx base, scratch;
15403 HOST_WIDE_INT offset = 0;
15405 if (GET_CODE (ref) == SUBREG)
15407 offset = SUBREG_BYTE (ref);
15408 ref = SUBREG_REG (ref);
15411 if (REG_P (ref))
15413 /* We have a pseudo which has been spilt onto the stack; there
15414 are two cases here: the first where there is a simple
15415 stack-slot replacement and a second where the stack-slot is
15416 out of range, or is used as a subreg. */
15417 if (reg_equiv_mem (REGNO (ref)))
15419 ref = reg_equiv_mem (REGNO (ref));
15420 base = find_replacement (&XEXP (ref, 0));
15422 else
15423 /* The slot is out of range, or was dressed up in a SUBREG. */
15424 base = reg_equiv_address (REGNO (ref));
15426 else
15427 base = find_replacement (&XEXP (ref, 0));
15429 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15431 /* Handle the case where the address is too complex to be offset by 1. */
15432 if (GET_CODE (base) == MINUS
15433 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15435 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15437 /* Be careful not to destroy OUTVAL. */
15438 if (reg_overlap_mentioned_p (base_plus, outval))
15440 /* Updating base_plus might destroy outval, see if we can
15441 swap the scratch and base_plus. */
15442 if (!reg_overlap_mentioned_p (scratch, outval))
15444 rtx tmp = scratch;
15445 scratch = base_plus;
15446 base_plus = tmp;
15448 else
15450 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15452 /* Be conservative and copy OUTVAL into the scratch now,
15453 this should only be necessary if outval is a subreg
15454 of something larger than a word. */
15455 /* XXX Might this clobber base? I can't see how it can,
15456 since scratch is known to overlap with OUTVAL, and
15457 must be wider than a word. */
15458 emit_insn (gen_movhi (scratch_hi, outval));
15459 outval = scratch_hi;
15463 emit_set_insn (base_plus, base);
15464 base = base_plus;
15466 else if (GET_CODE (base) == PLUS)
15468 /* The addend must be CONST_INT, or we would have dealt with it above. */
15469 HOST_WIDE_INT hi, lo;
15471 offset += INTVAL (XEXP (base, 1));
15472 base = XEXP (base, 0);
15474 /* Rework the address into a legal sequence of insns. */
15475 /* Valid range for lo is -4095 -> 4095 */
15476 lo = (offset >= 0
15477 ? (offset & 0xfff)
15478 : -((-offset) & 0xfff));
15480 /* Corner case, if lo is the max offset then we would be out of range
15481 once we have added the additional 1 below, so bump the msb into the
15482 pre-loading insn(s). */
15483 if (lo == 4095)
15484 lo &= 0x7ff;
15486 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15487 ^ (HOST_WIDE_INT) 0x80000000)
15488 - (HOST_WIDE_INT) 0x80000000);
15490 gcc_assert (hi + lo == offset);
15492 if (hi != 0)
15494 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15496 /* Be careful not to destroy OUTVAL. */
15497 if (reg_overlap_mentioned_p (base_plus, outval))
15499 /* Updating base_plus might destroy outval, see if we
15500 can swap the scratch and base_plus. */
15501 if (!reg_overlap_mentioned_p (scratch, outval))
15503 rtx tmp = scratch;
15504 scratch = base_plus;
15505 base_plus = tmp;
15507 else
15509 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15511 /* Be conservative and copy outval into scratch now,
15512 this should only be necessary if outval is a
15513 subreg of something larger than a word. */
15514 /* XXX Might this clobber base? I can't see how it
15515 can, since scratch is known to overlap with
15516 outval. */
15517 emit_insn (gen_movhi (scratch_hi, outval));
15518 outval = scratch_hi;
15522 /* Get the base address; addsi3 knows how to handle constants
15523 that require more than one insn. */
15524 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15525 base = base_plus;
15526 offset = lo;
15530 if (BYTES_BIG_ENDIAN)
15532 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15533 plus_constant (Pmode, base,
15534 offset + 1)),
15535 gen_lowpart (QImode, outval)));
15536 emit_insn (gen_lshrsi3 (scratch,
15537 gen_rtx_SUBREG (SImode, outval, 0),
15538 GEN_INT (8)));
15539 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15540 offset)),
15541 gen_lowpart (QImode, scratch)));
15543 else
15545 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15546 offset)),
15547 gen_lowpart (QImode, outval)));
15548 emit_insn (gen_lshrsi3 (scratch,
15549 gen_rtx_SUBREG (SImode, outval, 0),
15550 GEN_INT (8)));
15551 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15552 plus_constant (Pmode, base,
15553 offset + 1)),
15554 gen_lowpart (QImode, scratch)));
15558 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15559 (padded to the size of a word) should be passed in a register. */
15561 static bool
15562 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
15564 if (TARGET_AAPCS_BASED)
15565 return must_pass_in_stack_var_size (mode, type);
15566 else
15567 return must_pass_in_stack_var_size_or_pad (mode, type);
15571 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15572 Return true if an argument passed on the stack should be padded upwards,
15573 i.e. if the least-significant byte has useful data.
15574 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15575 aggregate types are placed in the lowest memory address. */
15577 bool
15578 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15580 if (!TARGET_AAPCS_BASED)
15581 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15583 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15584 return false;
15586 return true;
15590 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15591 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15592 register has useful data, and return the opposite if the most
15593 significant byte does. */
15595 bool
15596 arm_pad_reg_upward (enum machine_mode mode,
15597 tree type, int first ATTRIBUTE_UNUSED)
15599 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15601 /* For AAPCS, small aggregates, small fixed-point types,
15602 and small complex types are always padded upwards. */
15603 if (type)
15605 if ((AGGREGATE_TYPE_P (type)
15606 || TREE_CODE (type) == COMPLEX_TYPE
15607 || FIXED_POINT_TYPE_P (type))
15608 && int_size_in_bytes (type) <= 4)
15609 return true;
15611 else
15613 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15614 && GET_MODE_SIZE (mode) <= 4)
15615 return true;
15619 /* Otherwise, use default padding. */
15620 return !BYTES_BIG_ENDIAN;
15623 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15624 assuming that the address in the base register is word aligned. */
15625 bool
15626 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15628 HOST_WIDE_INT max_offset;
15630 /* Offset must be a multiple of 4 in Thumb mode. */
15631 if (TARGET_THUMB2 && ((offset & 3) != 0))
15632 return false;
15634 if (TARGET_THUMB2)
15635 max_offset = 1020;
15636 else if (TARGET_ARM)
15637 max_offset = 255;
15638 else
15639 return false;
15641 return ((offset <= max_offset) && (offset >= -max_offset));
15644 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15645 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15646 Assumes that the address in the base register RN is word aligned. Pattern
15647 guarantees that both memory accesses use the same base register,
15648 the offsets are constants within the range, and the gap between the offsets is 4.
15649 If preload complete then check that registers are legal. WBACK indicates whether
15650 address is updated. LOAD indicates whether memory access is load or store. */
15651 bool
15652 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15653 bool wback, bool load)
15655 unsigned int t, t2, n;
15657 if (!reload_completed)
15658 return true;
15660 if (!offset_ok_for_ldrd_strd (offset))
15661 return false;
15663 t = REGNO (rt);
15664 t2 = REGNO (rt2);
15665 n = REGNO (rn);
15667 if ((TARGET_THUMB2)
15668 && ((wback && (n == t || n == t2))
15669 || (t == SP_REGNUM)
15670 || (t == PC_REGNUM)
15671 || (t2 == SP_REGNUM)
15672 || (t2 == PC_REGNUM)
15673 || (!load && (n == PC_REGNUM))
15674 || (load && (t == t2))
15675 /* Triggers Cortex-M3 LDRD errata. */
15676 || (!wback && load && fix_cm3_ldrd && (n == t))))
15677 return false;
15679 if ((TARGET_ARM)
15680 && ((wback && (n == t || n == t2))
15681 || (t2 == PC_REGNUM)
15682 || (t % 2 != 0) /* First destination register is not even. */
15683 || (t2 != t + 1)
15684 /* PC can be used as base register (for offset addressing only),
15685 but it is depricated. */
15686 || (n == PC_REGNUM)))
15687 return false;
15689 return true;
15692 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15693 operand MEM's address contains an immediate offset from the base
15694 register and has no side effects, in which case it sets BASE and
15695 OFFSET accordingly. */
15696 static bool
15697 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15699 rtx addr;
15701 gcc_assert (base != NULL && offset != NULL);
15703 /* TODO: Handle more general memory operand patterns, such as
15704 PRE_DEC and PRE_INC. */
15706 if (side_effects_p (mem))
15707 return false;
15709 /* Can't deal with subregs. */
15710 if (GET_CODE (mem) == SUBREG)
15711 return false;
15713 gcc_assert (MEM_P (mem));
15715 *offset = const0_rtx;
15717 addr = XEXP (mem, 0);
15719 /* If addr isn't valid for DImode, then we can't handle it. */
15720 if (!arm_legitimate_address_p (DImode, addr,
15721 reload_in_progress || reload_completed))
15722 return false;
15724 if (REG_P (addr))
15726 *base = addr;
15727 return true;
15729 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15731 *base = XEXP (addr, 0);
15732 *offset = XEXP (addr, 1);
15733 return (REG_P (*base) && CONST_INT_P (*offset));
15736 return false;
15739 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15741 /* Called from a peephole2 to replace two word-size accesses with a
15742 single LDRD/STRD instruction. Returns true iff we can generate a
15743 new instruction sequence. That is, both accesses use the same base
15744 register and the gap between constant offsets is 4. This function
15745 may reorder its operands to match ldrd/strd RTL templates.
15746 OPERANDS are the operands found by the peephole matcher;
15747 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15748 corresponding memory operands. LOAD indicaates whether the access
15749 is load or store. CONST_STORE indicates a store of constant
15750 integer values held in OPERANDS[4,5] and assumes that the pattern
15751 is of length 4 insn, for the purpose of checking dead registers.
15752 COMMUTE indicates that register operands may be reordered. */
15753 bool
15754 gen_operands_ldrd_strd (rtx *operands, bool load,
15755 bool const_store, bool commute)
15757 int nops = 2;
15758 HOST_WIDE_INT offsets[2], offset;
15759 rtx base = NULL_RTX;
15760 rtx cur_base, cur_offset, tmp;
15761 int i, gap;
15762 HARD_REG_SET regset;
15764 gcc_assert (!const_store || !load);
15765 /* Check that the memory references are immediate offsets from the
15766 same base register. Extract the base register, the destination
15767 registers, and the corresponding memory offsets. */
15768 for (i = 0; i < nops; i++)
15770 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15771 return false;
15773 if (i == 0)
15774 base = cur_base;
15775 else if (REGNO (base) != REGNO (cur_base))
15776 return false;
15778 offsets[i] = INTVAL (cur_offset);
15779 if (GET_CODE (operands[i]) == SUBREG)
15781 tmp = SUBREG_REG (operands[i]);
15782 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15783 operands[i] = tmp;
15787 /* Make sure there is no dependency between the individual loads. */
15788 if (load && REGNO (operands[0]) == REGNO (base))
15789 return false; /* RAW */
15791 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15792 return false; /* WAW */
15794 /* If the same input register is used in both stores
15795 when storing different constants, try to find a free register.
15796 For example, the code
15797 mov r0, 0
15798 str r0, [r2]
15799 mov r0, 1
15800 str r0, [r2, #4]
15801 can be transformed into
15802 mov r1, 0
15803 strd r1, r0, [r2]
15804 in Thumb mode assuming that r1 is free. */
15805 if (const_store
15806 && REGNO (operands[0]) == REGNO (operands[1])
15807 && INTVAL (operands[4]) != INTVAL (operands[5]))
15809 if (TARGET_THUMB2)
15811 CLEAR_HARD_REG_SET (regset);
15812 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15813 if (tmp == NULL_RTX)
15814 return false;
15816 /* Use the new register in the first load to ensure that
15817 if the original input register is not dead after peephole,
15818 then it will have the correct constant value. */
15819 operands[0] = tmp;
15821 else if (TARGET_ARM)
15823 return false;
15824 int regno = REGNO (operands[0]);
15825 if (!peep2_reg_dead_p (4, operands[0]))
15827 /* When the input register is even and is not dead after the
15828 pattern, it has to hold the second constant but we cannot
15829 form a legal STRD in ARM mode with this register as the second
15830 register. */
15831 if (regno % 2 == 0)
15832 return false;
15834 /* Is regno-1 free? */
15835 SET_HARD_REG_SET (regset);
15836 CLEAR_HARD_REG_BIT(regset, regno - 1);
15837 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15838 if (tmp == NULL_RTX)
15839 return false;
15841 operands[0] = tmp;
15843 else
15845 /* Find a DImode register. */
15846 CLEAR_HARD_REG_SET (regset);
15847 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15848 if (tmp != NULL_RTX)
15850 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15851 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15853 else
15855 /* Can we use the input register to form a DI register? */
15856 SET_HARD_REG_SET (regset);
15857 CLEAR_HARD_REG_BIT(regset,
15858 regno % 2 == 0 ? regno + 1 : regno - 1);
15859 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15860 if (tmp == NULL_RTX)
15861 return false;
15862 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15866 gcc_assert (operands[0] != NULL_RTX);
15867 gcc_assert (operands[1] != NULL_RTX);
15868 gcc_assert (REGNO (operands[0]) % 2 == 0);
15869 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15873 /* Make sure the instructions are ordered with lower memory access first. */
15874 if (offsets[0] > offsets[1])
15876 gap = offsets[0] - offsets[1];
15877 offset = offsets[1];
15879 /* Swap the instructions such that lower memory is accessed first. */
15880 SWAP_RTX (operands[0], operands[1]);
15881 SWAP_RTX (operands[2], operands[3]);
15882 if (const_store)
15883 SWAP_RTX (operands[4], operands[5]);
15885 else
15887 gap = offsets[1] - offsets[0];
15888 offset = offsets[0];
15891 /* Make sure accesses are to consecutive memory locations. */
15892 if (gap != 4)
15893 return false;
15895 /* Make sure we generate legal instructions. */
15896 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15897 false, load))
15898 return true;
15900 /* In Thumb state, where registers are almost unconstrained, there
15901 is little hope to fix it. */
15902 if (TARGET_THUMB2)
15903 return false;
15905 if (load && commute)
15907 /* Try reordering registers. */
15908 SWAP_RTX (operands[0], operands[1]);
15909 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15910 false, load))
15911 return true;
15914 if (const_store)
15916 /* If input registers are dead after this pattern, they can be
15917 reordered or replaced by other registers that are free in the
15918 current pattern. */
15919 if (!peep2_reg_dead_p (4, operands[0])
15920 || !peep2_reg_dead_p (4, operands[1]))
15921 return false;
15923 /* Try to reorder the input registers. */
15924 /* For example, the code
15925 mov r0, 0
15926 mov r1, 1
15927 str r1, [r2]
15928 str r0, [r2, #4]
15929 can be transformed into
15930 mov r1, 0
15931 mov r0, 1
15932 strd r0, [r2]
15934 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15935 false, false))
15937 SWAP_RTX (operands[0], operands[1]);
15938 return true;
15941 /* Try to find a free DI register. */
15942 CLEAR_HARD_REG_SET (regset);
15943 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15944 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15945 while (true)
15947 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15948 if (tmp == NULL_RTX)
15949 return false;
15951 /* DREG must be an even-numbered register in DImode.
15952 Split it into SI registers. */
15953 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15954 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15955 gcc_assert (operands[0] != NULL_RTX);
15956 gcc_assert (operands[1] != NULL_RTX);
15957 gcc_assert (REGNO (operands[0]) % 2 == 0);
15958 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15960 return (operands_ok_ldrd_strd (operands[0], operands[1],
15961 base, offset,
15962 false, load));
15966 return false;
15968 #undef SWAP_RTX
15973 /* Print a symbolic form of X to the debug file, F. */
15974 static void
15975 arm_print_value (FILE *f, rtx x)
15977 switch (GET_CODE (x))
15979 case CONST_INT:
15980 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15981 return;
15983 case CONST_DOUBLE:
15984 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15985 return;
15987 case CONST_VECTOR:
15989 int i;
15991 fprintf (f, "<");
15992 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15994 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15995 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15996 fputc (',', f);
15998 fprintf (f, ">");
16000 return;
16002 case CONST_STRING:
16003 fprintf (f, "\"%s\"", XSTR (x, 0));
16004 return;
16006 case SYMBOL_REF:
16007 fprintf (f, "`%s'", XSTR (x, 0));
16008 return;
16010 case LABEL_REF:
16011 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16012 return;
16014 case CONST:
16015 arm_print_value (f, XEXP (x, 0));
16016 return;
16018 case PLUS:
16019 arm_print_value (f, XEXP (x, 0));
16020 fprintf (f, "+");
16021 arm_print_value (f, XEXP (x, 1));
16022 return;
16024 case PC:
16025 fprintf (f, "pc");
16026 return;
16028 default:
16029 fprintf (f, "????");
16030 return;
16034 /* Routines for manipulation of the constant pool. */
16036 /* Arm instructions cannot load a large constant directly into a
16037 register; they have to come from a pc relative load. The constant
16038 must therefore be placed in the addressable range of the pc
16039 relative load. Depending on the precise pc relative load
16040 instruction the range is somewhere between 256 bytes and 4k. This
16041 means that we often have to dump a constant inside a function, and
16042 generate code to branch around it.
16044 It is important to minimize this, since the branches will slow
16045 things down and make the code larger.
16047 Normally we can hide the table after an existing unconditional
16048 branch so that there is no interruption of the flow, but in the
16049 worst case the code looks like this:
16051 ldr rn, L1
16053 b L2
16054 align
16055 L1: .long value
16059 ldr rn, L3
16061 b L4
16062 align
16063 L3: .long value
16067 We fix this by performing a scan after scheduling, which notices
16068 which instructions need to have their operands fetched from the
16069 constant table and builds the table.
16071 The algorithm starts by building a table of all the constants that
16072 need fixing up and all the natural barriers in the function (places
16073 where a constant table can be dropped without breaking the flow).
16074 For each fixup we note how far the pc-relative replacement will be
16075 able to reach and the offset of the instruction into the function.
16077 Having built the table we then group the fixes together to form
16078 tables that are as large as possible (subject to addressing
16079 constraints) and emit each table of constants after the last
16080 barrier that is within range of all the instructions in the group.
16081 If a group does not contain a barrier, then we forcibly create one
16082 by inserting a jump instruction into the flow. Once the table has
16083 been inserted, the insns are then modified to reference the
16084 relevant entry in the pool.
16086 Possible enhancements to the algorithm (not implemented) are:
16088 1) For some processors and object formats, there may be benefit in
16089 aligning the pools to the start of cache lines; this alignment
16090 would need to be taken into account when calculating addressability
16091 of a pool. */
16093 /* These typedefs are located at the start of this file, so that
16094 they can be used in the prototypes there. This comment is to
16095 remind readers of that fact so that the following structures
16096 can be understood more easily.
16098 typedef struct minipool_node Mnode;
16099 typedef struct minipool_fixup Mfix; */
16101 struct minipool_node
16103 /* Doubly linked chain of entries. */
16104 Mnode * next;
16105 Mnode * prev;
16106 /* The maximum offset into the code that this entry can be placed. While
16107 pushing fixes for forward references, all entries are sorted in order
16108 of increasing max_address. */
16109 HOST_WIDE_INT max_address;
16110 /* Similarly for an entry inserted for a backwards ref. */
16111 HOST_WIDE_INT min_address;
16112 /* The number of fixes referencing this entry. This can become zero
16113 if we "unpush" an entry. In this case we ignore the entry when we
16114 come to emit the code. */
16115 int refcount;
16116 /* The offset from the start of the minipool. */
16117 HOST_WIDE_INT offset;
16118 /* The value in table. */
16119 rtx value;
16120 /* The mode of value. */
16121 enum machine_mode mode;
16122 /* The size of the value. With iWMMXt enabled
16123 sizes > 4 also imply an alignment of 8-bytes. */
16124 int fix_size;
16127 struct minipool_fixup
16129 Mfix * next;
16130 rtx_insn * insn;
16131 HOST_WIDE_INT address;
16132 rtx * loc;
16133 enum machine_mode mode;
16134 int fix_size;
16135 rtx value;
16136 Mnode * minipool;
16137 HOST_WIDE_INT forwards;
16138 HOST_WIDE_INT backwards;
16141 /* Fixes less than a word need padding out to a word boundary. */
16142 #define MINIPOOL_FIX_SIZE(mode) \
16143 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16145 static Mnode * minipool_vector_head;
16146 static Mnode * minipool_vector_tail;
16147 static rtx_code_label *minipool_vector_label;
16148 static int minipool_pad;
16150 /* The linked list of all minipool fixes required for this function. */
16151 Mfix * minipool_fix_head;
16152 Mfix * minipool_fix_tail;
16153 /* The fix entry for the current minipool, once it has been placed. */
16154 Mfix * minipool_barrier;
16156 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16157 #define JUMP_TABLES_IN_TEXT_SECTION 0
16158 #endif
16160 static HOST_WIDE_INT
16161 get_jump_table_size (rtx_jump_table_data *insn)
16163 /* ADDR_VECs only take room if read-only data does into the text
16164 section. */
16165 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16167 rtx body = PATTERN (insn);
16168 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16169 HOST_WIDE_INT size;
16170 HOST_WIDE_INT modesize;
16172 modesize = GET_MODE_SIZE (GET_MODE (body));
16173 size = modesize * XVECLEN (body, elt);
16174 switch (modesize)
16176 case 1:
16177 /* Round up size of TBB table to a halfword boundary. */
16178 size = (size + 1) & ~(HOST_WIDE_INT)1;
16179 break;
16180 case 2:
16181 /* No padding necessary for TBH. */
16182 break;
16183 case 4:
16184 /* Add two bytes for alignment on Thumb. */
16185 if (TARGET_THUMB)
16186 size += 2;
16187 break;
16188 default:
16189 gcc_unreachable ();
16191 return size;
16194 return 0;
16197 /* Return the maximum amount of padding that will be inserted before
16198 label LABEL. */
16200 static HOST_WIDE_INT
16201 get_label_padding (rtx label)
16203 HOST_WIDE_INT align, min_insn_size;
16205 align = 1 << label_to_alignment (label);
16206 min_insn_size = TARGET_THUMB ? 2 : 4;
16207 return align > min_insn_size ? align - min_insn_size : 0;
16210 /* Move a minipool fix MP from its current location to before MAX_MP.
16211 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16212 constraints may need updating. */
16213 static Mnode *
16214 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16215 HOST_WIDE_INT max_address)
16217 /* The code below assumes these are different. */
16218 gcc_assert (mp != max_mp);
16220 if (max_mp == NULL)
16222 if (max_address < mp->max_address)
16223 mp->max_address = max_address;
16225 else
16227 if (max_address > max_mp->max_address - mp->fix_size)
16228 mp->max_address = max_mp->max_address - mp->fix_size;
16229 else
16230 mp->max_address = max_address;
16232 /* Unlink MP from its current position. Since max_mp is non-null,
16233 mp->prev must be non-null. */
16234 mp->prev->next = mp->next;
16235 if (mp->next != NULL)
16236 mp->next->prev = mp->prev;
16237 else
16238 minipool_vector_tail = mp->prev;
16240 /* Re-insert it before MAX_MP. */
16241 mp->next = max_mp;
16242 mp->prev = max_mp->prev;
16243 max_mp->prev = mp;
16245 if (mp->prev != NULL)
16246 mp->prev->next = mp;
16247 else
16248 minipool_vector_head = mp;
16251 /* Save the new entry. */
16252 max_mp = mp;
16254 /* Scan over the preceding entries and adjust their addresses as
16255 required. */
16256 while (mp->prev != NULL
16257 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16259 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16260 mp = mp->prev;
16263 return max_mp;
16266 /* Add a constant to the minipool for a forward reference. Returns the
16267 node added or NULL if the constant will not fit in this pool. */
16268 static Mnode *
16269 add_minipool_forward_ref (Mfix *fix)
16271 /* If set, max_mp is the first pool_entry that has a lower
16272 constraint than the one we are trying to add. */
16273 Mnode * max_mp = NULL;
16274 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16275 Mnode * mp;
16277 /* If the minipool starts before the end of FIX->INSN then this FIX
16278 can not be placed into the current pool. Furthermore, adding the
16279 new constant pool entry may cause the pool to start FIX_SIZE bytes
16280 earlier. */
16281 if (minipool_vector_head &&
16282 (fix->address + get_attr_length (fix->insn)
16283 >= minipool_vector_head->max_address - fix->fix_size))
16284 return NULL;
16286 /* Scan the pool to see if a constant with the same value has
16287 already been added. While we are doing this, also note the
16288 location where we must insert the constant if it doesn't already
16289 exist. */
16290 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16292 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16293 && fix->mode == mp->mode
16294 && (!LABEL_P (fix->value)
16295 || (CODE_LABEL_NUMBER (fix->value)
16296 == CODE_LABEL_NUMBER (mp->value)))
16297 && rtx_equal_p (fix->value, mp->value))
16299 /* More than one fix references this entry. */
16300 mp->refcount++;
16301 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16304 /* Note the insertion point if necessary. */
16305 if (max_mp == NULL
16306 && mp->max_address > max_address)
16307 max_mp = mp;
16309 /* If we are inserting an 8-bytes aligned quantity and
16310 we have not already found an insertion point, then
16311 make sure that all such 8-byte aligned quantities are
16312 placed at the start of the pool. */
16313 if (ARM_DOUBLEWORD_ALIGN
16314 && max_mp == NULL
16315 && fix->fix_size >= 8
16316 && mp->fix_size < 8)
16318 max_mp = mp;
16319 max_address = mp->max_address;
16323 /* The value is not currently in the minipool, so we need to create
16324 a new entry for it. If MAX_MP is NULL, the entry will be put on
16325 the end of the list since the placement is less constrained than
16326 any existing entry. Otherwise, we insert the new fix before
16327 MAX_MP and, if necessary, adjust the constraints on the other
16328 entries. */
16329 mp = XNEW (Mnode);
16330 mp->fix_size = fix->fix_size;
16331 mp->mode = fix->mode;
16332 mp->value = fix->value;
16333 mp->refcount = 1;
16334 /* Not yet required for a backwards ref. */
16335 mp->min_address = -65536;
16337 if (max_mp == NULL)
16339 mp->max_address = max_address;
16340 mp->next = NULL;
16341 mp->prev = minipool_vector_tail;
16343 if (mp->prev == NULL)
16345 minipool_vector_head = mp;
16346 minipool_vector_label = gen_label_rtx ();
16348 else
16349 mp->prev->next = mp;
16351 minipool_vector_tail = mp;
16353 else
16355 if (max_address > max_mp->max_address - mp->fix_size)
16356 mp->max_address = max_mp->max_address - mp->fix_size;
16357 else
16358 mp->max_address = max_address;
16360 mp->next = max_mp;
16361 mp->prev = max_mp->prev;
16362 max_mp->prev = mp;
16363 if (mp->prev != NULL)
16364 mp->prev->next = mp;
16365 else
16366 minipool_vector_head = mp;
16369 /* Save the new entry. */
16370 max_mp = mp;
16372 /* Scan over the preceding entries and adjust their addresses as
16373 required. */
16374 while (mp->prev != NULL
16375 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16377 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16378 mp = mp->prev;
16381 return max_mp;
16384 static Mnode *
16385 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16386 HOST_WIDE_INT min_address)
16388 HOST_WIDE_INT offset;
16390 /* The code below assumes these are different. */
16391 gcc_assert (mp != min_mp);
16393 if (min_mp == NULL)
16395 if (min_address > mp->min_address)
16396 mp->min_address = min_address;
16398 else
16400 /* We will adjust this below if it is too loose. */
16401 mp->min_address = min_address;
16403 /* Unlink MP from its current position. Since min_mp is non-null,
16404 mp->next must be non-null. */
16405 mp->next->prev = mp->prev;
16406 if (mp->prev != NULL)
16407 mp->prev->next = mp->next;
16408 else
16409 minipool_vector_head = mp->next;
16411 /* Reinsert it after MIN_MP. */
16412 mp->prev = min_mp;
16413 mp->next = min_mp->next;
16414 min_mp->next = mp;
16415 if (mp->next != NULL)
16416 mp->next->prev = mp;
16417 else
16418 minipool_vector_tail = mp;
16421 min_mp = mp;
16423 offset = 0;
16424 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16426 mp->offset = offset;
16427 if (mp->refcount > 0)
16428 offset += mp->fix_size;
16430 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16431 mp->next->min_address = mp->min_address + mp->fix_size;
16434 return min_mp;
16437 /* Add a constant to the minipool for a backward reference. Returns the
16438 node added or NULL if the constant will not fit in this pool.
16440 Note that the code for insertion for a backwards reference can be
16441 somewhat confusing because the calculated offsets for each fix do
16442 not take into account the size of the pool (which is still under
16443 construction. */
16444 static Mnode *
16445 add_minipool_backward_ref (Mfix *fix)
16447 /* If set, min_mp is the last pool_entry that has a lower constraint
16448 than the one we are trying to add. */
16449 Mnode *min_mp = NULL;
16450 /* This can be negative, since it is only a constraint. */
16451 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16452 Mnode *mp;
16454 /* If we can't reach the current pool from this insn, or if we can't
16455 insert this entry at the end of the pool without pushing other
16456 fixes out of range, then we don't try. This ensures that we
16457 can't fail later on. */
16458 if (min_address >= minipool_barrier->address
16459 || (minipool_vector_tail->min_address + fix->fix_size
16460 >= minipool_barrier->address))
16461 return NULL;
16463 /* Scan the pool to see if a constant with the same value has
16464 already been added. While we are doing this, also note the
16465 location where we must insert the constant if it doesn't already
16466 exist. */
16467 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16469 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16470 && fix->mode == mp->mode
16471 && (!LABEL_P (fix->value)
16472 || (CODE_LABEL_NUMBER (fix->value)
16473 == CODE_LABEL_NUMBER (mp->value)))
16474 && rtx_equal_p (fix->value, mp->value)
16475 /* Check that there is enough slack to move this entry to the
16476 end of the table (this is conservative). */
16477 && (mp->max_address
16478 > (minipool_barrier->address
16479 + minipool_vector_tail->offset
16480 + minipool_vector_tail->fix_size)))
16482 mp->refcount++;
16483 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16486 if (min_mp != NULL)
16487 mp->min_address += fix->fix_size;
16488 else
16490 /* Note the insertion point if necessary. */
16491 if (mp->min_address < min_address)
16493 /* For now, we do not allow the insertion of 8-byte alignment
16494 requiring nodes anywhere but at the start of the pool. */
16495 if (ARM_DOUBLEWORD_ALIGN
16496 && fix->fix_size >= 8 && mp->fix_size < 8)
16497 return NULL;
16498 else
16499 min_mp = mp;
16501 else if (mp->max_address
16502 < minipool_barrier->address + mp->offset + fix->fix_size)
16504 /* Inserting before this entry would push the fix beyond
16505 its maximum address (which can happen if we have
16506 re-located a forwards fix); force the new fix to come
16507 after it. */
16508 if (ARM_DOUBLEWORD_ALIGN
16509 && fix->fix_size >= 8 && mp->fix_size < 8)
16510 return NULL;
16511 else
16513 min_mp = mp;
16514 min_address = mp->min_address + fix->fix_size;
16517 /* Do not insert a non-8-byte aligned quantity before 8-byte
16518 aligned quantities. */
16519 else if (ARM_DOUBLEWORD_ALIGN
16520 && fix->fix_size < 8
16521 && mp->fix_size >= 8)
16523 min_mp = mp;
16524 min_address = mp->min_address + fix->fix_size;
16529 /* We need to create a new entry. */
16530 mp = XNEW (Mnode);
16531 mp->fix_size = fix->fix_size;
16532 mp->mode = fix->mode;
16533 mp->value = fix->value;
16534 mp->refcount = 1;
16535 mp->max_address = minipool_barrier->address + 65536;
16537 mp->min_address = min_address;
16539 if (min_mp == NULL)
16541 mp->prev = NULL;
16542 mp->next = minipool_vector_head;
16544 if (mp->next == NULL)
16546 minipool_vector_tail = mp;
16547 minipool_vector_label = gen_label_rtx ();
16549 else
16550 mp->next->prev = mp;
16552 minipool_vector_head = mp;
16554 else
16556 mp->next = min_mp->next;
16557 mp->prev = min_mp;
16558 min_mp->next = mp;
16560 if (mp->next != NULL)
16561 mp->next->prev = mp;
16562 else
16563 minipool_vector_tail = mp;
16566 /* Save the new entry. */
16567 min_mp = mp;
16569 if (mp->prev)
16570 mp = mp->prev;
16571 else
16572 mp->offset = 0;
16574 /* Scan over the following entries and adjust their offsets. */
16575 while (mp->next != NULL)
16577 if (mp->next->min_address < mp->min_address + mp->fix_size)
16578 mp->next->min_address = mp->min_address + mp->fix_size;
16580 if (mp->refcount)
16581 mp->next->offset = mp->offset + mp->fix_size;
16582 else
16583 mp->next->offset = mp->offset;
16585 mp = mp->next;
16588 return min_mp;
16591 static void
16592 assign_minipool_offsets (Mfix *barrier)
16594 HOST_WIDE_INT offset = 0;
16595 Mnode *mp;
16597 minipool_barrier = barrier;
16599 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16601 mp->offset = offset;
16603 if (mp->refcount > 0)
16604 offset += mp->fix_size;
16608 /* Output the literal table */
16609 static void
16610 dump_minipool (rtx_insn *scan)
16612 Mnode * mp;
16613 Mnode * nmp;
16614 int align64 = 0;
16616 if (ARM_DOUBLEWORD_ALIGN)
16617 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16618 if (mp->refcount > 0 && mp->fix_size >= 8)
16620 align64 = 1;
16621 break;
16624 if (dump_file)
16625 fprintf (dump_file,
16626 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16627 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16629 scan = emit_label_after (gen_label_rtx (), scan);
16630 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16631 scan = emit_label_after (minipool_vector_label, scan);
16633 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16635 if (mp->refcount > 0)
16637 if (dump_file)
16639 fprintf (dump_file,
16640 ";; Offset %u, min %ld, max %ld ",
16641 (unsigned) mp->offset, (unsigned long) mp->min_address,
16642 (unsigned long) mp->max_address);
16643 arm_print_value (dump_file, mp->value);
16644 fputc ('\n', dump_file);
16647 switch (mp->fix_size)
16649 #ifdef HAVE_consttable_1
16650 case 1:
16651 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16652 break;
16654 #endif
16655 #ifdef HAVE_consttable_2
16656 case 2:
16657 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16658 break;
16660 #endif
16661 #ifdef HAVE_consttable_4
16662 case 4:
16663 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16664 break;
16666 #endif
16667 #ifdef HAVE_consttable_8
16668 case 8:
16669 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16670 break;
16672 #endif
16673 #ifdef HAVE_consttable_16
16674 case 16:
16675 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16676 break;
16678 #endif
16679 default:
16680 gcc_unreachable ();
16684 nmp = mp->next;
16685 free (mp);
16688 minipool_vector_head = minipool_vector_tail = NULL;
16689 scan = emit_insn_after (gen_consttable_end (), scan);
16690 scan = emit_barrier_after (scan);
16693 /* Return the cost of forcibly inserting a barrier after INSN. */
16694 static int
16695 arm_barrier_cost (rtx insn)
16697 /* Basing the location of the pool on the loop depth is preferable,
16698 but at the moment, the basic block information seems to be
16699 corrupt by this stage of the compilation. */
16700 int base_cost = 50;
16701 rtx next = next_nonnote_insn (insn);
16703 if (next != NULL && LABEL_P (next))
16704 base_cost -= 20;
16706 switch (GET_CODE (insn))
16708 case CODE_LABEL:
16709 /* It will always be better to place the table before the label, rather
16710 than after it. */
16711 return 50;
16713 case INSN:
16714 case CALL_INSN:
16715 return base_cost;
16717 case JUMP_INSN:
16718 return base_cost - 10;
16720 default:
16721 return base_cost + 10;
16725 /* Find the best place in the insn stream in the range
16726 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16727 Create the barrier by inserting a jump and add a new fix entry for
16728 it. */
16729 static Mfix *
16730 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16732 HOST_WIDE_INT count = 0;
16733 rtx_barrier *barrier;
16734 rtx_insn *from = fix->insn;
16735 /* The instruction after which we will insert the jump. */
16736 rtx_insn *selected = NULL;
16737 int selected_cost;
16738 /* The address at which the jump instruction will be placed. */
16739 HOST_WIDE_INT selected_address;
16740 Mfix * new_fix;
16741 HOST_WIDE_INT max_count = max_address - fix->address;
16742 rtx_code_label *label = gen_label_rtx ();
16744 selected_cost = arm_barrier_cost (from);
16745 selected_address = fix->address;
16747 while (from && count < max_count)
16749 rtx_jump_table_data *tmp;
16750 int new_cost;
16752 /* This code shouldn't have been called if there was a natural barrier
16753 within range. */
16754 gcc_assert (!BARRIER_P (from));
16756 /* Count the length of this insn. This must stay in sync with the
16757 code that pushes minipool fixes. */
16758 if (LABEL_P (from))
16759 count += get_label_padding (from);
16760 else
16761 count += get_attr_length (from);
16763 /* If there is a jump table, add its length. */
16764 if (tablejump_p (from, NULL, &tmp))
16766 count += get_jump_table_size (tmp);
16768 /* Jump tables aren't in a basic block, so base the cost on
16769 the dispatch insn. If we select this location, we will
16770 still put the pool after the table. */
16771 new_cost = arm_barrier_cost (from);
16773 if (count < max_count
16774 && (!selected || new_cost <= selected_cost))
16776 selected = tmp;
16777 selected_cost = new_cost;
16778 selected_address = fix->address + count;
16781 /* Continue after the dispatch table. */
16782 from = NEXT_INSN (tmp);
16783 continue;
16786 new_cost = arm_barrier_cost (from);
16788 if (count < max_count
16789 && (!selected || new_cost <= selected_cost))
16791 selected = from;
16792 selected_cost = new_cost;
16793 selected_address = fix->address + count;
16796 from = NEXT_INSN (from);
16799 /* Make sure that we found a place to insert the jump. */
16800 gcc_assert (selected);
16802 /* Make sure we do not split a call and its corresponding
16803 CALL_ARG_LOCATION note. */
16804 if (CALL_P (selected))
16806 rtx_insn *next = NEXT_INSN (selected);
16807 if (next && NOTE_P (next)
16808 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16809 selected = next;
16812 /* Create a new JUMP_INSN that branches around a barrier. */
16813 from = emit_jump_insn_after (gen_jump (label), selected);
16814 JUMP_LABEL (from) = label;
16815 barrier = emit_barrier_after (from);
16816 emit_label_after (label, barrier);
16818 /* Create a minipool barrier entry for the new barrier. */
16819 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16820 new_fix->insn = barrier;
16821 new_fix->address = selected_address;
16822 new_fix->next = fix->next;
16823 fix->next = new_fix;
16825 return new_fix;
16828 /* Record that there is a natural barrier in the insn stream at
16829 ADDRESS. */
16830 static void
16831 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16833 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16835 fix->insn = insn;
16836 fix->address = address;
16838 fix->next = NULL;
16839 if (minipool_fix_head != NULL)
16840 minipool_fix_tail->next = fix;
16841 else
16842 minipool_fix_head = fix;
16844 minipool_fix_tail = fix;
16847 /* Record INSN, which will need fixing up to load a value from the
16848 minipool. ADDRESS is the offset of the insn since the start of the
16849 function; LOC is a pointer to the part of the insn which requires
16850 fixing; VALUE is the constant that must be loaded, which is of type
16851 MODE. */
16852 static void
16853 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16854 enum machine_mode mode, rtx value)
16856 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16858 fix->insn = insn;
16859 fix->address = address;
16860 fix->loc = loc;
16861 fix->mode = mode;
16862 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16863 fix->value = value;
16864 fix->forwards = get_attr_pool_range (insn);
16865 fix->backwards = get_attr_neg_pool_range (insn);
16866 fix->minipool = NULL;
16868 /* If an insn doesn't have a range defined for it, then it isn't
16869 expecting to be reworked by this code. Better to stop now than
16870 to generate duff assembly code. */
16871 gcc_assert (fix->forwards || fix->backwards);
16873 /* If an entry requires 8-byte alignment then assume all constant pools
16874 require 4 bytes of padding. Trying to do this later on a per-pool
16875 basis is awkward because existing pool entries have to be modified. */
16876 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16877 minipool_pad = 4;
16879 if (dump_file)
16881 fprintf (dump_file,
16882 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16883 GET_MODE_NAME (mode),
16884 INSN_UID (insn), (unsigned long) address,
16885 -1 * (long)fix->backwards, (long)fix->forwards);
16886 arm_print_value (dump_file, fix->value);
16887 fprintf (dump_file, "\n");
16890 /* Add it to the chain of fixes. */
16891 fix->next = NULL;
16893 if (minipool_fix_head != NULL)
16894 minipool_fix_tail->next = fix;
16895 else
16896 minipool_fix_head = fix;
16898 minipool_fix_tail = fix;
16901 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16902 Returns the number of insns needed, or 99 if we always want to synthesize
16903 the value. */
16905 arm_max_const_double_inline_cost ()
16907 /* Let the value get synthesized to avoid the use of literal pools. */
16908 if (arm_disable_literal_pool)
16909 return 99;
16911 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16914 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16915 Returns the number of insns needed, or 99 if we don't know how to
16916 do it. */
16918 arm_const_double_inline_cost (rtx val)
16920 rtx lowpart, highpart;
16921 enum machine_mode mode;
16923 mode = GET_MODE (val);
16925 if (mode == VOIDmode)
16926 mode = DImode;
16928 gcc_assert (GET_MODE_SIZE (mode) == 8);
16930 lowpart = gen_lowpart (SImode, val);
16931 highpart = gen_highpart_mode (SImode, mode, val);
16933 gcc_assert (CONST_INT_P (lowpart));
16934 gcc_assert (CONST_INT_P (highpart));
16936 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16937 NULL_RTX, NULL_RTX, 0, 0)
16938 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16939 NULL_RTX, NULL_RTX, 0, 0));
16942 /* Cost of loading a SImode constant. */
16943 static inline int
16944 arm_const_inline_cost (enum rtx_code code, rtx val)
16946 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16947 NULL_RTX, NULL_RTX, 1, 0);
16950 /* Return true if it is worthwhile to split a 64-bit constant into two
16951 32-bit operations. This is the case if optimizing for size, or
16952 if we have load delay slots, or if one 32-bit part can be done with
16953 a single data operation. */
16954 bool
16955 arm_const_double_by_parts (rtx val)
16957 enum machine_mode mode = GET_MODE (val);
16958 rtx part;
16960 if (optimize_size || arm_ld_sched)
16961 return true;
16963 if (mode == VOIDmode)
16964 mode = DImode;
16966 part = gen_highpart_mode (SImode, mode, val);
16968 gcc_assert (CONST_INT_P (part));
16970 if (const_ok_for_arm (INTVAL (part))
16971 || const_ok_for_arm (~INTVAL (part)))
16972 return true;
16974 part = gen_lowpart (SImode, val);
16976 gcc_assert (CONST_INT_P (part));
16978 if (const_ok_for_arm (INTVAL (part))
16979 || const_ok_for_arm (~INTVAL (part)))
16980 return true;
16982 return false;
16985 /* Return true if it is possible to inline both the high and low parts
16986 of a 64-bit constant into 32-bit data processing instructions. */
16987 bool
16988 arm_const_double_by_immediates (rtx val)
16990 enum machine_mode mode = GET_MODE (val);
16991 rtx part;
16993 if (mode == VOIDmode)
16994 mode = DImode;
16996 part = gen_highpart_mode (SImode, mode, val);
16998 gcc_assert (CONST_INT_P (part));
17000 if (!const_ok_for_arm (INTVAL (part)))
17001 return false;
17003 part = gen_lowpart (SImode, val);
17005 gcc_assert (CONST_INT_P (part));
17007 if (!const_ok_for_arm (INTVAL (part)))
17008 return false;
17010 return true;
17013 /* Scan INSN and note any of its operands that need fixing.
17014 If DO_PUSHES is false we do not actually push any of the fixups
17015 needed. */
17016 static void
17017 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17019 int opno;
17021 extract_insn (insn);
17023 if (!constrain_operands (1))
17024 fatal_insn_not_found (insn);
17026 if (recog_data.n_alternatives == 0)
17027 return;
17029 /* Fill in recog_op_alt with information about the constraints of
17030 this insn. */
17031 preprocess_constraints (insn);
17033 const operand_alternative *op_alt = which_op_alt ();
17034 for (opno = 0; opno < recog_data.n_operands; opno++)
17036 /* Things we need to fix can only occur in inputs. */
17037 if (recog_data.operand_type[opno] != OP_IN)
17038 continue;
17040 /* If this alternative is a memory reference, then any mention
17041 of constants in this alternative is really to fool reload
17042 into allowing us to accept one there. We need to fix them up
17043 now so that we output the right code. */
17044 if (op_alt[opno].memory_ok)
17046 rtx op = recog_data.operand[opno];
17048 if (CONSTANT_P (op))
17050 if (do_pushes)
17051 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17052 recog_data.operand_mode[opno], op);
17054 else if (MEM_P (op)
17055 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17056 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17058 if (do_pushes)
17060 rtx cop = avoid_constant_pool_reference (op);
17062 /* Casting the address of something to a mode narrower
17063 than a word can cause avoid_constant_pool_reference()
17064 to return the pool reference itself. That's no good to
17065 us here. Lets just hope that we can use the
17066 constant pool value directly. */
17067 if (op == cop)
17068 cop = get_pool_constant (XEXP (op, 0));
17070 push_minipool_fix (insn, address,
17071 recog_data.operand_loc[opno],
17072 recog_data.operand_mode[opno], cop);
17079 return;
17082 /* Rewrite move insn into subtract of 0 if the condition codes will
17083 be useful in next conditional jump insn. */
17085 static void
17086 thumb1_reorg (void)
17088 basic_block bb;
17090 FOR_EACH_BB_FN (bb, cfun)
17092 rtx dest, src;
17093 rtx pat, op0, set = NULL;
17094 rtx_insn *prev, *insn = BB_END (bb);
17095 bool insn_clobbered = false;
17097 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17098 insn = PREV_INSN (insn);
17100 /* Find the last cbranchsi4_insn in basic block BB. */
17101 if (insn == BB_HEAD (bb)
17102 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17103 continue;
17105 /* Get the register with which we are comparing. */
17106 pat = PATTERN (insn);
17107 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17109 /* Find the first flag setting insn before INSN in basic block BB. */
17110 gcc_assert (insn != BB_HEAD (bb));
17111 for (prev = PREV_INSN (insn);
17112 (!insn_clobbered
17113 && prev != BB_HEAD (bb)
17114 && (NOTE_P (prev)
17115 || DEBUG_INSN_P (prev)
17116 || ((set = single_set (prev)) != NULL
17117 && get_attr_conds (prev) == CONDS_NOCOND)));
17118 prev = PREV_INSN (prev))
17120 if (reg_set_p (op0, prev))
17121 insn_clobbered = true;
17124 /* Skip if op0 is clobbered by insn other than prev. */
17125 if (insn_clobbered)
17126 continue;
17128 if (!set)
17129 continue;
17131 dest = SET_DEST (set);
17132 src = SET_SRC (set);
17133 if (!low_register_operand (dest, SImode)
17134 || !low_register_operand (src, SImode))
17135 continue;
17137 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17138 in INSN. Both src and dest of the move insn are checked. */
17139 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17141 dest = copy_rtx (dest);
17142 src = copy_rtx (src);
17143 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17144 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17145 INSN_CODE (prev) = -1;
17146 /* Set test register in INSN to dest. */
17147 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17148 INSN_CODE (insn) = -1;
17153 /* Convert instructions to their cc-clobbering variant if possible, since
17154 that allows us to use smaller encodings. */
17156 static void
17157 thumb2_reorg (void)
17159 basic_block bb;
17160 regset_head live;
17162 INIT_REG_SET (&live);
17164 /* We are freeing block_for_insn in the toplev to keep compatibility
17165 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17166 compute_bb_for_insn ();
17167 df_analyze ();
17169 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17171 FOR_EACH_BB_FN (bb, cfun)
17173 if (current_tune->disparage_flag_setting_t16_encodings
17174 && optimize_bb_for_speed_p (bb))
17175 continue;
17177 rtx_insn *insn;
17178 Convert_Action action = SKIP;
17179 Convert_Action action_for_partial_flag_setting
17180 = (current_tune->disparage_partial_flag_setting_t16_encodings
17181 && optimize_bb_for_speed_p (bb))
17182 ? SKIP : CONV;
17184 COPY_REG_SET (&live, DF_LR_OUT (bb));
17185 df_simulate_initialize_backwards (bb, &live);
17186 FOR_BB_INSNS_REVERSE (bb, insn)
17188 if (NONJUMP_INSN_P (insn)
17189 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17190 && GET_CODE (PATTERN (insn)) == SET)
17192 action = SKIP;
17193 rtx pat = PATTERN (insn);
17194 rtx dst = XEXP (pat, 0);
17195 rtx src = XEXP (pat, 1);
17196 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17198 if (!OBJECT_P (src))
17199 op0 = XEXP (src, 0);
17201 if (BINARY_P (src))
17202 op1 = XEXP (src, 1);
17204 if (low_register_operand (dst, SImode))
17206 switch (GET_CODE (src))
17208 case PLUS:
17209 /* Adding two registers and storing the result
17210 in the first source is already a 16-bit
17211 operation. */
17212 if (rtx_equal_p (dst, op0)
17213 && register_operand (op1, SImode))
17214 break;
17216 if (low_register_operand (op0, SImode))
17218 /* ADDS <Rd>,<Rn>,<Rm> */
17219 if (low_register_operand (op1, SImode))
17220 action = CONV;
17221 /* ADDS <Rdn>,#<imm8> */
17222 /* SUBS <Rdn>,#<imm8> */
17223 else if (rtx_equal_p (dst, op0)
17224 && CONST_INT_P (op1)
17225 && IN_RANGE (INTVAL (op1), -255, 255))
17226 action = CONV;
17227 /* ADDS <Rd>,<Rn>,#<imm3> */
17228 /* SUBS <Rd>,<Rn>,#<imm3> */
17229 else if (CONST_INT_P (op1)
17230 && IN_RANGE (INTVAL (op1), -7, 7))
17231 action = CONV;
17233 /* ADCS <Rd>, <Rn> */
17234 else if (GET_CODE (XEXP (src, 0)) == PLUS
17235 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17236 && low_register_operand (XEXP (XEXP (src, 0), 1),
17237 SImode)
17238 && COMPARISON_P (op1)
17239 && cc_register (XEXP (op1, 0), VOIDmode)
17240 && maybe_get_arm_condition_code (op1) == ARM_CS
17241 && XEXP (op1, 1) == const0_rtx)
17242 action = CONV;
17243 break;
17245 case MINUS:
17246 /* RSBS <Rd>,<Rn>,#0
17247 Not handled here: see NEG below. */
17248 /* SUBS <Rd>,<Rn>,#<imm3>
17249 SUBS <Rdn>,#<imm8>
17250 Not handled here: see PLUS above. */
17251 /* SUBS <Rd>,<Rn>,<Rm> */
17252 if (low_register_operand (op0, SImode)
17253 && low_register_operand (op1, SImode))
17254 action = CONV;
17255 break;
17257 case MULT:
17258 /* MULS <Rdm>,<Rn>,<Rdm>
17259 As an exception to the rule, this is only used
17260 when optimizing for size since MULS is slow on all
17261 known implementations. We do not even want to use
17262 MULS in cold code, if optimizing for speed, so we
17263 test the global flag here. */
17264 if (!optimize_size)
17265 break;
17266 /* else fall through. */
17267 case AND:
17268 case IOR:
17269 case XOR:
17270 /* ANDS <Rdn>,<Rm> */
17271 if (rtx_equal_p (dst, op0)
17272 && low_register_operand (op1, SImode))
17273 action = action_for_partial_flag_setting;
17274 else if (rtx_equal_p (dst, op1)
17275 && low_register_operand (op0, SImode))
17276 action = action_for_partial_flag_setting == SKIP
17277 ? SKIP : SWAP_CONV;
17278 break;
17280 case ASHIFTRT:
17281 case ASHIFT:
17282 case LSHIFTRT:
17283 /* ASRS <Rdn>,<Rm> */
17284 /* LSRS <Rdn>,<Rm> */
17285 /* LSLS <Rdn>,<Rm> */
17286 if (rtx_equal_p (dst, op0)
17287 && low_register_operand (op1, SImode))
17288 action = action_for_partial_flag_setting;
17289 /* ASRS <Rd>,<Rm>,#<imm5> */
17290 /* LSRS <Rd>,<Rm>,#<imm5> */
17291 /* LSLS <Rd>,<Rm>,#<imm5> */
17292 else if (low_register_operand (op0, SImode)
17293 && CONST_INT_P (op1)
17294 && IN_RANGE (INTVAL (op1), 0, 31))
17295 action = action_for_partial_flag_setting;
17296 break;
17298 case ROTATERT:
17299 /* RORS <Rdn>,<Rm> */
17300 if (rtx_equal_p (dst, op0)
17301 && low_register_operand (op1, SImode))
17302 action = action_for_partial_flag_setting;
17303 break;
17305 case NOT:
17306 /* MVNS <Rd>,<Rm> */
17307 if (low_register_operand (op0, SImode))
17308 action = action_for_partial_flag_setting;
17309 break;
17311 case NEG:
17312 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17313 if (low_register_operand (op0, SImode))
17314 action = CONV;
17315 break;
17317 case CONST_INT:
17318 /* MOVS <Rd>,#<imm8> */
17319 if (CONST_INT_P (src)
17320 && IN_RANGE (INTVAL (src), 0, 255))
17321 action = action_for_partial_flag_setting;
17322 break;
17324 case REG:
17325 /* MOVS and MOV<c> with registers have different
17326 encodings, so are not relevant here. */
17327 break;
17329 default:
17330 break;
17334 if (action != SKIP)
17336 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17337 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17338 rtvec vec;
17340 if (action == SWAP_CONV)
17342 src = copy_rtx (src);
17343 XEXP (src, 0) = op1;
17344 XEXP (src, 1) = op0;
17345 pat = gen_rtx_SET (VOIDmode, dst, src);
17346 vec = gen_rtvec (2, pat, clobber);
17348 else /* action == CONV */
17349 vec = gen_rtvec (2, pat, clobber);
17351 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17352 INSN_CODE (insn) = -1;
17356 if (NONDEBUG_INSN_P (insn))
17357 df_simulate_one_insn_backwards (bb, insn, &live);
17361 CLEAR_REG_SET (&live);
17364 /* Gcc puts the pool in the wrong place for ARM, since we can only
17365 load addresses a limited distance around the pc. We do some
17366 special munging to move the constant pool values to the correct
17367 point in the code. */
17368 static void
17369 arm_reorg (void)
17371 rtx_insn *insn;
17372 HOST_WIDE_INT address = 0;
17373 Mfix * fix;
17375 if (TARGET_THUMB1)
17376 thumb1_reorg ();
17377 else if (TARGET_THUMB2)
17378 thumb2_reorg ();
17380 /* Ensure all insns that must be split have been split at this point.
17381 Otherwise, the pool placement code below may compute incorrect
17382 insn lengths. Note that when optimizing, all insns have already
17383 been split at this point. */
17384 if (!optimize)
17385 split_all_insns_noflow ();
17387 minipool_fix_head = minipool_fix_tail = NULL;
17389 /* The first insn must always be a note, or the code below won't
17390 scan it properly. */
17391 insn = get_insns ();
17392 gcc_assert (NOTE_P (insn));
17393 minipool_pad = 0;
17395 /* Scan all the insns and record the operands that will need fixing. */
17396 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17398 if (BARRIER_P (insn))
17399 push_minipool_barrier (insn, address);
17400 else if (INSN_P (insn))
17402 rtx_jump_table_data *table;
17404 note_invalid_constants (insn, address, true);
17405 address += get_attr_length (insn);
17407 /* If the insn is a vector jump, add the size of the table
17408 and skip the table. */
17409 if (tablejump_p (insn, NULL, &table))
17411 address += get_jump_table_size (table);
17412 insn = table;
17415 else if (LABEL_P (insn))
17416 /* Add the worst-case padding due to alignment. We don't add
17417 the _current_ padding because the minipool insertions
17418 themselves might change it. */
17419 address += get_label_padding (insn);
17422 fix = minipool_fix_head;
17424 /* Now scan the fixups and perform the required changes. */
17425 while (fix)
17427 Mfix * ftmp;
17428 Mfix * fdel;
17429 Mfix * last_added_fix;
17430 Mfix * last_barrier = NULL;
17431 Mfix * this_fix;
17433 /* Skip any further barriers before the next fix. */
17434 while (fix && BARRIER_P (fix->insn))
17435 fix = fix->next;
17437 /* No more fixes. */
17438 if (fix == NULL)
17439 break;
17441 last_added_fix = NULL;
17443 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17445 if (BARRIER_P (ftmp->insn))
17447 if (ftmp->address >= minipool_vector_head->max_address)
17448 break;
17450 last_barrier = ftmp;
17452 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17453 break;
17455 last_added_fix = ftmp; /* Keep track of the last fix added. */
17458 /* If we found a barrier, drop back to that; any fixes that we
17459 could have reached but come after the barrier will now go in
17460 the next mini-pool. */
17461 if (last_barrier != NULL)
17463 /* Reduce the refcount for those fixes that won't go into this
17464 pool after all. */
17465 for (fdel = last_barrier->next;
17466 fdel && fdel != ftmp;
17467 fdel = fdel->next)
17469 fdel->minipool->refcount--;
17470 fdel->minipool = NULL;
17473 ftmp = last_barrier;
17475 else
17477 /* ftmp is first fix that we can't fit into this pool and
17478 there no natural barriers that we could use. Insert a
17479 new barrier in the code somewhere between the previous
17480 fix and this one, and arrange to jump around it. */
17481 HOST_WIDE_INT max_address;
17483 /* The last item on the list of fixes must be a barrier, so
17484 we can never run off the end of the list of fixes without
17485 last_barrier being set. */
17486 gcc_assert (ftmp);
17488 max_address = minipool_vector_head->max_address;
17489 /* Check that there isn't another fix that is in range that
17490 we couldn't fit into this pool because the pool was
17491 already too large: we need to put the pool before such an
17492 instruction. The pool itself may come just after the
17493 fix because create_fix_barrier also allows space for a
17494 jump instruction. */
17495 if (ftmp->address < max_address)
17496 max_address = ftmp->address + 1;
17498 last_barrier = create_fix_barrier (last_added_fix, max_address);
17501 assign_minipool_offsets (last_barrier);
17503 while (ftmp)
17505 if (!BARRIER_P (ftmp->insn)
17506 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17507 == NULL))
17508 break;
17510 ftmp = ftmp->next;
17513 /* Scan over the fixes we have identified for this pool, fixing them
17514 up and adding the constants to the pool itself. */
17515 for (this_fix = fix; this_fix && ftmp != this_fix;
17516 this_fix = this_fix->next)
17517 if (!BARRIER_P (this_fix->insn))
17519 rtx addr
17520 = plus_constant (Pmode,
17521 gen_rtx_LABEL_REF (VOIDmode,
17522 minipool_vector_label),
17523 this_fix->minipool->offset);
17524 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17527 dump_minipool (last_barrier->insn);
17528 fix = ftmp;
17531 /* From now on we must synthesize any constants that we can't handle
17532 directly. This can happen if the RTL gets split during final
17533 instruction generation. */
17534 cfun->machine->after_arm_reorg = 1;
17536 /* Free the minipool memory. */
17537 obstack_free (&minipool_obstack, minipool_startobj);
17540 /* Routines to output assembly language. */
17542 /* Return string representation of passed in real value. */
17543 static const char *
17544 fp_const_from_val (REAL_VALUE_TYPE *r)
17546 if (!fp_consts_inited)
17547 init_fp_table ();
17549 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17550 return "0";
17553 /* OPERANDS[0] is the entire list of insns that constitute pop,
17554 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17555 is in the list, UPDATE is true iff the list contains explicit
17556 update of base register. */
17557 void
17558 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17559 bool update)
17561 int i;
17562 char pattern[100];
17563 int offset;
17564 const char *conditional;
17565 int num_saves = XVECLEN (operands[0], 0);
17566 unsigned int regno;
17567 unsigned int regno_base = REGNO (operands[1]);
17569 offset = 0;
17570 offset += update ? 1 : 0;
17571 offset += return_pc ? 1 : 0;
17573 /* Is the base register in the list? */
17574 for (i = offset; i < num_saves; i++)
17576 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17577 /* If SP is in the list, then the base register must be SP. */
17578 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17579 /* If base register is in the list, there must be no explicit update. */
17580 if (regno == regno_base)
17581 gcc_assert (!update);
17584 conditional = reverse ? "%?%D0" : "%?%d0";
17585 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17587 /* Output pop (not stmfd) because it has a shorter encoding. */
17588 gcc_assert (update);
17589 sprintf (pattern, "pop%s\t{", conditional);
17591 else
17593 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17594 It's just a convention, their semantics are identical. */
17595 if (regno_base == SP_REGNUM)
17596 sprintf (pattern, "ldm%sfd\t", conditional);
17597 else if (TARGET_UNIFIED_ASM)
17598 sprintf (pattern, "ldmia%s\t", conditional);
17599 else
17600 sprintf (pattern, "ldm%sia\t", conditional);
17602 strcat (pattern, reg_names[regno_base]);
17603 if (update)
17604 strcat (pattern, "!, {");
17605 else
17606 strcat (pattern, ", {");
17609 /* Output the first destination register. */
17610 strcat (pattern,
17611 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17613 /* Output the rest of the destination registers. */
17614 for (i = offset + 1; i < num_saves; i++)
17616 strcat (pattern, ", ");
17617 strcat (pattern,
17618 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17621 strcat (pattern, "}");
17623 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17624 strcat (pattern, "^");
17626 output_asm_insn (pattern, &cond);
17630 /* Output the assembly for a store multiple. */
17632 const char *
17633 vfp_output_vstmd (rtx * operands)
17635 char pattern[100];
17636 int p;
17637 int base;
17638 int i;
17639 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17640 ? XEXP (operands[0], 0)
17641 : XEXP (XEXP (operands[0], 0), 0);
17642 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17644 if (push_p)
17645 strcpy (pattern, "vpush%?.64\t{%P1");
17646 else
17647 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17649 p = strlen (pattern);
17651 gcc_assert (REG_P (operands[1]));
17653 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17654 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17656 p += sprintf (&pattern[p], ", d%d", base + i);
17658 strcpy (&pattern[p], "}");
17660 output_asm_insn (pattern, operands);
17661 return "";
17665 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17666 number of bytes pushed. */
17668 static int
17669 vfp_emit_fstmd (int base_reg, int count)
17671 rtx par;
17672 rtx dwarf;
17673 rtx tmp, reg;
17674 int i;
17676 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17677 register pairs are stored by a store multiple insn. We avoid this
17678 by pushing an extra pair. */
17679 if (count == 2 && !arm_arch6)
17681 if (base_reg == LAST_VFP_REGNUM - 3)
17682 base_reg -= 2;
17683 count++;
17686 /* FSTMD may not store more than 16 doubleword registers at once. Split
17687 larger stores into multiple parts (up to a maximum of two, in
17688 practice). */
17689 if (count > 16)
17691 int saved;
17692 /* NOTE: base_reg is an internal register number, so each D register
17693 counts as 2. */
17694 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17695 saved += vfp_emit_fstmd (base_reg, 16);
17696 return saved;
17699 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17700 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17702 reg = gen_rtx_REG (DFmode, base_reg);
17703 base_reg += 2;
17705 XVECEXP (par, 0, 0)
17706 = gen_rtx_SET (VOIDmode,
17707 gen_frame_mem
17708 (BLKmode,
17709 gen_rtx_PRE_MODIFY (Pmode,
17710 stack_pointer_rtx,
17711 plus_constant
17712 (Pmode, stack_pointer_rtx,
17713 - (count * 8)))
17715 gen_rtx_UNSPEC (BLKmode,
17716 gen_rtvec (1, reg),
17717 UNSPEC_PUSH_MULT));
17719 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17720 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17721 RTX_FRAME_RELATED_P (tmp) = 1;
17722 XVECEXP (dwarf, 0, 0) = tmp;
17724 tmp = gen_rtx_SET (VOIDmode,
17725 gen_frame_mem (DFmode, stack_pointer_rtx),
17726 reg);
17727 RTX_FRAME_RELATED_P (tmp) = 1;
17728 XVECEXP (dwarf, 0, 1) = tmp;
17730 for (i = 1; i < count; i++)
17732 reg = gen_rtx_REG (DFmode, base_reg);
17733 base_reg += 2;
17734 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17736 tmp = gen_rtx_SET (VOIDmode,
17737 gen_frame_mem (DFmode,
17738 plus_constant (Pmode,
17739 stack_pointer_rtx,
17740 i * 8)),
17741 reg);
17742 RTX_FRAME_RELATED_P (tmp) = 1;
17743 XVECEXP (dwarf, 0, i + 1) = tmp;
17746 par = emit_insn (par);
17747 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17748 RTX_FRAME_RELATED_P (par) = 1;
17750 return count * 8;
17753 /* Emit a call instruction with pattern PAT. ADDR is the address of
17754 the call target. */
17756 void
17757 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17759 rtx insn;
17761 insn = emit_call_insn (pat);
17763 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17764 If the call might use such an entry, add a use of the PIC register
17765 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17766 if (TARGET_VXWORKS_RTP
17767 && flag_pic
17768 && !sibcall
17769 && GET_CODE (addr) == SYMBOL_REF
17770 && (SYMBOL_REF_DECL (addr)
17771 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17772 : !SYMBOL_REF_LOCAL_P (addr)))
17774 require_pic_register ();
17775 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17778 if (TARGET_AAPCS_BASED)
17780 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17781 linker. We need to add an IP clobber to allow setting
17782 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17783 is not needed since it's a fixed register. */
17784 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17785 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17789 /* Output a 'call' insn. */
17790 const char *
17791 output_call (rtx *operands)
17793 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17795 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17796 if (REGNO (operands[0]) == LR_REGNUM)
17798 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17799 output_asm_insn ("mov%?\t%0, %|lr", operands);
17802 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17804 if (TARGET_INTERWORK || arm_arch4t)
17805 output_asm_insn ("bx%?\t%0", operands);
17806 else
17807 output_asm_insn ("mov%?\t%|pc, %0", operands);
17809 return "";
17812 /* Output a 'call' insn that is a reference in memory. This is
17813 disabled for ARMv5 and we prefer a blx instead because otherwise
17814 there's a significant performance overhead. */
17815 const char *
17816 output_call_mem (rtx *operands)
17818 gcc_assert (!arm_arch5);
17819 if (TARGET_INTERWORK)
17821 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17822 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17823 output_asm_insn ("bx%?\t%|ip", operands);
17825 else if (regno_use_in (LR_REGNUM, operands[0]))
17827 /* LR is used in the memory address. We load the address in the
17828 first instruction. It's safe to use IP as the target of the
17829 load since the call will kill it anyway. */
17830 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17831 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17832 if (arm_arch4t)
17833 output_asm_insn ("bx%?\t%|ip", operands);
17834 else
17835 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17837 else
17839 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17840 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17843 return "";
17847 /* Output a move from arm registers to arm registers of a long double
17848 OPERANDS[0] is the destination.
17849 OPERANDS[1] is the source. */
17850 const char *
17851 output_mov_long_double_arm_from_arm (rtx *operands)
17853 /* We have to be careful here because the two might overlap. */
17854 int dest_start = REGNO (operands[0]);
17855 int src_start = REGNO (operands[1]);
17856 rtx ops[2];
17857 int i;
17859 if (dest_start < src_start)
17861 for (i = 0; i < 3; i++)
17863 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17864 ops[1] = gen_rtx_REG (SImode, src_start + i);
17865 output_asm_insn ("mov%?\t%0, %1", ops);
17868 else
17870 for (i = 2; i >= 0; i--)
17872 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17873 ops[1] = gen_rtx_REG (SImode, src_start + i);
17874 output_asm_insn ("mov%?\t%0, %1", ops);
17878 return "";
17881 void
17882 arm_emit_movpair (rtx dest, rtx src)
17884 /* If the src is an immediate, simplify it. */
17885 if (CONST_INT_P (src))
17887 HOST_WIDE_INT val = INTVAL (src);
17888 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17889 if ((val >> 16) & 0x0000ffff)
17890 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17891 GEN_INT (16)),
17892 GEN_INT ((val >> 16) & 0x0000ffff));
17893 return;
17895 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17896 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17899 /* Output a move between double words. It must be REG<-MEM
17900 or MEM<-REG. */
17901 const char *
17902 output_move_double (rtx *operands, bool emit, int *count)
17904 enum rtx_code code0 = GET_CODE (operands[0]);
17905 enum rtx_code code1 = GET_CODE (operands[1]);
17906 rtx otherops[3];
17907 if (count)
17908 *count = 1;
17910 /* The only case when this might happen is when
17911 you are looking at the length of a DImode instruction
17912 that has an invalid constant in it. */
17913 if (code0 == REG && code1 != MEM)
17915 gcc_assert (!emit);
17916 *count = 2;
17917 return "";
17920 if (code0 == REG)
17922 unsigned int reg0 = REGNO (operands[0]);
17924 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17926 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17928 switch (GET_CODE (XEXP (operands[1], 0)))
17930 case REG:
17932 if (emit)
17934 if (TARGET_LDRD
17935 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17936 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17937 else
17938 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17940 break;
17942 case PRE_INC:
17943 gcc_assert (TARGET_LDRD);
17944 if (emit)
17945 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17946 break;
17948 case PRE_DEC:
17949 if (emit)
17951 if (TARGET_LDRD)
17952 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17953 else
17954 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17956 break;
17958 case POST_INC:
17959 if (emit)
17961 if (TARGET_LDRD)
17962 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17963 else
17964 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17966 break;
17968 case POST_DEC:
17969 gcc_assert (TARGET_LDRD);
17970 if (emit)
17971 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17972 break;
17974 case PRE_MODIFY:
17975 case POST_MODIFY:
17976 /* Autoicrement addressing modes should never have overlapping
17977 base and destination registers, and overlapping index registers
17978 are already prohibited, so this doesn't need to worry about
17979 fix_cm3_ldrd. */
17980 otherops[0] = operands[0];
17981 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17982 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17984 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17986 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17988 /* Registers overlap so split out the increment. */
17989 if (emit)
17991 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17992 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17994 if (count)
17995 *count = 2;
17997 else
17999 /* Use a single insn if we can.
18000 FIXME: IWMMXT allows offsets larger than ldrd can
18001 handle, fix these up with a pair of ldr. */
18002 if (TARGET_THUMB2
18003 || !CONST_INT_P (otherops[2])
18004 || (INTVAL (otherops[2]) > -256
18005 && INTVAL (otherops[2]) < 256))
18007 if (emit)
18008 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18010 else
18012 if (emit)
18014 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18015 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18017 if (count)
18018 *count = 2;
18023 else
18025 /* Use a single insn if we can.
18026 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18027 fix these up with a pair of ldr. */
18028 if (TARGET_THUMB2
18029 || !CONST_INT_P (otherops[2])
18030 || (INTVAL (otherops[2]) > -256
18031 && INTVAL (otherops[2]) < 256))
18033 if (emit)
18034 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18036 else
18038 if (emit)
18040 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18041 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18043 if (count)
18044 *count = 2;
18047 break;
18049 case LABEL_REF:
18050 case CONST:
18051 /* We might be able to use ldrd %0, %1 here. However the range is
18052 different to ldr/adr, and it is broken on some ARMv7-M
18053 implementations. */
18054 /* Use the second register of the pair to avoid problematic
18055 overlap. */
18056 otherops[1] = operands[1];
18057 if (emit)
18058 output_asm_insn ("adr%?\t%0, %1", otherops);
18059 operands[1] = otherops[0];
18060 if (emit)
18062 if (TARGET_LDRD)
18063 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18064 else
18065 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18068 if (count)
18069 *count = 2;
18070 break;
18072 /* ??? This needs checking for thumb2. */
18073 default:
18074 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18075 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18077 otherops[0] = operands[0];
18078 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18079 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18081 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18083 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18085 switch ((int) INTVAL (otherops[2]))
18087 case -8:
18088 if (emit)
18089 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18090 return "";
18091 case -4:
18092 if (TARGET_THUMB2)
18093 break;
18094 if (emit)
18095 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18096 return "";
18097 case 4:
18098 if (TARGET_THUMB2)
18099 break;
18100 if (emit)
18101 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18102 return "";
18105 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18106 operands[1] = otherops[0];
18107 if (TARGET_LDRD
18108 && (REG_P (otherops[2])
18109 || TARGET_THUMB2
18110 || (CONST_INT_P (otherops[2])
18111 && INTVAL (otherops[2]) > -256
18112 && INTVAL (otherops[2]) < 256)))
18114 if (reg_overlap_mentioned_p (operands[0],
18115 otherops[2]))
18117 rtx tmp;
18118 /* Swap base and index registers over to
18119 avoid a conflict. */
18120 tmp = otherops[1];
18121 otherops[1] = otherops[2];
18122 otherops[2] = tmp;
18124 /* If both registers conflict, it will usually
18125 have been fixed by a splitter. */
18126 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18127 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18129 if (emit)
18131 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18132 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18134 if (count)
18135 *count = 2;
18137 else
18139 otherops[0] = operands[0];
18140 if (emit)
18141 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18143 return "";
18146 if (CONST_INT_P (otherops[2]))
18148 if (emit)
18150 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18151 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18152 else
18153 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18156 else
18158 if (emit)
18159 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18162 else
18164 if (emit)
18165 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18168 if (count)
18169 *count = 2;
18171 if (TARGET_LDRD)
18172 return "ldr%(d%)\t%0, [%1]";
18174 return "ldm%(ia%)\t%1, %M0";
18176 else
18178 otherops[1] = adjust_address (operands[1], SImode, 4);
18179 /* Take care of overlapping base/data reg. */
18180 if (reg_mentioned_p (operands[0], operands[1]))
18182 if (emit)
18184 output_asm_insn ("ldr%?\t%0, %1", otherops);
18185 output_asm_insn ("ldr%?\t%0, %1", operands);
18187 if (count)
18188 *count = 2;
18191 else
18193 if (emit)
18195 output_asm_insn ("ldr%?\t%0, %1", operands);
18196 output_asm_insn ("ldr%?\t%0, %1", otherops);
18198 if (count)
18199 *count = 2;
18204 else
18206 /* Constraints should ensure this. */
18207 gcc_assert (code0 == MEM && code1 == REG);
18208 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18209 || (TARGET_ARM && TARGET_LDRD));
18211 switch (GET_CODE (XEXP (operands[0], 0)))
18213 case REG:
18214 if (emit)
18216 if (TARGET_LDRD)
18217 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18218 else
18219 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18221 break;
18223 case PRE_INC:
18224 gcc_assert (TARGET_LDRD);
18225 if (emit)
18226 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18227 break;
18229 case PRE_DEC:
18230 if (emit)
18232 if (TARGET_LDRD)
18233 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18234 else
18235 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18237 break;
18239 case POST_INC:
18240 if (emit)
18242 if (TARGET_LDRD)
18243 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18244 else
18245 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18247 break;
18249 case POST_DEC:
18250 gcc_assert (TARGET_LDRD);
18251 if (emit)
18252 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18253 break;
18255 case PRE_MODIFY:
18256 case POST_MODIFY:
18257 otherops[0] = operands[1];
18258 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18259 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18261 /* IWMMXT allows offsets larger than ldrd can handle,
18262 fix these up with a pair of ldr. */
18263 if (!TARGET_THUMB2
18264 && CONST_INT_P (otherops[2])
18265 && (INTVAL(otherops[2]) <= -256
18266 || INTVAL(otherops[2]) >= 256))
18268 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18270 if (emit)
18272 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18273 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18275 if (count)
18276 *count = 2;
18278 else
18280 if (emit)
18282 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18283 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18285 if (count)
18286 *count = 2;
18289 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18291 if (emit)
18292 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18294 else
18296 if (emit)
18297 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18299 break;
18301 case PLUS:
18302 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18303 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18305 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18307 case -8:
18308 if (emit)
18309 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18310 return "";
18312 case -4:
18313 if (TARGET_THUMB2)
18314 break;
18315 if (emit)
18316 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18317 return "";
18319 case 4:
18320 if (TARGET_THUMB2)
18321 break;
18322 if (emit)
18323 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18324 return "";
18327 if (TARGET_LDRD
18328 && (REG_P (otherops[2])
18329 || TARGET_THUMB2
18330 || (CONST_INT_P (otherops[2])
18331 && INTVAL (otherops[2]) > -256
18332 && INTVAL (otherops[2]) < 256)))
18334 otherops[0] = operands[1];
18335 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18336 if (emit)
18337 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18338 return "";
18340 /* Fall through */
18342 default:
18343 otherops[0] = adjust_address (operands[0], SImode, 4);
18344 otherops[1] = operands[1];
18345 if (emit)
18347 output_asm_insn ("str%?\t%1, %0", operands);
18348 output_asm_insn ("str%?\t%H1, %0", otherops);
18350 if (count)
18351 *count = 2;
18355 return "";
18358 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18359 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18361 const char *
18362 output_move_quad (rtx *operands)
18364 if (REG_P (operands[0]))
18366 /* Load, or reg->reg move. */
18368 if (MEM_P (operands[1]))
18370 switch (GET_CODE (XEXP (operands[1], 0)))
18372 case REG:
18373 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18374 break;
18376 case LABEL_REF:
18377 case CONST:
18378 output_asm_insn ("adr%?\t%0, %1", operands);
18379 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18380 break;
18382 default:
18383 gcc_unreachable ();
18386 else
18388 rtx ops[2];
18389 int dest, src, i;
18391 gcc_assert (REG_P (operands[1]));
18393 dest = REGNO (operands[0]);
18394 src = REGNO (operands[1]);
18396 /* This seems pretty dumb, but hopefully GCC won't try to do it
18397 very often. */
18398 if (dest < src)
18399 for (i = 0; i < 4; i++)
18401 ops[0] = gen_rtx_REG (SImode, dest + i);
18402 ops[1] = gen_rtx_REG (SImode, src + i);
18403 output_asm_insn ("mov%?\t%0, %1", ops);
18405 else
18406 for (i = 3; i >= 0; i--)
18408 ops[0] = gen_rtx_REG (SImode, dest + i);
18409 ops[1] = gen_rtx_REG (SImode, src + i);
18410 output_asm_insn ("mov%?\t%0, %1", ops);
18414 else
18416 gcc_assert (MEM_P (operands[0]));
18417 gcc_assert (REG_P (operands[1]));
18418 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18420 switch (GET_CODE (XEXP (operands[0], 0)))
18422 case REG:
18423 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18424 break;
18426 default:
18427 gcc_unreachable ();
18431 return "";
18434 /* Output a VFP load or store instruction. */
18436 const char *
18437 output_move_vfp (rtx *operands)
18439 rtx reg, mem, addr, ops[2];
18440 int load = REG_P (operands[0]);
18441 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18442 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18443 const char *templ;
18444 char buff[50];
18445 enum machine_mode mode;
18447 reg = operands[!load];
18448 mem = operands[load];
18450 mode = GET_MODE (reg);
18452 gcc_assert (REG_P (reg));
18453 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18454 gcc_assert (mode == SFmode
18455 || mode == DFmode
18456 || mode == SImode
18457 || mode == DImode
18458 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18459 gcc_assert (MEM_P (mem));
18461 addr = XEXP (mem, 0);
18463 switch (GET_CODE (addr))
18465 case PRE_DEC:
18466 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18467 ops[0] = XEXP (addr, 0);
18468 ops[1] = reg;
18469 break;
18471 case POST_INC:
18472 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18473 ops[0] = XEXP (addr, 0);
18474 ops[1] = reg;
18475 break;
18477 default:
18478 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18479 ops[0] = reg;
18480 ops[1] = mem;
18481 break;
18484 sprintf (buff, templ,
18485 load ? "ld" : "st",
18486 dp ? "64" : "32",
18487 dp ? "P" : "",
18488 integer_p ? "\t%@ int" : "");
18489 output_asm_insn (buff, ops);
18491 return "";
18494 /* Output a Neon double-word or quad-word load or store, or a load
18495 or store for larger structure modes.
18497 WARNING: The ordering of elements is weird in big-endian mode,
18498 because the EABI requires that vectors stored in memory appear
18499 as though they were stored by a VSTM, as required by the EABI.
18500 GCC RTL defines element ordering based on in-memory order.
18501 This can be different from the architectural ordering of elements
18502 within a NEON register. The intrinsics defined in arm_neon.h use the
18503 NEON register element ordering, not the GCC RTL element ordering.
18505 For example, the in-memory ordering of a big-endian a quadword
18506 vector with 16-bit elements when stored from register pair {d0,d1}
18507 will be (lowest address first, d0[N] is NEON register element N):
18509 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18511 When necessary, quadword registers (dN, dN+1) are moved to ARM
18512 registers from rN in the order:
18514 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18516 So that STM/LDM can be used on vectors in ARM registers, and the
18517 same memory layout will result as if VSTM/VLDM were used.
18519 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18520 possible, which allows use of appropriate alignment tags.
18521 Note that the choice of "64" is independent of the actual vector
18522 element size; this size simply ensures that the behavior is
18523 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18525 Due to limitations of those instructions, use of VST1.64/VLD1.64
18526 is not possible if:
18527 - the address contains PRE_DEC, or
18528 - the mode refers to more than 4 double-word registers
18530 In those cases, it would be possible to replace VSTM/VLDM by a
18531 sequence of instructions; this is not currently implemented since
18532 this is not certain to actually improve performance. */
18534 const char *
18535 output_move_neon (rtx *operands)
18537 rtx reg, mem, addr, ops[2];
18538 int regno, nregs, load = REG_P (operands[0]);
18539 const char *templ;
18540 char buff[50];
18541 enum machine_mode mode;
18543 reg = operands[!load];
18544 mem = operands[load];
18546 mode = GET_MODE (reg);
18548 gcc_assert (REG_P (reg));
18549 regno = REGNO (reg);
18550 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18551 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18552 || NEON_REGNO_OK_FOR_QUAD (regno));
18553 gcc_assert (VALID_NEON_DREG_MODE (mode)
18554 || VALID_NEON_QREG_MODE (mode)
18555 || VALID_NEON_STRUCT_MODE (mode));
18556 gcc_assert (MEM_P (mem));
18558 addr = XEXP (mem, 0);
18560 /* Strip off const from addresses like (const (plus (...))). */
18561 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18562 addr = XEXP (addr, 0);
18564 switch (GET_CODE (addr))
18566 case POST_INC:
18567 /* We have to use vldm / vstm for too-large modes. */
18568 if (nregs > 4)
18570 templ = "v%smia%%?\t%%0!, %%h1";
18571 ops[0] = XEXP (addr, 0);
18573 else
18575 templ = "v%s1.64\t%%h1, %%A0";
18576 ops[0] = mem;
18578 ops[1] = reg;
18579 break;
18581 case PRE_DEC:
18582 /* We have to use vldm / vstm in this case, since there is no
18583 pre-decrement form of the vld1 / vst1 instructions. */
18584 templ = "v%smdb%%?\t%%0!, %%h1";
18585 ops[0] = XEXP (addr, 0);
18586 ops[1] = reg;
18587 break;
18589 case POST_MODIFY:
18590 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18591 gcc_unreachable ();
18593 case REG:
18594 /* We have to use vldm / vstm for too-large modes. */
18595 if (nregs > 1)
18597 if (nregs > 4)
18598 templ = "v%smia%%?\t%%m0, %%h1";
18599 else
18600 templ = "v%s1.64\t%%h1, %%A0";
18602 ops[0] = mem;
18603 ops[1] = reg;
18604 break;
18606 /* Fall through. */
18607 case LABEL_REF:
18608 case PLUS:
18610 int i;
18611 int overlap = -1;
18612 for (i = 0; i < nregs; i++)
18614 /* We're only using DImode here because it's a convenient size. */
18615 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18616 ops[1] = adjust_address (mem, DImode, 8 * i);
18617 if (reg_overlap_mentioned_p (ops[0], mem))
18619 gcc_assert (overlap == -1);
18620 overlap = i;
18622 else
18624 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18625 output_asm_insn (buff, ops);
18628 if (overlap != -1)
18630 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18631 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18632 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18633 output_asm_insn (buff, ops);
18636 return "";
18639 default:
18640 gcc_unreachable ();
18643 sprintf (buff, templ, load ? "ld" : "st");
18644 output_asm_insn (buff, ops);
18646 return "";
18649 /* Compute and return the length of neon_mov<mode>, where <mode> is
18650 one of VSTRUCT modes: EI, OI, CI or XI. */
18652 arm_attr_length_move_neon (rtx_insn *insn)
18654 rtx reg, mem, addr;
18655 int load;
18656 enum machine_mode mode;
18658 extract_insn_cached (insn);
18660 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18662 mode = GET_MODE (recog_data.operand[0]);
18663 switch (mode)
18665 case EImode:
18666 case OImode:
18667 return 8;
18668 case CImode:
18669 return 12;
18670 case XImode:
18671 return 16;
18672 default:
18673 gcc_unreachable ();
18677 load = REG_P (recog_data.operand[0]);
18678 reg = recog_data.operand[!load];
18679 mem = recog_data.operand[load];
18681 gcc_assert (MEM_P (mem));
18683 mode = GET_MODE (reg);
18684 addr = XEXP (mem, 0);
18686 /* Strip off const from addresses like (const (plus (...))). */
18687 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18688 addr = XEXP (addr, 0);
18690 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18692 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18693 return insns * 4;
18695 else
18696 return 4;
18699 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18700 return zero. */
18703 arm_address_offset_is_imm (rtx_insn *insn)
18705 rtx mem, addr;
18707 extract_insn_cached (insn);
18709 if (REG_P (recog_data.operand[0]))
18710 return 0;
18712 mem = recog_data.operand[0];
18714 gcc_assert (MEM_P (mem));
18716 addr = XEXP (mem, 0);
18718 if (REG_P (addr)
18719 || (GET_CODE (addr) == PLUS
18720 && REG_P (XEXP (addr, 0))
18721 && CONST_INT_P (XEXP (addr, 1))))
18722 return 1;
18723 else
18724 return 0;
18727 /* Output an ADD r, s, #n where n may be too big for one instruction.
18728 If adding zero to one register, output nothing. */
18729 const char *
18730 output_add_immediate (rtx *operands)
18732 HOST_WIDE_INT n = INTVAL (operands[2]);
18734 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18736 if (n < 0)
18737 output_multi_immediate (operands,
18738 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18739 -n);
18740 else
18741 output_multi_immediate (operands,
18742 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18746 return "";
18749 /* Output a multiple immediate operation.
18750 OPERANDS is the vector of operands referred to in the output patterns.
18751 INSTR1 is the output pattern to use for the first constant.
18752 INSTR2 is the output pattern to use for subsequent constants.
18753 IMMED_OP is the index of the constant slot in OPERANDS.
18754 N is the constant value. */
18755 static const char *
18756 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18757 int immed_op, HOST_WIDE_INT n)
18759 #if HOST_BITS_PER_WIDE_INT > 32
18760 n &= 0xffffffff;
18761 #endif
18763 if (n == 0)
18765 /* Quick and easy output. */
18766 operands[immed_op] = const0_rtx;
18767 output_asm_insn (instr1, operands);
18769 else
18771 int i;
18772 const char * instr = instr1;
18774 /* Note that n is never zero here (which would give no output). */
18775 for (i = 0; i < 32; i += 2)
18777 if (n & (3 << i))
18779 operands[immed_op] = GEN_INT (n & (255 << i));
18780 output_asm_insn (instr, operands);
18781 instr = instr2;
18782 i += 6;
18787 return "";
18790 /* Return the name of a shifter operation. */
18791 static const char *
18792 arm_shift_nmem(enum rtx_code code)
18794 switch (code)
18796 case ASHIFT:
18797 return ARM_LSL_NAME;
18799 case ASHIFTRT:
18800 return "asr";
18802 case LSHIFTRT:
18803 return "lsr";
18805 case ROTATERT:
18806 return "ror";
18808 default:
18809 abort();
18813 /* Return the appropriate ARM instruction for the operation code.
18814 The returned result should not be overwritten. OP is the rtx of the
18815 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18816 was shifted. */
18817 const char *
18818 arithmetic_instr (rtx op, int shift_first_arg)
18820 switch (GET_CODE (op))
18822 case PLUS:
18823 return "add";
18825 case MINUS:
18826 return shift_first_arg ? "rsb" : "sub";
18828 case IOR:
18829 return "orr";
18831 case XOR:
18832 return "eor";
18834 case AND:
18835 return "and";
18837 case ASHIFT:
18838 case ASHIFTRT:
18839 case LSHIFTRT:
18840 case ROTATERT:
18841 return arm_shift_nmem(GET_CODE(op));
18843 default:
18844 gcc_unreachable ();
18848 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18849 for the operation code. The returned result should not be overwritten.
18850 OP is the rtx code of the shift.
18851 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18852 shift. */
18853 static const char *
18854 shift_op (rtx op, HOST_WIDE_INT *amountp)
18856 const char * mnem;
18857 enum rtx_code code = GET_CODE (op);
18859 switch (code)
18861 case ROTATE:
18862 if (!CONST_INT_P (XEXP (op, 1)))
18864 output_operand_lossage ("invalid shift operand");
18865 return NULL;
18868 code = ROTATERT;
18869 *amountp = 32 - INTVAL (XEXP (op, 1));
18870 mnem = "ror";
18871 break;
18873 case ASHIFT:
18874 case ASHIFTRT:
18875 case LSHIFTRT:
18876 case ROTATERT:
18877 mnem = arm_shift_nmem(code);
18878 if (CONST_INT_P (XEXP (op, 1)))
18880 *amountp = INTVAL (XEXP (op, 1));
18882 else if (REG_P (XEXP (op, 1)))
18884 *amountp = -1;
18885 return mnem;
18887 else
18889 output_operand_lossage ("invalid shift operand");
18890 return NULL;
18892 break;
18894 case MULT:
18895 /* We never have to worry about the amount being other than a
18896 power of 2, since this case can never be reloaded from a reg. */
18897 if (!CONST_INT_P (XEXP (op, 1)))
18899 output_operand_lossage ("invalid shift operand");
18900 return NULL;
18903 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18905 /* Amount must be a power of two. */
18906 if (*amountp & (*amountp - 1))
18908 output_operand_lossage ("invalid shift operand");
18909 return NULL;
18912 *amountp = int_log2 (*amountp);
18913 return ARM_LSL_NAME;
18915 default:
18916 output_operand_lossage ("invalid shift operand");
18917 return NULL;
18920 /* This is not 100% correct, but follows from the desire to merge
18921 multiplication by a power of 2 with the recognizer for a
18922 shift. >=32 is not a valid shift for "lsl", so we must try and
18923 output a shift that produces the correct arithmetical result.
18924 Using lsr #32 is identical except for the fact that the carry bit
18925 is not set correctly if we set the flags; but we never use the
18926 carry bit from such an operation, so we can ignore that. */
18927 if (code == ROTATERT)
18928 /* Rotate is just modulo 32. */
18929 *amountp &= 31;
18930 else if (*amountp != (*amountp & 31))
18932 if (code == ASHIFT)
18933 mnem = "lsr";
18934 *amountp = 32;
18937 /* Shifts of 0 are no-ops. */
18938 if (*amountp == 0)
18939 return NULL;
18941 return mnem;
18944 /* Obtain the shift from the POWER of two. */
18946 static HOST_WIDE_INT
18947 int_log2 (HOST_WIDE_INT power)
18949 HOST_WIDE_INT shift = 0;
18951 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18953 gcc_assert (shift <= 31);
18954 shift++;
18957 return shift;
18960 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18961 because /bin/as is horribly restrictive. The judgement about
18962 whether or not each character is 'printable' (and can be output as
18963 is) or not (and must be printed with an octal escape) must be made
18964 with reference to the *host* character set -- the situation is
18965 similar to that discussed in the comments above pp_c_char in
18966 c-pretty-print.c. */
18968 #define MAX_ASCII_LEN 51
18970 void
18971 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18973 int i;
18974 int len_so_far = 0;
18976 fputs ("\t.ascii\t\"", stream);
18978 for (i = 0; i < len; i++)
18980 int c = p[i];
18982 if (len_so_far >= MAX_ASCII_LEN)
18984 fputs ("\"\n\t.ascii\t\"", stream);
18985 len_so_far = 0;
18988 if (ISPRINT (c))
18990 if (c == '\\' || c == '\"')
18992 putc ('\\', stream);
18993 len_so_far++;
18995 putc (c, stream);
18996 len_so_far++;
18998 else
19000 fprintf (stream, "\\%03o", c);
19001 len_so_far += 4;
19005 fputs ("\"\n", stream);
19008 /* Compute the register save mask for registers 0 through 12
19009 inclusive. This code is used by arm_compute_save_reg_mask. */
19011 static unsigned long
19012 arm_compute_save_reg0_reg12_mask (void)
19014 unsigned long func_type = arm_current_func_type ();
19015 unsigned long save_reg_mask = 0;
19016 unsigned int reg;
19018 if (IS_INTERRUPT (func_type))
19020 unsigned int max_reg;
19021 /* Interrupt functions must not corrupt any registers,
19022 even call clobbered ones. If this is a leaf function
19023 we can just examine the registers used by the RTL, but
19024 otherwise we have to assume that whatever function is
19025 called might clobber anything, and so we have to save
19026 all the call-clobbered registers as well. */
19027 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19028 /* FIQ handlers have registers r8 - r12 banked, so
19029 we only need to check r0 - r7, Normal ISRs only
19030 bank r14 and r15, so we must check up to r12.
19031 r13 is the stack pointer which is always preserved,
19032 so we do not need to consider it here. */
19033 max_reg = 7;
19034 else
19035 max_reg = 12;
19037 for (reg = 0; reg <= max_reg; reg++)
19038 if (df_regs_ever_live_p (reg)
19039 || (! crtl->is_leaf && call_used_regs[reg]))
19040 save_reg_mask |= (1 << reg);
19042 /* Also save the pic base register if necessary. */
19043 if (flag_pic
19044 && !TARGET_SINGLE_PIC_BASE
19045 && arm_pic_register != INVALID_REGNUM
19046 && crtl->uses_pic_offset_table)
19047 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19049 else if (IS_VOLATILE(func_type))
19051 /* For noreturn functions we historically omitted register saves
19052 altogether. However this really messes up debugging. As a
19053 compromise save just the frame pointers. Combined with the link
19054 register saved elsewhere this should be sufficient to get
19055 a backtrace. */
19056 if (frame_pointer_needed)
19057 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19058 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19059 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19060 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19061 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19063 else
19065 /* In the normal case we only need to save those registers
19066 which are call saved and which are used by this function. */
19067 for (reg = 0; reg <= 11; reg++)
19068 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
19069 save_reg_mask |= (1 << reg);
19071 /* Handle the frame pointer as a special case. */
19072 if (frame_pointer_needed)
19073 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19075 /* If we aren't loading the PIC register,
19076 don't stack it even though it may be live. */
19077 if (flag_pic
19078 && !TARGET_SINGLE_PIC_BASE
19079 && arm_pic_register != INVALID_REGNUM
19080 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19081 || crtl->uses_pic_offset_table))
19082 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19084 /* The prologue will copy SP into R0, so save it. */
19085 if (IS_STACKALIGN (func_type))
19086 save_reg_mask |= 1;
19089 /* Save registers so the exception handler can modify them. */
19090 if (crtl->calls_eh_return)
19092 unsigned int i;
19094 for (i = 0; ; i++)
19096 reg = EH_RETURN_DATA_REGNO (i);
19097 if (reg == INVALID_REGNUM)
19098 break;
19099 save_reg_mask |= 1 << reg;
19103 return save_reg_mask;
19106 /* Return true if r3 is live at the start of the function. */
19108 static bool
19109 arm_r3_live_at_start_p (void)
19111 /* Just look at cfg info, which is still close enough to correct at this
19112 point. This gives false positives for broken functions that might use
19113 uninitialized data that happens to be allocated in r3, but who cares? */
19114 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19117 /* Compute the number of bytes used to store the static chain register on the
19118 stack, above the stack frame. We need to know this accurately to get the
19119 alignment of the rest of the stack frame correct. */
19121 static int
19122 arm_compute_static_chain_stack_bytes (void)
19124 /* See the defining assertion in arm_expand_prologue. */
19125 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19126 && IS_NESTED (arm_current_func_type ())
19127 && arm_r3_live_at_start_p ()
19128 && crtl->args.pretend_args_size == 0)
19129 return 4;
19131 return 0;
19134 /* Compute a bit mask of which registers need to be
19135 saved on the stack for the current function.
19136 This is used by arm_get_frame_offsets, which may add extra registers. */
19138 static unsigned long
19139 arm_compute_save_reg_mask (void)
19141 unsigned int save_reg_mask = 0;
19142 unsigned long func_type = arm_current_func_type ();
19143 unsigned int reg;
19145 if (IS_NAKED (func_type))
19146 /* This should never really happen. */
19147 return 0;
19149 /* If we are creating a stack frame, then we must save the frame pointer,
19150 IP (which will hold the old stack pointer), LR and the PC. */
19151 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19152 save_reg_mask |=
19153 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19154 | (1 << IP_REGNUM)
19155 | (1 << LR_REGNUM)
19156 | (1 << PC_REGNUM);
19158 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19160 /* Decide if we need to save the link register.
19161 Interrupt routines have their own banked link register,
19162 so they never need to save it.
19163 Otherwise if we do not use the link register we do not need to save
19164 it. If we are pushing other registers onto the stack however, we
19165 can save an instruction in the epilogue by pushing the link register
19166 now and then popping it back into the PC. This incurs extra memory
19167 accesses though, so we only do it when optimizing for size, and only
19168 if we know that we will not need a fancy return sequence. */
19169 if (df_regs_ever_live_p (LR_REGNUM)
19170 || (save_reg_mask
19171 && optimize_size
19172 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19173 && !crtl->calls_eh_return))
19174 save_reg_mask |= 1 << LR_REGNUM;
19176 if (cfun->machine->lr_save_eliminated)
19177 save_reg_mask &= ~ (1 << LR_REGNUM);
19179 if (TARGET_REALLY_IWMMXT
19180 && ((bit_count (save_reg_mask)
19181 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19182 arm_compute_static_chain_stack_bytes())
19183 ) % 2) != 0)
19185 /* The total number of registers that are going to be pushed
19186 onto the stack is odd. We need to ensure that the stack
19187 is 64-bit aligned before we start to save iWMMXt registers,
19188 and also before we start to create locals. (A local variable
19189 might be a double or long long which we will load/store using
19190 an iWMMXt instruction). Therefore we need to push another
19191 ARM register, so that the stack will be 64-bit aligned. We
19192 try to avoid using the arg registers (r0 -r3) as they might be
19193 used to pass values in a tail call. */
19194 for (reg = 4; reg <= 12; reg++)
19195 if ((save_reg_mask & (1 << reg)) == 0)
19196 break;
19198 if (reg <= 12)
19199 save_reg_mask |= (1 << reg);
19200 else
19202 cfun->machine->sibcall_blocked = 1;
19203 save_reg_mask |= (1 << 3);
19207 /* We may need to push an additional register for use initializing the
19208 PIC base register. */
19209 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19210 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19212 reg = thumb_find_work_register (1 << 4);
19213 if (!call_used_regs[reg])
19214 save_reg_mask |= (1 << reg);
19217 return save_reg_mask;
19221 /* Compute a bit mask of which registers need to be
19222 saved on the stack for the current function. */
19223 static unsigned long
19224 thumb1_compute_save_reg_mask (void)
19226 unsigned long mask;
19227 unsigned reg;
19229 mask = 0;
19230 for (reg = 0; reg < 12; reg ++)
19231 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19232 mask |= 1 << reg;
19234 if (flag_pic
19235 && !TARGET_SINGLE_PIC_BASE
19236 && arm_pic_register != INVALID_REGNUM
19237 && crtl->uses_pic_offset_table)
19238 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19240 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19241 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19242 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19244 /* LR will also be pushed if any lo regs are pushed. */
19245 if (mask & 0xff || thumb_force_lr_save ())
19246 mask |= (1 << LR_REGNUM);
19248 /* Make sure we have a low work register if we need one.
19249 We will need one if we are going to push a high register,
19250 but we are not currently intending to push a low register. */
19251 if ((mask & 0xff) == 0
19252 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19254 /* Use thumb_find_work_register to choose which register
19255 we will use. If the register is live then we will
19256 have to push it. Use LAST_LO_REGNUM as our fallback
19257 choice for the register to select. */
19258 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19259 /* Make sure the register returned by thumb_find_work_register is
19260 not part of the return value. */
19261 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19262 reg = LAST_LO_REGNUM;
19264 if (! call_used_regs[reg])
19265 mask |= 1 << reg;
19268 /* The 504 below is 8 bytes less than 512 because there are two possible
19269 alignment words. We can't tell here if they will be present or not so we
19270 have to play it safe and assume that they are. */
19271 if ((CALLER_INTERWORKING_SLOT_SIZE +
19272 ROUND_UP_WORD (get_frame_size ()) +
19273 crtl->outgoing_args_size) >= 504)
19275 /* This is the same as the code in thumb1_expand_prologue() which
19276 determines which register to use for stack decrement. */
19277 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19278 if (mask & (1 << reg))
19279 break;
19281 if (reg > LAST_LO_REGNUM)
19283 /* Make sure we have a register available for stack decrement. */
19284 mask |= 1 << LAST_LO_REGNUM;
19288 return mask;
19292 /* Return the number of bytes required to save VFP registers. */
19293 static int
19294 arm_get_vfp_saved_size (void)
19296 unsigned int regno;
19297 int count;
19298 int saved;
19300 saved = 0;
19301 /* Space for saved VFP registers. */
19302 if (TARGET_HARD_FLOAT && TARGET_VFP)
19304 count = 0;
19305 for (regno = FIRST_VFP_REGNUM;
19306 regno < LAST_VFP_REGNUM;
19307 regno += 2)
19309 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19310 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19312 if (count > 0)
19314 /* Workaround ARM10 VFPr1 bug. */
19315 if (count == 2 && !arm_arch6)
19316 count++;
19317 saved += count * 8;
19319 count = 0;
19321 else
19322 count++;
19324 if (count > 0)
19326 if (count == 2 && !arm_arch6)
19327 count++;
19328 saved += count * 8;
19331 return saved;
19335 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19336 everything bar the final return instruction. If simple_return is true,
19337 then do not output epilogue, because it has already been emitted in RTL. */
19338 const char *
19339 output_return_instruction (rtx operand, bool really_return, bool reverse,
19340 bool simple_return)
19342 char conditional[10];
19343 char instr[100];
19344 unsigned reg;
19345 unsigned long live_regs_mask;
19346 unsigned long func_type;
19347 arm_stack_offsets *offsets;
19349 func_type = arm_current_func_type ();
19351 if (IS_NAKED (func_type))
19352 return "";
19354 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19356 /* If this function was declared non-returning, and we have
19357 found a tail call, then we have to trust that the called
19358 function won't return. */
19359 if (really_return)
19361 rtx ops[2];
19363 /* Otherwise, trap an attempted return by aborting. */
19364 ops[0] = operand;
19365 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19366 : "abort");
19367 assemble_external_libcall (ops[1]);
19368 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19371 return "";
19374 gcc_assert (!cfun->calls_alloca || really_return);
19376 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19378 cfun->machine->return_used_this_function = 1;
19380 offsets = arm_get_frame_offsets ();
19381 live_regs_mask = offsets->saved_regs_mask;
19383 if (!simple_return && live_regs_mask)
19385 const char * return_reg;
19387 /* If we do not have any special requirements for function exit
19388 (e.g. interworking) then we can load the return address
19389 directly into the PC. Otherwise we must load it into LR. */
19390 if (really_return
19391 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19392 return_reg = reg_names[PC_REGNUM];
19393 else
19394 return_reg = reg_names[LR_REGNUM];
19396 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19398 /* There are three possible reasons for the IP register
19399 being saved. 1) a stack frame was created, in which case
19400 IP contains the old stack pointer, or 2) an ISR routine
19401 corrupted it, or 3) it was saved to align the stack on
19402 iWMMXt. In case 1, restore IP into SP, otherwise just
19403 restore IP. */
19404 if (frame_pointer_needed)
19406 live_regs_mask &= ~ (1 << IP_REGNUM);
19407 live_regs_mask |= (1 << SP_REGNUM);
19409 else
19410 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19413 /* On some ARM architectures it is faster to use LDR rather than
19414 LDM to load a single register. On other architectures, the
19415 cost is the same. In 26 bit mode, or for exception handlers,
19416 we have to use LDM to load the PC so that the CPSR is also
19417 restored. */
19418 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19419 if (live_regs_mask == (1U << reg))
19420 break;
19422 if (reg <= LAST_ARM_REGNUM
19423 && (reg != LR_REGNUM
19424 || ! really_return
19425 || ! IS_INTERRUPT (func_type)))
19427 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19428 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19430 else
19432 char *p;
19433 int first = 1;
19435 /* Generate the load multiple instruction to restore the
19436 registers. Note we can get here, even if
19437 frame_pointer_needed is true, but only if sp already
19438 points to the base of the saved core registers. */
19439 if (live_regs_mask & (1 << SP_REGNUM))
19441 unsigned HOST_WIDE_INT stack_adjust;
19443 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19444 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19446 if (stack_adjust && arm_arch5 && TARGET_ARM)
19447 if (TARGET_UNIFIED_ASM)
19448 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19449 else
19450 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19451 else
19453 /* If we can't use ldmib (SA110 bug),
19454 then try to pop r3 instead. */
19455 if (stack_adjust)
19456 live_regs_mask |= 1 << 3;
19458 if (TARGET_UNIFIED_ASM)
19459 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19460 else
19461 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19464 else
19465 if (TARGET_UNIFIED_ASM)
19466 sprintf (instr, "pop%s\t{", conditional);
19467 else
19468 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19470 p = instr + strlen (instr);
19472 for (reg = 0; reg <= SP_REGNUM; reg++)
19473 if (live_regs_mask & (1 << reg))
19475 int l = strlen (reg_names[reg]);
19477 if (first)
19478 first = 0;
19479 else
19481 memcpy (p, ", ", 2);
19482 p += 2;
19485 memcpy (p, "%|", 2);
19486 memcpy (p + 2, reg_names[reg], l);
19487 p += l + 2;
19490 if (live_regs_mask & (1 << LR_REGNUM))
19492 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19493 /* If returning from an interrupt, restore the CPSR. */
19494 if (IS_INTERRUPT (func_type))
19495 strcat (p, "^");
19497 else
19498 strcpy (p, "}");
19501 output_asm_insn (instr, & operand);
19503 /* See if we need to generate an extra instruction to
19504 perform the actual function return. */
19505 if (really_return
19506 && func_type != ARM_FT_INTERWORKED
19507 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19509 /* The return has already been handled
19510 by loading the LR into the PC. */
19511 return "";
19515 if (really_return)
19517 switch ((int) ARM_FUNC_TYPE (func_type))
19519 case ARM_FT_ISR:
19520 case ARM_FT_FIQ:
19521 /* ??? This is wrong for unified assembly syntax. */
19522 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19523 break;
19525 case ARM_FT_INTERWORKED:
19526 sprintf (instr, "bx%s\t%%|lr", conditional);
19527 break;
19529 case ARM_FT_EXCEPTION:
19530 /* ??? This is wrong for unified assembly syntax. */
19531 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19532 break;
19534 default:
19535 /* Use bx if it's available. */
19536 if (arm_arch5 || arm_arch4t)
19537 sprintf (instr, "bx%s\t%%|lr", conditional);
19538 else
19539 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19540 break;
19543 output_asm_insn (instr, & operand);
19546 return "";
19549 /* Write the function name into the code section, directly preceding
19550 the function prologue.
19552 Code will be output similar to this:
19554 .ascii "arm_poke_function_name", 0
19555 .align
19557 .word 0xff000000 + (t1 - t0)
19558 arm_poke_function_name
19559 mov ip, sp
19560 stmfd sp!, {fp, ip, lr, pc}
19561 sub fp, ip, #4
19563 When performing a stack backtrace, code can inspect the value
19564 of 'pc' stored at 'fp' + 0. If the trace function then looks
19565 at location pc - 12 and the top 8 bits are set, then we know
19566 that there is a function name embedded immediately preceding this
19567 location and has length ((pc[-3]) & 0xff000000).
19569 We assume that pc is declared as a pointer to an unsigned long.
19571 It is of no benefit to output the function name if we are assembling
19572 a leaf function. These function types will not contain a stack
19573 backtrace structure, therefore it is not possible to determine the
19574 function name. */
19575 void
19576 arm_poke_function_name (FILE *stream, const char *name)
19578 unsigned long alignlength;
19579 unsigned long length;
19580 rtx x;
19582 length = strlen (name) + 1;
19583 alignlength = ROUND_UP_WORD (length);
19585 ASM_OUTPUT_ASCII (stream, name, length);
19586 ASM_OUTPUT_ALIGN (stream, 2);
19587 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19588 assemble_aligned_integer (UNITS_PER_WORD, x);
19591 /* Place some comments into the assembler stream
19592 describing the current function. */
19593 static void
19594 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19596 unsigned long func_type;
19598 /* ??? Do we want to print some of the below anyway? */
19599 if (TARGET_THUMB1)
19600 return;
19602 /* Sanity check. */
19603 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19605 func_type = arm_current_func_type ();
19607 switch ((int) ARM_FUNC_TYPE (func_type))
19609 default:
19610 case ARM_FT_NORMAL:
19611 break;
19612 case ARM_FT_INTERWORKED:
19613 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19614 break;
19615 case ARM_FT_ISR:
19616 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19617 break;
19618 case ARM_FT_FIQ:
19619 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19620 break;
19621 case ARM_FT_EXCEPTION:
19622 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19623 break;
19626 if (IS_NAKED (func_type))
19627 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19629 if (IS_VOLATILE (func_type))
19630 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19632 if (IS_NESTED (func_type))
19633 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19634 if (IS_STACKALIGN (func_type))
19635 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19637 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19638 crtl->args.size,
19639 crtl->args.pretend_args_size, frame_size);
19641 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19642 frame_pointer_needed,
19643 cfun->machine->uses_anonymous_args);
19645 if (cfun->machine->lr_save_eliminated)
19646 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19648 if (crtl->calls_eh_return)
19649 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19653 static void
19654 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19655 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19657 arm_stack_offsets *offsets;
19659 if (TARGET_THUMB1)
19661 int regno;
19663 /* Emit any call-via-reg trampolines that are needed for v4t support
19664 of call_reg and call_value_reg type insns. */
19665 for (regno = 0; regno < LR_REGNUM; regno++)
19667 rtx label = cfun->machine->call_via[regno];
19669 if (label != NULL)
19671 switch_to_section (function_section (current_function_decl));
19672 targetm.asm_out.internal_label (asm_out_file, "L",
19673 CODE_LABEL_NUMBER (label));
19674 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19678 /* ??? Probably not safe to set this here, since it assumes that a
19679 function will be emitted as assembly immediately after we generate
19680 RTL for it. This does not happen for inline functions. */
19681 cfun->machine->return_used_this_function = 0;
19683 else /* TARGET_32BIT */
19685 /* We need to take into account any stack-frame rounding. */
19686 offsets = arm_get_frame_offsets ();
19688 gcc_assert (!use_return_insn (FALSE, NULL)
19689 || (cfun->machine->return_used_this_function != 0)
19690 || offsets->saved_regs == offsets->outgoing_args
19691 || frame_pointer_needed);
19695 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19696 STR and STRD. If an even number of registers are being pushed, one
19697 or more STRD patterns are created for each register pair. If an
19698 odd number of registers are pushed, emit an initial STR followed by
19699 as many STRD instructions as are needed. This works best when the
19700 stack is initially 64-bit aligned (the normal case), since it
19701 ensures that each STRD is also 64-bit aligned. */
19702 static void
19703 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19705 int num_regs = 0;
19706 int i;
19707 int regno;
19708 rtx par = NULL_RTX;
19709 rtx dwarf = NULL_RTX;
19710 rtx tmp;
19711 bool first = true;
19713 num_regs = bit_count (saved_regs_mask);
19715 /* Must be at least one register to save, and can't save SP or PC. */
19716 gcc_assert (num_regs > 0 && num_regs <= 14);
19717 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19718 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19720 /* Create sequence for DWARF info. All the frame-related data for
19721 debugging is held in this wrapper. */
19722 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19724 /* Describe the stack adjustment. */
19725 tmp = gen_rtx_SET (VOIDmode,
19726 stack_pointer_rtx,
19727 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19728 RTX_FRAME_RELATED_P (tmp) = 1;
19729 XVECEXP (dwarf, 0, 0) = tmp;
19731 /* Find the first register. */
19732 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19735 i = 0;
19737 /* If there's an odd number of registers to push. Start off by
19738 pushing a single register. This ensures that subsequent strd
19739 operations are dword aligned (assuming that SP was originally
19740 64-bit aligned). */
19741 if ((num_regs & 1) != 0)
19743 rtx reg, mem, insn;
19745 reg = gen_rtx_REG (SImode, regno);
19746 if (num_regs == 1)
19747 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19748 stack_pointer_rtx));
19749 else
19750 mem = gen_frame_mem (Pmode,
19751 gen_rtx_PRE_MODIFY
19752 (Pmode, stack_pointer_rtx,
19753 plus_constant (Pmode, stack_pointer_rtx,
19754 -4 * num_regs)));
19756 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19757 RTX_FRAME_RELATED_P (tmp) = 1;
19758 insn = emit_insn (tmp);
19759 RTX_FRAME_RELATED_P (insn) = 1;
19760 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19761 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19762 reg);
19763 RTX_FRAME_RELATED_P (tmp) = 1;
19764 i++;
19765 regno++;
19766 XVECEXP (dwarf, 0, i) = tmp;
19767 first = false;
19770 while (i < num_regs)
19771 if (saved_regs_mask & (1 << regno))
19773 rtx reg1, reg2, mem1, mem2;
19774 rtx tmp0, tmp1, tmp2;
19775 int regno2;
19777 /* Find the register to pair with this one. */
19778 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19779 regno2++)
19782 reg1 = gen_rtx_REG (SImode, regno);
19783 reg2 = gen_rtx_REG (SImode, regno2);
19785 if (first)
19787 rtx insn;
19789 first = false;
19790 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19791 stack_pointer_rtx,
19792 -4 * num_regs));
19793 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19794 stack_pointer_rtx,
19795 -4 * (num_regs - 1)));
19796 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19797 plus_constant (Pmode, stack_pointer_rtx,
19798 -4 * (num_regs)));
19799 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19800 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19801 RTX_FRAME_RELATED_P (tmp0) = 1;
19802 RTX_FRAME_RELATED_P (tmp1) = 1;
19803 RTX_FRAME_RELATED_P (tmp2) = 1;
19804 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19805 XVECEXP (par, 0, 0) = tmp0;
19806 XVECEXP (par, 0, 1) = tmp1;
19807 XVECEXP (par, 0, 2) = tmp2;
19808 insn = emit_insn (par);
19809 RTX_FRAME_RELATED_P (insn) = 1;
19810 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19812 else
19814 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19815 stack_pointer_rtx,
19816 4 * i));
19817 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19818 stack_pointer_rtx,
19819 4 * (i + 1)));
19820 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19821 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19822 RTX_FRAME_RELATED_P (tmp1) = 1;
19823 RTX_FRAME_RELATED_P (tmp2) = 1;
19824 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19825 XVECEXP (par, 0, 0) = tmp1;
19826 XVECEXP (par, 0, 1) = tmp2;
19827 emit_insn (par);
19830 /* Create unwind information. This is an approximation. */
19831 tmp1 = gen_rtx_SET (VOIDmode,
19832 gen_frame_mem (Pmode,
19833 plus_constant (Pmode,
19834 stack_pointer_rtx,
19835 4 * i)),
19836 reg1);
19837 tmp2 = gen_rtx_SET (VOIDmode,
19838 gen_frame_mem (Pmode,
19839 plus_constant (Pmode,
19840 stack_pointer_rtx,
19841 4 * (i + 1))),
19842 reg2);
19844 RTX_FRAME_RELATED_P (tmp1) = 1;
19845 RTX_FRAME_RELATED_P (tmp2) = 1;
19846 XVECEXP (dwarf, 0, i + 1) = tmp1;
19847 XVECEXP (dwarf, 0, i + 2) = tmp2;
19848 i += 2;
19849 regno = regno2 + 1;
19851 else
19852 regno++;
19854 return;
19857 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19858 whenever possible, otherwise it emits single-word stores. The first store
19859 also allocates stack space for all saved registers, using writeback with
19860 post-addressing mode. All other stores use offset addressing. If no STRD
19861 can be emitted, this function emits a sequence of single-word stores,
19862 and not an STM as before, because single-word stores provide more freedom
19863 scheduling and can be turned into an STM by peephole optimizations. */
19864 static void
19865 arm_emit_strd_push (unsigned long saved_regs_mask)
19867 int num_regs = 0;
19868 int i, j, dwarf_index = 0;
19869 int offset = 0;
19870 rtx dwarf = NULL_RTX;
19871 rtx insn = NULL_RTX;
19872 rtx tmp, mem;
19874 /* TODO: A more efficient code can be emitted by changing the
19875 layout, e.g., first push all pairs that can use STRD to keep the
19876 stack aligned, and then push all other registers. */
19877 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19878 if (saved_regs_mask & (1 << i))
19879 num_regs++;
19881 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19882 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19883 gcc_assert (num_regs > 0);
19885 /* Create sequence for DWARF info. */
19886 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19888 /* For dwarf info, we generate explicit stack update. */
19889 tmp = gen_rtx_SET (VOIDmode,
19890 stack_pointer_rtx,
19891 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19892 RTX_FRAME_RELATED_P (tmp) = 1;
19893 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19895 /* Save registers. */
19896 offset = - 4 * num_regs;
19897 j = 0;
19898 while (j <= LAST_ARM_REGNUM)
19899 if (saved_regs_mask & (1 << j))
19901 if ((j % 2 == 0)
19902 && (saved_regs_mask & (1 << (j + 1))))
19904 /* Current register and previous register form register pair for
19905 which STRD can be generated. */
19906 if (offset < 0)
19908 /* Allocate stack space for all saved registers. */
19909 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19910 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19911 mem = gen_frame_mem (DImode, tmp);
19912 offset = 0;
19914 else if (offset > 0)
19915 mem = gen_frame_mem (DImode,
19916 plus_constant (Pmode,
19917 stack_pointer_rtx,
19918 offset));
19919 else
19920 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19922 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19923 RTX_FRAME_RELATED_P (tmp) = 1;
19924 tmp = emit_insn (tmp);
19926 /* Record the first store insn. */
19927 if (dwarf_index == 1)
19928 insn = tmp;
19930 /* Generate dwarf info. */
19931 mem = gen_frame_mem (SImode,
19932 plus_constant (Pmode,
19933 stack_pointer_rtx,
19934 offset));
19935 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19936 RTX_FRAME_RELATED_P (tmp) = 1;
19937 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19939 mem = gen_frame_mem (SImode,
19940 plus_constant (Pmode,
19941 stack_pointer_rtx,
19942 offset + 4));
19943 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19944 RTX_FRAME_RELATED_P (tmp) = 1;
19945 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19947 offset += 8;
19948 j += 2;
19950 else
19952 /* Emit a single word store. */
19953 if (offset < 0)
19955 /* Allocate stack space for all saved registers. */
19956 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19957 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19958 mem = gen_frame_mem (SImode, tmp);
19959 offset = 0;
19961 else if (offset > 0)
19962 mem = gen_frame_mem (SImode,
19963 plus_constant (Pmode,
19964 stack_pointer_rtx,
19965 offset));
19966 else
19967 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19969 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19970 RTX_FRAME_RELATED_P (tmp) = 1;
19971 tmp = emit_insn (tmp);
19973 /* Record the first store insn. */
19974 if (dwarf_index == 1)
19975 insn = tmp;
19977 /* Generate dwarf info. */
19978 mem = gen_frame_mem (SImode,
19979 plus_constant(Pmode,
19980 stack_pointer_rtx,
19981 offset));
19982 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19983 RTX_FRAME_RELATED_P (tmp) = 1;
19984 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19986 offset += 4;
19987 j += 1;
19990 else
19991 j++;
19993 /* Attach dwarf info to the first insn we generate. */
19994 gcc_assert (insn != NULL_RTX);
19995 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19996 RTX_FRAME_RELATED_P (insn) = 1;
19999 /* Generate and emit an insn that we will recognize as a push_multi.
20000 Unfortunately, since this insn does not reflect very well the actual
20001 semantics of the operation, we need to annotate the insn for the benefit
20002 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20003 MASK for registers that should be annotated for DWARF2 frame unwind
20004 information. */
20005 static rtx
20006 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20008 int num_regs = 0;
20009 int num_dwarf_regs = 0;
20010 int i, j;
20011 rtx par;
20012 rtx dwarf;
20013 int dwarf_par_index;
20014 rtx tmp, reg;
20016 /* We don't record the PC in the dwarf frame information. */
20017 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20019 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20021 if (mask & (1 << i))
20022 num_regs++;
20023 if (dwarf_regs_mask & (1 << i))
20024 num_dwarf_regs++;
20027 gcc_assert (num_regs && num_regs <= 16);
20028 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20030 /* For the body of the insn we are going to generate an UNSPEC in
20031 parallel with several USEs. This allows the insn to be recognized
20032 by the push_multi pattern in the arm.md file.
20034 The body of the insn looks something like this:
20036 (parallel [
20037 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20038 (const_int:SI <num>)))
20039 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20040 (use (reg:SI XX))
20041 (use (reg:SI YY))
20045 For the frame note however, we try to be more explicit and actually
20046 show each register being stored into the stack frame, plus a (single)
20047 decrement of the stack pointer. We do it this way in order to be
20048 friendly to the stack unwinding code, which only wants to see a single
20049 stack decrement per instruction. The RTL we generate for the note looks
20050 something like this:
20052 (sequence [
20053 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20054 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20055 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20056 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20060 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20061 instead we'd have a parallel expression detailing all
20062 the stores to the various memory addresses so that debug
20063 information is more up-to-date. Remember however while writing
20064 this to take care of the constraints with the push instruction.
20066 Note also that this has to be taken care of for the VFP registers.
20068 For more see PR43399. */
20070 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20071 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20072 dwarf_par_index = 1;
20074 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20076 if (mask & (1 << i))
20078 reg = gen_rtx_REG (SImode, i);
20080 XVECEXP (par, 0, 0)
20081 = gen_rtx_SET (VOIDmode,
20082 gen_frame_mem
20083 (BLKmode,
20084 gen_rtx_PRE_MODIFY (Pmode,
20085 stack_pointer_rtx,
20086 plus_constant
20087 (Pmode, stack_pointer_rtx,
20088 -4 * num_regs))
20090 gen_rtx_UNSPEC (BLKmode,
20091 gen_rtvec (1, reg),
20092 UNSPEC_PUSH_MULT));
20094 if (dwarf_regs_mask & (1 << i))
20096 tmp = gen_rtx_SET (VOIDmode,
20097 gen_frame_mem (SImode, stack_pointer_rtx),
20098 reg);
20099 RTX_FRAME_RELATED_P (tmp) = 1;
20100 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20103 break;
20107 for (j = 1, i++; j < num_regs; i++)
20109 if (mask & (1 << i))
20111 reg = gen_rtx_REG (SImode, i);
20113 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20115 if (dwarf_regs_mask & (1 << i))
20118 = gen_rtx_SET (VOIDmode,
20119 gen_frame_mem
20120 (SImode,
20121 plus_constant (Pmode, stack_pointer_rtx,
20122 4 * j)),
20123 reg);
20124 RTX_FRAME_RELATED_P (tmp) = 1;
20125 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20128 j++;
20132 par = emit_insn (par);
20134 tmp = gen_rtx_SET (VOIDmode,
20135 stack_pointer_rtx,
20136 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20137 RTX_FRAME_RELATED_P (tmp) = 1;
20138 XVECEXP (dwarf, 0, 0) = tmp;
20140 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20142 return par;
20145 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20146 SIZE is the offset to be adjusted.
20147 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20148 static void
20149 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20151 rtx dwarf;
20153 RTX_FRAME_RELATED_P (insn) = 1;
20154 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20155 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20158 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20159 SAVED_REGS_MASK shows which registers need to be restored.
20161 Unfortunately, since this insn does not reflect very well the actual
20162 semantics of the operation, we need to annotate the insn for the benefit
20163 of DWARF2 frame unwind information. */
20164 static void
20165 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20167 int num_regs = 0;
20168 int i, j;
20169 rtx par;
20170 rtx dwarf = NULL_RTX;
20171 rtx tmp, reg;
20172 bool return_in_pc;
20173 int offset_adj;
20174 int emit_update;
20176 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20177 offset_adj = return_in_pc ? 1 : 0;
20178 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20179 if (saved_regs_mask & (1 << i))
20180 num_regs++;
20182 gcc_assert (num_regs && num_regs <= 16);
20184 /* If SP is in reglist, then we don't emit SP update insn. */
20185 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20187 /* The parallel needs to hold num_regs SETs
20188 and one SET for the stack update. */
20189 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20191 if (return_in_pc)
20193 tmp = ret_rtx;
20194 XVECEXP (par, 0, 0) = tmp;
20197 if (emit_update)
20199 /* Increment the stack pointer, based on there being
20200 num_regs 4-byte registers to restore. */
20201 tmp = gen_rtx_SET (VOIDmode,
20202 stack_pointer_rtx,
20203 plus_constant (Pmode,
20204 stack_pointer_rtx,
20205 4 * num_regs));
20206 RTX_FRAME_RELATED_P (tmp) = 1;
20207 XVECEXP (par, 0, offset_adj) = tmp;
20210 /* Now restore every reg, which may include PC. */
20211 for (j = 0, i = 0; j < num_regs; i++)
20212 if (saved_regs_mask & (1 << i))
20214 reg = gen_rtx_REG (SImode, i);
20215 if ((num_regs == 1) && emit_update && !return_in_pc)
20217 /* Emit single load with writeback. */
20218 tmp = gen_frame_mem (SImode,
20219 gen_rtx_POST_INC (Pmode,
20220 stack_pointer_rtx));
20221 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20222 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20223 return;
20226 tmp = gen_rtx_SET (VOIDmode,
20227 reg,
20228 gen_frame_mem
20229 (SImode,
20230 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20231 RTX_FRAME_RELATED_P (tmp) = 1;
20232 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20234 /* We need to maintain a sequence for DWARF info too. As dwarf info
20235 should not have PC, skip PC. */
20236 if (i != PC_REGNUM)
20237 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20239 j++;
20242 if (return_in_pc)
20243 par = emit_jump_insn (par);
20244 else
20245 par = emit_insn (par);
20247 REG_NOTES (par) = dwarf;
20248 if (!return_in_pc)
20249 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20250 stack_pointer_rtx, stack_pointer_rtx);
20253 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20254 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20256 Unfortunately, since this insn does not reflect very well the actual
20257 semantics of the operation, we need to annotate the insn for the benefit
20258 of DWARF2 frame unwind information. */
20259 static void
20260 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20262 int i, j;
20263 rtx par;
20264 rtx dwarf = NULL_RTX;
20265 rtx tmp, reg;
20267 gcc_assert (num_regs && num_regs <= 32);
20269 /* Workaround ARM10 VFPr1 bug. */
20270 if (num_regs == 2 && !arm_arch6)
20272 if (first_reg == 15)
20273 first_reg--;
20275 num_regs++;
20278 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20279 there could be up to 32 D-registers to restore.
20280 If there are more than 16 D-registers, make two recursive calls,
20281 each of which emits one pop_multi instruction. */
20282 if (num_regs > 16)
20284 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20285 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20286 return;
20289 /* The parallel needs to hold num_regs SETs
20290 and one SET for the stack update. */
20291 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20293 /* Increment the stack pointer, based on there being
20294 num_regs 8-byte registers to restore. */
20295 tmp = gen_rtx_SET (VOIDmode,
20296 base_reg,
20297 plus_constant (Pmode, base_reg, 8 * num_regs));
20298 RTX_FRAME_RELATED_P (tmp) = 1;
20299 XVECEXP (par, 0, 0) = tmp;
20301 /* Now show every reg that will be restored, using a SET for each. */
20302 for (j = 0, i=first_reg; j < num_regs; i += 2)
20304 reg = gen_rtx_REG (DFmode, i);
20306 tmp = gen_rtx_SET (VOIDmode,
20307 reg,
20308 gen_frame_mem
20309 (DFmode,
20310 plus_constant (Pmode, base_reg, 8 * j)));
20311 RTX_FRAME_RELATED_P (tmp) = 1;
20312 XVECEXP (par, 0, j + 1) = tmp;
20314 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20316 j++;
20319 par = emit_insn (par);
20320 REG_NOTES (par) = dwarf;
20322 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20323 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20325 RTX_FRAME_RELATED_P (par) = 1;
20326 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20328 else
20329 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20330 base_reg, base_reg);
20333 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20334 number of registers are being popped, multiple LDRD patterns are created for
20335 all register pairs. If odd number of registers are popped, last register is
20336 loaded by using LDR pattern. */
20337 static void
20338 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20340 int num_regs = 0;
20341 int i, j;
20342 rtx par = NULL_RTX;
20343 rtx dwarf = NULL_RTX;
20344 rtx tmp, reg, tmp1;
20345 bool return_in_pc;
20347 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20348 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20349 if (saved_regs_mask & (1 << i))
20350 num_regs++;
20352 gcc_assert (num_regs && num_regs <= 16);
20354 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20355 to be popped. So, if num_regs is even, now it will become odd,
20356 and we can generate pop with PC. If num_regs is odd, it will be
20357 even now, and ldr with return can be generated for PC. */
20358 if (return_in_pc)
20359 num_regs--;
20361 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20363 /* Var j iterates over all the registers to gather all the registers in
20364 saved_regs_mask. Var i gives index of saved registers in stack frame.
20365 A PARALLEL RTX of register-pair is created here, so that pattern for
20366 LDRD can be matched. As PC is always last register to be popped, and
20367 we have already decremented num_regs if PC, we don't have to worry
20368 about PC in this loop. */
20369 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20370 if (saved_regs_mask & (1 << j))
20372 /* Create RTX for memory load. */
20373 reg = gen_rtx_REG (SImode, j);
20374 tmp = gen_rtx_SET (SImode,
20375 reg,
20376 gen_frame_mem (SImode,
20377 plus_constant (Pmode,
20378 stack_pointer_rtx, 4 * i)));
20379 RTX_FRAME_RELATED_P (tmp) = 1;
20381 if (i % 2 == 0)
20383 /* When saved-register index (i) is even, the RTX to be emitted is
20384 yet to be created. Hence create it first. The LDRD pattern we
20385 are generating is :
20386 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20387 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20388 where target registers need not be consecutive. */
20389 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20390 dwarf = NULL_RTX;
20393 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20394 added as 0th element and if i is odd, reg_i is added as 1st element
20395 of LDRD pattern shown above. */
20396 XVECEXP (par, 0, (i % 2)) = tmp;
20397 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20399 if ((i % 2) == 1)
20401 /* When saved-register index (i) is odd, RTXs for both the registers
20402 to be loaded are generated in above given LDRD pattern, and the
20403 pattern can be emitted now. */
20404 par = emit_insn (par);
20405 REG_NOTES (par) = dwarf;
20406 RTX_FRAME_RELATED_P (par) = 1;
20409 i++;
20412 /* If the number of registers pushed is odd AND return_in_pc is false OR
20413 number of registers are even AND return_in_pc is true, last register is
20414 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20415 then LDR with post increment. */
20417 /* Increment the stack pointer, based on there being
20418 num_regs 4-byte registers to restore. */
20419 tmp = gen_rtx_SET (VOIDmode,
20420 stack_pointer_rtx,
20421 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20422 RTX_FRAME_RELATED_P (tmp) = 1;
20423 tmp = emit_insn (tmp);
20424 if (!return_in_pc)
20426 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20427 stack_pointer_rtx, stack_pointer_rtx);
20430 dwarf = NULL_RTX;
20432 if (((num_regs % 2) == 1 && !return_in_pc)
20433 || ((num_regs % 2) == 0 && return_in_pc))
20435 /* Scan for the single register to be popped. Skip until the saved
20436 register is found. */
20437 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20439 /* Gen LDR with post increment here. */
20440 tmp1 = gen_rtx_MEM (SImode,
20441 gen_rtx_POST_INC (SImode,
20442 stack_pointer_rtx));
20443 set_mem_alias_set (tmp1, get_frame_alias_set ());
20445 reg = gen_rtx_REG (SImode, j);
20446 tmp = gen_rtx_SET (SImode, reg, tmp1);
20447 RTX_FRAME_RELATED_P (tmp) = 1;
20448 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20450 if (return_in_pc)
20452 /* If return_in_pc, j must be PC_REGNUM. */
20453 gcc_assert (j == PC_REGNUM);
20454 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20455 XVECEXP (par, 0, 0) = ret_rtx;
20456 XVECEXP (par, 0, 1) = tmp;
20457 par = emit_jump_insn (par);
20459 else
20461 par = emit_insn (tmp);
20462 REG_NOTES (par) = dwarf;
20463 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20464 stack_pointer_rtx, stack_pointer_rtx);
20468 else if ((num_regs % 2) == 1 && return_in_pc)
20470 /* There are 2 registers to be popped. So, generate the pattern
20471 pop_multiple_with_stack_update_and_return to pop in PC. */
20472 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20475 return;
20478 /* LDRD in ARM mode needs consecutive registers as operands. This function
20479 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20480 offset addressing and then generates one separate stack udpate. This provides
20481 more scheduling freedom, compared to writeback on every load. However,
20482 if the function returns using load into PC directly
20483 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20484 before the last load. TODO: Add a peephole optimization to recognize
20485 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20486 peephole optimization to merge the load at stack-offset zero
20487 with the stack update instruction using load with writeback
20488 in post-index addressing mode. */
20489 static void
20490 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20492 int j = 0;
20493 int offset = 0;
20494 rtx par = NULL_RTX;
20495 rtx dwarf = NULL_RTX;
20496 rtx tmp, mem;
20498 /* Restore saved registers. */
20499 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20500 j = 0;
20501 while (j <= LAST_ARM_REGNUM)
20502 if (saved_regs_mask & (1 << j))
20504 if ((j % 2) == 0
20505 && (saved_regs_mask & (1 << (j + 1)))
20506 && (j + 1) != PC_REGNUM)
20508 /* Current register and next register form register pair for which
20509 LDRD can be generated. PC is always the last register popped, and
20510 we handle it separately. */
20511 if (offset > 0)
20512 mem = gen_frame_mem (DImode,
20513 plus_constant (Pmode,
20514 stack_pointer_rtx,
20515 offset));
20516 else
20517 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20519 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20520 tmp = emit_insn (tmp);
20521 RTX_FRAME_RELATED_P (tmp) = 1;
20523 /* Generate dwarf info. */
20525 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20526 gen_rtx_REG (SImode, j),
20527 NULL_RTX);
20528 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20529 gen_rtx_REG (SImode, j + 1),
20530 dwarf);
20532 REG_NOTES (tmp) = dwarf;
20534 offset += 8;
20535 j += 2;
20537 else if (j != PC_REGNUM)
20539 /* Emit a single word load. */
20540 if (offset > 0)
20541 mem = gen_frame_mem (SImode,
20542 plus_constant (Pmode,
20543 stack_pointer_rtx,
20544 offset));
20545 else
20546 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20548 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20549 tmp = emit_insn (tmp);
20550 RTX_FRAME_RELATED_P (tmp) = 1;
20552 /* Generate dwarf info. */
20553 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20554 gen_rtx_REG (SImode, j),
20555 NULL_RTX);
20557 offset += 4;
20558 j += 1;
20560 else /* j == PC_REGNUM */
20561 j++;
20563 else
20564 j++;
20566 /* Update the stack. */
20567 if (offset > 0)
20569 tmp = gen_rtx_SET (Pmode,
20570 stack_pointer_rtx,
20571 plus_constant (Pmode,
20572 stack_pointer_rtx,
20573 offset));
20574 tmp = emit_insn (tmp);
20575 arm_add_cfa_adjust_cfa_note (tmp, offset,
20576 stack_pointer_rtx, stack_pointer_rtx);
20577 offset = 0;
20580 if (saved_regs_mask & (1 << PC_REGNUM))
20582 /* Only PC is to be popped. */
20583 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20584 XVECEXP (par, 0, 0) = ret_rtx;
20585 tmp = gen_rtx_SET (SImode,
20586 gen_rtx_REG (SImode, PC_REGNUM),
20587 gen_frame_mem (SImode,
20588 gen_rtx_POST_INC (SImode,
20589 stack_pointer_rtx)));
20590 RTX_FRAME_RELATED_P (tmp) = 1;
20591 XVECEXP (par, 0, 1) = tmp;
20592 par = emit_jump_insn (par);
20594 /* Generate dwarf info. */
20595 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20596 gen_rtx_REG (SImode, PC_REGNUM),
20597 NULL_RTX);
20598 REG_NOTES (par) = dwarf;
20599 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20600 stack_pointer_rtx, stack_pointer_rtx);
20604 /* Calculate the size of the return value that is passed in registers. */
20605 static unsigned
20606 arm_size_return_regs (void)
20608 enum machine_mode mode;
20610 if (crtl->return_rtx != 0)
20611 mode = GET_MODE (crtl->return_rtx);
20612 else
20613 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20615 return GET_MODE_SIZE (mode);
20618 /* Return true if the current function needs to save/restore LR. */
20619 static bool
20620 thumb_force_lr_save (void)
20622 return !cfun->machine->lr_save_eliminated
20623 && (!leaf_function_p ()
20624 || thumb_far_jump_used_p ()
20625 || df_regs_ever_live_p (LR_REGNUM));
20628 /* We do not know if r3 will be available because
20629 we do have an indirect tailcall happening in this
20630 particular case. */
20631 static bool
20632 is_indirect_tailcall_p (rtx call)
20634 rtx pat = PATTERN (call);
20636 /* Indirect tail call. */
20637 pat = XVECEXP (pat, 0, 0);
20638 if (GET_CODE (pat) == SET)
20639 pat = SET_SRC (pat);
20641 pat = XEXP (XEXP (pat, 0), 0);
20642 return REG_P (pat);
20645 /* Return true if r3 is used by any of the tail call insns in the
20646 current function. */
20647 static bool
20648 any_sibcall_could_use_r3 (void)
20650 edge_iterator ei;
20651 edge e;
20653 if (!crtl->tail_call_emit)
20654 return false;
20655 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20656 if (e->flags & EDGE_SIBCALL)
20658 rtx call = BB_END (e->src);
20659 if (!CALL_P (call))
20660 call = prev_nonnote_nondebug_insn (call);
20661 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20662 if (find_regno_fusage (call, USE, 3)
20663 || is_indirect_tailcall_p (call))
20664 return true;
20666 return false;
20670 /* Compute the distance from register FROM to register TO.
20671 These can be the arg pointer (26), the soft frame pointer (25),
20672 the stack pointer (13) or the hard frame pointer (11).
20673 In thumb mode r7 is used as the soft frame pointer, if needed.
20674 Typical stack layout looks like this:
20676 old stack pointer -> | |
20677 ----
20678 | | \
20679 | | saved arguments for
20680 | | vararg functions
20681 | | /
20683 hard FP & arg pointer -> | | \
20684 | | stack
20685 | | frame
20686 | | /
20688 | | \
20689 | | call saved
20690 | | registers
20691 soft frame pointer -> | | /
20693 | | \
20694 | | local
20695 | | variables
20696 locals base pointer -> | | /
20698 | | \
20699 | | outgoing
20700 | | arguments
20701 current stack pointer -> | | /
20704 For a given function some or all of these stack components
20705 may not be needed, giving rise to the possibility of
20706 eliminating some of the registers.
20708 The values returned by this function must reflect the behavior
20709 of arm_expand_prologue() and arm_compute_save_reg_mask().
20711 The sign of the number returned reflects the direction of stack
20712 growth, so the values are positive for all eliminations except
20713 from the soft frame pointer to the hard frame pointer.
20715 SFP may point just inside the local variables block to ensure correct
20716 alignment. */
20719 /* Calculate stack offsets. These are used to calculate register elimination
20720 offsets and in prologue/epilogue code. Also calculates which registers
20721 should be saved. */
20723 static arm_stack_offsets *
20724 arm_get_frame_offsets (void)
20726 struct arm_stack_offsets *offsets;
20727 unsigned long func_type;
20728 int leaf;
20729 int saved;
20730 int core_saved;
20731 HOST_WIDE_INT frame_size;
20732 int i;
20734 offsets = &cfun->machine->stack_offsets;
20736 /* We need to know if we are a leaf function. Unfortunately, it
20737 is possible to be called after start_sequence has been called,
20738 which causes get_insns to return the insns for the sequence,
20739 not the function, which will cause leaf_function_p to return
20740 the incorrect result.
20742 to know about leaf functions once reload has completed, and the
20743 frame size cannot be changed after that time, so we can safely
20744 use the cached value. */
20746 if (reload_completed)
20747 return offsets;
20749 /* Initially this is the size of the local variables. It will translated
20750 into an offset once we have determined the size of preceding data. */
20751 frame_size = ROUND_UP_WORD (get_frame_size ());
20753 leaf = leaf_function_p ();
20755 /* Space for variadic functions. */
20756 offsets->saved_args = crtl->args.pretend_args_size;
20758 /* In Thumb mode this is incorrect, but never used. */
20759 offsets->frame
20760 = (offsets->saved_args
20761 + arm_compute_static_chain_stack_bytes ()
20762 + (frame_pointer_needed ? 4 : 0));
20764 if (TARGET_32BIT)
20766 unsigned int regno;
20768 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20769 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20770 saved = core_saved;
20772 /* We know that SP will be doubleword aligned on entry, and we must
20773 preserve that condition at any subroutine call. We also require the
20774 soft frame pointer to be doubleword aligned. */
20776 if (TARGET_REALLY_IWMMXT)
20778 /* Check for the call-saved iWMMXt registers. */
20779 for (regno = FIRST_IWMMXT_REGNUM;
20780 regno <= LAST_IWMMXT_REGNUM;
20781 regno++)
20782 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20783 saved += 8;
20786 func_type = arm_current_func_type ();
20787 /* Space for saved VFP registers. */
20788 if (! IS_VOLATILE (func_type)
20789 && TARGET_HARD_FLOAT && TARGET_VFP)
20790 saved += arm_get_vfp_saved_size ();
20792 else /* TARGET_THUMB1 */
20794 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20795 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20796 saved = core_saved;
20797 if (TARGET_BACKTRACE)
20798 saved += 16;
20801 /* Saved registers include the stack frame. */
20802 offsets->saved_regs
20803 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20804 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20806 /* A leaf function does not need any stack alignment if it has nothing
20807 on the stack. */
20808 if (leaf && frame_size == 0
20809 /* However if it calls alloca(), we have a dynamically allocated
20810 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20811 && ! cfun->calls_alloca)
20813 offsets->outgoing_args = offsets->soft_frame;
20814 offsets->locals_base = offsets->soft_frame;
20815 return offsets;
20818 /* Ensure SFP has the correct alignment. */
20819 if (ARM_DOUBLEWORD_ALIGN
20820 && (offsets->soft_frame & 7))
20822 offsets->soft_frame += 4;
20823 /* Try to align stack by pushing an extra reg. Don't bother doing this
20824 when there is a stack frame as the alignment will be rolled into
20825 the normal stack adjustment. */
20826 if (frame_size + crtl->outgoing_args_size == 0)
20828 int reg = -1;
20830 /* Register r3 is caller-saved. Normally it does not need to be
20831 saved on entry by the prologue. However if we choose to save
20832 it for padding then we may confuse the compiler into thinking
20833 a prologue sequence is required when in fact it is not. This
20834 will occur when shrink-wrapping if r3 is used as a scratch
20835 register and there are no other callee-saved writes.
20837 This situation can be avoided when other callee-saved registers
20838 are available and r3 is not mandatory if we choose a callee-saved
20839 register for padding. */
20840 bool prefer_callee_reg_p = false;
20842 /* If it is safe to use r3, then do so. This sometimes
20843 generates better code on Thumb-2 by avoiding the need to
20844 use 32-bit push/pop instructions. */
20845 if (! any_sibcall_could_use_r3 ()
20846 && arm_size_return_regs () <= 12
20847 && (offsets->saved_regs_mask & (1 << 3)) == 0
20848 && (TARGET_THUMB2
20849 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20851 reg = 3;
20852 if (!TARGET_THUMB2)
20853 prefer_callee_reg_p = true;
20855 if (reg == -1
20856 || prefer_callee_reg_p)
20858 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20860 /* Avoid fixed registers; they may be changed at
20861 arbitrary times so it's unsafe to restore them
20862 during the epilogue. */
20863 if (!fixed_regs[i]
20864 && (offsets->saved_regs_mask & (1 << i)) == 0)
20866 reg = i;
20867 break;
20872 if (reg != -1)
20874 offsets->saved_regs += 4;
20875 offsets->saved_regs_mask |= (1 << reg);
20880 offsets->locals_base = offsets->soft_frame + frame_size;
20881 offsets->outgoing_args = (offsets->locals_base
20882 + crtl->outgoing_args_size);
20884 if (ARM_DOUBLEWORD_ALIGN)
20886 /* Ensure SP remains doubleword aligned. */
20887 if (offsets->outgoing_args & 7)
20888 offsets->outgoing_args += 4;
20889 gcc_assert (!(offsets->outgoing_args & 7));
20892 return offsets;
20896 /* Calculate the relative offsets for the different stack pointers. Positive
20897 offsets are in the direction of stack growth. */
20899 HOST_WIDE_INT
20900 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20902 arm_stack_offsets *offsets;
20904 offsets = arm_get_frame_offsets ();
20906 /* OK, now we have enough information to compute the distances.
20907 There must be an entry in these switch tables for each pair
20908 of registers in ELIMINABLE_REGS, even if some of the entries
20909 seem to be redundant or useless. */
20910 switch (from)
20912 case ARG_POINTER_REGNUM:
20913 switch (to)
20915 case THUMB_HARD_FRAME_POINTER_REGNUM:
20916 return 0;
20918 case FRAME_POINTER_REGNUM:
20919 /* This is the reverse of the soft frame pointer
20920 to hard frame pointer elimination below. */
20921 return offsets->soft_frame - offsets->saved_args;
20923 case ARM_HARD_FRAME_POINTER_REGNUM:
20924 /* This is only non-zero in the case where the static chain register
20925 is stored above the frame. */
20926 return offsets->frame - offsets->saved_args - 4;
20928 case STACK_POINTER_REGNUM:
20929 /* If nothing has been pushed on the stack at all
20930 then this will return -4. This *is* correct! */
20931 return offsets->outgoing_args - (offsets->saved_args + 4);
20933 default:
20934 gcc_unreachable ();
20936 gcc_unreachable ();
20938 case FRAME_POINTER_REGNUM:
20939 switch (to)
20941 case THUMB_HARD_FRAME_POINTER_REGNUM:
20942 return 0;
20944 case ARM_HARD_FRAME_POINTER_REGNUM:
20945 /* The hard frame pointer points to the top entry in the
20946 stack frame. The soft frame pointer to the bottom entry
20947 in the stack frame. If there is no stack frame at all,
20948 then they are identical. */
20950 return offsets->frame - offsets->soft_frame;
20952 case STACK_POINTER_REGNUM:
20953 return offsets->outgoing_args - offsets->soft_frame;
20955 default:
20956 gcc_unreachable ();
20958 gcc_unreachable ();
20960 default:
20961 /* You cannot eliminate from the stack pointer.
20962 In theory you could eliminate from the hard frame
20963 pointer to the stack pointer, but this will never
20964 happen, since if a stack frame is not needed the
20965 hard frame pointer will never be used. */
20966 gcc_unreachable ();
20970 /* Given FROM and TO register numbers, say whether this elimination is
20971 allowed. Frame pointer elimination is automatically handled.
20973 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20974 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20975 pointer, we must eliminate FRAME_POINTER_REGNUM into
20976 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20977 ARG_POINTER_REGNUM. */
20979 bool
20980 arm_can_eliminate (const int from, const int to)
20982 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20983 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20984 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20985 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20986 true);
20989 /* Emit RTL to save coprocessor registers on function entry. Returns the
20990 number of bytes pushed. */
20992 static int
20993 arm_save_coproc_regs(void)
20995 int saved_size = 0;
20996 unsigned reg;
20997 unsigned start_reg;
20998 rtx insn;
21000 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21001 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21003 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21004 insn = gen_rtx_MEM (V2SImode, insn);
21005 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21006 RTX_FRAME_RELATED_P (insn) = 1;
21007 saved_size += 8;
21010 if (TARGET_HARD_FLOAT && TARGET_VFP)
21012 start_reg = FIRST_VFP_REGNUM;
21014 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21016 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21017 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21019 if (start_reg != reg)
21020 saved_size += vfp_emit_fstmd (start_reg,
21021 (reg - start_reg) / 2);
21022 start_reg = reg + 2;
21025 if (start_reg != reg)
21026 saved_size += vfp_emit_fstmd (start_reg,
21027 (reg - start_reg) / 2);
21029 return saved_size;
21033 /* Set the Thumb frame pointer from the stack pointer. */
21035 static void
21036 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21038 HOST_WIDE_INT amount;
21039 rtx insn, dwarf;
21041 amount = offsets->outgoing_args - offsets->locals_base;
21042 if (amount < 1024)
21043 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21044 stack_pointer_rtx, GEN_INT (amount)));
21045 else
21047 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21048 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21049 expects the first two operands to be the same. */
21050 if (TARGET_THUMB2)
21052 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21053 stack_pointer_rtx,
21054 hard_frame_pointer_rtx));
21056 else
21058 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21059 hard_frame_pointer_rtx,
21060 stack_pointer_rtx));
21062 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21063 plus_constant (Pmode, stack_pointer_rtx, amount));
21064 RTX_FRAME_RELATED_P (dwarf) = 1;
21065 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21068 RTX_FRAME_RELATED_P (insn) = 1;
21071 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21072 function. */
21073 void
21074 arm_expand_prologue (void)
21076 rtx amount;
21077 rtx insn;
21078 rtx ip_rtx;
21079 unsigned long live_regs_mask;
21080 unsigned long func_type;
21081 int fp_offset = 0;
21082 int saved_pretend_args = 0;
21083 int saved_regs = 0;
21084 unsigned HOST_WIDE_INT args_to_push;
21085 arm_stack_offsets *offsets;
21087 func_type = arm_current_func_type ();
21089 /* Naked functions don't have prologues. */
21090 if (IS_NAKED (func_type))
21091 return;
21093 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21094 args_to_push = crtl->args.pretend_args_size;
21096 /* Compute which register we will have to save onto the stack. */
21097 offsets = arm_get_frame_offsets ();
21098 live_regs_mask = offsets->saved_regs_mask;
21100 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21102 if (IS_STACKALIGN (func_type))
21104 rtx r0, r1;
21106 /* Handle a word-aligned stack pointer. We generate the following:
21108 mov r0, sp
21109 bic r1, r0, #7
21110 mov sp, r1
21111 <save and restore r0 in normal prologue/epilogue>
21112 mov sp, r0
21113 bx lr
21115 The unwinder doesn't need to know about the stack realignment.
21116 Just tell it we saved SP in r0. */
21117 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21119 r0 = gen_rtx_REG (SImode, 0);
21120 r1 = gen_rtx_REG (SImode, 1);
21122 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21123 RTX_FRAME_RELATED_P (insn) = 1;
21124 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21126 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21128 /* ??? The CFA changes here, which may cause GDB to conclude that it
21129 has entered a different function. That said, the unwind info is
21130 correct, individually, before and after this instruction because
21131 we've described the save of SP, which will override the default
21132 handling of SP as restoring from the CFA. */
21133 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21136 /* For APCS frames, if IP register is clobbered
21137 when creating frame, save that register in a special
21138 way. */
21139 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21141 if (IS_INTERRUPT (func_type))
21143 /* Interrupt functions must not corrupt any registers.
21144 Creating a frame pointer however, corrupts the IP
21145 register, so we must push it first. */
21146 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21148 /* Do not set RTX_FRAME_RELATED_P on this insn.
21149 The dwarf stack unwinding code only wants to see one
21150 stack decrement per function, and this is not it. If
21151 this instruction is labeled as being part of the frame
21152 creation sequence then dwarf2out_frame_debug_expr will
21153 die when it encounters the assignment of IP to FP
21154 later on, since the use of SP here establishes SP as
21155 the CFA register and not IP.
21157 Anyway this instruction is not really part of the stack
21158 frame creation although it is part of the prologue. */
21160 else if (IS_NESTED (func_type))
21162 /* The static chain register is the same as the IP register
21163 used as a scratch register during stack frame creation.
21164 To get around this need to find somewhere to store IP
21165 whilst the frame is being created. We try the following
21166 places in order:
21168 1. The last argument register r3 if it is available.
21169 2. A slot on the stack above the frame if there are no
21170 arguments to push onto the stack.
21171 3. Register r3 again, after pushing the argument registers
21172 onto the stack, if this is a varargs function.
21173 4. The last slot on the stack created for the arguments to
21174 push, if this isn't a varargs function.
21176 Note - we only need to tell the dwarf2 backend about the SP
21177 adjustment in the second variant; the static chain register
21178 doesn't need to be unwound, as it doesn't contain a value
21179 inherited from the caller. */
21181 if (!arm_r3_live_at_start_p ())
21182 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21183 else if (args_to_push == 0)
21185 rtx addr, dwarf;
21187 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21188 saved_regs += 4;
21190 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21191 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21192 fp_offset = 4;
21194 /* Just tell the dwarf backend that we adjusted SP. */
21195 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21196 plus_constant (Pmode, stack_pointer_rtx,
21197 -fp_offset));
21198 RTX_FRAME_RELATED_P (insn) = 1;
21199 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21201 else
21203 /* Store the args on the stack. */
21204 if (cfun->machine->uses_anonymous_args)
21206 insn
21207 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21208 (0xf0 >> (args_to_push / 4)) & 0xf);
21209 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21210 saved_pretend_args = 1;
21212 else
21214 rtx addr, dwarf;
21216 if (args_to_push == 4)
21217 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21218 else
21219 addr
21220 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21221 plus_constant (Pmode,
21222 stack_pointer_rtx,
21223 -args_to_push));
21225 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21227 /* Just tell the dwarf backend that we adjusted SP. */
21228 dwarf
21229 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21230 plus_constant (Pmode, stack_pointer_rtx,
21231 -args_to_push));
21232 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21235 RTX_FRAME_RELATED_P (insn) = 1;
21236 fp_offset = args_to_push;
21237 args_to_push = 0;
21241 insn = emit_set_insn (ip_rtx,
21242 plus_constant (Pmode, stack_pointer_rtx,
21243 fp_offset));
21244 RTX_FRAME_RELATED_P (insn) = 1;
21247 if (args_to_push)
21249 /* Push the argument registers, or reserve space for them. */
21250 if (cfun->machine->uses_anonymous_args)
21251 insn = emit_multi_reg_push
21252 ((0xf0 >> (args_to_push / 4)) & 0xf,
21253 (0xf0 >> (args_to_push / 4)) & 0xf);
21254 else
21255 insn = emit_insn
21256 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21257 GEN_INT (- args_to_push)));
21258 RTX_FRAME_RELATED_P (insn) = 1;
21261 /* If this is an interrupt service routine, and the link register
21262 is going to be pushed, and we're not generating extra
21263 push of IP (needed when frame is needed and frame layout if apcs),
21264 subtracting four from LR now will mean that the function return
21265 can be done with a single instruction. */
21266 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21267 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21268 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21269 && TARGET_ARM)
21271 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21273 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21276 if (live_regs_mask)
21278 unsigned long dwarf_regs_mask = live_regs_mask;
21280 saved_regs += bit_count (live_regs_mask) * 4;
21281 if (optimize_size && !frame_pointer_needed
21282 && saved_regs == offsets->saved_regs - offsets->saved_args)
21284 /* If no coprocessor registers are being pushed and we don't have
21285 to worry about a frame pointer then push extra registers to
21286 create the stack frame. This is done is a way that does not
21287 alter the frame layout, so is independent of the epilogue. */
21288 int n;
21289 int frame;
21290 n = 0;
21291 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21292 n++;
21293 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21294 if (frame && n * 4 >= frame)
21296 n = frame / 4;
21297 live_regs_mask |= (1 << n) - 1;
21298 saved_regs += frame;
21302 if (TARGET_LDRD
21303 && current_tune->prefer_ldrd_strd
21304 && !optimize_function_for_size_p (cfun))
21306 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21307 if (TARGET_THUMB2)
21308 thumb2_emit_strd_push (live_regs_mask);
21309 else if (TARGET_ARM
21310 && !TARGET_APCS_FRAME
21311 && !IS_INTERRUPT (func_type))
21312 arm_emit_strd_push (live_regs_mask);
21313 else
21315 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21316 RTX_FRAME_RELATED_P (insn) = 1;
21319 else
21321 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21322 RTX_FRAME_RELATED_P (insn) = 1;
21326 if (! IS_VOLATILE (func_type))
21327 saved_regs += arm_save_coproc_regs ();
21329 if (frame_pointer_needed && TARGET_ARM)
21331 /* Create the new frame pointer. */
21332 if (TARGET_APCS_FRAME)
21334 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21335 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21336 RTX_FRAME_RELATED_P (insn) = 1;
21338 if (IS_NESTED (func_type))
21340 /* Recover the static chain register. */
21341 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21342 insn = gen_rtx_REG (SImode, 3);
21343 else
21345 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21346 insn = gen_frame_mem (SImode, insn);
21348 emit_set_insn (ip_rtx, insn);
21349 /* Add a USE to stop propagate_one_insn() from barfing. */
21350 emit_insn (gen_force_register_use (ip_rtx));
21353 else
21355 insn = GEN_INT (saved_regs - 4);
21356 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21357 stack_pointer_rtx, insn));
21358 RTX_FRAME_RELATED_P (insn) = 1;
21362 if (flag_stack_usage_info)
21363 current_function_static_stack_size
21364 = offsets->outgoing_args - offsets->saved_args;
21366 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21368 /* This add can produce multiple insns for a large constant, so we
21369 need to get tricky. */
21370 rtx_insn *last = get_last_insn ();
21372 amount = GEN_INT (offsets->saved_args + saved_regs
21373 - offsets->outgoing_args);
21375 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21376 amount));
21379 last = last ? NEXT_INSN (last) : get_insns ();
21380 RTX_FRAME_RELATED_P (last) = 1;
21382 while (last != insn);
21384 /* If the frame pointer is needed, emit a special barrier that
21385 will prevent the scheduler from moving stores to the frame
21386 before the stack adjustment. */
21387 if (frame_pointer_needed)
21388 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21389 hard_frame_pointer_rtx));
21393 if (frame_pointer_needed && TARGET_THUMB2)
21394 thumb_set_frame_pointer (offsets);
21396 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21398 unsigned long mask;
21400 mask = live_regs_mask;
21401 mask &= THUMB2_WORK_REGS;
21402 if (!IS_NESTED (func_type))
21403 mask |= (1 << IP_REGNUM);
21404 arm_load_pic_register (mask);
21407 /* If we are profiling, make sure no instructions are scheduled before
21408 the call to mcount. Similarly if the user has requested no
21409 scheduling in the prolog. Similarly if we want non-call exceptions
21410 using the EABI unwinder, to prevent faulting instructions from being
21411 swapped with a stack adjustment. */
21412 if (crtl->profile || !TARGET_SCHED_PROLOG
21413 || (arm_except_unwind_info (&global_options) == UI_TARGET
21414 && cfun->can_throw_non_call_exceptions))
21415 emit_insn (gen_blockage ());
21417 /* If the link register is being kept alive, with the return address in it,
21418 then make sure that it does not get reused by the ce2 pass. */
21419 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21420 cfun->machine->lr_save_eliminated = 1;
21423 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21424 static void
21425 arm_print_condition (FILE *stream)
21427 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21429 /* Branch conversion is not implemented for Thumb-2. */
21430 if (TARGET_THUMB)
21432 output_operand_lossage ("predicated Thumb instruction");
21433 return;
21435 if (current_insn_predicate != NULL)
21437 output_operand_lossage
21438 ("predicated instruction in conditional sequence");
21439 return;
21442 fputs (arm_condition_codes[arm_current_cc], stream);
21444 else if (current_insn_predicate)
21446 enum arm_cond_code code;
21448 if (TARGET_THUMB1)
21450 output_operand_lossage ("predicated Thumb instruction");
21451 return;
21454 code = get_arm_condition_code (current_insn_predicate);
21455 fputs (arm_condition_codes[code], stream);
21460 /* Globally reserved letters: acln
21461 Puncutation letters currently used: @_|?().!#
21462 Lower case letters currently used: bcdefhimpqtvwxyz
21463 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21464 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21466 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21468 If CODE is 'd', then the X is a condition operand and the instruction
21469 should only be executed if the condition is true.
21470 if CODE is 'D', then the X is a condition operand and the instruction
21471 should only be executed if the condition is false: however, if the mode
21472 of the comparison is CCFPEmode, then always execute the instruction -- we
21473 do this because in these circumstances !GE does not necessarily imply LT;
21474 in these cases the instruction pattern will take care to make sure that
21475 an instruction containing %d will follow, thereby undoing the effects of
21476 doing this instruction unconditionally.
21477 If CODE is 'N' then X is a floating point operand that must be negated
21478 before output.
21479 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21480 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21481 static void
21482 arm_print_operand (FILE *stream, rtx x, int code)
21484 switch (code)
21486 case '@':
21487 fputs (ASM_COMMENT_START, stream);
21488 return;
21490 case '_':
21491 fputs (user_label_prefix, stream);
21492 return;
21494 case '|':
21495 fputs (REGISTER_PREFIX, stream);
21496 return;
21498 case '?':
21499 arm_print_condition (stream);
21500 return;
21502 case '(':
21503 /* Nothing in unified syntax, otherwise the current condition code. */
21504 if (!TARGET_UNIFIED_ASM)
21505 arm_print_condition (stream);
21506 break;
21508 case ')':
21509 /* The current condition code in unified syntax, otherwise nothing. */
21510 if (TARGET_UNIFIED_ASM)
21511 arm_print_condition (stream);
21512 break;
21514 case '.':
21515 /* The current condition code for a condition code setting instruction.
21516 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21517 if (TARGET_UNIFIED_ASM)
21519 fputc('s', stream);
21520 arm_print_condition (stream);
21522 else
21524 arm_print_condition (stream);
21525 fputc('s', stream);
21527 return;
21529 case '!':
21530 /* If the instruction is conditionally executed then print
21531 the current condition code, otherwise print 's'. */
21532 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21533 if (current_insn_predicate)
21534 arm_print_condition (stream);
21535 else
21536 fputc('s', stream);
21537 break;
21539 /* %# is a "break" sequence. It doesn't output anything, but is used to
21540 separate e.g. operand numbers from following text, if that text consists
21541 of further digits which we don't want to be part of the operand
21542 number. */
21543 case '#':
21544 return;
21546 case 'N':
21548 REAL_VALUE_TYPE r;
21549 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21550 r = real_value_negate (&r);
21551 fprintf (stream, "%s", fp_const_from_val (&r));
21553 return;
21555 /* An integer or symbol address without a preceding # sign. */
21556 case 'c':
21557 switch (GET_CODE (x))
21559 case CONST_INT:
21560 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21561 break;
21563 case SYMBOL_REF:
21564 output_addr_const (stream, x);
21565 break;
21567 case CONST:
21568 if (GET_CODE (XEXP (x, 0)) == PLUS
21569 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21571 output_addr_const (stream, x);
21572 break;
21574 /* Fall through. */
21576 default:
21577 output_operand_lossage ("Unsupported operand for code '%c'", code);
21579 return;
21581 /* An integer that we want to print in HEX. */
21582 case 'x':
21583 switch (GET_CODE (x))
21585 case CONST_INT:
21586 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21587 break;
21589 default:
21590 output_operand_lossage ("Unsupported operand for code '%c'", code);
21592 return;
21594 case 'B':
21595 if (CONST_INT_P (x))
21597 HOST_WIDE_INT val;
21598 val = ARM_SIGN_EXTEND (~INTVAL (x));
21599 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21601 else
21603 putc ('~', stream);
21604 output_addr_const (stream, x);
21606 return;
21608 case 'b':
21609 /* Print the log2 of a CONST_INT. */
21611 HOST_WIDE_INT val;
21613 if (!CONST_INT_P (x)
21614 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21615 output_operand_lossage ("Unsupported operand for code '%c'", code);
21616 else
21617 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21619 return;
21621 case 'L':
21622 /* The low 16 bits of an immediate constant. */
21623 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21624 return;
21626 case 'i':
21627 fprintf (stream, "%s", arithmetic_instr (x, 1));
21628 return;
21630 case 'I':
21631 fprintf (stream, "%s", arithmetic_instr (x, 0));
21632 return;
21634 case 'S':
21636 HOST_WIDE_INT val;
21637 const char *shift;
21639 shift = shift_op (x, &val);
21641 if (shift)
21643 fprintf (stream, ", %s ", shift);
21644 if (val == -1)
21645 arm_print_operand (stream, XEXP (x, 1), 0);
21646 else
21647 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21650 return;
21652 /* An explanation of the 'Q', 'R' and 'H' register operands:
21654 In a pair of registers containing a DI or DF value the 'Q'
21655 operand returns the register number of the register containing
21656 the least significant part of the value. The 'R' operand returns
21657 the register number of the register containing the most
21658 significant part of the value.
21660 The 'H' operand returns the higher of the two register numbers.
21661 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21662 same as the 'Q' operand, since the most significant part of the
21663 value is held in the lower number register. The reverse is true
21664 on systems where WORDS_BIG_ENDIAN is false.
21666 The purpose of these operands is to distinguish between cases
21667 where the endian-ness of the values is important (for example
21668 when they are added together), and cases where the endian-ness
21669 is irrelevant, but the order of register operations is important.
21670 For example when loading a value from memory into a register
21671 pair, the endian-ness does not matter. Provided that the value
21672 from the lower memory address is put into the lower numbered
21673 register, and the value from the higher address is put into the
21674 higher numbered register, the load will work regardless of whether
21675 the value being loaded is big-wordian or little-wordian. The
21676 order of the two register loads can matter however, if the address
21677 of the memory location is actually held in one of the registers
21678 being overwritten by the load.
21680 The 'Q' and 'R' constraints are also available for 64-bit
21681 constants. */
21682 case 'Q':
21683 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21685 rtx part = gen_lowpart (SImode, x);
21686 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21687 return;
21690 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21692 output_operand_lossage ("invalid operand for code '%c'", code);
21693 return;
21696 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21697 return;
21699 case 'R':
21700 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21702 enum machine_mode mode = GET_MODE (x);
21703 rtx part;
21705 if (mode == VOIDmode)
21706 mode = DImode;
21707 part = gen_highpart_mode (SImode, mode, x);
21708 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21709 return;
21712 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21714 output_operand_lossage ("invalid operand for code '%c'", code);
21715 return;
21718 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21719 return;
21721 case 'H':
21722 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21724 output_operand_lossage ("invalid operand for code '%c'", code);
21725 return;
21728 asm_fprintf (stream, "%r", REGNO (x) + 1);
21729 return;
21731 case 'J':
21732 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21734 output_operand_lossage ("invalid operand for code '%c'", code);
21735 return;
21738 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21739 return;
21741 case 'K':
21742 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21744 output_operand_lossage ("invalid operand for code '%c'", code);
21745 return;
21748 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21749 return;
21751 case 'm':
21752 asm_fprintf (stream, "%r",
21753 REG_P (XEXP (x, 0))
21754 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21755 return;
21757 case 'M':
21758 asm_fprintf (stream, "{%r-%r}",
21759 REGNO (x),
21760 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21761 return;
21763 /* Like 'M', but writing doubleword vector registers, for use by Neon
21764 insns. */
21765 case 'h':
21767 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21768 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21769 if (numregs == 1)
21770 asm_fprintf (stream, "{d%d}", regno);
21771 else
21772 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21774 return;
21776 case 'd':
21777 /* CONST_TRUE_RTX means always -- that's the default. */
21778 if (x == const_true_rtx)
21779 return;
21781 if (!COMPARISON_P (x))
21783 output_operand_lossage ("invalid operand for code '%c'", code);
21784 return;
21787 fputs (arm_condition_codes[get_arm_condition_code (x)],
21788 stream);
21789 return;
21791 case 'D':
21792 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21793 want to do that. */
21794 if (x == const_true_rtx)
21796 output_operand_lossage ("instruction never executed");
21797 return;
21799 if (!COMPARISON_P (x))
21801 output_operand_lossage ("invalid operand for code '%c'", code);
21802 return;
21805 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21806 (get_arm_condition_code (x))],
21807 stream);
21808 return;
21810 case 's':
21811 case 'V':
21812 case 'W':
21813 case 'X':
21814 case 'Y':
21815 case 'Z':
21816 /* Former Maverick support, removed after GCC-4.7. */
21817 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21818 return;
21820 case 'U':
21821 if (!REG_P (x)
21822 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21823 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21824 /* Bad value for wCG register number. */
21826 output_operand_lossage ("invalid operand for code '%c'", code);
21827 return;
21830 else
21831 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21832 return;
21834 /* Print an iWMMXt control register name. */
21835 case 'w':
21836 if (!CONST_INT_P (x)
21837 || INTVAL (x) < 0
21838 || INTVAL (x) >= 16)
21839 /* Bad value for wC register number. */
21841 output_operand_lossage ("invalid operand for code '%c'", code);
21842 return;
21845 else
21847 static const char * wc_reg_names [16] =
21849 "wCID", "wCon", "wCSSF", "wCASF",
21850 "wC4", "wC5", "wC6", "wC7",
21851 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21852 "wC12", "wC13", "wC14", "wC15"
21855 fputs (wc_reg_names [INTVAL (x)], stream);
21857 return;
21859 /* Print the high single-precision register of a VFP double-precision
21860 register. */
21861 case 'p':
21863 enum machine_mode mode = GET_MODE (x);
21864 int regno;
21866 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21868 output_operand_lossage ("invalid operand for code '%c'", code);
21869 return;
21872 regno = REGNO (x);
21873 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21875 output_operand_lossage ("invalid operand for code '%c'", code);
21876 return;
21879 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21881 return;
21883 /* Print a VFP/Neon double precision or quad precision register name. */
21884 case 'P':
21885 case 'q':
21887 enum machine_mode mode = GET_MODE (x);
21888 int is_quad = (code == 'q');
21889 int regno;
21891 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21893 output_operand_lossage ("invalid operand for code '%c'", code);
21894 return;
21897 if (!REG_P (x)
21898 || !IS_VFP_REGNUM (REGNO (x)))
21900 output_operand_lossage ("invalid operand for code '%c'", code);
21901 return;
21904 regno = REGNO (x);
21905 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21906 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21908 output_operand_lossage ("invalid operand for code '%c'", code);
21909 return;
21912 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21913 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21915 return;
21917 /* These two codes print the low/high doubleword register of a Neon quad
21918 register, respectively. For pair-structure types, can also print
21919 low/high quadword registers. */
21920 case 'e':
21921 case 'f':
21923 enum machine_mode mode = GET_MODE (x);
21924 int regno;
21926 if ((GET_MODE_SIZE (mode) != 16
21927 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21929 output_operand_lossage ("invalid operand for code '%c'", code);
21930 return;
21933 regno = REGNO (x);
21934 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21936 output_operand_lossage ("invalid operand for code '%c'", code);
21937 return;
21940 if (GET_MODE_SIZE (mode) == 16)
21941 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21942 + (code == 'f' ? 1 : 0));
21943 else
21944 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21945 + (code == 'f' ? 1 : 0));
21947 return;
21949 /* Print a VFPv3 floating-point constant, represented as an integer
21950 index. */
21951 case 'G':
21953 int index = vfp3_const_double_index (x);
21954 gcc_assert (index != -1);
21955 fprintf (stream, "%d", index);
21957 return;
21959 /* Print bits representing opcode features for Neon.
21961 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21962 and polynomials as unsigned.
21964 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21966 Bit 2 is 1 for rounding functions, 0 otherwise. */
21968 /* Identify the type as 's', 'u', 'p' or 'f'. */
21969 case 'T':
21971 HOST_WIDE_INT bits = INTVAL (x);
21972 fputc ("uspf"[bits & 3], stream);
21974 return;
21976 /* Likewise, but signed and unsigned integers are both 'i'. */
21977 case 'F':
21979 HOST_WIDE_INT bits = INTVAL (x);
21980 fputc ("iipf"[bits & 3], stream);
21982 return;
21984 /* As for 'T', but emit 'u' instead of 'p'. */
21985 case 't':
21987 HOST_WIDE_INT bits = INTVAL (x);
21988 fputc ("usuf"[bits & 3], stream);
21990 return;
21992 /* Bit 2: rounding (vs none). */
21993 case 'O':
21995 HOST_WIDE_INT bits = INTVAL (x);
21996 fputs ((bits & 4) != 0 ? "r" : "", stream);
21998 return;
22000 /* Memory operand for vld1/vst1 instruction. */
22001 case 'A':
22003 rtx addr;
22004 bool postinc = FALSE;
22005 rtx postinc_reg = NULL;
22006 unsigned align, memsize, align_bits;
22008 gcc_assert (MEM_P (x));
22009 addr = XEXP (x, 0);
22010 if (GET_CODE (addr) == POST_INC)
22012 postinc = 1;
22013 addr = XEXP (addr, 0);
22015 if (GET_CODE (addr) == POST_MODIFY)
22017 postinc_reg = XEXP( XEXP (addr, 1), 1);
22018 addr = XEXP (addr, 0);
22020 asm_fprintf (stream, "[%r", REGNO (addr));
22022 /* We know the alignment of this access, so we can emit a hint in the
22023 instruction (for some alignments) as an aid to the memory subsystem
22024 of the target. */
22025 align = MEM_ALIGN (x) >> 3;
22026 memsize = MEM_SIZE (x);
22028 /* Only certain alignment specifiers are supported by the hardware. */
22029 if (memsize == 32 && (align % 32) == 0)
22030 align_bits = 256;
22031 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22032 align_bits = 128;
22033 else if (memsize >= 8 && (align % 8) == 0)
22034 align_bits = 64;
22035 else
22036 align_bits = 0;
22038 if (align_bits != 0)
22039 asm_fprintf (stream, ":%d", align_bits);
22041 asm_fprintf (stream, "]");
22043 if (postinc)
22044 fputs("!", stream);
22045 if (postinc_reg)
22046 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22048 return;
22050 case 'C':
22052 rtx addr;
22054 gcc_assert (MEM_P (x));
22055 addr = XEXP (x, 0);
22056 gcc_assert (REG_P (addr));
22057 asm_fprintf (stream, "[%r]", REGNO (addr));
22059 return;
22061 /* Translate an S register number into a D register number and element index. */
22062 case 'y':
22064 enum machine_mode mode = GET_MODE (x);
22065 int regno;
22067 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22069 output_operand_lossage ("invalid operand for code '%c'", code);
22070 return;
22073 regno = REGNO (x);
22074 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22076 output_operand_lossage ("invalid operand for code '%c'", code);
22077 return;
22080 regno = regno - FIRST_VFP_REGNUM;
22081 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22083 return;
22085 case 'v':
22086 gcc_assert (CONST_DOUBLE_P (x));
22087 int result;
22088 result = vfp3_const_double_for_fract_bits (x);
22089 if (result == 0)
22090 result = vfp3_const_double_for_bits (x);
22091 fprintf (stream, "#%d", result);
22092 return;
22094 /* Register specifier for vld1.16/vst1.16. Translate the S register
22095 number into a D register number and element index. */
22096 case 'z':
22098 enum machine_mode mode = GET_MODE (x);
22099 int regno;
22101 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22103 output_operand_lossage ("invalid operand for code '%c'", code);
22104 return;
22107 regno = REGNO (x);
22108 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22110 output_operand_lossage ("invalid operand for code '%c'", code);
22111 return;
22114 regno = regno - FIRST_VFP_REGNUM;
22115 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22117 return;
22119 default:
22120 if (x == 0)
22122 output_operand_lossage ("missing operand");
22123 return;
22126 switch (GET_CODE (x))
22128 case REG:
22129 asm_fprintf (stream, "%r", REGNO (x));
22130 break;
22132 case MEM:
22133 output_memory_reference_mode = GET_MODE (x);
22134 output_address (XEXP (x, 0));
22135 break;
22137 case CONST_DOUBLE:
22139 char fpstr[20];
22140 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22141 sizeof (fpstr), 0, 1);
22142 fprintf (stream, "#%s", fpstr);
22144 break;
22146 default:
22147 gcc_assert (GET_CODE (x) != NEG);
22148 fputc ('#', stream);
22149 if (GET_CODE (x) == HIGH)
22151 fputs (":lower16:", stream);
22152 x = XEXP (x, 0);
22155 output_addr_const (stream, x);
22156 break;
22161 /* Target hook for printing a memory address. */
22162 static void
22163 arm_print_operand_address (FILE *stream, rtx x)
22165 if (TARGET_32BIT)
22167 int is_minus = GET_CODE (x) == MINUS;
22169 if (REG_P (x))
22170 asm_fprintf (stream, "[%r]", REGNO (x));
22171 else if (GET_CODE (x) == PLUS || is_minus)
22173 rtx base = XEXP (x, 0);
22174 rtx index = XEXP (x, 1);
22175 HOST_WIDE_INT offset = 0;
22176 if (!REG_P (base)
22177 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22179 /* Ensure that BASE is a register. */
22180 /* (one of them must be). */
22181 /* Also ensure the SP is not used as in index register. */
22182 rtx temp = base;
22183 base = index;
22184 index = temp;
22186 switch (GET_CODE (index))
22188 case CONST_INT:
22189 offset = INTVAL (index);
22190 if (is_minus)
22191 offset = -offset;
22192 asm_fprintf (stream, "[%r, #%wd]",
22193 REGNO (base), offset);
22194 break;
22196 case REG:
22197 asm_fprintf (stream, "[%r, %s%r]",
22198 REGNO (base), is_minus ? "-" : "",
22199 REGNO (index));
22200 break;
22202 case MULT:
22203 case ASHIFTRT:
22204 case LSHIFTRT:
22205 case ASHIFT:
22206 case ROTATERT:
22208 asm_fprintf (stream, "[%r, %s%r",
22209 REGNO (base), is_minus ? "-" : "",
22210 REGNO (XEXP (index, 0)));
22211 arm_print_operand (stream, index, 'S');
22212 fputs ("]", stream);
22213 break;
22216 default:
22217 gcc_unreachable ();
22220 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22221 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22223 extern enum machine_mode output_memory_reference_mode;
22225 gcc_assert (REG_P (XEXP (x, 0)));
22227 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22228 asm_fprintf (stream, "[%r, #%s%d]!",
22229 REGNO (XEXP (x, 0)),
22230 GET_CODE (x) == PRE_DEC ? "-" : "",
22231 GET_MODE_SIZE (output_memory_reference_mode));
22232 else
22233 asm_fprintf (stream, "[%r], #%s%d",
22234 REGNO (XEXP (x, 0)),
22235 GET_CODE (x) == POST_DEC ? "-" : "",
22236 GET_MODE_SIZE (output_memory_reference_mode));
22238 else if (GET_CODE (x) == PRE_MODIFY)
22240 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22241 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22242 asm_fprintf (stream, "#%wd]!",
22243 INTVAL (XEXP (XEXP (x, 1), 1)));
22244 else
22245 asm_fprintf (stream, "%r]!",
22246 REGNO (XEXP (XEXP (x, 1), 1)));
22248 else if (GET_CODE (x) == POST_MODIFY)
22250 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22251 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22252 asm_fprintf (stream, "#%wd",
22253 INTVAL (XEXP (XEXP (x, 1), 1)));
22254 else
22255 asm_fprintf (stream, "%r",
22256 REGNO (XEXP (XEXP (x, 1), 1)));
22258 else output_addr_const (stream, x);
22260 else
22262 if (REG_P (x))
22263 asm_fprintf (stream, "[%r]", REGNO (x));
22264 else if (GET_CODE (x) == POST_INC)
22265 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22266 else if (GET_CODE (x) == PLUS)
22268 gcc_assert (REG_P (XEXP (x, 0)));
22269 if (CONST_INT_P (XEXP (x, 1)))
22270 asm_fprintf (stream, "[%r, #%wd]",
22271 REGNO (XEXP (x, 0)),
22272 INTVAL (XEXP (x, 1)));
22273 else
22274 asm_fprintf (stream, "[%r, %r]",
22275 REGNO (XEXP (x, 0)),
22276 REGNO (XEXP (x, 1)));
22278 else
22279 output_addr_const (stream, x);
22283 /* Target hook for indicating whether a punctuation character for
22284 TARGET_PRINT_OPERAND is valid. */
22285 static bool
22286 arm_print_operand_punct_valid_p (unsigned char code)
22288 return (code == '@' || code == '|' || code == '.'
22289 || code == '(' || code == ')' || code == '#'
22290 || (TARGET_32BIT && (code == '?'))
22291 || (TARGET_THUMB2 && (code == '!'))
22292 || (TARGET_THUMB && (code == '_')));
22295 /* Target hook for assembling integer objects. The ARM version needs to
22296 handle word-sized values specially. */
22297 static bool
22298 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22300 enum machine_mode mode;
22302 if (size == UNITS_PER_WORD && aligned_p)
22304 fputs ("\t.word\t", asm_out_file);
22305 output_addr_const (asm_out_file, x);
22307 /* Mark symbols as position independent. We only do this in the
22308 .text segment, not in the .data segment. */
22309 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22310 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22312 /* See legitimize_pic_address for an explanation of the
22313 TARGET_VXWORKS_RTP check. */
22314 if (!arm_pic_data_is_text_relative
22315 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22316 fputs ("(GOT)", asm_out_file);
22317 else
22318 fputs ("(GOTOFF)", asm_out_file);
22320 fputc ('\n', asm_out_file);
22321 return true;
22324 mode = GET_MODE (x);
22326 if (arm_vector_mode_supported_p (mode))
22328 int i, units;
22330 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22332 units = CONST_VECTOR_NUNITS (x);
22333 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22335 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22336 for (i = 0; i < units; i++)
22338 rtx elt = CONST_VECTOR_ELT (x, i);
22339 assemble_integer
22340 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22342 else
22343 for (i = 0; i < units; i++)
22345 rtx elt = CONST_VECTOR_ELT (x, i);
22346 REAL_VALUE_TYPE rval;
22348 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22350 assemble_real
22351 (rval, GET_MODE_INNER (mode),
22352 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22355 return true;
22358 return default_assemble_integer (x, size, aligned_p);
22361 static void
22362 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22364 section *s;
22366 if (!TARGET_AAPCS_BASED)
22368 (is_ctor ?
22369 default_named_section_asm_out_constructor
22370 : default_named_section_asm_out_destructor) (symbol, priority);
22371 return;
22374 /* Put these in the .init_array section, using a special relocation. */
22375 if (priority != DEFAULT_INIT_PRIORITY)
22377 char buf[18];
22378 sprintf (buf, "%s.%.5u",
22379 is_ctor ? ".init_array" : ".fini_array",
22380 priority);
22381 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22383 else if (is_ctor)
22384 s = ctors_section;
22385 else
22386 s = dtors_section;
22388 switch_to_section (s);
22389 assemble_align (POINTER_SIZE);
22390 fputs ("\t.word\t", asm_out_file);
22391 output_addr_const (asm_out_file, symbol);
22392 fputs ("(target1)\n", asm_out_file);
22395 /* Add a function to the list of static constructors. */
22397 static void
22398 arm_elf_asm_constructor (rtx symbol, int priority)
22400 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22403 /* Add a function to the list of static destructors. */
22405 static void
22406 arm_elf_asm_destructor (rtx symbol, int priority)
22408 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22411 /* A finite state machine takes care of noticing whether or not instructions
22412 can be conditionally executed, and thus decrease execution time and code
22413 size by deleting branch instructions. The fsm is controlled by
22414 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22416 /* The state of the fsm controlling condition codes are:
22417 0: normal, do nothing special
22418 1: make ASM_OUTPUT_OPCODE not output this instruction
22419 2: make ASM_OUTPUT_OPCODE not output this instruction
22420 3: make instructions conditional
22421 4: make instructions conditional
22423 State transitions (state->state by whom under condition):
22424 0 -> 1 final_prescan_insn if the `target' is a label
22425 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22426 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22427 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22428 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22429 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22430 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22431 (the target insn is arm_target_insn).
22433 If the jump clobbers the conditions then we use states 2 and 4.
22435 A similar thing can be done with conditional return insns.
22437 XXX In case the `target' is an unconditional branch, this conditionalising
22438 of the instructions always reduces code size, but not always execution
22439 time. But then, I want to reduce the code size to somewhere near what
22440 /bin/cc produces. */
22442 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22443 instructions. When a COND_EXEC instruction is seen the subsequent
22444 instructions are scanned so that multiple conditional instructions can be
22445 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22446 specify the length and true/false mask for the IT block. These will be
22447 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22449 /* Returns the index of the ARM condition code string in
22450 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22451 COMPARISON should be an rtx like `(eq (...) (...))'. */
22453 enum arm_cond_code
22454 maybe_get_arm_condition_code (rtx comparison)
22456 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
22457 enum arm_cond_code code;
22458 enum rtx_code comp_code = GET_CODE (comparison);
22460 if (GET_MODE_CLASS (mode) != MODE_CC)
22461 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22462 XEXP (comparison, 1));
22464 switch (mode)
22466 case CC_DNEmode: code = ARM_NE; goto dominance;
22467 case CC_DEQmode: code = ARM_EQ; goto dominance;
22468 case CC_DGEmode: code = ARM_GE; goto dominance;
22469 case CC_DGTmode: code = ARM_GT; goto dominance;
22470 case CC_DLEmode: code = ARM_LE; goto dominance;
22471 case CC_DLTmode: code = ARM_LT; goto dominance;
22472 case CC_DGEUmode: code = ARM_CS; goto dominance;
22473 case CC_DGTUmode: code = ARM_HI; goto dominance;
22474 case CC_DLEUmode: code = ARM_LS; goto dominance;
22475 case CC_DLTUmode: code = ARM_CC;
22477 dominance:
22478 if (comp_code == EQ)
22479 return ARM_INVERSE_CONDITION_CODE (code);
22480 if (comp_code == NE)
22481 return code;
22482 return ARM_NV;
22484 case CC_NOOVmode:
22485 switch (comp_code)
22487 case NE: return ARM_NE;
22488 case EQ: return ARM_EQ;
22489 case GE: return ARM_PL;
22490 case LT: return ARM_MI;
22491 default: return ARM_NV;
22494 case CC_Zmode:
22495 switch (comp_code)
22497 case NE: return ARM_NE;
22498 case EQ: return ARM_EQ;
22499 default: return ARM_NV;
22502 case CC_Nmode:
22503 switch (comp_code)
22505 case NE: return ARM_MI;
22506 case EQ: return ARM_PL;
22507 default: return ARM_NV;
22510 case CCFPEmode:
22511 case CCFPmode:
22512 /* We can handle all cases except UNEQ and LTGT. */
22513 switch (comp_code)
22515 case GE: return ARM_GE;
22516 case GT: return ARM_GT;
22517 case LE: return ARM_LS;
22518 case LT: return ARM_MI;
22519 case NE: return ARM_NE;
22520 case EQ: return ARM_EQ;
22521 case ORDERED: return ARM_VC;
22522 case UNORDERED: return ARM_VS;
22523 case UNLT: return ARM_LT;
22524 case UNLE: return ARM_LE;
22525 case UNGT: return ARM_HI;
22526 case UNGE: return ARM_PL;
22527 /* UNEQ and LTGT do not have a representation. */
22528 case UNEQ: /* Fall through. */
22529 case LTGT: /* Fall through. */
22530 default: return ARM_NV;
22533 case CC_SWPmode:
22534 switch (comp_code)
22536 case NE: return ARM_NE;
22537 case EQ: return ARM_EQ;
22538 case GE: return ARM_LE;
22539 case GT: return ARM_LT;
22540 case LE: return ARM_GE;
22541 case LT: return ARM_GT;
22542 case GEU: return ARM_LS;
22543 case GTU: return ARM_CC;
22544 case LEU: return ARM_CS;
22545 case LTU: return ARM_HI;
22546 default: return ARM_NV;
22549 case CC_Cmode:
22550 switch (comp_code)
22552 case LTU: return ARM_CS;
22553 case GEU: return ARM_CC;
22554 default: return ARM_NV;
22557 case CC_CZmode:
22558 switch (comp_code)
22560 case NE: return ARM_NE;
22561 case EQ: return ARM_EQ;
22562 case GEU: return ARM_CS;
22563 case GTU: return ARM_HI;
22564 case LEU: return ARM_LS;
22565 case LTU: return ARM_CC;
22566 default: return ARM_NV;
22569 case CC_NCVmode:
22570 switch (comp_code)
22572 case GE: return ARM_GE;
22573 case LT: return ARM_LT;
22574 case GEU: return ARM_CS;
22575 case LTU: return ARM_CC;
22576 default: return ARM_NV;
22579 case CCmode:
22580 switch (comp_code)
22582 case NE: return ARM_NE;
22583 case EQ: return ARM_EQ;
22584 case GE: return ARM_GE;
22585 case GT: return ARM_GT;
22586 case LE: return ARM_LE;
22587 case LT: return ARM_LT;
22588 case GEU: return ARM_CS;
22589 case GTU: return ARM_HI;
22590 case LEU: return ARM_LS;
22591 case LTU: return ARM_CC;
22592 default: return ARM_NV;
22595 default: gcc_unreachable ();
22599 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22600 static enum arm_cond_code
22601 get_arm_condition_code (rtx comparison)
22603 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22604 gcc_assert (code != ARM_NV);
22605 return code;
22608 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22609 instructions. */
22610 void
22611 thumb2_final_prescan_insn (rtx_insn *insn)
22613 rtx_insn *first_insn = insn;
22614 rtx body = PATTERN (insn);
22615 rtx predicate;
22616 enum arm_cond_code code;
22617 int n;
22618 int mask;
22619 int max;
22621 /* max_insns_skipped in the tune was already taken into account in the
22622 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22623 just emit the IT blocks as we can. It does not make sense to split
22624 the IT blocks. */
22625 max = MAX_INSN_PER_IT_BLOCK;
22627 /* Remove the previous insn from the count of insns to be output. */
22628 if (arm_condexec_count)
22629 arm_condexec_count--;
22631 /* Nothing to do if we are already inside a conditional block. */
22632 if (arm_condexec_count)
22633 return;
22635 if (GET_CODE (body) != COND_EXEC)
22636 return;
22638 /* Conditional jumps are implemented directly. */
22639 if (JUMP_P (insn))
22640 return;
22642 predicate = COND_EXEC_TEST (body);
22643 arm_current_cc = get_arm_condition_code (predicate);
22645 n = get_attr_ce_count (insn);
22646 arm_condexec_count = 1;
22647 arm_condexec_mask = (1 << n) - 1;
22648 arm_condexec_masklen = n;
22649 /* See if subsequent instructions can be combined into the same block. */
22650 for (;;)
22652 insn = next_nonnote_insn (insn);
22654 /* Jumping into the middle of an IT block is illegal, so a label or
22655 barrier terminates the block. */
22656 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22657 break;
22659 body = PATTERN (insn);
22660 /* USE and CLOBBER aren't really insns, so just skip them. */
22661 if (GET_CODE (body) == USE
22662 || GET_CODE (body) == CLOBBER)
22663 continue;
22665 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22666 if (GET_CODE (body) != COND_EXEC)
22667 break;
22668 /* Maximum number of conditionally executed instructions in a block. */
22669 n = get_attr_ce_count (insn);
22670 if (arm_condexec_masklen + n > max)
22671 break;
22673 predicate = COND_EXEC_TEST (body);
22674 code = get_arm_condition_code (predicate);
22675 mask = (1 << n) - 1;
22676 if (arm_current_cc == code)
22677 arm_condexec_mask |= (mask << arm_condexec_masklen);
22678 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22679 break;
22681 arm_condexec_count++;
22682 arm_condexec_masklen += n;
22684 /* A jump must be the last instruction in a conditional block. */
22685 if (JUMP_P (insn))
22686 break;
22688 /* Restore recog_data (getting the attributes of other insns can
22689 destroy this array, but final.c assumes that it remains intact
22690 across this call). */
22691 extract_constrain_insn_cached (first_insn);
22694 void
22695 arm_final_prescan_insn (rtx_insn *insn)
22697 /* BODY will hold the body of INSN. */
22698 rtx body = PATTERN (insn);
22700 /* This will be 1 if trying to repeat the trick, and things need to be
22701 reversed if it appears to fail. */
22702 int reverse = 0;
22704 /* If we start with a return insn, we only succeed if we find another one. */
22705 int seeking_return = 0;
22706 enum rtx_code return_code = UNKNOWN;
22708 /* START_INSN will hold the insn from where we start looking. This is the
22709 first insn after the following code_label if REVERSE is true. */
22710 rtx_insn *start_insn = insn;
22712 /* If in state 4, check if the target branch is reached, in order to
22713 change back to state 0. */
22714 if (arm_ccfsm_state == 4)
22716 if (insn == arm_target_insn)
22718 arm_target_insn = NULL;
22719 arm_ccfsm_state = 0;
22721 return;
22724 /* If in state 3, it is possible to repeat the trick, if this insn is an
22725 unconditional branch to a label, and immediately following this branch
22726 is the previous target label which is only used once, and the label this
22727 branch jumps to is not too far off. */
22728 if (arm_ccfsm_state == 3)
22730 if (simplejump_p (insn))
22732 start_insn = next_nonnote_insn (start_insn);
22733 if (BARRIER_P (start_insn))
22735 /* XXX Isn't this always a barrier? */
22736 start_insn = next_nonnote_insn (start_insn);
22738 if (LABEL_P (start_insn)
22739 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22740 && LABEL_NUSES (start_insn) == 1)
22741 reverse = TRUE;
22742 else
22743 return;
22745 else if (ANY_RETURN_P (body))
22747 start_insn = next_nonnote_insn (start_insn);
22748 if (BARRIER_P (start_insn))
22749 start_insn = next_nonnote_insn (start_insn);
22750 if (LABEL_P (start_insn)
22751 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22752 && LABEL_NUSES (start_insn) == 1)
22754 reverse = TRUE;
22755 seeking_return = 1;
22756 return_code = GET_CODE (body);
22758 else
22759 return;
22761 else
22762 return;
22765 gcc_assert (!arm_ccfsm_state || reverse);
22766 if (!JUMP_P (insn))
22767 return;
22769 /* This jump might be paralleled with a clobber of the condition codes
22770 the jump should always come first */
22771 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22772 body = XVECEXP (body, 0, 0);
22774 if (reverse
22775 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22776 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22778 int insns_skipped;
22779 int fail = FALSE, succeed = FALSE;
22780 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22781 int then_not_else = TRUE;
22782 rtx_insn *this_insn = start_insn;
22783 rtx label = 0;
22785 /* Register the insn jumped to. */
22786 if (reverse)
22788 if (!seeking_return)
22789 label = XEXP (SET_SRC (body), 0);
22791 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22792 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22793 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22795 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22796 then_not_else = FALSE;
22798 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22800 seeking_return = 1;
22801 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22803 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22805 seeking_return = 1;
22806 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22807 then_not_else = FALSE;
22809 else
22810 gcc_unreachable ();
22812 /* See how many insns this branch skips, and what kind of insns. If all
22813 insns are okay, and the label or unconditional branch to the same
22814 label is not too far away, succeed. */
22815 for (insns_skipped = 0;
22816 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22818 rtx scanbody;
22820 this_insn = next_nonnote_insn (this_insn);
22821 if (!this_insn)
22822 break;
22824 switch (GET_CODE (this_insn))
22826 case CODE_LABEL:
22827 /* Succeed if it is the target label, otherwise fail since
22828 control falls in from somewhere else. */
22829 if (this_insn == label)
22831 arm_ccfsm_state = 1;
22832 succeed = TRUE;
22834 else
22835 fail = TRUE;
22836 break;
22838 case BARRIER:
22839 /* Succeed if the following insn is the target label.
22840 Otherwise fail.
22841 If return insns are used then the last insn in a function
22842 will be a barrier. */
22843 this_insn = next_nonnote_insn (this_insn);
22844 if (this_insn && this_insn == label)
22846 arm_ccfsm_state = 1;
22847 succeed = TRUE;
22849 else
22850 fail = TRUE;
22851 break;
22853 case CALL_INSN:
22854 /* The AAPCS says that conditional calls should not be
22855 used since they make interworking inefficient (the
22856 linker can't transform BL<cond> into BLX). That's
22857 only a problem if the machine has BLX. */
22858 if (arm_arch5)
22860 fail = TRUE;
22861 break;
22864 /* Succeed if the following insn is the target label, or
22865 if the following two insns are a barrier and the
22866 target label. */
22867 this_insn = next_nonnote_insn (this_insn);
22868 if (this_insn && BARRIER_P (this_insn))
22869 this_insn = next_nonnote_insn (this_insn);
22871 if (this_insn && this_insn == label
22872 && insns_skipped < max_insns_skipped)
22874 arm_ccfsm_state = 1;
22875 succeed = TRUE;
22877 else
22878 fail = TRUE;
22879 break;
22881 case JUMP_INSN:
22882 /* If this is an unconditional branch to the same label, succeed.
22883 If it is to another label, do nothing. If it is conditional,
22884 fail. */
22885 /* XXX Probably, the tests for SET and the PC are
22886 unnecessary. */
22888 scanbody = PATTERN (this_insn);
22889 if (GET_CODE (scanbody) == SET
22890 && GET_CODE (SET_DEST (scanbody)) == PC)
22892 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22893 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22895 arm_ccfsm_state = 2;
22896 succeed = TRUE;
22898 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22899 fail = TRUE;
22901 /* Fail if a conditional return is undesirable (e.g. on a
22902 StrongARM), but still allow this if optimizing for size. */
22903 else if (GET_CODE (scanbody) == return_code
22904 && !use_return_insn (TRUE, NULL)
22905 && !optimize_size)
22906 fail = TRUE;
22907 else if (GET_CODE (scanbody) == return_code)
22909 arm_ccfsm_state = 2;
22910 succeed = TRUE;
22912 else if (GET_CODE (scanbody) == PARALLEL)
22914 switch (get_attr_conds (this_insn))
22916 case CONDS_NOCOND:
22917 break;
22918 default:
22919 fail = TRUE;
22920 break;
22923 else
22924 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22926 break;
22928 case INSN:
22929 /* Instructions using or affecting the condition codes make it
22930 fail. */
22931 scanbody = PATTERN (this_insn);
22932 if (!(GET_CODE (scanbody) == SET
22933 || GET_CODE (scanbody) == PARALLEL)
22934 || get_attr_conds (this_insn) != CONDS_NOCOND)
22935 fail = TRUE;
22936 break;
22938 default:
22939 break;
22942 if (succeed)
22944 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22945 arm_target_label = CODE_LABEL_NUMBER (label);
22946 else
22948 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22950 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22952 this_insn = next_nonnote_insn (this_insn);
22953 gcc_assert (!this_insn
22954 || (!BARRIER_P (this_insn)
22955 && !LABEL_P (this_insn)));
22957 if (!this_insn)
22959 /* Oh, dear! we ran off the end.. give up. */
22960 extract_constrain_insn_cached (insn);
22961 arm_ccfsm_state = 0;
22962 arm_target_insn = NULL;
22963 return;
22965 arm_target_insn = this_insn;
22968 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22969 what it was. */
22970 if (!reverse)
22971 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22973 if (reverse || then_not_else)
22974 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22977 /* Restore recog_data (getting the attributes of other insns can
22978 destroy this array, but final.c assumes that it remains intact
22979 across this call. */
22980 extract_constrain_insn_cached (insn);
22984 /* Output IT instructions. */
22985 void
22986 thumb2_asm_output_opcode (FILE * stream)
22988 char buff[5];
22989 int n;
22991 if (arm_condexec_mask)
22993 for (n = 0; n < arm_condexec_masklen; n++)
22994 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22995 buff[n] = 0;
22996 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22997 arm_condition_codes[arm_current_cc]);
22998 arm_condexec_mask = 0;
23002 /* Returns true if REGNO is a valid register
23003 for holding a quantity of type MODE. */
23005 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
23007 if (GET_MODE_CLASS (mode) == MODE_CC)
23008 return (regno == CC_REGNUM
23009 || (TARGET_HARD_FLOAT && TARGET_VFP
23010 && regno == VFPCC_REGNUM));
23012 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23013 return false;
23015 if (TARGET_THUMB1)
23016 /* For the Thumb we only allow values bigger than SImode in
23017 registers 0 - 6, so that there is always a second low
23018 register available to hold the upper part of the value.
23019 We probably we ought to ensure that the register is the
23020 start of an even numbered register pair. */
23021 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23023 if (TARGET_HARD_FLOAT && TARGET_VFP
23024 && IS_VFP_REGNUM (regno))
23026 if (mode == SFmode || mode == SImode)
23027 return VFP_REGNO_OK_FOR_SINGLE (regno);
23029 if (mode == DFmode)
23030 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23032 /* VFP registers can hold HFmode values, but there is no point in
23033 putting them there unless we have hardware conversion insns. */
23034 if (mode == HFmode)
23035 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23037 if (TARGET_NEON)
23038 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23039 || (VALID_NEON_QREG_MODE (mode)
23040 && NEON_REGNO_OK_FOR_QUAD (regno))
23041 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23042 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23043 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23044 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23045 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23047 return FALSE;
23050 if (TARGET_REALLY_IWMMXT)
23052 if (IS_IWMMXT_GR_REGNUM (regno))
23053 return mode == SImode;
23055 if (IS_IWMMXT_REGNUM (regno))
23056 return VALID_IWMMXT_REG_MODE (mode);
23059 /* We allow almost any value to be stored in the general registers.
23060 Restrict doubleword quantities to even register pairs in ARM state
23061 so that we can use ldrd. Do not allow very large Neon structure
23062 opaque modes in general registers; they would use too many. */
23063 if (regno <= LAST_ARM_REGNUM)
23065 if (ARM_NUM_REGS (mode) > 4)
23066 return FALSE;
23068 if (TARGET_THUMB2)
23069 return TRUE;
23071 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23074 if (regno == FRAME_POINTER_REGNUM
23075 || regno == ARG_POINTER_REGNUM)
23076 /* We only allow integers in the fake hard registers. */
23077 return GET_MODE_CLASS (mode) == MODE_INT;
23079 return FALSE;
23082 /* Implement MODES_TIEABLE_P. */
23084 bool
23085 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
23087 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23088 return true;
23090 /* We specifically want to allow elements of "structure" modes to
23091 be tieable to the structure. This more general condition allows
23092 other rarer situations too. */
23093 if (TARGET_NEON
23094 && (VALID_NEON_DREG_MODE (mode1)
23095 || VALID_NEON_QREG_MODE (mode1)
23096 || VALID_NEON_STRUCT_MODE (mode1))
23097 && (VALID_NEON_DREG_MODE (mode2)
23098 || VALID_NEON_QREG_MODE (mode2)
23099 || VALID_NEON_STRUCT_MODE (mode2)))
23100 return true;
23102 return false;
23105 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23106 not used in arm mode. */
23108 enum reg_class
23109 arm_regno_class (int regno)
23111 if (regno == PC_REGNUM)
23112 return NO_REGS;
23114 if (TARGET_THUMB1)
23116 if (regno == STACK_POINTER_REGNUM)
23117 return STACK_REG;
23118 if (regno == CC_REGNUM)
23119 return CC_REG;
23120 if (regno < 8)
23121 return LO_REGS;
23122 return HI_REGS;
23125 if (TARGET_THUMB2 && regno < 8)
23126 return LO_REGS;
23128 if ( regno <= LAST_ARM_REGNUM
23129 || regno == FRAME_POINTER_REGNUM
23130 || regno == ARG_POINTER_REGNUM)
23131 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23133 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23134 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23136 if (IS_VFP_REGNUM (regno))
23138 if (regno <= D7_VFP_REGNUM)
23139 return VFP_D0_D7_REGS;
23140 else if (regno <= LAST_LO_VFP_REGNUM)
23141 return VFP_LO_REGS;
23142 else
23143 return VFP_HI_REGS;
23146 if (IS_IWMMXT_REGNUM (regno))
23147 return IWMMXT_REGS;
23149 if (IS_IWMMXT_GR_REGNUM (regno))
23150 return IWMMXT_GR_REGS;
23152 return NO_REGS;
23155 /* Handle a special case when computing the offset
23156 of an argument from the frame pointer. */
23158 arm_debugger_arg_offset (int value, rtx addr)
23160 rtx_insn *insn;
23162 /* We are only interested if dbxout_parms() failed to compute the offset. */
23163 if (value != 0)
23164 return 0;
23166 /* We can only cope with the case where the address is held in a register. */
23167 if (!REG_P (addr))
23168 return 0;
23170 /* If we are using the frame pointer to point at the argument, then
23171 an offset of 0 is correct. */
23172 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23173 return 0;
23175 /* If we are using the stack pointer to point at the
23176 argument, then an offset of 0 is correct. */
23177 /* ??? Check this is consistent with thumb2 frame layout. */
23178 if ((TARGET_THUMB || !frame_pointer_needed)
23179 && REGNO (addr) == SP_REGNUM)
23180 return 0;
23182 /* Oh dear. The argument is pointed to by a register rather
23183 than being held in a register, or being stored at a known
23184 offset from the frame pointer. Since GDB only understands
23185 those two kinds of argument we must translate the address
23186 held in the register into an offset from the frame pointer.
23187 We do this by searching through the insns for the function
23188 looking to see where this register gets its value. If the
23189 register is initialized from the frame pointer plus an offset
23190 then we are in luck and we can continue, otherwise we give up.
23192 This code is exercised by producing debugging information
23193 for a function with arguments like this:
23195 double func (double a, double b, int c, double d) {return d;}
23197 Without this code the stab for parameter 'd' will be set to
23198 an offset of 0 from the frame pointer, rather than 8. */
23200 /* The if() statement says:
23202 If the insn is a normal instruction
23203 and if the insn is setting the value in a register
23204 and if the register being set is the register holding the address of the argument
23205 and if the address is computing by an addition
23206 that involves adding to a register
23207 which is the frame pointer
23208 a constant integer
23210 then... */
23212 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23214 if ( NONJUMP_INSN_P (insn)
23215 && GET_CODE (PATTERN (insn)) == SET
23216 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23217 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23218 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23219 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23220 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23223 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23225 break;
23229 if (value == 0)
23231 debug_rtx (addr);
23232 warning (0, "unable to compute real location of stacked parameter");
23233 value = 8; /* XXX magic hack */
23236 return value;
23239 typedef enum {
23240 T_V8QI,
23241 T_V4HI,
23242 T_V4HF,
23243 T_V2SI,
23244 T_V2SF,
23245 T_DI,
23246 T_V16QI,
23247 T_V8HI,
23248 T_V4SI,
23249 T_V4SF,
23250 T_V2DI,
23251 T_TI,
23252 T_EI,
23253 T_OI,
23254 T_MAX /* Size of enum. Keep last. */
23255 } neon_builtin_type_mode;
23257 #define TYPE_MODE_BIT(X) (1 << (X))
23259 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
23260 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
23261 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23262 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
23263 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
23264 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23266 #define v8qi_UP T_V8QI
23267 #define v4hi_UP T_V4HI
23268 #define v4hf_UP T_V4HF
23269 #define v2si_UP T_V2SI
23270 #define v2sf_UP T_V2SF
23271 #define di_UP T_DI
23272 #define v16qi_UP T_V16QI
23273 #define v8hi_UP T_V8HI
23274 #define v4si_UP T_V4SI
23275 #define v4sf_UP T_V4SF
23276 #define v2di_UP T_V2DI
23277 #define ti_UP T_TI
23278 #define ei_UP T_EI
23279 #define oi_UP T_OI
23281 #define UP(X) X##_UP
23283 typedef enum {
23284 NEON_BINOP,
23285 NEON_TERNOP,
23286 NEON_UNOP,
23287 NEON_BSWAP,
23288 NEON_GETLANE,
23289 NEON_SETLANE,
23290 NEON_CREATE,
23291 NEON_RINT,
23292 NEON_COPYSIGNF,
23293 NEON_DUP,
23294 NEON_DUPLANE,
23295 NEON_COMBINE,
23296 NEON_SPLIT,
23297 NEON_LANEMUL,
23298 NEON_LANEMULL,
23299 NEON_LANEMULH,
23300 NEON_LANEMAC,
23301 NEON_SCALARMUL,
23302 NEON_SCALARMULL,
23303 NEON_SCALARMULH,
23304 NEON_SCALARMAC,
23305 NEON_CONVERT,
23306 NEON_FLOAT_WIDEN,
23307 NEON_FLOAT_NARROW,
23308 NEON_FIXCONV,
23309 NEON_SELECT,
23310 NEON_REINTERP,
23311 NEON_VTBL,
23312 NEON_VTBX,
23313 NEON_LOAD1,
23314 NEON_LOAD1LANE,
23315 NEON_STORE1,
23316 NEON_STORE1LANE,
23317 NEON_LOADSTRUCT,
23318 NEON_LOADSTRUCTLANE,
23319 NEON_STORESTRUCT,
23320 NEON_STORESTRUCTLANE,
23321 NEON_LOGICBINOP,
23322 NEON_SHIFTINSERT,
23323 NEON_SHIFTIMM,
23324 NEON_SHIFTACC
23325 } neon_itype;
23327 typedef struct {
23328 const char *name;
23329 const neon_itype itype;
23330 const neon_builtin_type_mode mode;
23331 const enum insn_code code;
23332 unsigned int fcode;
23333 } neon_builtin_datum;
23335 #define CF(N,X) CODE_FOR_neon_##N##X
23337 #define VAR1(T, N, A) \
23338 {#N, NEON_##T, UP (A), CF (N, A), 0}
23339 #define VAR2(T, N, A, B) \
23340 VAR1 (T, N, A), \
23341 {#N, NEON_##T, UP (B), CF (N, B), 0}
23342 #define VAR3(T, N, A, B, C) \
23343 VAR2 (T, N, A, B), \
23344 {#N, NEON_##T, UP (C), CF (N, C), 0}
23345 #define VAR4(T, N, A, B, C, D) \
23346 VAR3 (T, N, A, B, C), \
23347 {#N, NEON_##T, UP (D), CF (N, D), 0}
23348 #define VAR5(T, N, A, B, C, D, E) \
23349 VAR4 (T, N, A, B, C, D), \
23350 {#N, NEON_##T, UP (E), CF (N, E), 0}
23351 #define VAR6(T, N, A, B, C, D, E, F) \
23352 VAR5 (T, N, A, B, C, D, E), \
23353 {#N, NEON_##T, UP (F), CF (N, F), 0}
23354 #define VAR7(T, N, A, B, C, D, E, F, G) \
23355 VAR6 (T, N, A, B, C, D, E, F), \
23356 {#N, NEON_##T, UP (G), CF (N, G), 0}
23357 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23358 VAR7 (T, N, A, B, C, D, E, F, G), \
23359 {#N, NEON_##T, UP (H), CF (N, H), 0}
23360 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23361 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23362 {#N, NEON_##T, UP (I), CF (N, I), 0}
23363 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23364 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23365 {#N, NEON_##T, UP (J), CF (N, J), 0}
23367 /* The NEON builtin data can be found in arm_neon_builtins.def.
23368 The mode entries in the following table correspond to the "key" type of the
23369 instruction variant, i.e. equivalent to that which would be specified after
23370 the assembler mnemonic, which usually refers to the last vector operand.
23371 (Signed/unsigned/polynomial types are not differentiated between though, and
23372 are all mapped onto the same mode for a given element size.) The modes
23373 listed per instruction should be the same as those defined for that
23374 instruction's pattern in neon.md. */
23376 static neon_builtin_datum neon_builtin_data[] =
23378 #include "arm_neon_builtins.def"
23381 #undef CF
23382 #undef VAR1
23383 #undef VAR2
23384 #undef VAR3
23385 #undef VAR4
23386 #undef VAR5
23387 #undef VAR6
23388 #undef VAR7
23389 #undef VAR8
23390 #undef VAR9
23391 #undef VAR10
23393 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23394 #define VAR1(T, N, A) \
23395 CF (N, A)
23396 #define VAR2(T, N, A, B) \
23397 VAR1 (T, N, A), \
23398 CF (N, B)
23399 #define VAR3(T, N, A, B, C) \
23400 VAR2 (T, N, A, B), \
23401 CF (N, C)
23402 #define VAR4(T, N, A, B, C, D) \
23403 VAR3 (T, N, A, B, C), \
23404 CF (N, D)
23405 #define VAR5(T, N, A, B, C, D, E) \
23406 VAR4 (T, N, A, B, C, D), \
23407 CF (N, E)
23408 #define VAR6(T, N, A, B, C, D, E, F) \
23409 VAR5 (T, N, A, B, C, D, E), \
23410 CF (N, F)
23411 #define VAR7(T, N, A, B, C, D, E, F, G) \
23412 VAR6 (T, N, A, B, C, D, E, F), \
23413 CF (N, G)
23414 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23415 VAR7 (T, N, A, B, C, D, E, F, G), \
23416 CF (N, H)
23417 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23418 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23419 CF (N, I)
23420 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23421 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23422 CF (N, J)
23423 enum arm_builtins
23425 ARM_BUILTIN_GETWCGR0,
23426 ARM_BUILTIN_GETWCGR1,
23427 ARM_BUILTIN_GETWCGR2,
23428 ARM_BUILTIN_GETWCGR3,
23430 ARM_BUILTIN_SETWCGR0,
23431 ARM_BUILTIN_SETWCGR1,
23432 ARM_BUILTIN_SETWCGR2,
23433 ARM_BUILTIN_SETWCGR3,
23435 ARM_BUILTIN_WZERO,
23437 ARM_BUILTIN_WAVG2BR,
23438 ARM_BUILTIN_WAVG2HR,
23439 ARM_BUILTIN_WAVG2B,
23440 ARM_BUILTIN_WAVG2H,
23442 ARM_BUILTIN_WACCB,
23443 ARM_BUILTIN_WACCH,
23444 ARM_BUILTIN_WACCW,
23446 ARM_BUILTIN_WMACS,
23447 ARM_BUILTIN_WMACSZ,
23448 ARM_BUILTIN_WMACU,
23449 ARM_BUILTIN_WMACUZ,
23451 ARM_BUILTIN_WSADB,
23452 ARM_BUILTIN_WSADBZ,
23453 ARM_BUILTIN_WSADH,
23454 ARM_BUILTIN_WSADHZ,
23456 ARM_BUILTIN_WALIGNI,
23457 ARM_BUILTIN_WALIGNR0,
23458 ARM_BUILTIN_WALIGNR1,
23459 ARM_BUILTIN_WALIGNR2,
23460 ARM_BUILTIN_WALIGNR3,
23462 ARM_BUILTIN_TMIA,
23463 ARM_BUILTIN_TMIAPH,
23464 ARM_BUILTIN_TMIABB,
23465 ARM_BUILTIN_TMIABT,
23466 ARM_BUILTIN_TMIATB,
23467 ARM_BUILTIN_TMIATT,
23469 ARM_BUILTIN_TMOVMSKB,
23470 ARM_BUILTIN_TMOVMSKH,
23471 ARM_BUILTIN_TMOVMSKW,
23473 ARM_BUILTIN_TBCSTB,
23474 ARM_BUILTIN_TBCSTH,
23475 ARM_BUILTIN_TBCSTW,
23477 ARM_BUILTIN_WMADDS,
23478 ARM_BUILTIN_WMADDU,
23480 ARM_BUILTIN_WPACKHSS,
23481 ARM_BUILTIN_WPACKWSS,
23482 ARM_BUILTIN_WPACKDSS,
23483 ARM_BUILTIN_WPACKHUS,
23484 ARM_BUILTIN_WPACKWUS,
23485 ARM_BUILTIN_WPACKDUS,
23487 ARM_BUILTIN_WADDB,
23488 ARM_BUILTIN_WADDH,
23489 ARM_BUILTIN_WADDW,
23490 ARM_BUILTIN_WADDSSB,
23491 ARM_BUILTIN_WADDSSH,
23492 ARM_BUILTIN_WADDSSW,
23493 ARM_BUILTIN_WADDUSB,
23494 ARM_BUILTIN_WADDUSH,
23495 ARM_BUILTIN_WADDUSW,
23496 ARM_BUILTIN_WSUBB,
23497 ARM_BUILTIN_WSUBH,
23498 ARM_BUILTIN_WSUBW,
23499 ARM_BUILTIN_WSUBSSB,
23500 ARM_BUILTIN_WSUBSSH,
23501 ARM_BUILTIN_WSUBSSW,
23502 ARM_BUILTIN_WSUBUSB,
23503 ARM_BUILTIN_WSUBUSH,
23504 ARM_BUILTIN_WSUBUSW,
23506 ARM_BUILTIN_WAND,
23507 ARM_BUILTIN_WANDN,
23508 ARM_BUILTIN_WOR,
23509 ARM_BUILTIN_WXOR,
23511 ARM_BUILTIN_WCMPEQB,
23512 ARM_BUILTIN_WCMPEQH,
23513 ARM_BUILTIN_WCMPEQW,
23514 ARM_BUILTIN_WCMPGTUB,
23515 ARM_BUILTIN_WCMPGTUH,
23516 ARM_BUILTIN_WCMPGTUW,
23517 ARM_BUILTIN_WCMPGTSB,
23518 ARM_BUILTIN_WCMPGTSH,
23519 ARM_BUILTIN_WCMPGTSW,
23521 ARM_BUILTIN_TEXTRMSB,
23522 ARM_BUILTIN_TEXTRMSH,
23523 ARM_BUILTIN_TEXTRMSW,
23524 ARM_BUILTIN_TEXTRMUB,
23525 ARM_BUILTIN_TEXTRMUH,
23526 ARM_BUILTIN_TEXTRMUW,
23527 ARM_BUILTIN_TINSRB,
23528 ARM_BUILTIN_TINSRH,
23529 ARM_BUILTIN_TINSRW,
23531 ARM_BUILTIN_WMAXSW,
23532 ARM_BUILTIN_WMAXSH,
23533 ARM_BUILTIN_WMAXSB,
23534 ARM_BUILTIN_WMAXUW,
23535 ARM_BUILTIN_WMAXUH,
23536 ARM_BUILTIN_WMAXUB,
23537 ARM_BUILTIN_WMINSW,
23538 ARM_BUILTIN_WMINSH,
23539 ARM_BUILTIN_WMINSB,
23540 ARM_BUILTIN_WMINUW,
23541 ARM_BUILTIN_WMINUH,
23542 ARM_BUILTIN_WMINUB,
23544 ARM_BUILTIN_WMULUM,
23545 ARM_BUILTIN_WMULSM,
23546 ARM_BUILTIN_WMULUL,
23548 ARM_BUILTIN_PSADBH,
23549 ARM_BUILTIN_WSHUFH,
23551 ARM_BUILTIN_WSLLH,
23552 ARM_BUILTIN_WSLLW,
23553 ARM_BUILTIN_WSLLD,
23554 ARM_BUILTIN_WSRAH,
23555 ARM_BUILTIN_WSRAW,
23556 ARM_BUILTIN_WSRAD,
23557 ARM_BUILTIN_WSRLH,
23558 ARM_BUILTIN_WSRLW,
23559 ARM_BUILTIN_WSRLD,
23560 ARM_BUILTIN_WRORH,
23561 ARM_BUILTIN_WRORW,
23562 ARM_BUILTIN_WRORD,
23563 ARM_BUILTIN_WSLLHI,
23564 ARM_BUILTIN_WSLLWI,
23565 ARM_BUILTIN_WSLLDI,
23566 ARM_BUILTIN_WSRAHI,
23567 ARM_BUILTIN_WSRAWI,
23568 ARM_BUILTIN_WSRADI,
23569 ARM_BUILTIN_WSRLHI,
23570 ARM_BUILTIN_WSRLWI,
23571 ARM_BUILTIN_WSRLDI,
23572 ARM_BUILTIN_WRORHI,
23573 ARM_BUILTIN_WRORWI,
23574 ARM_BUILTIN_WRORDI,
23576 ARM_BUILTIN_WUNPCKIHB,
23577 ARM_BUILTIN_WUNPCKIHH,
23578 ARM_BUILTIN_WUNPCKIHW,
23579 ARM_BUILTIN_WUNPCKILB,
23580 ARM_BUILTIN_WUNPCKILH,
23581 ARM_BUILTIN_WUNPCKILW,
23583 ARM_BUILTIN_WUNPCKEHSB,
23584 ARM_BUILTIN_WUNPCKEHSH,
23585 ARM_BUILTIN_WUNPCKEHSW,
23586 ARM_BUILTIN_WUNPCKEHUB,
23587 ARM_BUILTIN_WUNPCKEHUH,
23588 ARM_BUILTIN_WUNPCKEHUW,
23589 ARM_BUILTIN_WUNPCKELSB,
23590 ARM_BUILTIN_WUNPCKELSH,
23591 ARM_BUILTIN_WUNPCKELSW,
23592 ARM_BUILTIN_WUNPCKELUB,
23593 ARM_BUILTIN_WUNPCKELUH,
23594 ARM_BUILTIN_WUNPCKELUW,
23596 ARM_BUILTIN_WABSB,
23597 ARM_BUILTIN_WABSH,
23598 ARM_BUILTIN_WABSW,
23600 ARM_BUILTIN_WADDSUBHX,
23601 ARM_BUILTIN_WSUBADDHX,
23603 ARM_BUILTIN_WABSDIFFB,
23604 ARM_BUILTIN_WABSDIFFH,
23605 ARM_BUILTIN_WABSDIFFW,
23607 ARM_BUILTIN_WADDCH,
23608 ARM_BUILTIN_WADDCW,
23610 ARM_BUILTIN_WAVG4,
23611 ARM_BUILTIN_WAVG4R,
23613 ARM_BUILTIN_WMADDSX,
23614 ARM_BUILTIN_WMADDUX,
23616 ARM_BUILTIN_WMADDSN,
23617 ARM_BUILTIN_WMADDUN,
23619 ARM_BUILTIN_WMULWSM,
23620 ARM_BUILTIN_WMULWUM,
23622 ARM_BUILTIN_WMULWSMR,
23623 ARM_BUILTIN_WMULWUMR,
23625 ARM_BUILTIN_WMULWL,
23627 ARM_BUILTIN_WMULSMR,
23628 ARM_BUILTIN_WMULUMR,
23630 ARM_BUILTIN_WQMULM,
23631 ARM_BUILTIN_WQMULMR,
23633 ARM_BUILTIN_WQMULWM,
23634 ARM_BUILTIN_WQMULWMR,
23636 ARM_BUILTIN_WADDBHUSM,
23637 ARM_BUILTIN_WADDBHUSL,
23639 ARM_BUILTIN_WQMIABB,
23640 ARM_BUILTIN_WQMIABT,
23641 ARM_BUILTIN_WQMIATB,
23642 ARM_BUILTIN_WQMIATT,
23644 ARM_BUILTIN_WQMIABBN,
23645 ARM_BUILTIN_WQMIABTN,
23646 ARM_BUILTIN_WQMIATBN,
23647 ARM_BUILTIN_WQMIATTN,
23649 ARM_BUILTIN_WMIABB,
23650 ARM_BUILTIN_WMIABT,
23651 ARM_BUILTIN_WMIATB,
23652 ARM_BUILTIN_WMIATT,
23654 ARM_BUILTIN_WMIABBN,
23655 ARM_BUILTIN_WMIABTN,
23656 ARM_BUILTIN_WMIATBN,
23657 ARM_BUILTIN_WMIATTN,
23659 ARM_BUILTIN_WMIAWBB,
23660 ARM_BUILTIN_WMIAWBT,
23661 ARM_BUILTIN_WMIAWTB,
23662 ARM_BUILTIN_WMIAWTT,
23664 ARM_BUILTIN_WMIAWBBN,
23665 ARM_BUILTIN_WMIAWBTN,
23666 ARM_BUILTIN_WMIAWTBN,
23667 ARM_BUILTIN_WMIAWTTN,
23669 ARM_BUILTIN_WMERGE,
23671 ARM_BUILTIN_CRC32B,
23672 ARM_BUILTIN_CRC32H,
23673 ARM_BUILTIN_CRC32W,
23674 ARM_BUILTIN_CRC32CB,
23675 ARM_BUILTIN_CRC32CH,
23676 ARM_BUILTIN_CRC32CW,
23678 ARM_BUILTIN_GET_FPSCR,
23679 ARM_BUILTIN_SET_FPSCR,
23681 #undef CRYPTO1
23682 #undef CRYPTO2
23683 #undef CRYPTO3
23685 #define CRYPTO1(L, U, M1, M2) \
23686 ARM_BUILTIN_CRYPTO_##U,
23687 #define CRYPTO2(L, U, M1, M2, M3) \
23688 ARM_BUILTIN_CRYPTO_##U,
23689 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23690 ARM_BUILTIN_CRYPTO_##U,
23692 #include "crypto.def"
23694 #undef CRYPTO1
23695 #undef CRYPTO2
23696 #undef CRYPTO3
23698 #include "arm_neon_builtins.def"
23700 ,ARM_BUILTIN_MAX
23703 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23705 #undef CF
23706 #undef VAR1
23707 #undef VAR2
23708 #undef VAR3
23709 #undef VAR4
23710 #undef VAR5
23711 #undef VAR6
23712 #undef VAR7
23713 #undef VAR8
23714 #undef VAR9
23715 #undef VAR10
23717 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23719 #define NUM_DREG_TYPES 5
23720 #define NUM_QREG_TYPES 6
23722 static void
23723 arm_init_neon_builtins (void)
23725 unsigned int i, fcode;
23726 tree decl;
23728 tree neon_intQI_type_node;
23729 tree neon_intHI_type_node;
23730 tree neon_floatHF_type_node;
23731 tree neon_polyQI_type_node;
23732 tree neon_polyHI_type_node;
23733 tree neon_intSI_type_node;
23734 tree neon_intDI_type_node;
23735 tree neon_intUTI_type_node;
23736 tree neon_float_type_node;
23738 tree intQI_pointer_node;
23739 tree intHI_pointer_node;
23740 tree intSI_pointer_node;
23741 tree intDI_pointer_node;
23742 tree float_pointer_node;
23744 tree const_intQI_node;
23745 tree const_intHI_node;
23746 tree const_intSI_node;
23747 tree const_intDI_node;
23748 tree const_float_node;
23750 tree const_intQI_pointer_node;
23751 tree const_intHI_pointer_node;
23752 tree const_intSI_pointer_node;
23753 tree const_intDI_pointer_node;
23754 tree const_float_pointer_node;
23756 tree V8QI_type_node;
23757 tree V4HI_type_node;
23758 tree V4UHI_type_node;
23759 tree V4HF_type_node;
23760 tree V2SI_type_node;
23761 tree V2USI_type_node;
23762 tree V2SF_type_node;
23763 tree V16QI_type_node;
23764 tree V8HI_type_node;
23765 tree V8UHI_type_node;
23766 tree V4SI_type_node;
23767 tree V4USI_type_node;
23768 tree V4SF_type_node;
23769 tree V2DI_type_node;
23770 tree V2UDI_type_node;
23772 tree intUQI_type_node;
23773 tree intUHI_type_node;
23774 tree intUSI_type_node;
23775 tree intUDI_type_node;
23777 tree intEI_type_node;
23778 tree intOI_type_node;
23779 tree intCI_type_node;
23780 tree intXI_type_node;
23782 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23783 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23784 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23786 /* Create distinguished type nodes for NEON vector element types,
23787 and pointers to values of such types, so we can detect them later. */
23788 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23789 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23790 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23791 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23792 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23793 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23794 neon_float_type_node = make_node (REAL_TYPE);
23795 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23796 layout_type (neon_float_type_node);
23797 neon_floatHF_type_node = make_node (REAL_TYPE);
23798 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23799 layout_type (neon_floatHF_type_node);
23801 /* Define typedefs which exactly correspond to the modes we are basing vector
23802 types on. If you change these names you'll need to change
23803 the table used by arm_mangle_type too. */
23804 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23805 "__builtin_neon_qi");
23806 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23807 "__builtin_neon_hi");
23808 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23809 "__builtin_neon_hf");
23810 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23811 "__builtin_neon_si");
23812 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23813 "__builtin_neon_sf");
23814 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23815 "__builtin_neon_di");
23816 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23817 "__builtin_neon_poly8");
23818 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23819 "__builtin_neon_poly16");
23821 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23822 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23823 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23824 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23825 float_pointer_node = build_pointer_type (neon_float_type_node);
23827 /* Next create constant-qualified versions of the above types. */
23828 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23829 TYPE_QUAL_CONST);
23830 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23831 TYPE_QUAL_CONST);
23832 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23833 TYPE_QUAL_CONST);
23834 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23835 TYPE_QUAL_CONST);
23836 const_float_node = build_qualified_type (neon_float_type_node,
23837 TYPE_QUAL_CONST);
23839 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23840 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23841 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23842 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23843 const_float_pointer_node = build_pointer_type (const_float_node);
23845 /* Unsigned integer types for various mode sizes. */
23846 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23847 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23848 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23849 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23850 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23851 /* Now create vector types based on our NEON element types. */
23852 /* 64-bit vectors. */
23853 V8QI_type_node =
23854 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23855 V4HI_type_node =
23856 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23857 V4UHI_type_node =
23858 build_vector_type_for_mode (intUHI_type_node, V4HImode);
23859 V4HF_type_node =
23860 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23861 V2SI_type_node =
23862 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23863 V2USI_type_node =
23864 build_vector_type_for_mode (intUSI_type_node, V2SImode);
23865 V2SF_type_node =
23866 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23867 /* 128-bit vectors. */
23868 V16QI_type_node =
23869 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23870 V8HI_type_node =
23871 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23872 V8UHI_type_node =
23873 build_vector_type_for_mode (intUHI_type_node, V8HImode);
23874 V4SI_type_node =
23875 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23876 V4USI_type_node =
23877 build_vector_type_for_mode (intUSI_type_node, V4SImode);
23878 V4SF_type_node =
23879 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23880 V2DI_type_node =
23881 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23882 V2UDI_type_node =
23883 build_vector_type_for_mode (intUDI_type_node, V2DImode);
23886 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23887 "__builtin_neon_uqi");
23888 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23889 "__builtin_neon_uhi");
23890 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23891 "__builtin_neon_usi");
23892 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23893 "__builtin_neon_udi");
23894 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23895 "__builtin_neon_poly64");
23896 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23897 "__builtin_neon_poly128");
23899 /* Opaque integer types for structures of vectors. */
23900 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23901 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23902 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23903 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23905 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23906 "__builtin_neon_ti");
23907 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23908 "__builtin_neon_ei");
23909 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23910 "__builtin_neon_oi");
23911 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23912 "__builtin_neon_ci");
23913 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23914 "__builtin_neon_xi");
23916 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23919 tree V16UQI_type_node =
23920 build_vector_type_for_mode (intUQI_type_node, V16QImode);
23922 tree v16uqi_ftype_v16uqi
23923 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23925 tree v16uqi_ftype_v16uqi_v16uqi
23926 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23927 V16UQI_type_node, NULL_TREE);
23929 tree v4usi_ftype_v4usi
23930 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23932 tree v4usi_ftype_v4usi_v4usi
23933 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23934 V4USI_type_node, NULL_TREE);
23936 tree v4usi_ftype_v4usi_v4usi_v4usi
23937 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23938 V4USI_type_node, V4USI_type_node, NULL_TREE);
23940 tree uti_ftype_udi_udi
23941 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23942 intUDI_type_node, NULL_TREE);
23944 #undef CRYPTO1
23945 #undef CRYPTO2
23946 #undef CRYPTO3
23947 #undef C
23948 #undef N
23949 #undef CF
23950 #undef FT1
23951 #undef FT2
23952 #undef FT3
23954 #define C(U) \
23955 ARM_BUILTIN_CRYPTO_##U
23956 #define N(L) \
23957 "__builtin_arm_crypto_"#L
23958 #define FT1(R, A) \
23959 R##_ftype_##A
23960 #define FT2(R, A1, A2) \
23961 R##_ftype_##A1##_##A2
23962 #define FT3(R, A1, A2, A3) \
23963 R##_ftype_##A1##_##A2##_##A3
23964 #define CRYPTO1(L, U, R, A) \
23965 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23966 C (U), BUILT_IN_MD, \
23967 NULL, NULL_TREE);
23968 #define CRYPTO2(L, U, R, A1, A2) \
23969 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23970 C (U), BUILT_IN_MD, \
23971 NULL, NULL_TREE);
23973 #define CRYPTO3(L, U, R, A1, A2, A3) \
23974 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23975 C (U), BUILT_IN_MD, \
23976 NULL, NULL_TREE);
23977 #include "crypto.def"
23979 #undef CRYPTO1
23980 #undef CRYPTO2
23981 #undef CRYPTO3
23982 #undef C
23983 #undef N
23984 #undef FT1
23985 #undef FT2
23986 #undef FT3
23988 dreg_types[0] = V8QI_type_node;
23989 dreg_types[1] = V4HI_type_node;
23990 dreg_types[2] = V2SI_type_node;
23991 dreg_types[3] = V2SF_type_node;
23992 dreg_types[4] = neon_intDI_type_node;
23994 qreg_types[0] = V16QI_type_node;
23995 qreg_types[1] = V8HI_type_node;
23996 qreg_types[2] = V4SI_type_node;
23997 qreg_types[3] = V4SF_type_node;
23998 qreg_types[4] = V2DI_type_node;
23999 qreg_types[5] = neon_intUTI_type_node;
24001 for (i = 0; i < NUM_QREG_TYPES; i++)
24003 int j;
24004 for (j = 0; j < NUM_QREG_TYPES; j++)
24006 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
24007 reinterp_ftype_dreg[i][j]
24008 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
24010 reinterp_ftype_qreg[i][j]
24011 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
24015 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
24016 i < ARRAY_SIZE (neon_builtin_data);
24017 i++, fcode++)
24019 neon_builtin_datum *d = &neon_builtin_data[i];
24021 const char* const modenames[] = {
24022 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
24023 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
24024 "ti", "ei", "oi"
24026 char namebuf[60];
24027 tree ftype = NULL;
24028 int is_load = 0, is_store = 0;
24030 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
24032 d->fcode = fcode;
24034 switch (d->itype)
24036 case NEON_LOAD1:
24037 case NEON_LOAD1LANE:
24038 case NEON_LOADSTRUCT:
24039 case NEON_LOADSTRUCTLANE:
24040 is_load = 1;
24041 /* Fall through. */
24042 case NEON_STORE1:
24043 case NEON_STORE1LANE:
24044 case NEON_STORESTRUCT:
24045 case NEON_STORESTRUCTLANE:
24046 if (!is_load)
24047 is_store = 1;
24048 /* Fall through. */
24049 case NEON_UNOP:
24050 case NEON_RINT:
24051 case NEON_BINOP:
24052 case NEON_LOGICBINOP:
24053 case NEON_SHIFTINSERT:
24054 case NEON_TERNOP:
24055 case NEON_GETLANE:
24056 case NEON_SETLANE:
24057 case NEON_CREATE:
24058 case NEON_DUP:
24059 case NEON_DUPLANE:
24060 case NEON_SHIFTIMM:
24061 case NEON_SHIFTACC:
24062 case NEON_COMBINE:
24063 case NEON_SPLIT:
24064 case NEON_CONVERT:
24065 case NEON_FIXCONV:
24066 case NEON_LANEMUL:
24067 case NEON_LANEMULL:
24068 case NEON_LANEMULH:
24069 case NEON_LANEMAC:
24070 case NEON_SCALARMUL:
24071 case NEON_SCALARMULL:
24072 case NEON_SCALARMULH:
24073 case NEON_SCALARMAC:
24074 case NEON_SELECT:
24075 case NEON_VTBL:
24076 case NEON_VTBX:
24078 int k;
24079 tree return_type = void_type_node, args = void_list_node;
24081 /* Build a function type directly from the insn_data for
24082 this builtin. The build_function_type() function takes
24083 care of removing duplicates for us. */
24084 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
24086 tree eltype;
24088 if (is_load && k == 1)
24090 /* Neon load patterns always have the memory
24091 operand in the operand 1 position. */
24092 gcc_assert (insn_data[d->code].operand[k].predicate
24093 == neon_struct_operand);
24095 switch (d->mode)
24097 case T_V8QI:
24098 case T_V16QI:
24099 eltype = const_intQI_pointer_node;
24100 break;
24102 case T_V4HI:
24103 case T_V8HI:
24104 eltype = const_intHI_pointer_node;
24105 break;
24107 case T_V2SI:
24108 case T_V4SI:
24109 eltype = const_intSI_pointer_node;
24110 break;
24112 case T_V2SF:
24113 case T_V4SF:
24114 eltype = const_float_pointer_node;
24115 break;
24117 case T_DI:
24118 case T_V2DI:
24119 eltype = const_intDI_pointer_node;
24120 break;
24122 default: gcc_unreachable ();
24125 else if (is_store && k == 0)
24127 /* Similarly, Neon store patterns use operand 0 as
24128 the memory location to store to. */
24129 gcc_assert (insn_data[d->code].operand[k].predicate
24130 == neon_struct_operand);
24132 switch (d->mode)
24134 case T_V8QI:
24135 case T_V16QI:
24136 eltype = intQI_pointer_node;
24137 break;
24139 case T_V4HI:
24140 case T_V8HI:
24141 eltype = intHI_pointer_node;
24142 break;
24144 case T_V2SI:
24145 case T_V4SI:
24146 eltype = intSI_pointer_node;
24147 break;
24149 case T_V2SF:
24150 case T_V4SF:
24151 eltype = float_pointer_node;
24152 break;
24154 case T_DI:
24155 case T_V2DI:
24156 eltype = intDI_pointer_node;
24157 break;
24159 default: gcc_unreachable ();
24162 else
24164 switch (insn_data[d->code].operand[k].mode)
24166 case VOIDmode: eltype = void_type_node; break;
24167 /* Scalars. */
24168 case QImode: eltype = neon_intQI_type_node; break;
24169 case HImode: eltype = neon_intHI_type_node; break;
24170 case SImode: eltype = neon_intSI_type_node; break;
24171 case SFmode: eltype = neon_float_type_node; break;
24172 case DImode: eltype = neon_intDI_type_node; break;
24173 case TImode: eltype = intTI_type_node; break;
24174 case EImode: eltype = intEI_type_node; break;
24175 case OImode: eltype = intOI_type_node; break;
24176 case CImode: eltype = intCI_type_node; break;
24177 case XImode: eltype = intXI_type_node; break;
24178 /* 64-bit vectors. */
24179 case V8QImode: eltype = V8QI_type_node; break;
24180 case V4HImode: eltype = V4HI_type_node; break;
24181 case V2SImode: eltype = V2SI_type_node; break;
24182 case V2SFmode: eltype = V2SF_type_node; break;
24183 /* 128-bit vectors. */
24184 case V16QImode: eltype = V16QI_type_node; break;
24185 case V8HImode: eltype = V8HI_type_node; break;
24186 case V4SImode: eltype = V4SI_type_node; break;
24187 case V4SFmode: eltype = V4SF_type_node; break;
24188 case V2DImode: eltype = V2DI_type_node; break;
24189 default: gcc_unreachable ();
24193 if (k == 0 && !is_store)
24194 return_type = eltype;
24195 else
24196 args = tree_cons (NULL_TREE, eltype, args);
24199 ftype = build_function_type (return_type, args);
24201 break;
24203 case NEON_REINTERP:
24205 /* We iterate over NUM_DREG_TYPES doubleword types,
24206 then NUM_QREG_TYPES quadword types.
24207 V4HF is not a type used in reinterpret, so we translate
24208 d->mode to the correct index in reinterp_ftype_dreg. */
24209 bool qreg_p
24210 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
24211 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
24212 % NUM_QREG_TYPES;
24213 switch (insn_data[d->code].operand[0].mode)
24215 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
24216 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
24217 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
24218 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
24219 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
24220 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
24221 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
24222 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
24223 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
24224 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
24225 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
24226 default: gcc_unreachable ();
24229 break;
24230 case NEON_FLOAT_WIDEN:
24232 tree eltype = NULL_TREE;
24233 tree return_type = NULL_TREE;
24235 switch (insn_data[d->code].operand[1].mode)
24237 case V4HFmode:
24238 eltype = V4HF_type_node;
24239 return_type = V4SF_type_node;
24240 break;
24241 default: gcc_unreachable ();
24243 ftype = build_function_type_list (return_type, eltype, NULL);
24244 break;
24246 case NEON_FLOAT_NARROW:
24248 tree eltype = NULL_TREE;
24249 tree return_type = NULL_TREE;
24251 switch (insn_data[d->code].operand[1].mode)
24253 case V4SFmode:
24254 eltype = V4SF_type_node;
24255 return_type = V4HF_type_node;
24256 break;
24257 default: gcc_unreachable ();
24259 ftype = build_function_type_list (return_type, eltype, NULL);
24260 break;
24262 case NEON_BSWAP:
24264 tree eltype = NULL_TREE;
24265 switch (insn_data[d->code].operand[1].mode)
24267 case V4HImode:
24268 eltype = V4UHI_type_node;
24269 break;
24270 case V8HImode:
24271 eltype = V8UHI_type_node;
24272 break;
24273 case V2SImode:
24274 eltype = V2USI_type_node;
24275 break;
24276 case V4SImode:
24277 eltype = V4USI_type_node;
24278 break;
24279 case V2DImode:
24280 eltype = V2UDI_type_node;
24281 break;
24282 default: gcc_unreachable ();
24284 ftype = build_function_type_list (eltype, eltype, NULL);
24285 break;
24287 case NEON_COPYSIGNF:
24289 tree eltype = NULL_TREE;
24290 switch (insn_data[d->code].operand[1].mode)
24292 case V2SFmode:
24293 eltype = V2SF_type_node;
24294 break;
24295 case V4SFmode:
24296 eltype = V4SF_type_node;
24297 break;
24298 default: gcc_unreachable ();
24300 ftype = build_function_type_list (eltype, eltype, NULL);
24301 break;
24303 default:
24304 gcc_unreachable ();
24307 gcc_assert (ftype != NULL);
24309 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
24311 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
24312 NULL_TREE);
24313 arm_builtin_decls[fcode] = decl;
24317 #undef NUM_DREG_TYPES
24318 #undef NUM_QREG_TYPES
24320 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
24321 do \
24323 if ((MASK) & insn_flags) \
24325 tree bdecl; \
24326 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
24327 BUILT_IN_MD, NULL, NULL_TREE); \
24328 arm_builtin_decls[CODE] = bdecl; \
24331 while (0)
24333 struct builtin_description
24335 const unsigned int mask;
24336 const enum insn_code icode;
24337 const char * const name;
24338 const enum arm_builtins code;
24339 const enum rtx_code comparison;
24340 const unsigned int flag;
24343 static const struct builtin_description bdesc_2arg[] =
24345 #define IWMMXT_BUILTIN(code, string, builtin) \
24346 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24347 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24349 #define IWMMXT2_BUILTIN(code, string, builtin) \
24350 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24351 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24353 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
24354 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
24355 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
24356 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
24357 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
24358 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
24359 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
24360 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
24361 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
24362 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
24363 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
24364 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
24365 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
24366 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
24367 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
24368 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
24369 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
24370 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
24371 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
24372 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
24373 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
24374 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
24375 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
24376 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
24377 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
24378 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
24379 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
24380 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
24381 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
24382 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
24383 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
24384 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
24385 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
24386 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
24387 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
24388 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
24389 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
24390 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
24391 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
24392 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
24393 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
24394 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
24395 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
24396 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
24397 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
24398 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
24399 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
24400 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
24401 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
24402 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
24403 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
24404 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
24405 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
24406 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
24407 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
24408 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
24409 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
24410 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
24411 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
24412 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
24413 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
24414 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
24415 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
24416 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
24417 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
24418 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
24419 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
24420 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
24421 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
24422 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
24423 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
24424 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
24425 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
24426 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
24427 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
24428 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
24429 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
24430 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
24432 #define IWMMXT_BUILTIN2(code, builtin) \
24433 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24435 #define IWMMXT2_BUILTIN2(code, builtin) \
24436 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24438 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24439 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24440 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24441 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24442 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24443 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24444 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24445 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24446 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24447 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24450 #define FP_BUILTIN(L, U) \
24451 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24452 UNKNOWN, 0},
24454 FP_BUILTIN (get_fpscr, GET_FPSCR)
24455 FP_BUILTIN (set_fpscr, SET_FPSCR)
24456 #undef FP_BUILTIN
24458 #define CRC32_BUILTIN(L, U) \
24459 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24460 UNKNOWN, 0},
24461 CRC32_BUILTIN (crc32b, CRC32B)
24462 CRC32_BUILTIN (crc32h, CRC32H)
24463 CRC32_BUILTIN (crc32w, CRC32W)
24464 CRC32_BUILTIN (crc32cb, CRC32CB)
24465 CRC32_BUILTIN (crc32ch, CRC32CH)
24466 CRC32_BUILTIN (crc32cw, CRC32CW)
24467 #undef CRC32_BUILTIN
24470 #define CRYPTO_BUILTIN(L, U) \
24471 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24472 UNKNOWN, 0},
24473 #undef CRYPTO1
24474 #undef CRYPTO2
24475 #undef CRYPTO3
24476 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24477 #define CRYPTO1(L, U, R, A)
24478 #define CRYPTO3(L, U, R, A1, A2, A3)
24479 #include "crypto.def"
24480 #undef CRYPTO1
24481 #undef CRYPTO2
24482 #undef CRYPTO3
24486 static const struct builtin_description bdesc_1arg[] =
24488 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24489 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24490 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24491 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24492 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24493 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24494 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24495 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24496 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24497 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24498 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24499 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24500 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24501 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24502 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24503 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24504 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24505 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24506 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24507 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24508 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24509 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24510 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24511 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24513 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24514 #define CRYPTO2(L, U, R, A1, A2)
24515 #define CRYPTO3(L, U, R, A1, A2, A3)
24516 #include "crypto.def"
24517 #undef CRYPTO1
24518 #undef CRYPTO2
24519 #undef CRYPTO3
24522 static const struct builtin_description bdesc_3arg[] =
24524 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24525 #define CRYPTO1(L, U, R, A)
24526 #define CRYPTO2(L, U, R, A1, A2)
24527 #include "crypto.def"
24528 #undef CRYPTO1
24529 #undef CRYPTO2
24530 #undef CRYPTO3
24532 #undef CRYPTO_BUILTIN
24534 /* Set up all the iWMMXt builtins. This is not called if
24535 TARGET_IWMMXT is zero. */
24537 static void
24538 arm_init_iwmmxt_builtins (void)
24540 const struct builtin_description * d;
24541 size_t i;
24543 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24544 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24545 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24547 tree v8qi_ftype_v8qi_v8qi_int
24548 = build_function_type_list (V8QI_type_node,
24549 V8QI_type_node, V8QI_type_node,
24550 integer_type_node, NULL_TREE);
24551 tree v4hi_ftype_v4hi_int
24552 = build_function_type_list (V4HI_type_node,
24553 V4HI_type_node, integer_type_node, NULL_TREE);
24554 tree v2si_ftype_v2si_int
24555 = build_function_type_list (V2SI_type_node,
24556 V2SI_type_node, integer_type_node, NULL_TREE);
24557 tree v2si_ftype_di_di
24558 = build_function_type_list (V2SI_type_node,
24559 long_long_integer_type_node,
24560 long_long_integer_type_node,
24561 NULL_TREE);
24562 tree di_ftype_di_int
24563 = build_function_type_list (long_long_integer_type_node,
24564 long_long_integer_type_node,
24565 integer_type_node, NULL_TREE);
24566 tree di_ftype_di_int_int
24567 = build_function_type_list (long_long_integer_type_node,
24568 long_long_integer_type_node,
24569 integer_type_node,
24570 integer_type_node, NULL_TREE);
24571 tree int_ftype_v8qi
24572 = build_function_type_list (integer_type_node,
24573 V8QI_type_node, NULL_TREE);
24574 tree int_ftype_v4hi
24575 = build_function_type_list (integer_type_node,
24576 V4HI_type_node, NULL_TREE);
24577 tree int_ftype_v2si
24578 = build_function_type_list (integer_type_node,
24579 V2SI_type_node, NULL_TREE);
24580 tree int_ftype_v8qi_int
24581 = build_function_type_list (integer_type_node,
24582 V8QI_type_node, integer_type_node, NULL_TREE);
24583 tree int_ftype_v4hi_int
24584 = build_function_type_list (integer_type_node,
24585 V4HI_type_node, integer_type_node, NULL_TREE);
24586 tree int_ftype_v2si_int
24587 = build_function_type_list (integer_type_node,
24588 V2SI_type_node, integer_type_node, NULL_TREE);
24589 tree v8qi_ftype_v8qi_int_int
24590 = build_function_type_list (V8QI_type_node,
24591 V8QI_type_node, integer_type_node,
24592 integer_type_node, NULL_TREE);
24593 tree v4hi_ftype_v4hi_int_int
24594 = build_function_type_list (V4HI_type_node,
24595 V4HI_type_node, integer_type_node,
24596 integer_type_node, NULL_TREE);
24597 tree v2si_ftype_v2si_int_int
24598 = build_function_type_list (V2SI_type_node,
24599 V2SI_type_node, integer_type_node,
24600 integer_type_node, NULL_TREE);
24601 /* Miscellaneous. */
24602 tree v8qi_ftype_v4hi_v4hi
24603 = build_function_type_list (V8QI_type_node,
24604 V4HI_type_node, V4HI_type_node, NULL_TREE);
24605 tree v4hi_ftype_v2si_v2si
24606 = build_function_type_list (V4HI_type_node,
24607 V2SI_type_node, V2SI_type_node, NULL_TREE);
24608 tree v8qi_ftype_v4hi_v8qi
24609 = build_function_type_list (V8QI_type_node,
24610 V4HI_type_node, V8QI_type_node, NULL_TREE);
24611 tree v2si_ftype_v4hi_v4hi
24612 = build_function_type_list (V2SI_type_node,
24613 V4HI_type_node, V4HI_type_node, NULL_TREE);
24614 tree v2si_ftype_v8qi_v8qi
24615 = build_function_type_list (V2SI_type_node,
24616 V8QI_type_node, V8QI_type_node, NULL_TREE);
24617 tree v4hi_ftype_v4hi_di
24618 = build_function_type_list (V4HI_type_node,
24619 V4HI_type_node, long_long_integer_type_node,
24620 NULL_TREE);
24621 tree v2si_ftype_v2si_di
24622 = build_function_type_list (V2SI_type_node,
24623 V2SI_type_node, long_long_integer_type_node,
24624 NULL_TREE);
24625 tree di_ftype_void
24626 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24627 tree int_ftype_void
24628 = build_function_type_list (integer_type_node, NULL_TREE);
24629 tree di_ftype_v8qi
24630 = build_function_type_list (long_long_integer_type_node,
24631 V8QI_type_node, NULL_TREE);
24632 tree di_ftype_v4hi
24633 = build_function_type_list (long_long_integer_type_node,
24634 V4HI_type_node, NULL_TREE);
24635 tree di_ftype_v2si
24636 = build_function_type_list (long_long_integer_type_node,
24637 V2SI_type_node, NULL_TREE);
24638 tree v2si_ftype_v4hi
24639 = build_function_type_list (V2SI_type_node,
24640 V4HI_type_node, NULL_TREE);
24641 tree v4hi_ftype_v8qi
24642 = build_function_type_list (V4HI_type_node,
24643 V8QI_type_node, NULL_TREE);
24644 tree v8qi_ftype_v8qi
24645 = build_function_type_list (V8QI_type_node,
24646 V8QI_type_node, NULL_TREE);
24647 tree v4hi_ftype_v4hi
24648 = build_function_type_list (V4HI_type_node,
24649 V4HI_type_node, NULL_TREE);
24650 tree v2si_ftype_v2si
24651 = build_function_type_list (V2SI_type_node,
24652 V2SI_type_node, NULL_TREE);
24654 tree di_ftype_di_v4hi_v4hi
24655 = build_function_type_list (long_long_unsigned_type_node,
24656 long_long_unsigned_type_node,
24657 V4HI_type_node, V4HI_type_node,
24658 NULL_TREE);
24660 tree di_ftype_v4hi_v4hi
24661 = build_function_type_list (long_long_unsigned_type_node,
24662 V4HI_type_node,V4HI_type_node,
24663 NULL_TREE);
24665 tree v2si_ftype_v2si_v4hi_v4hi
24666 = build_function_type_list (V2SI_type_node,
24667 V2SI_type_node, V4HI_type_node,
24668 V4HI_type_node, NULL_TREE);
24670 tree v2si_ftype_v2si_v8qi_v8qi
24671 = build_function_type_list (V2SI_type_node,
24672 V2SI_type_node, V8QI_type_node,
24673 V8QI_type_node, NULL_TREE);
24675 tree di_ftype_di_v2si_v2si
24676 = build_function_type_list (long_long_unsigned_type_node,
24677 long_long_unsigned_type_node,
24678 V2SI_type_node, V2SI_type_node,
24679 NULL_TREE);
24681 tree di_ftype_di_di_int
24682 = build_function_type_list (long_long_unsigned_type_node,
24683 long_long_unsigned_type_node,
24684 long_long_unsigned_type_node,
24685 integer_type_node, NULL_TREE);
24687 tree void_ftype_int
24688 = build_function_type_list (void_type_node,
24689 integer_type_node, NULL_TREE);
24691 tree v8qi_ftype_char
24692 = build_function_type_list (V8QI_type_node,
24693 signed_char_type_node, NULL_TREE);
24695 tree v4hi_ftype_short
24696 = build_function_type_list (V4HI_type_node,
24697 short_integer_type_node, NULL_TREE);
24699 tree v2si_ftype_int
24700 = build_function_type_list (V2SI_type_node,
24701 integer_type_node, NULL_TREE);
24703 /* Normal vector binops. */
24704 tree v8qi_ftype_v8qi_v8qi
24705 = build_function_type_list (V8QI_type_node,
24706 V8QI_type_node, V8QI_type_node, NULL_TREE);
24707 tree v4hi_ftype_v4hi_v4hi
24708 = build_function_type_list (V4HI_type_node,
24709 V4HI_type_node,V4HI_type_node, NULL_TREE);
24710 tree v2si_ftype_v2si_v2si
24711 = build_function_type_list (V2SI_type_node,
24712 V2SI_type_node, V2SI_type_node, NULL_TREE);
24713 tree di_ftype_di_di
24714 = build_function_type_list (long_long_unsigned_type_node,
24715 long_long_unsigned_type_node,
24716 long_long_unsigned_type_node,
24717 NULL_TREE);
24719 /* Add all builtins that are more or less simple operations on two
24720 operands. */
24721 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24723 /* Use one of the operands; the target can have a different mode for
24724 mask-generating compares. */
24725 enum machine_mode mode;
24726 tree type;
24728 if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24729 continue;
24731 mode = insn_data[d->icode].operand[1].mode;
24733 switch (mode)
24735 case V8QImode:
24736 type = v8qi_ftype_v8qi_v8qi;
24737 break;
24738 case V4HImode:
24739 type = v4hi_ftype_v4hi_v4hi;
24740 break;
24741 case V2SImode:
24742 type = v2si_ftype_v2si_v2si;
24743 break;
24744 case DImode:
24745 type = di_ftype_di_di;
24746 break;
24748 default:
24749 gcc_unreachable ();
24752 def_mbuiltin (d->mask, d->name, type, d->code);
24755 /* Add the remaining MMX insns with somewhat more complicated types. */
24756 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24757 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24758 ARM_BUILTIN_ ## CODE)
24760 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24761 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24762 ARM_BUILTIN_ ## CODE)
24764 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24765 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24766 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24767 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24768 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24769 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24770 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24771 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24772 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24774 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24775 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24776 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24777 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24778 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24779 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24781 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24782 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24783 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24784 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24785 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24786 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24788 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24789 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24790 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24791 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24792 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24793 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24795 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24796 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24797 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24798 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24799 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24800 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24802 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24804 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24805 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24806 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24807 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24808 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24809 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24810 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24811 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24812 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24813 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24815 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24816 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24817 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24818 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24819 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24820 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24821 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24822 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24823 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24825 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24826 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24827 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24829 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24830 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24831 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24833 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24834 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24836 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24837 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24838 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24839 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24840 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24841 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24843 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24844 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24845 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24846 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24847 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24848 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24849 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24850 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24851 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24852 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24853 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24854 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24856 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24857 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24858 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24859 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24861 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24862 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24863 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24864 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24865 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24866 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24867 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24869 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24870 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24871 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24873 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24874 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24875 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24876 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24878 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24879 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24880 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24881 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24883 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24884 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24885 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24886 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24888 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24889 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24890 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24891 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24893 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24894 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24895 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24896 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24898 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24899 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24900 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24901 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24903 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24905 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24906 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24907 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24909 #undef iwmmx_mbuiltin
24910 #undef iwmmx2_mbuiltin
24913 static void
24914 arm_init_fp16_builtins (void)
24916 tree fp16_type = make_node (REAL_TYPE);
24917 TYPE_PRECISION (fp16_type) = 16;
24918 layout_type (fp16_type);
24919 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24922 static void
24923 arm_init_crc32_builtins ()
24925 tree si_ftype_si_qi
24926 = build_function_type_list (unsigned_intSI_type_node,
24927 unsigned_intSI_type_node,
24928 unsigned_intQI_type_node, NULL_TREE);
24929 tree si_ftype_si_hi
24930 = build_function_type_list (unsigned_intSI_type_node,
24931 unsigned_intSI_type_node,
24932 unsigned_intHI_type_node, NULL_TREE);
24933 tree si_ftype_si_si
24934 = build_function_type_list (unsigned_intSI_type_node,
24935 unsigned_intSI_type_node,
24936 unsigned_intSI_type_node, NULL_TREE);
24938 arm_builtin_decls[ARM_BUILTIN_CRC32B]
24939 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24940 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24941 arm_builtin_decls[ARM_BUILTIN_CRC32H]
24942 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24943 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24944 arm_builtin_decls[ARM_BUILTIN_CRC32W]
24945 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
24946 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
24947 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
24948 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
24949 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
24950 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
24951 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
24952 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
24953 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
24954 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
24955 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
24958 static void
24959 arm_init_builtins (void)
24961 if (TARGET_REALLY_IWMMXT)
24962 arm_init_iwmmxt_builtins ();
24964 if (TARGET_NEON)
24965 arm_init_neon_builtins ();
24967 if (arm_fp16_format)
24968 arm_init_fp16_builtins ();
24970 if (TARGET_CRC32)
24971 arm_init_crc32_builtins ();
24973 if (TARGET_VFP && TARGET_HARD_FLOAT)
24975 tree ftype_set_fpscr
24976 = build_function_type_list (void_type_node, unsigned_type_node, NULL);
24977 tree ftype_get_fpscr
24978 = build_function_type_list (unsigned_type_node, NULL);
24980 arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]
24981 = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr,
24982 ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
24983 arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]
24984 = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr,
24985 ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
24989 /* Return the ARM builtin for CODE. */
24991 static tree
24992 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24994 if (code >= ARM_BUILTIN_MAX)
24995 return error_mark_node;
24997 return arm_builtin_decls[code];
25000 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
25002 static const char *
25003 arm_invalid_parameter_type (const_tree t)
25005 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25006 return N_("function parameters cannot have __fp16 type");
25007 return NULL;
25010 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
25012 static const char *
25013 arm_invalid_return_type (const_tree t)
25015 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25016 return N_("functions cannot return __fp16 type");
25017 return NULL;
25020 /* Implement TARGET_PROMOTED_TYPE. */
25022 static tree
25023 arm_promoted_type (const_tree t)
25025 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25026 return float_type_node;
25027 return NULL_TREE;
25030 /* Implement TARGET_CONVERT_TO_TYPE.
25031 Specifically, this hook implements the peculiarity of the ARM
25032 half-precision floating-point C semantics that requires conversions between
25033 __fp16 to or from double to do an intermediate conversion to float. */
25035 static tree
25036 arm_convert_to_type (tree type, tree expr)
25038 tree fromtype = TREE_TYPE (expr);
25039 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
25040 return NULL_TREE;
25041 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
25042 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
25043 return convert (type, convert (float_type_node, expr));
25044 return NULL_TREE;
25047 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25048 This simply adds HFmode as a supported mode; even though we don't
25049 implement arithmetic on this type directly, it's supported by
25050 optabs conversions, much the way the double-word arithmetic is
25051 special-cased in the default hook. */
25053 static bool
25054 arm_scalar_mode_supported_p (enum machine_mode mode)
25056 if (mode == HFmode)
25057 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25058 else if (ALL_FIXED_POINT_MODE_P (mode))
25059 return true;
25060 else
25061 return default_scalar_mode_supported_p (mode);
25064 /* Errors in the source file can cause expand_expr to return const0_rtx
25065 where we expect a vector. To avoid crashing, use one of the vector
25066 clear instructions. */
25068 static rtx
25069 safe_vector_operand (rtx x, enum machine_mode mode)
25071 if (x != const0_rtx)
25072 return x;
25073 x = gen_reg_rtx (mode);
25075 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
25076 : gen_rtx_SUBREG (DImode, x, 0)));
25077 return x;
25080 /* Function to expand ternary builtins. */
25081 static rtx
25082 arm_expand_ternop_builtin (enum insn_code icode,
25083 tree exp, rtx target)
25085 rtx pat;
25086 tree arg0 = CALL_EXPR_ARG (exp, 0);
25087 tree arg1 = CALL_EXPR_ARG (exp, 1);
25088 tree arg2 = CALL_EXPR_ARG (exp, 2);
25090 rtx op0 = expand_normal (arg0);
25091 rtx op1 = expand_normal (arg1);
25092 rtx op2 = expand_normal (arg2);
25093 rtx op3 = NULL_RTX;
25095 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
25096 lane operand depending on endianness. */
25097 bool builtin_sha1cpm_p = false;
25099 if (insn_data[icode].n_operands == 5)
25101 gcc_assert (icode == CODE_FOR_crypto_sha1c
25102 || icode == CODE_FOR_crypto_sha1p
25103 || icode == CODE_FOR_crypto_sha1m);
25104 builtin_sha1cpm_p = true;
25106 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25107 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25108 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25109 enum machine_mode mode2 = insn_data[icode].operand[3].mode;
25112 if (VECTOR_MODE_P (mode0))
25113 op0 = safe_vector_operand (op0, mode0);
25114 if (VECTOR_MODE_P (mode1))
25115 op1 = safe_vector_operand (op1, mode1);
25116 if (VECTOR_MODE_P (mode2))
25117 op2 = safe_vector_operand (op2, mode2);
25119 if (! target
25120 || GET_MODE (target) != tmode
25121 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25122 target = gen_reg_rtx (tmode);
25124 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25125 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
25126 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
25128 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25129 op0 = copy_to_mode_reg (mode0, op0);
25130 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25131 op1 = copy_to_mode_reg (mode1, op1);
25132 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25133 op2 = copy_to_mode_reg (mode2, op2);
25134 if (builtin_sha1cpm_p)
25135 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25137 if (builtin_sha1cpm_p)
25138 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
25139 else
25140 pat = GEN_FCN (icode) (target, op0, op1, op2);
25141 if (! pat)
25142 return 0;
25143 emit_insn (pat);
25144 return target;
25147 /* Subroutine of arm_expand_builtin to take care of binop insns. */
25149 static rtx
25150 arm_expand_binop_builtin (enum insn_code icode,
25151 tree exp, rtx target)
25153 rtx pat;
25154 tree arg0 = CALL_EXPR_ARG (exp, 0);
25155 tree arg1 = CALL_EXPR_ARG (exp, 1);
25156 rtx op0 = expand_normal (arg0);
25157 rtx op1 = expand_normal (arg1);
25158 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25159 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25160 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25162 if (VECTOR_MODE_P (mode0))
25163 op0 = safe_vector_operand (op0, mode0);
25164 if (VECTOR_MODE_P (mode1))
25165 op1 = safe_vector_operand (op1, mode1);
25167 if (! target
25168 || GET_MODE (target) != tmode
25169 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25170 target = gen_reg_rtx (tmode);
25172 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25173 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
25175 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25176 op0 = copy_to_mode_reg (mode0, op0);
25177 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25178 op1 = copy_to_mode_reg (mode1, op1);
25180 pat = GEN_FCN (icode) (target, op0, op1);
25181 if (! pat)
25182 return 0;
25183 emit_insn (pat);
25184 return target;
25187 /* Subroutine of arm_expand_builtin to take care of unop insns. */
25189 static rtx
25190 arm_expand_unop_builtin (enum insn_code icode,
25191 tree exp, rtx target, int do_load)
25193 rtx pat;
25194 tree arg0 = CALL_EXPR_ARG (exp, 0);
25195 rtx op0 = expand_normal (arg0);
25196 rtx op1 = NULL_RTX;
25197 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25198 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25199 bool builtin_sha1h_p = false;
25201 if (insn_data[icode].n_operands == 3)
25203 gcc_assert (icode == CODE_FOR_crypto_sha1h);
25204 builtin_sha1h_p = true;
25207 if (! target
25208 || GET_MODE (target) != tmode
25209 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25210 target = gen_reg_rtx (tmode);
25211 if (do_load)
25212 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
25213 else
25215 if (VECTOR_MODE_P (mode0))
25216 op0 = safe_vector_operand (op0, mode0);
25218 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25219 op0 = copy_to_mode_reg (mode0, op0);
25221 if (builtin_sha1h_p)
25222 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25224 if (builtin_sha1h_p)
25225 pat = GEN_FCN (icode) (target, op0, op1);
25226 else
25227 pat = GEN_FCN (icode) (target, op0);
25228 if (! pat)
25229 return 0;
25230 emit_insn (pat);
25231 return target;
25234 typedef enum {
25235 NEON_ARG_COPY_TO_REG,
25236 NEON_ARG_CONSTANT,
25237 NEON_ARG_MEMORY,
25238 NEON_ARG_STOP
25239 } builtin_arg;
25241 #define NEON_MAX_BUILTIN_ARGS 5
25243 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
25244 and return an expression for the accessed memory.
25246 The intrinsic function operates on a block of registers that has
25247 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
25248 function references the memory at EXP of type TYPE and in mode
25249 MEM_MODE; this mode may be BLKmode if no more suitable mode is
25250 available. */
25252 static tree
25253 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
25254 enum machine_mode reg_mode,
25255 neon_builtin_type_mode type_mode)
25257 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
25258 tree elem_type, upper_bound, array_type;
25260 /* Work out the size of the register block in bytes. */
25261 reg_size = GET_MODE_SIZE (reg_mode);
25263 /* Work out the size of each vector in bytes. */
25264 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
25265 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
25267 /* Work out how many vectors there are. */
25268 gcc_assert (reg_size % vector_size == 0);
25269 nvectors = reg_size / vector_size;
25271 /* Work out the type of each element. */
25272 gcc_assert (POINTER_TYPE_P (type));
25273 elem_type = TREE_TYPE (type);
25275 /* Work out how many elements are being loaded or stored.
25276 MEM_MODE == REG_MODE implies a one-to-one mapping between register
25277 and memory elements; anything else implies a lane load or store. */
25278 if (mem_mode == reg_mode)
25279 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
25280 else
25281 nelems = nvectors;
25283 /* Create a type that describes the full access. */
25284 upper_bound = build_int_cst (size_type_node, nelems - 1);
25285 array_type = build_array_type (elem_type, build_index_type (upper_bound));
25287 /* Dereference EXP using that type. */
25288 return fold_build2 (MEM_REF, array_type, exp,
25289 build_int_cst (build_pointer_type (array_type), 0));
25292 /* Expand a Neon builtin. */
25293 static rtx
25294 arm_expand_neon_args (rtx target, int icode, int have_retval,
25295 neon_builtin_type_mode type_mode,
25296 tree exp, int fcode, ...)
25298 va_list ap;
25299 rtx pat;
25300 tree arg[NEON_MAX_BUILTIN_ARGS];
25301 rtx op[NEON_MAX_BUILTIN_ARGS];
25302 tree arg_type;
25303 tree formals;
25304 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25305 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
25306 enum machine_mode other_mode;
25307 int argc = 0;
25308 int opno;
25310 if (have_retval
25311 && (!target
25312 || GET_MODE (target) != tmode
25313 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
25314 target = gen_reg_rtx (tmode);
25316 va_start (ap, fcode);
25318 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
25320 for (;;)
25322 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
25324 if (thisarg == NEON_ARG_STOP)
25325 break;
25326 else
25328 opno = argc + have_retval;
25329 mode[argc] = insn_data[icode].operand[opno].mode;
25330 arg[argc] = CALL_EXPR_ARG (exp, argc);
25331 arg_type = TREE_VALUE (formals);
25332 if (thisarg == NEON_ARG_MEMORY)
25334 other_mode = insn_data[icode].operand[1 - opno].mode;
25335 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
25336 mode[argc], other_mode,
25337 type_mode);
25340 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25341 be returned. */
25342 op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
25343 (thisarg == NEON_ARG_MEMORY
25344 ? EXPAND_MEMORY : EXPAND_NORMAL));
25346 switch (thisarg)
25348 case NEON_ARG_COPY_TO_REG:
25349 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25350 if (!(*insn_data[icode].operand[opno].predicate)
25351 (op[argc], mode[argc]))
25352 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
25353 break;
25355 case NEON_ARG_CONSTANT:
25356 /* FIXME: This error message is somewhat unhelpful. */
25357 if (!(*insn_data[icode].operand[opno].predicate)
25358 (op[argc], mode[argc]))
25359 error ("argument must be a constant");
25360 break;
25362 case NEON_ARG_MEMORY:
25363 /* Check if expand failed. */
25364 if (op[argc] == const0_rtx)
25365 return 0;
25366 gcc_assert (MEM_P (op[argc]));
25367 PUT_MODE (op[argc], mode[argc]);
25368 /* ??? arm_neon.h uses the same built-in functions for signed
25369 and unsigned accesses, casting where necessary. This isn't
25370 alias safe. */
25371 set_mem_alias_set (op[argc], 0);
25372 if (!(*insn_data[icode].operand[opno].predicate)
25373 (op[argc], mode[argc]))
25374 op[argc] = (replace_equiv_address
25375 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
25376 break;
25378 case NEON_ARG_STOP:
25379 gcc_unreachable ();
25382 argc++;
25383 formals = TREE_CHAIN (formals);
25387 va_end (ap);
25389 if (have_retval)
25390 switch (argc)
25392 case 1:
25393 pat = GEN_FCN (icode) (target, op[0]);
25394 break;
25396 case 2:
25397 pat = GEN_FCN (icode) (target, op[0], op[1]);
25398 break;
25400 case 3:
25401 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
25402 break;
25404 case 4:
25405 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
25406 break;
25408 case 5:
25409 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
25410 break;
25412 default:
25413 gcc_unreachable ();
25415 else
25416 switch (argc)
25418 case 1:
25419 pat = GEN_FCN (icode) (op[0]);
25420 break;
25422 case 2:
25423 pat = GEN_FCN (icode) (op[0], op[1]);
25424 break;
25426 case 3:
25427 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
25428 break;
25430 case 4:
25431 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
25432 break;
25434 case 5:
25435 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
25436 break;
25438 default:
25439 gcc_unreachable ();
25442 if (!pat)
25443 return 0;
25445 emit_insn (pat);
25447 return target;
25450 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25451 constants defined per-instruction or per instruction-variant. Instead, the
25452 required info is looked up in the table neon_builtin_data. */
25453 static rtx
25454 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
25456 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
25457 neon_itype itype = d->itype;
25458 enum insn_code icode = d->code;
25459 neon_builtin_type_mode type_mode = d->mode;
25461 switch (itype)
25463 case NEON_UNOP:
25464 case NEON_CONVERT:
25465 case NEON_DUPLANE:
25466 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25467 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25469 case NEON_BINOP:
25470 case NEON_SETLANE:
25471 case NEON_SCALARMUL:
25472 case NEON_SCALARMULL:
25473 case NEON_SCALARMULH:
25474 case NEON_SHIFTINSERT:
25475 case NEON_LOGICBINOP:
25476 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25477 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25478 NEON_ARG_STOP);
25480 case NEON_TERNOP:
25481 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25482 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25483 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25485 case NEON_GETLANE:
25486 case NEON_FIXCONV:
25487 case NEON_SHIFTIMM:
25488 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25489 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
25490 NEON_ARG_STOP);
25492 case NEON_CREATE:
25493 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25494 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25496 case NEON_DUP:
25497 case NEON_RINT:
25498 case NEON_SPLIT:
25499 case NEON_FLOAT_WIDEN:
25500 case NEON_FLOAT_NARROW:
25501 case NEON_BSWAP:
25502 case NEON_REINTERP:
25503 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25504 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25506 case NEON_COPYSIGNF:
25507 case NEON_COMBINE:
25508 case NEON_VTBL:
25509 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25510 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25512 case NEON_LANEMUL:
25513 case NEON_LANEMULL:
25514 case NEON_LANEMULH:
25515 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25516 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25517 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25519 case NEON_LANEMAC:
25520 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25521 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25522 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25524 case NEON_SHIFTACC:
25525 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25526 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25527 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25529 case NEON_SCALARMAC:
25530 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25531 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25532 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25534 case NEON_SELECT:
25535 case NEON_VTBX:
25536 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25537 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25538 NEON_ARG_STOP);
25540 case NEON_LOAD1:
25541 case NEON_LOADSTRUCT:
25542 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25543 NEON_ARG_MEMORY, NEON_ARG_STOP);
25545 case NEON_LOAD1LANE:
25546 case NEON_LOADSTRUCTLANE:
25547 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25548 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25549 NEON_ARG_STOP);
25551 case NEON_STORE1:
25552 case NEON_STORESTRUCT:
25553 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25554 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25556 case NEON_STORE1LANE:
25557 case NEON_STORESTRUCTLANE:
25558 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25559 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25560 NEON_ARG_STOP);
25563 gcc_unreachable ();
25566 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25567 void
25568 neon_reinterpret (rtx dest, rtx src)
25570 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25573 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25574 not to early-clobber SRC registers in the process.
25576 We assume that the operands described by SRC and DEST represent a
25577 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25578 number of components into which the copy has been decomposed. */
25579 void
25580 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25582 unsigned int i;
25584 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25585 || REGNO (operands[0]) < REGNO (operands[1]))
25587 for (i = 0; i < count; i++)
25589 operands[2 * i] = dest[i];
25590 operands[2 * i + 1] = src[i];
25593 else
25595 for (i = 0; i < count; i++)
25597 operands[2 * i] = dest[count - i - 1];
25598 operands[2 * i + 1] = src[count - i - 1];
25603 /* Split operands into moves from op[1] + op[2] into op[0]. */
25605 void
25606 neon_split_vcombine (rtx operands[3])
25608 unsigned int dest = REGNO (operands[0]);
25609 unsigned int src1 = REGNO (operands[1]);
25610 unsigned int src2 = REGNO (operands[2]);
25611 enum machine_mode halfmode = GET_MODE (operands[1]);
25612 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25613 rtx destlo, desthi;
25615 if (src1 == dest && src2 == dest + halfregs)
25617 /* No-op move. Can't split to nothing; emit something. */
25618 emit_note (NOTE_INSN_DELETED);
25619 return;
25622 /* Preserve register attributes for variable tracking. */
25623 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25624 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25625 GET_MODE_SIZE (halfmode));
25627 /* Special case of reversed high/low parts. Use VSWP. */
25628 if (src2 == dest && src1 == dest + halfregs)
25630 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25631 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25632 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25633 return;
25636 if (!reg_overlap_mentioned_p (operands[2], destlo))
25638 /* Try to avoid unnecessary moves if part of the result
25639 is in the right place already. */
25640 if (src1 != dest)
25641 emit_move_insn (destlo, operands[1]);
25642 if (src2 != dest + halfregs)
25643 emit_move_insn (desthi, operands[2]);
25645 else
25647 if (src2 != dest + halfregs)
25648 emit_move_insn (desthi, operands[2]);
25649 if (src1 != dest)
25650 emit_move_insn (destlo, operands[1]);
25654 /* Expand an expression EXP that calls a built-in function,
25655 with result going to TARGET if that's convenient
25656 (and in mode MODE if that's convenient).
25657 SUBTARGET may be used as the target for computing one of EXP's operands.
25658 IGNORE is nonzero if the value is to be ignored. */
25660 static rtx
25661 arm_expand_builtin (tree exp,
25662 rtx target,
25663 rtx subtarget ATTRIBUTE_UNUSED,
25664 enum machine_mode mode ATTRIBUTE_UNUSED,
25665 int ignore ATTRIBUTE_UNUSED)
25667 const struct builtin_description * d;
25668 enum insn_code icode;
25669 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25670 tree arg0;
25671 tree arg1;
25672 tree arg2;
25673 rtx op0;
25674 rtx op1;
25675 rtx op2;
25676 rtx pat;
25677 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25678 size_t i;
25679 enum machine_mode tmode;
25680 enum machine_mode mode0;
25681 enum machine_mode mode1;
25682 enum machine_mode mode2;
25683 int opint;
25684 int selector;
25685 int mask;
25686 int imm;
25688 if (fcode >= ARM_BUILTIN_NEON_BASE)
25689 return arm_expand_neon_builtin (fcode, exp, target);
25691 switch (fcode)
25693 case ARM_BUILTIN_GET_FPSCR:
25694 case ARM_BUILTIN_SET_FPSCR:
25695 if (fcode == ARM_BUILTIN_GET_FPSCR)
25697 icode = CODE_FOR_get_fpscr;
25698 target = gen_reg_rtx (SImode);
25699 pat = GEN_FCN (icode) (target);
25701 else
25703 target = NULL_RTX;
25704 icode = CODE_FOR_set_fpscr;
25705 arg0 = CALL_EXPR_ARG (exp, 0);
25706 op0 = expand_normal (arg0);
25707 pat = GEN_FCN (icode) (op0);
25709 emit_insn (pat);
25710 return target;
25712 case ARM_BUILTIN_TEXTRMSB:
25713 case ARM_BUILTIN_TEXTRMUB:
25714 case ARM_BUILTIN_TEXTRMSH:
25715 case ARM_BUILTIN_TEXTRMUH:
25716 case ARM_BUILTIN_TEXTRMSW:
25717 case ARM_BUILTIN_TEXTRMUW:
25718 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25719 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25720 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25721 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25722 : CODE_FOR_iwmmxt_textrmw);
25724 arg0 = CALL_EXPR_ARG (exp, 0);
25725 arg1 = CALL_EXPR_ARG (exp, 1);
25726 op0 = expand_normal (arg0);
25727 op1 = expand_normal (arg1);
25728 tmode = insn_data[icode].operand[0].mode;
25729 mode0 = insn_data[icode].operand[1].mode;
25730 mode1 = insn_data[icode].operand[2].mode;
25732 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25733 op0 = copy_to_mode_reg (mode0, op0);
25734 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25736 /* @@@ better error message */
25737 error ("selector must be an immediate");
25738 return gen_reg_rtx (tmode);
25741 opint = INTVAL (op1);
25742 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25744 if (opint > 7 || opint < 0)
25745 error ("the range of selector should be in 0 to 7");
25747 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25749 if (opint > 3 || opint < 0)
25750 error ("the range of selector should be in 0 to 3");
25752 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25754 if (opint > 1 || opint < 0)
25755 error ("the range of selector should be in 0 to 1");
25758 if (target == 0
25759 || GET_MODE (target) != tmode
25760 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25761 target = gen_reg_rtx (tmode);
25762 pat = GEN_FCN (icode) (target, op0, op1);
25763 if (! pat)
25764 return 0;
25765 emit_insn (pat);
25766 return target;
25768 case ARM_BUILTIN_WALIGNI:
25769 /* If op2 is immediate, call walighi, else call walighr. */
25770 arg0 = CALL_EXPR_ARG (exp, 0);
25771 arg1 = CALL_EXPR_ARG (exp, 1);
25772 arg2 = CALL_EXPR_ARG (exp, 2);
25773 op0 = expand_normal (arg0);
25774 op1 = expand_normal (arg1);
25775 op2 = expand_normal (arg2);
25776 if (CONST_INT_P (op2))
25778 icode = CODE_FOR_iwmmxt_waligni;
25779 tmode = insn_data[icode].operand[0].mode;
25780 mode0 = insn_data[icode].operand[1].mode;
25781 mode1 = insn_data[icode].operand[2].mode;
25782 mode2 = insn_data[icode].operand[3].mode;
25783 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25784 op0 = copy_to_mode_reg (mode0, op0);
25785 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25786 op1 = copy_to_mode_reg (mode1, op1);
25787 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25788 selector = INTVAL (op2);
25789 if (selector > 7 || selector < 0)
25790 error ("the range of selector should be in 0 to 7");
25792 else
25794 icode = CODE_FOR_iwmmxt_walignr;
25795 tmode = insn_data[icode].operand[0].mode;
25796 mode0 = insn_data[icode].operand[1].mode;
25797 mode1 = insn_data[icode].operand[2].mode;
25798 mode2 = insn_data[icode].operand[3].mode;
25799 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25800 op0 = copy_to_mode_reg (mode0, op0);
25801 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25802 op1 = copy_to_mode_reg (mode1, op1);
25803 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25804 op2 = copy_to_mode_reg (mode2, op2);
25806 if (target == 0
25807 || GET_MODE (target) != tmode
25808 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25809 target = gen_reg_rtx (tmode);
25810 pat = GEN_FCN (icode) (target, op0, op1, op2);
25811 if (!pat)
25812 return 0;
25813 emit_insn (pat);
25814 return target;
25816 case ARM_BUILTIN_TINSRB:
25817 case ARM_BUILTIN_TINSRH:
25818 case ARM_BUILTIN_TINSRW:
25819 case ARM_BUILTIN_WMERGE:
25820 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25821 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25822 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25823 : CODE_FOR_iwmmxt_tinsrw);
25824 arg0 = CALL_EXPR_ARG (exp, 0);
25825 arg1 = CALL_EXPR_ARG (exp, 1);
25826 arg2 = CALL_EXPR_ARG (exp, 2);
25827 op0 = expand_normal (arg0);
25828 op1 = expand_normal (arg1);
25829 op2 = expand_normal (arg2);
25830 tmode = insn_data[icode].operand[0].mode;
25831 mode0 = insn_data[icode].operand[1].mode;
25832 mode1 = insn_data[icode].operand[2].mode;
25833 mode2 = insn_data[icode].operand[3].mode;
25835 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25836 op0 = copy_to_mode_reg (mode0, op0);
25837 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25838 op1 = copy_to_mode_reg (mode1, op1);
25839 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25841 error ("selector must be an immediate");
25842 return const0_rtx;
25844 if (icode == CODE_FOR_iwmmxt_wmerge)
25846 selector = INTVAL (op2);
25847 if (selector > 7 || selector < 0)
25848 error ("the range of selector should be in 0 to 7");
25850 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25851 || (icode == CODE_FOR_iwmmxt_tinsrh)
25852 || (icode == CODE_FOR_iwmmxt_tinsrw))
25854 mask = 0x01;
25855 selector= INTVAL (op2);
25856 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25857 error ("the range of selector should be in 0 to 7");
25858 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25859 error ("the range of selector should be in 0 to 3");
25860 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25861 error ("the range of selector should be in 0 to 1");
25862 mask <<= selector;
25863 op2 = GEN_INT (mask);
25865 if (target == 0
25866 || GET_MODE (target) != tmode
25867 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25868 target = gen_reg_rtx (tmode);
25869 pat = GEN_FCN (icode) (target, op0, op1, op2);
25870 if (! pat)
25871 return 0;
25872 emit_insn (pat);
25873 return target;
25875 case ARM_BUILTIN_SETWCGR0:
25876 case ARM_BUILTIN_SETWCGR1:
25877 case ARM_BUILTIN_SETWCGR2:
25878 case ARM_BUILTIN_SETWCGR3:
25879 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25880 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25881 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25882 : CODE_FOR_iwmmxt_setwcgr3);
25883 arg0 = CALL_EXPR_ARG (exp, 0);
25884 op0 = expand_normal (arg0);
25885 mode0 = insn_data[icode].operand[0].mode;
25886 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25887 op0 = copy_to_mode_reg (mode0, op0);
25888 pat = GEN_FCN (icode) (op0);
25889 if (!pat)
25890 return 0;
25891 emit_insn (pat);
25892 return 0;
25894 case ARM_BUILTIN_GETWCGR0:
25895 case ARM_BUILTIN_GETWCGR1:
25896 case ARM_BUILTIN_GETWCGR2:
25897 case ARM_BUILTIN_GETWCGR3:
25898 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25899 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25900 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25901 : CODE_FOR_iwmmxt_getwcgr3);
25902 tmode = insn_data[icode].operand[0].mode;
25903 if (target == 0
25904 || GET_MODE (target) != tmode
25905 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25906 target = gen_reg_rtx (tmode);
25907 pat = GEN_FCN (icode) (target);
25908 if (!pat)
25909 return 0;
25910 emit_insn (pat);
25911 return target;
25913 case ARM_BUILTIN_WSHUFH:
25914 icode = CODE_FOR_iwmmxt_wshufh;
25915 arg0 = CALL_EXPR_ARG (exp, 0);
25916 arg1 = CALL_EXPR_ARG (exp, 1);
25917 op0 = expand_normal (arg0);
25918 op1 = expand_normal (arg1);
25919 tmode = insn_data[icode].operand[0].mode;
25920 mode1 = insn_data[icode].operand[1].mode;
25921 mode2 = insn_data[icode].operand[2].mode;
25923 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25924 op0 = copy_to_mode_reg (mode1, op0);
25925 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25927 error ("mask must be an immediate");
25928 return const0_rtx;
25930 selector = INTVAL (op1);
25931 if (selector < 0 || selector > 255)
25932 error ("the range of mask should be in 0 to 255");
25933 if (target == 0
25934 || GET_MODE (target) != tmode
25935 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25936 target = gen_reg_rtx (tmode);
25937 pat = GEN_FCN (icode) (target, op0, op1);
25938 if (! pat)
25939 return 0;
25940 emit_insn (pat);
25941 return target;
25943 case ARM_BUILTIN_WMADDS:
25944 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25945 case ARM_BUILTIN_WMADDSX:
25946 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25947 case ARM_BUILTIN_WMADDSN:
25948 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25949 case ARM_BUILTIN_WMADDU:
25950 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25951 case ARM_BUILTIN_WMADDUX:
25952 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25953 case ARM_BUILTIN_WMADDUN:
25954 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25955 case ARM_BUILTIN_WSADBZ:
25956 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25957 case ARM_BUILTIN_WSADHZ:
25958 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25960 /* Several three-argument builtins. */
25961 case ARM_BUILTIN_WMACS:
25962 case ARM_BUILTIN_WMACU:
25963 case ARM_BUILTIN_TMIA:
25964 case ARM_BUILTIN_TMIAPH:
25965 case ARM_BUILTIN_TMIATT:
25966 case ARM_BUILTIN_TMIATB:
25967 case ARM_BUILTIN_TMIABT:
25968 case ARM_BUILTIN_TMIABB:
25969 case ARM_BUILTIN_WQMIABB:
25970 case ARM_BUILTIN_WQMIABT:
25971 case ARM_BUILTIN_WQMIATB:
25972 case ARM_BUILTIN_WQMIATT:
25973 case ARM_BUILTIN_WQMIABBN:
25974 case ARM_BUILTIN_WQMIABTN:
25975 case ARM_BUILTIN_WQMIATBN:
25976 case ARM_BUILTIN_WQMIATTN:
25977 case ARM_BUILTIN_WMIABB:
25978 case ARM_BUILTIN_WMIABT:
25979 case ARM_BUILTIN_WMIATB:
25980 case ARM_BUILTIN_WMIATT:
25981 case ARM_BUILTIN_WMIABBN:
25982 case ARM_BUILTIN_WMIABTN:
25983 case ARM_BUILTIN_WMIATBN:
25984 case ARM_BUILTIN_WMIATTN:
25985 case ARM_BUILTIN_WMIAWBB:
25986 case ARM_BUILTIN_WMIAWBT:
25987 case ARM_BUILTIN_WMIAWTB:
25988 case ARM_BUILTIN_WMIAWTT:
25989 case ARM_BUILTIN_WMIAWBBN:
25990 case ARM_BUILTIN_WMIAWBTN:
25991 case ARM_BUILTIN_WMIAWTBN:
25992 case ARM_BUILTIN_WMIAWTTN:
25993 case ARM_BUILTIN_WSADB:
25994 case ARM_BUILTIN_WSADH:
25995 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
25996 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
25997 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
25998 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
25999 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
26000 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
26001 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
26002 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
26003 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
26004 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
26005 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
26006 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
26007 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
26008 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
26009 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
26010 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
26011 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
26012 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
26013 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
26014 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
26015 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
26016 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
26017 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
26018 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
26019 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
26020 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
26021 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
26022 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
26023 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
26024 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
26025 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
26026 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
26027 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
26028 : CODE_FOR_iwmmxt_wsadh);
26029 arg0 = CALL_EXPR_ARG (exp, 0);
26030 arg1 = CALL_EXPR_ARG (exp, 1);
26031 arg2 = CALL_EXPR_ARG (exp, 2);
26032 op0 = expand_normal (arg0);
26033 op1 = expand_normal (arg1);
26034 op2 = expand_normal (arg2);
26035 tmode = insn_data[icode].operand[0].mode;
26036 mode0 = insn_data[icode].operand[1].mode;
26037 mode1 = insn_data[icode].operand[2].mode;
26038 mode2 = insn_data[icode].operand[3].mode;
26040 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
26041 op0 = copy_to_mode_reg (mode0, op0);
26042 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
26043 op1 = copy_to_mode_reg (mode1, op1);
26044 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
26045 op2 = copy_to_mode_reg (mode2, op2);
26046 if (target == 0
26047 || GET_MODE (target) != tmode
26048 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
26049 target = gen_reg_rtx (tmode);
26050 pat = GEN_FCN (icode) (target, op0, op1, op2);
26051 if (! pat)
26052 return 0;
26053 emit_insn (pat);
26054 return target;
26056 case ARM_BUILTIN_WZERO:
26057 target = gen_reg_rtx (DImode);
26058 emit_insn (gen_iwmmxt_clrdi (target));
26059 return target;
26061 case ARM_BUILTIN_WSRLHI:
26062 case ARM_BUILTIN_WSRLWI:
26063 case ARM_BUILTIN_WSRLDI:
26064 case ARM_BUILTIN_WSLLHI:
26065 case ARM_BUILTIN_WSLLWI:
26066 case ARM_BUILTIN_WSLLDI:
26067 case ARM_BUILTIN_WSRAHI:
26068 case ARM_BUILTIN_WSRAWI:
26069 case ARM_BUILTIN_WSRADI:
26070 case ARM_BUILTIN_WRORHI:
26071 case ARM_BUILTIN_WRORWI:
26072 case ARM_BUILTIN_WRORDI:
26073 case ARM_BUILTIN_WSRLH:
26074 case ARM_BUILTIN_WSRLW:
26075 case ARM_BUILTIN_WSRLD:
26076 case ARM_BUILTIN_WSLLH:
26077 case ARM_BUILTIN_WSLLW:
26078 case ARM_BUILTIN_WSLLD:
26079 case ARM_BUILTIN_WSRAH:
26080 case ARM_BUILTIN_WSRAW:
26081 case ARM_BUILTIN_WSRAD:
26082 case ARM_BUILTIN_WRORH:
26083 case ARM_BUILTIN_WRORW:
26084 case ARM_BUILTIN_WRORD:
26085 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
26086 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
26087 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
26088 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
26089 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
26090 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
26091 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
26092 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
26093 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
26094 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
26095 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
26096 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
26097 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
26098 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
26099 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
26100 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
26101 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
26102 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
26103 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
26104 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
26105 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
26106 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
26107 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
26108 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
26109 : CODE_FOR_nothing);
26110 arg1 = CALL_EXPR_ARG (exp, 1);
26111 op1 = expand_normal (arg1);
26112 if (GET_MODE (op1) == VOIDmode)
26114 imm = INTVAL (op1);
26115 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
26116 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
26117 && (imm < 0 || imm > 32))
26119 if (fcode == ARM_BUILTIN_WRORHI)
26120 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
26121 else if (fcode == ARM_BUILTIN_WRORWI)
26122 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
26123 else if (fcode == ARM_BUILTIN_WRORH)
26124 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
26125 else
26126 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
26128 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
26129 && (imm < 0 || imm > 64))
26131 if (fcode == ARM_BUILTIN_WRORDI)
26132 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
26133 else
26134 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
26136 else if (imm < 0)
26138 if (fcode == ARM_BUILTIN_WSRLHI)
26139 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
26140 else if (fcode == ARM_BUILTIN_WSRLWI)
26141 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
26142 else if (fcode == ARM_BUILTIN_WSRLDI)
26143 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
26144 else if (fcode == ARM_BUILTIN_WSLLHI)
26145 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
26146 else if (fcode == ARM_BUILTIN_WSLLWI)
26147 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
26148 else if (fcode == ARM_BUILTIN_WSLLDI)
26149 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
26150 else if (fcode == ARM_BUILTIN_WSRAHI)
26151 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
26152 else if (fcode == ARM_BUILTIN_WSRAWI)
26153 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
26154 else if (fcode == ARM_BUILTIN_WSRADI)
26155 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
26156 else if (fcode == ARM_BUILTIN_WSRLH)
26157 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
26158 else if (fcode == ARM_BUILTIN_WSRLW)
26159 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
26160 else if (fcode == ARM_BUILTIN_WSRLD)
26161 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
26162 else if (fcode == ARM_BUILTIN_WSLLH)
26163 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
26164 else if (fcode == ARM_BUILTIN_WSLLW)
26165 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
26166 else if (fcode == ARM_BUILTIN_WSLLD)
26167 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
26168 else if (fcode == ARM_BUILTIN_WSRAH)
26169 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
26170 else if (fcode == ARM_BUILTIN_WSRAW)
26171 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
26172 else
26173 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
26176 return arm_expand_binop_builtin (icode, exp, target);
26178 default:
26179 break;
26182 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
26183 if (d->code == (const enum arm_builtins) fcode)
26184 return arm_expand_binop_builtin (d->icode, exp, target);
26186 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
26187 if (d->code == (const enum arm_builtins) fcode)
26188 return arm_expand_unop_builtin (d->icode, exp, target, 0);
26190 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
26191 if (d->code == (const enum arm_builtins) fcode)
26192 return arm_expand_ternop_builtin (d->icode, exp, target);
26194 /* @@@ Should really do something sensible here. */
26195 return NULL_RTX;
26198 /* Return the number (counting from 0) of
26199 the least significant set bit in MASK. */
26201 inline static int
26202 number_of_first_bit_set (unsigned mask)
26204 return ctz_hwi (mask);
26207 /* Like emit_multi_reg_push, but allowing for a different set of
26208 registers to be described as saved. MASK is the set of registers
26209 to be saved; REAL_REGS is the set of registers to be described as
26210 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26212 static rtx_insn *
26213 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26215 unsigned long regno;
26216 rtx par[10], tmp, reg;
26217 rtx_insn *insn;
26218 int i, j;
26220 /* Build the parallel of the registers actually being stored. */
26221 for (i = 0; mask; ++i, mask &= mask - 1)
26223 regno = ctz_hwi (mask);
26224 reg = gen_rtx_REG (SImode, regno);
26226 if (i == 0)
26227 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26228 else
26229 tmp = gen_rtx_USE (VOIDmode, reg);
26231 par[i] = tmp;
26234 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26235 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26236 tmp = gen_frame_mem (BLKmode, tmp);
26237 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
26238 par[0] = tmp;
26240 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26241 insn = emit_insn (tmp);
26243 /* Always build the stack adjustment note for unwind info. */
26244 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26245 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
26246 par[0] = tmp;
26248 /* Build the parallel of the registers recorded as saved for unwind. */
26249 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26251 regno = ctz_hwi (real_regs);
26252 reg = gen_rtx_REG (SImode, regno);
26254 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26255 tmp = gen_frame_mem (SImode, tmp);
26256 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
26257 RTX_FRAME_RELATED_P (tmp) = 1;
26258 par[j + 1] = tmp;
26261 if (j == 0)
26262 tmp = par[0];
26263 else
26265 RTX_FRAME_RELATED_P (par[0]) = 1;
26266 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26269 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26271 return insn;
26274 /* Emit code to push or pop registers to or from the stack. F is the
26275 assembly file. MASK is the registers to pop. */
26276 static void
26277 thumb_pop (FILE *f, unsigned long mask)
26279 int regno;
26280 int lo_mask = mask & 0xFF;
26281 int pushed_words = 0;
26283 gcc_assert (mask);
26285 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26287 /* Special case. Do not generate a POP PC statement here, do it in
26288 thumb_exit() */
26289 thumb_exit (f, -1);
26290 return;
26293 fprintf (f, "\tpop\t{");
26295 /* Look at the low registers first. */
26296 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26298 if (lo_mask & 1)
26300 asm_fprintf (f, "%r", regno);
26302 if ((lo_mask & ~1) != 0)
26303 fprintf (f, ", ");
26305 pushed_words++;
26309 if (mask & (1 << PC_REGNUM))
26311 /* Catch popping the PC. */
26312 if (TARGET_INTERWORK || TARGET_BACKTRACE
26313 || crtl->calls_eh_return)
26315 /* The PC is never poped directly, instead
26316 it is popped into r3 and then BX is used. */
26317 fprintf (f, "}\n");
26319 thumb_exit (f, -1);
26321 return;
26323 else
26325 if (mask & 0xFF)
26326 fprintf (f, ", ");
26328 asm_fprintf (f, "%r", PC_REGNUM);
26332 fprintf (f, "}\n");
26335 /* Generate code to return from a thumb function.
26336 If 'reg_containing_return_addr' is -1, then the return address is
26337 actually on the stack, at the stack pointer. */
26338 static void
26339 thumb_exit (FILE *f, int reg_containing_return_addr)
26341 unsigned regs_available_for_popping;
26342 unsigned regs_to_pop;
26343 int pops_needed;
26344 unsigned available;
26345 unsigned required;
26346 enum machine_mode mode;
26347 int size;
26348 int restore_a4 = FALSE;
26350 /* Compute the registers we need to pop. */
26351 regs_to_pop = 0;
26352 pops_needed = 0;
26354 if (reg_containing_return_addr == -1)
26356 regs_to_pop |= 1 << LR_REGNUM;
26357 ++pops_needed;
26360 if (TARGET_BACKTRACE)
26362 /* Restore the (ARM) frame pointer and stack pointer. */
26363 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26364 pops_needed += 2;
26367 /* If there is nothing to pop then just emit the BX instruction and
26368 return. */
26369 if (pops_needed == 0)
26371 if (crtl->calls_eh_return)
26372 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26374 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26375 return;
26377 /* Otherwise if we are not supporting interworking and we have not created
26378 a backtrace structure and the function was not entered in ARM mode then
26379 just pop the return address straight into the PC. */
26380 else if (!TARGET_INTERWORK
26381 && !TARGET_BACKTRACE
26382 && !is_called_in_ARM_mode (current_function_decl)
26383 && !crtl->calls_eh_return)
26385 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26386 return;
26389 /* Find out how many of the (return) argument registers we can corrupt. */
26390 regs_available_for_popping = 0;
26392 /* If returning via __builtin_eh_return, the bottom three registers
26393 all contain information needed for the return. */
26394 if (crtl->calls_eh_return)
26395 size = 12;
26396 else
26398 /* If we can deduce the registers used from the function's
26399 return value. This is more reliable that examining
26400 df_regs_ever_live_p () because that will be set if the register is
26401 ever used in the function, not just if the register is used
26402 to hold a return value. */
26404 if (crtl->return_rtx != 0)
26405 mode = GET_MODE (crtl->return_rtx);
26406 else
26407 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26409 size = GET_MODE_SIZE (mode);
26411 if (size == 0)
26413 /* In a void function we can use any argument register.
26414 In a function that returns a structure on the stack
26415 we can use the second and third argument registers. */
26416 if (mode == VOIDmode)
26417 regs_available_for_popping =
26418 (1 << ARG_REGISTER (1))
26419 | (1 << ARG_REGISTER (2))
26420 | (1 << ARG_REGISTER (3));
26421 else
26422 regs_available_for_popping =
26423 (1 << ARG_REGISTER (2))
26424 | (1 << ARG_REGISTER (3));
26426 else if (size <= 4)
26427 regs_available_for_popping =
26428 (1 << ARG_REGISTER (2))
26429 | (1 << ARG_REGISTER (3));
26430 else if (size <= 8)
26431 regs_available_for_popping =
26432 (1 << ARG_REGISTER (3));
26435 /* Match registers to be popped with registers into which we pop them. */
26436 for (available = regs_available_for_popping,
26437 required = regs_to_pop;
26438 required != 0 && available != 0;
26439 available &= ~(available & - available),
26440 required &= ~(required & - required))
26441 -- pops_needed;
26443 /* If we have any popping registers left over, remove them. */
26444 if (available > 0)
26445 regs_available_for_popping &= ~available;
26447 /* Otherwise if we need another popping register we can use
26448 the fourth argument register. */
26449 else if (pops_needed)
26451 /* If we have not found any free argument registers and
26452 reg a4 contains the return address, we must move it. */
26453 if (regs_available_for_popping == 0
26454 && reg_containing_return_addr == LAST_ARG_REGNUM)
26456 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26457 reg_containing_return_addr = LR_REGNUM;
26459 else if (size > 12)
26461 /* Register a4 is being used to hold part of the return value,
26462 but we have dire need of a free, low register. */
26463 restore_a4 = TRUE;
26465 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26468 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26470 /* The fourth argument register is available. */
26471 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26473 --pops_needed;
26477 /* Pop as many registers as we can. */
26478 thumb_pop (f, regs_available_for_popping);
26480 /* Process the registers we popped. */
26481 if (reg_containing_return_addr == -1)
26483 /* The return address was popped into the lowest numbered register. */
26484 regs_to_pop &= ~(1 << LR_REGNUM);
26486 reg_containing_return_addr =
26487 number_of_first_bit_set (regs_available_for_popping);
26489 /* Remove this register for the mask of available registers, so that
26490 the return address will not be corrupted by further pops. */
26491 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26494 /* If we popped other registers then handle them here. */
26495 if (regs_available_for_popping)
26497 int frame_pointer;
26499 /* Work out which register currently contains the frame pointer. */
26500 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26502 /* Move it into the correct place. */
26503 asm_fprintf (f, "\tmov\t%r, %r\n",
26504 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26506 /* (Temporarily) remove it from the mask of popped registers. */
26507 regs_available_for_popping &= ~(1 << frame_pointer);
26508 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26510 if (regs_available_for_popping)
26512 int stack_pointer;
26514 /* We popped the stack pointer as well,
26515 find the register that contains it. */
26516 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26518 /* Move it into the stack register. */
26519 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26521 /* At this point we have popped all necessary registers, so
26522 do not worry about restoring regs_available_for_popping
26523 to its correct value:
26525 assert (pops_needed == 0)
26526 assert (regs_available_for_popping == (1 << frame_pointer))
26527 assert (regs_to_pop == (1 << STACK_POINTER)) */
26529 else
26531 /* Since we have just move the popped value into the frame
26532 pointer, the popping register is available for reuse, and
26533 we know that we still have the stack pointer left to pop. */
26534 regs_available_for_popping |= (1 << frame_pointer);
26538 /* If we still have registers left on the stack, but we no longer have
26539 any registers into which we can pop them, then we must move the return
26540 address into the link register and make available the register that
26541 contained it. */
26542 if (regs_available_for_popping == 0 && pops_needed > 0)
26544 regs_available_for_popping |= 1 << reg_containing_return_addr;
26546 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26547 reg_containing_return_addr);
26549 reg_containing_return_addr = LR_REGNUM;
26552 /* If we have registers left on the stack then pop some more.
26553 We know that at most we will want to pop FP and SP. */
26554 if (pops_needed > 0)
26556 int popped_into;
26557 int move_to;
26559 thumb_pop (f, regs_available_for_popping);
26561 /* We have popped either FP or SP.
26562 Move whichever one it is into the correct register. */
26563 popped_into = number_of_first_bit_set (regs_available_for_popping);
26564 move_to = number_of_first_bit_set (regs_to_pop);
26566 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26568 regs_to_pop &= ~(1 << move_to);
26570 --pops_needed;
26573 /* If we still have not popped everything then we must have only
26574 had one register available to us and we are now popping the SP. */
26575 if (pops_needed > 0)
26577 int popped_into;
26579 thumb_pop (f, regs_available_for_popping);
26581 popped_into = number_of_first_bit_set (regs_available_for_popping);
26583 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26585 assert (regs_to_pop == (1 << STACK_POINTER))
26586 assert (pops_needed == 1)
26590 /* If necessary restore the a4 register. */
26591 if (restore_a4)
26593 if (reg_containing_return_addr != LR_REGNUM)
26595 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26596 reg_containing_return_addr = LR_REGNUM;
26599 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26602 if (crtl->calls_eh_return)
26603 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26605 /* Return to caller. */
26606 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26609 /* Scan INSN just before assembler is output for it.
26610 For Thumb-1, we track the status of the condition codes; this
26611 information is used in the cbranchsi4_insn pattern. */
26612 void
26613 thumb1_final_prescan_insn (rtx_insn *insn)
26615 if (flag_print_asm_name)
26616 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26617 INSN_ADDRESSES (INSN_UID (insn)));
26618 /* Don't overwrite the previous setter when we get to a cbranch. */
26619 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26621 enum attr_conds conds;
26623 if (cfun->machine->thumb1_cc_insn)
26625 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26626 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26627 CC_STATUS_INIT;
26629 conds = get_attr_conds (insn);
26630 if (conds == CONDS_SET)
26632 rtx set = single_set (insn);
26633 cfun->machine->thumb1_cc_insn = insn;
26634 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26635 cfun->machine->thumb1_cc_op1 = const0_rtx;
26636 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26637 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26639 rtx src1 = XEXP (SET_SRC (set), 1);
26640 if (src1 == const0_rtx)
26641 cfun->machine->thumb1_cc_mode = CCmode;
26643 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26645 /* Record the src register operand instead of dest because
26646 cprop_hardreg pass propagates src. */
26647 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26650 else if (conds != CONDS_NOCOND)
26651 cfun->machine->thumb1_cc_insn = NULL_RTX;
26654 /* Check if unexpected far jump is used. */
26655 if (cfun->machine->lr_save_eliminated
26656 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26657 internal_error("Unexpected thumb1 far jump");
26661 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26663 unsigned HOST_WIDE_INT mask = 0xff;
26664 int i;
26666 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26667 if (val == 0) /* XXX */
26668 return 0;
26670 for (i = 0; i < 25; i++)
26671 if ((val & (mask << i)) == val)
26672 return 1;
26674 return 0;
26677 /* Returns nonzero if the current function contains,
26678 or might contain a far jump. */
26679 static int
26680 thumb_far_jump_used_p (void)
26682 rtx_insn *insn;
26683 bool far_jump = false;
26684 unsigned int func_size = 0;
26686 /* This test is only important for leaf functions. */
26687 /* assert (!leaf_function_p ()); */
26689 /* If we have already decided that far jumps may be used,
26690 do not bother checking again, and always return true even if
26691 it turns out that they are not being used. Once we have made
26692 the decision that far jumps are present (and that hence the link
26693 register will be pushed onto the stack) we cannot go back on it. */
26694 if (cfun->machine->far_jump_used)
26695 return 1;
26697 /* If this function is not being called from the prologue/epilogue
26698 generation code then it must be being called from the
26699 INITIAL_ELIMINATION_OFFSET macro. */
26700 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26702 /* In this case we know that we are being asked about the elimination
26703 of the arg pointer register. If that register is not being used,
26704 then there are no arguments on the stack, and we do not have to
26705 worry that a far jump might force the prologue to push the link
26706 register, changing the stack offsets. In this case we can just
26707 return false, since the presence of far jumps in the function will
26708 not affect stack offsets.
26710 If the arg pointer is live (or if it was live, but has now been
26711 eliminated and so set to dead) then we do have to test to see if
26712 the function might contain a far jump. This test can lead to some
26713 false negatives, since before reload is completed, then length of
26714 branch instructions is not known, so gcc defaults to returning their
26715 longest length, which in turn sets the far jump attribute to true.
26717 A false negative will not result in bad code being generated, but it
26718 will result in a needless push and pop of the link register. We
26719 hope that this does not occur too often.
26721 If we need doubleword stack alignment this could affect the other
26722 elimination offsets so we can't risk getting it wrong. */
26723 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26724 cfun->machine->arg_pointer_live = 1;
26725 else if (!cfun->machine->arg_pointer_live)
26726 return 0;
26729 /* We should not change far_jump_used during or after reload, as there is
26730 no chance to change stack frame layout. */
26731 if (reload_in_progress || reload_completed)
26732 return 0;
26734 /* Check to see if the function contains a branch
26735 insn with the far jump attribute set. */
26736 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26738 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26740 far_jump = true;
26742 func_size += get_attr_length (insn);
26745 /* Attribute far_jump will always be true for thumb1 before
26746 shorten_branch pass. So checking far_jump attribute before
26747 shorten_branch isn't much useful.
26749 Following heuristic tries to estimate more accurately if a far jump
26750 may finally be used. The heuristic is very conservative as there is
26751 no chance to roll-back the decision of not to use far jump.
26753 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26754 2-byte insn is associated with a 4 byte constant pool. Using
26755 function size 2048/3 as the threshold is conservative enough. */
26756 if (far_jump)
26758 if ((func_size * 3) >= 2048)
26760 /* Record the fact that we have decided that
26761 the function does use far jumps. */
26762 cfun->machine->far_jump_used = 1;
26763 return 1;
26767 return 0;
26770 /* Return nonzero if FUNC must be entered in ARM mode. */
26772 is_called_in_ARM_mode (tree func)
26774 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26776 /* Ignore the problem about functions whose address is taken. */
26777 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26778 return TRUE;
26780 #ifdef ARM_PE
26781 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26782 #else
26783 return FALSE;
26784 #endif
26787 /* Given the stack offsets and register mask in OFFSETS, decide how
26788 many additional registers to push instead of subtracting a constant
26789 from SP. For epilogues the principle is the same except we use pop.
26790 FOR_PROLOGUE indicates which we're generating. */
26791 static int
26792 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26794 HOST_WIDE_INT amount;
26795 unsigned long live_regs_mask = offsets->saved_regs_mask;
26796 /* Extract a mask of the ones we can give to the Thumb's push/pop
26797 instruction. */
26798 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26799 /* Then count how many other high registers will need to be pushed. */
26800 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26801 int n_free, reg_base, size;
26803 if (!for_prologue && frame_pointer_needed)
26804 amount = offsets->locals_base - offsets->saved_regs;
26805 else
26806 amount = offsets->outgoing_args - offsets->saved_regs;
26808 /* If the stack frame size is 512 exactly, we can save one load
26809 instruction, which should make this a win even when optimizing
26810 for speed. */
26811 if (!optimize_size && amount != 512)
26812 return 0;
26814 /* Can't do this if there are high registers to push. */
26815 if (high_regs_pushed != 0)
26816 return 0;
26818 /* Shouldn't do it in the prologue if no registers would normally
26819 be pushed at all. In the epilogue, also allow it if we'll have
26820 a pop insn for the PC. */
26821 if (l_mask == 0
26822 && (for_prologue
26823 || TARGET_BACKTRACE
26824 || (live_regs_mask & 1 << LR_REGNUM) == 0
26825 || TARGET_INTERWORK
26826 || crtl->args.pretend_args_size != 0))
26827 return 0;
26829 /* Don't do this if thumb_expand_prologue wants to emit instructions
26830 between the push and the stack frame allocation. */
26831 if (for_prologue
26832 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26833 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26834 return 0;
26836 reg_base = 0;
26837 n_free = 0;
26838 if (!for_prologue)
26840 size = arm_size_return_regs ();
26841 reg_base = ARM_NUM_INTS (size);
26842 live_regs_mask >>= reg_base;
26845 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26846 && (for_prologue || call_used_regs[reg_base + n_free]))
26848 live_regs_mask >>= 1;
26849 n_free++;
26852 if (n_free == 0)
26853 return 0;
26854 gcc_assert (amount / 4 * 4 == amount);
26856 if (amount >= 512 && (amount - n_free * 4) < 512)
26857 return (amount - 508) / 4;
26858 if (amount <= n_free * 4)
26859 return amount / 4;
26860 return 0;
26863 /* The bits which aren't usefully expanded as rtl. */
26864 const char *
26865 thumb1_unexpanded_epilogue (void)
26867 arm_stack_offsets *offsets;
26868 int regno;
26869 unsigned long live_regs_mask = 0;
26870 int high_regs_pushed = 0;
26871 int extra_pop;
26872 int had_to_push_lr;
26873 int size;
26875 if (cfun->machine->return_used_this_function != 0)
26876 return "";
26878 if (IS_NAKED (arm_current_func_type ()))
26879 return "";
26881 offsets = arm_get_frame_offsets ();
26882 live_regs_mask = offsets->saved_regs_mask;
26883 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26885 /* If we can deduce the registers used from the function's return value.
26886 This is more reliable that examining df_regs_ever_live_p () because that
26887 will be set if the register is ever used in the function, not just if
26888 the register is used to hold a return value. */
26889 size = arm_size_return_regs ();
26891 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26892 if (extra_pop > 0)
26894 unsigned long extra_mask = (1 << extra_pop) - 1;
26895 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26898 /* The prolog may have pushed some high registers to use as
26899 work registers. e.g. the testsuite file:
26900 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26901 compiles to produce:
26902 push {r4, r5, r6, r7, lr}
26903 mov r7, r9
26904 mov r6, r8
26905 push {r6, r7}
26906 as part of the prolog. We have to undo that pushing here. */
26908 if (high_regs_pushed)
26910 unsigned long mask = live_regs_mask & 0xff;
26911 int next_hi_reg;
26913 /* The available low registers depend on the size of the value we are
26914 returning. */
26915 if (size <= 12)
26916 mask |= 1 << 3;
26917 if (size <= 8)
26918 mask |= 1 << 2;
26920 if (mask == 0)
26921 /* Oh dear! We have no low registers into which we can pop
26922 high registers! */
26923 internal_error
26924 ("no low registers available for popping high registers");
26926 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26927 if (live_regs_mask & (1 << next_hi_reg))
26928 break;
26930 while (high_regs_pushed)
26932 /* Find lo register(s) into which the high register(s) can
26933 be popped. */
26934 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26936 if (mask & (1 << regno))
26937 high_regs_pushed--;
26938 if (high_regs_pushed == 0)
26939 break;
26942 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
26944 /* Pop the values into the low register(s). */
26945 thumb_pop (asm_out_file, mask);
26947 /* Move the value(s) into the high registers. */
26948 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26950 if (mask & (1 << regno))
26952 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26953 regno);
26955 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26956 if (live_regs_mask & (1 << next_hi_reg))
26957 break;
26961 live_regs_mask &= ~0x0f00;
26964 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26965 live_regs_mask &= 0xff;
26967 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26969 /* Pop the return address into the PC. */
26970 if (had_to_push_lr)
26971 live_regs_mask |= 1 << PC_REGNUM;
26973 /* Either no argument registers were pushed or a backtrace
26974 structure was created which includes an adjusted stack
26975 pointer, so just pop everything. */
26976 if (live_regs_mask)
26977 thumb_pop (asm_out_file, live_regs_mask);
26979 /* We have either just popped the return address into the
26980 PC or it is was kept in LR for the entire function.
26981 Note that thumb_pop has already called thumb_exit if the
26982 PC was in the list. */
26983 if (!had_to_push_lr)
26984 thumb_exit (asm_out_file, LR_REGNUM);
26986 else
26988 /* Pop everything but the return address. */
26989 if (live_regs_mask)
26990 thumb_pop (asm_out_file, live_regs_mask);
26992 if (had_to_push_lr)
26994 if (size > 12)
26996 /* We have no free low regs, so save one. */
26997 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26998 LAST_ARG_REGNUM);
27001 /* Get the return address into a temporary register. */
27002 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
27004 if (size > 12)
27006 /* Move the return address to lr. */
27007 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
27008 LAST_ARG_REGNUM);
27009 /* Restore the low register. */
27010 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
27011 IP_REGNUM);
27012 regno = LR_REGNUM;
27014 else
27015 regno = LAST_ARG_REGNUM;
27017 else
27018 regno = LR_REGNUM;
27020 /* Remove the argument registers that were pushed onto the stack. */
27021 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
27022 SP_REGNUM, SP_REGNUM,
27023 crtl->args.pretend_args_size);
27025 thumb_exit (asm_out_file, regno);
27028 return "";
27031 /* Functions to save and restore machine-specific function data. */
27032 static struct machine_function *
27033 arm_init_machine_status (void)
27035 struct machine_function *machine;
27036 machine = ggc_cleared_alloc<machine_function> ();
27038 #if ARM_FT_UNKNOWN != 0
27039 machine->func_type = ARM_FT_UNKNOWN;
27040 #endif
27041 return machine;
27044 /* Return an RTX indicating where the return address to the
27045 calling function can be found. */
27047 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
27049 if (count != 0)
27050 return NULL_RTX;
27052 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
27055 /* Do anything needed before RTL is emitted for each function. */
27056 void
27057 arm_init_expanders (void)
27059 /* Arrange to initialize and mark the machine per-function status. */
27060 init_machine_status = arm_init_machine_status;
27062 /* This is to stop the combine pass optimizing away the alignment
27063 adjustment of va_arg. */
27064 /* ??? It is claimed that this should not be necessary. */
27065 if (cfun)
27066 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
27070 /* Like arm_compute_initial_elimination offset. Simpler because there
27071 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27072 to point at the base of the local variables after static stack
27073 space for a function has been allocated. */
27075 HOST_WIDE_INT
27076 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27078 arm_stack_offsets *offsets;
27080 offsets = arm_get_frame_offsets ();
27082 switch (from)
27084 case ARG_POINTER_REGNUM:
27085 switch (to)
27087 case STACK_POINTER_REGNUM:
27088 return offsets->outgoing_args - offsets->saved_args;
27090 case FRAME_POINTER_REGNUM:
27091 return offsets->soft_frame - offsets->saved_args;
27093 case ARM_HARD_FRAME_POINTER_REGNUM:
27094 return offsets->saved_regs - offsets->saved_args;
27096 case THUMB_HARD_FRAME_POINTER_REGNUM:
27097 return offsets->locals_base - offsets->saved_args;
27099 default:
27100 gcc_unreachable ();
27102 break;
27104 case FRAME_POINTER_REGNUM:
27105 switch (to)
27107 case STACK_POINTER_REGNUM:
27108 return offsets->outgoing_args - offsets->soft_frame;
27110 case ARM_HARD_FRAME_POINTER_REGNUM:
27111 return offsets->saved_regs - offsets->soft_frame;
27113 case THUMB_HARD_FRAME_POINTER_REGNUM:
27114 return offsets->locals_base - offsets->soft_frame;
27116 default:
27117 gcc_unreachable ();
27119 break;
27121 default:
27122 gcc_unreachable ();
27126 /* Generate the function's prologue. */
27128 void
27129 thumb1_expand_prologue (void)
27131 rtx_insn *insn;
27133 HOST_WIDE_INT amount;
27134 arm_stack_offsets *offsets;
27135 unsigned long func_type;
27136 int regno;
27137 unsigned long live_regs_mask;
27138 unsigned long l_mask;
27139 unsigned high_regs_pushed = 0;
27141 func_type = arm_current_func_type ();
27143 /* Naked functions don't have prologues. */
27144 if (IS_NAKED (func_type))
27145 return;
27147 if (IS_INTERRUPT (func_type))
27149 error ("interrupt Service Routines cannot be coded in Thumb mode");
27150 return;
27153 if (is_called_in_ARM_mode (current_function_decl))
27154 emit_insn (gen_prologue_thumb1_interwork ());
27156 offsets = arm_get_frame_offsets ();
27157 live_regs_mask = offsets->saved_regs_mask;
27159 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27160 l_mask = live_regs_mask & 0x40ff;
27161 /* Then count how many other high registers will need to be pushed. */
27162 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27164 if (crtl->args.pretend_args_size)
27166 rtx x = GEN_INT (-crtl->args.pretend_args_size);
27168 if (cfun->machine->uses_anonymous_args)
27170 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27171 unsigned long mask;
27173 mask = 1ul << (LAST_ARG_REGNUM + 1);
27174 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27176 insn = thumb1_emit_multi_reg_push (mask, 0);
27178 else
27180 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27181 stack_pointer_rtx, x));
27183 RTX_FRAME_RELATED_P (insn) = 1;
27186 if (TARGET_BACKTRACE)
27188 HOST_WIDE_INT offset = 0;
27189 unsigned work_register;
27190 rtx work_reg, x, arm_hfp_rtx;
27192 /* We have been asked to create a stack backtrace structure.
27193 The code looks like this:
27195 0 .align 2
27196 0 func:
27197 0 sub SP, #16 Reserve space for 4 registers.
27198 2 push {R7} Push low registers.
27199 4 add R7, SP, #20 Get the stack pointer before the push.
27200 6 str R7, [SP, #8] Store the stack pointer
27201 (before reserving the space).
27202 8 mov R7, PC Get hold of the start of this code + 12.
27203 10 str R7, [SP, #16] Store it.
27204 12 mov R7, FP Get hold of the current frame pointer.
27205 14 str R7, [SP, #4] Store it.
27206 16 mov R7, LR Get hold of the current return address.
27207 18 str R7, [SP, #12] Store it.
27208 20 add R7, SP, #16 Point at the start of the
27209 backtrace structure.
27210 22 mov FP, R7 Put this value into the frame pointer. */
27212 work_register = thumb_find_work_register (live_regs_mask);
27213 work_reg = gen_rtx_REG (SImode, work_register);
27214 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27216 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27217 stack_pointer_rtx, GEN_INT (-16)));
27218 RTX_FRAME_RELATED_P (insn) = 1;
27220 if (l_mask)
27222 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27223 RTX_FRAME_RELATED_P (insn) = 1;
27225 offset = bit_count (l_mask) * UNITS_PER_WORD;
27228 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27229 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27231 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27232 x = gen_frame_mem (SImode, x);
27233 emit_move_insn (x, work_reg);
27235 /* Make sure that the instruction fetching the PC is in the right place
27236 to calculate "start of backtrace creation code + 12". */
27237 /* ??? The stores using the common WORK_REG ought to be enough to
27238 prevent the scheduler from doing anything weird. Failing that
27239 we could always move all of the following into an UNSPEC_VOLATILE. */
27240 if (l_mask)
27242 x = gen_rtx_REG (SImode, PC_REGNUM);
27243 emit_move_insn (work_reg, x);
27245 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27246 x = gen_frame_mem (SImode, x);
27247 emit_move_insn (x, work_reg);
27249 emit_move_insn (work_reg, arm_hfp_rtx);
27251 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27252 x = gen_frame_mem (SImode, x);
27253 emit_move_insn (x, work_reg);
27255 else
27257 emit_move_insn (work_reg, arm_hfp_rtx);
27259 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27260 x = gen_frame_mem (SImode, x);
27261 emit_move_insn (x, work_reg);
27263 x = gen_rtx_REG (SImode, PC_REGNUM);
27264 emit_move_insn (work_reg, x);
27266 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27267 x = gen_frame_mem (SImode, x);
27268 emit_move_insn (x, work_reg);
27271 x = gen_rtx_REG (SImode, LR_REGNUM);
27272 emit_move_insn (work_reg, x);
27274 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27275 x = gen_frame_mem (SImode, x);
27276 emit_move_insn (x, work_reg);
27278 x = GEN_INT (offset + 12);
27279 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27281 emit_move_insn (arm_hfp_rtx, work_reg);
27283 /* Optimization: If we are not pushing any low registers but we are going
27284 to push some high registers then delay our first push. This will just
27285 be a push of LR and we can combine it with the push of the first high
27286 register. */
27287 else if ((l_mask & 0xff) != 0
27288 || (high_regs_pushed == 0 && l_mask))
27290 unsigned long mask = l_mask;
27291 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27292 insn = thumb1_emit_multi_reg_push (mask, mask);
27293 RTX_FRAME_RELATED_P (insn) = 1;
27296 if (high_regs_pushed)
27298 unsigned pushable_regs;
27299 unsigned next_hi_reg;
27300 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27301 : crtl->args.info.nregs;
27302 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27304 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27305 if (live_regs_mask & (1 << next_hi_reg))
27306 break;
27308 /* Here we need to mask out registers used for passing arguments
27309 even if they can be pushed. This is to avoid using them to stash the high
27310 registers. Such kind of stash may clobber the use of arguments. */
27311 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
27313 if (pushable_regs == 0)
27314 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27316 while (high_regs_pushed > 0)
27318 unsigned long real_regs_mask = 0;
27320 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
27322 if (pushable_regs & (1 << regno))
27324 emit_move_insn (gen_rtx_REG (SImode, regno),
27325 gen_rtx_REG (SImode, next_hi_reg));
27327 high_regs_pushed --;
27328 real_regs_mask |= (1 << next_hi_reg);
27330 if (high_regs_pushed)
27332 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27333 next_hi_reg --)
27334 if (live_regs_mask & (1 << next_hi_reg))
27335 break;
27337 else
27339 pushable_regs &= ~((1 << regno) - 1);
27340 break;
27345 /* If we had to find a work register and we have not yet
27346 saved the LR then add it to the list of regs to push. */
27347 if (l_mask == (1 << LR_REGNUM))
27349 pushable_regs |= l_mask;
27350 real_regs_mask |= l_mask;
27351 l_mask = 0;
27354 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
27355 RTX_FRAME_RELATED_P (insn) = 1;
27359 /* Load the pic register before setting the frame pointer,
27360 so we can use r7 as a temporary work register. */
27361 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27362 arm_load_pic_register (live_regs_mask);
27364 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27365 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27366 stack_pointer_rtx);
27368 if (flag_stack_usage_info)
27369 current_function_static_stack_size
27370 = offsets->outgoing_args - offsets->saved_args;
27372 amount = offsets->outgoing_args - offsets->saved_regs;
27373 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27374 if (amount)
27376 if (amount < 512)
27378 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27379 GEN_INT (- amount)));
27380 RTX_FRAME_RELATED_P (insn) = 1;
27382 else
27384 rtx reg, dwarf;
27386 /* The stack decrement is too big for an immediate value in a single
27387 insn. In theory we could issue multiple subtracts, but after
27388 three of them it becomes more space efficient to place the full
27389 value in the constant pool and load into a register. (Also the
27390 ARM debugger really likes to see only one stack decrement per
27391 function). So instead we look for a scratch register into which
27392 we can load the decrement, and then we subtract this from the
27393 stack pointer. Unfortunately on the thumb the only available
27394 scratch registers are the argument registers, and we cannot use
27395 these as they may hold arguments to the function. Instead we
27396 attempt to locate a call preserved register which is used by this
27397 function. If we can find one, then we know that it will have
27398 been pushed at the start of the prologue and so we can corrupt
27399 it now. */
27400 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27401 if (live_regs_mask & (1 << regno))
27402 break;
27404 gcc_assert(regno <= LAST_LO_REGNUM);
27406 reg = gen_rtx_REG (SImode, regno);
27408 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27410 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27411 stack_pointer_rtx, reg));
27413 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27414 plus_constant (Pmode, stack_pointer_rtx,
27415 -amount));
27416 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27417 RTX_FRAME_RELATED_P (insn) = 1;
27421 if (frame_pointer_needed)
27422 thumb_set_frame_pointer (offsets);
27424 /* If we are profiling, make sure no instructions are scheduled before
27425 the call to mcount. Similarly if the user has requested no
27426 scheduling in the prolog. Similarly if we want non-call exceptions
27427 using the EABI unwinder, to prevent faulting instructions from being
27428 swapped with a stack adjustment. */
27429 if (crtl->profile || !TARGET_SCHED_PROLOG
27430 || (arm_except_unwind_info (&global_options) == UI_TARGET
27431 && cfun->can_throw_non_call_exceptions))
27432 emit_insn (gen_blockage ());
27434 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27435 if (live_regs_mask & 0xff)
27436 cfun->machine->lr_save_eliminated = 0;
27439 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27440 POP instruction can be generated. LR should be replaced by PC. All
27441 the checks required are already done by USE_RETURN_INSN (). Hence,
27442 all we really need to check here is if single register is to be
27443 returned, or multiple register return. */
27444 void
27445 thumb2_expand_return (bool simple_return)
27447 int i, num_regs;
27448 unsigned long saved_regs_mask;
27449 arm_stack_offsets *offsets;
27451 offsets = arm_get_frame_offsets ();
27452 saved_regs_mask = offsets->saved_regs_mask;
27454 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27455 if (saved_regs_mask & (1 << i))
27456 num_regs++;
27458 if (!simple_return && saved_regs_mask)
27460 if (num_regs == 1)
27462 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27463 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27464 rtx addr = gen_rtx_MEM (SImode,
27465 gen_rtx_POST_INC (SImode,
27466 stack_pointer_rtx));
27467 set_mem_alias_set (addr, get_frame_alias_set ());
27468 XVECEXP (par, 0, 0) = ret_rtx;
27469 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27470 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27471 emit_jump_insn (par);
27473 else
27475 saved_regs_mask &= ~ (1 << LR_REGNUM);
27476 saved_regs_mask |= (1 << PC_REGNUM);
27477 arm_emit_multi_reg_pop (saved_regs_mask);
27480 else
27482 emit_jump_insn (simple_return_rtx);
27486 void
27487 thumb1_expand_epilogue (void)
27489 HOST_WIDE_INT amount;
27490 arm_stack_offsets *offsets;
27491 int regno;
27493 /* Naked functions don't have prologues. */
27494 if (IS_NAKED (arm_current_func_type ()))
27495 return;
27497 offsets = arm_get_frame_offsets ();
27498 amount = offsets->outgoing_args - offsets->saved_regs;
27500 if (frame_pointer_needed)
27502 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27503 amount = offsets->locals_base - offsets->saved_regs;
27505 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27507 gcc_assert (amount >= 0);
27508 if (amount)
27510 emit_insn (gen_blockage ());
27512 if (amount < 512)
27513 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27514 GEN_INT (amount)));
27515 else
27517 /* r3 is always free in the epilogue. */
27518 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27520 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27521 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27525 /* Emit a USE (stack_pointer_rtx), so that
27526 the stack adjustment will not be deleted. */
27527 emit_insn (gen_force_register_use (stack_pointer_rtx));
27529 if (crtl->profile || !TARGET_SCHED_PROLOG)
27530 emit_insn (gen_blockage ());
27532 /* Emit a clobber for each insn that will be restored in the epilogue,
27533 so that flow2 will get register lifetimes correct. */
27534 for (regno = 0; regno < 13; regno++)
27535 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27536 emit_clobber (gen_rtx_REG (SImode, regno));
27538 if (! df_regs_ever_live_p (LR_REGNUM))
27539 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27542 /* Epilogue code for APCS frame. */
27543 static void
27544 arm_expand_epilogue_apcs_frame (bool really_return)
27546 unsigned long func_type;
27547 unsigned long saved_regs_mask;
27548 int num_regs = 0;
27549 int i;
27550 int floats_from_frame = 0;
27551 arm_stack_offsets *offsets;
27553 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27554 func_type = arm_current_func_type ();
27556 /* Get frame offsets for ARM. */
27557 offsets = arm_get_frame_offsets ();
27558 saved_regs_mask = offsets->saved_regs_mask;
27560 /* Find the offset of the floating-point save area in the frame. */
27561 floats_from_frame
27562 = (offsets->saved_args
27563 + arm_compute_static_chain_stack_bytes ()
27564 - offsets->frame);
27566 /* Compute how many core registers saved and how far away the floats are. */
27567 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27568 if (saved_regs_mask & (1 << i))
27570 num_regs++;
27571 floats_from_frame += 4;
27574 if (TARGET_HARD_FLOAT && TARGET_VFP)
27576 int start_reg;
27577 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27579 /* The offset is from IP_REGNUM. */
27580 int saved_size = arm_get_vfp_saved_size ();
27581 if (saved_size > 0)
27583 rtx_insn *insn;
27584 floats_from_frame += saved_size;
27585 insn = emit_insn (gen_addsi3 (ip_rtx,
27586 hard_frame_pointer_rtx,
27587 GEN_INT (-floats_from_frame)));
27588 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27589 ip_rtx, hard_frame_pointer_rtx);
27592 /* Generate VFP register multi-pop. */
27593 start_reg = FIRST_VFP_REGNUM;
27595 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27596 /* Look for a case where a reg does not need restoring. */
27597 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27598 && (!df_regs_ever_live_p (i + 1)
27599 || call_used_regs[i + 1]))
27601 if (start_reg != i)
27602 arm_emit_vfp_multi_reg_pop (start_reg,
27603 (i - start_reg) / 2,
27604 gen_rtx_REG (SImode,
27605 IP_REGNUM));
27606 start_reg = i + 2;
27609 /* Restore the remaining regs that we have discovered (or possibly
27610 even all of them, if the conditional in the for loop never
27611 fired). */
27612 if (start_reg != i)
27613 arm_emit_vfp_multi_reg_pop (start_reg,
27614 (i - start_reg) / 2,
27615 gen_rtx_REG (SImode, IP_REGNUM));
27618 if (TARGET_IWMMXT)
27620 /* The frame pointer is guaranteed to be non-double-word aligned, as
27621 it is set to double-word-aligned old_stack_pointer - 4. */
27622 rtx_insn *insn;
27623 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27625 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27626 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27628 rtx addr = gen_frame_mem (V2SImode,
27629 plus_constant (Pmode, hard_frame_pointer_rtx,
27630 - lrm_count * 4));
27631 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27632 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27633 gen_rtx_REG (V2SImode, i),
27634 NULL_RTX);
27635 lrm_count += 2;
27639 /* saved_regs_mask should contain IP which contains old stack pointer
27640 at the time of activation creation. Since SP and IP are adjacent registers,
27641 we can restore the value directly into SP. */
27642 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27643 saved_regs_mask &= ~(1 << IP_REGNUM);
27644 saved_regs_mask |= (1 << SP_REGNUM);
27646 /* There are two registers left in saved_regs_mask - LR and PC. We
27647 only need to restore LR (the return address), but to
27648 save time we can load it directly into PC, unless we need a
27649 special function exit sequence, or we are not really returning. */
27650 if (really_return
27651 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27652 && !crtl->calls_eh_return)
27653 /* Delete LR from the register mask, so that LR on
27654 the stack is loaded into the PC in the register mask. */
27655 saved_regs_mask &= ~(1 << LR_REGNUM);
27656 else
27657 saved_regs_mask &= ~(1 << PC_REGNUM);
27659 num_regs = bit_count (saved_regs_mask);
27660 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27662 rtx_insn *insn;
27663 emit_insn (gen_blockage ());
27664 /* Unwind the stack to just below the saved registers. */
27665 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27666 hard_frame_pointer_rtx,
27667 GEN_INT (- 4 * num_regs)));
27669 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27670 stack_pointer_rtx, hard_frame_pointer_rtx);
27673 arm_emit_multi_reg_pop (saved_regs_mask);
27675 if (IS_INTERRUPT (func_type))
27677 /* Interrupt handlers will have pushed the
27678 IP onto the stack, so restore it now. */
27679 rtx_insn *insn;
27680 rtx addr = gen_rtx_MEM (SImode,
27681 gen_rtx_POST_INC (SImode,
27682 stack_pointer_rtx));
27683 set_mem_alias_set (addr, get_frame_alias_set ());
27684 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27685 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27686 gen_rtx_REG (SImode, IP_REGNUM),
27687 NULL_RTX);
27690 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27691 return;
27693 if (crtl->calls_eh_return)
27694 emit_insn (gen_addsi3 (stack_pointer_rtx,
27695 stack_pointer_rtx,
27696 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27698 if (IS_STACKALIGN (func_type))
27699 /* Restore the original stack pointer. Before prologue, the stack was
27700 realigned and the original stack pointer saved in r0. For details,
27701 see comment in arm_expand_prologue. */
27702 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27704 emit_jump_insn (simple_return_rtx);
27707 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27708 function is not a sibcall. */
27709 void
27710 arm_expand_epilogue (bool really_return)
27712 unsigned long func_type;
27713 unsigned long saved_regs_mask;
27714 int num_regs = 0;
27715 int i;
27716 int amount;
27717 arm_stack_offsets *offsets;
27719 func_type = arm_current_func_type ();
27721 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27722 let output_return_instruction take care of instruction emission if any. */
27723 if (IS_NAKED (func_type)
27724 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27726 if (really_return)
27727 emit_jump_insn (simple_return_rtx);
27728 return;
27731 /* If we are throwing an exception, then we really must be doing a
27732 return, so we can't tail-call. */
27733 gcc_assert (!crtl->calls_eh_return || really_return);
27735 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27737 arm_expand_epilogue_apcs_frame (really_return);
27738 return;
27741 /* Get frame offsets for ARM. */
27742 offsets = arm_get_frame_offsets ();
27743 saved_regs_mask = offsets->saved_regs_mask;
27744 num_regs = bit_count (saved_regs_mask);
27746 if (frame_pointer_needed)
27748 rtx_insn *insn;
27749 /* Restore stack pointer if necessary. */
27750 if (TARGET_ARM)
27752 /* In ARM mode, frame pointer points to first saved register.
27753 Restore stack pointer to last saved register. */
27754 amount = offsets->frame - offsets->saved_regs;
27756 /* Force out any pending memory operations that reference stacked data
27757 before stack de-allocation occurs. */
27758 emit_insn (gen_blockage ());
27759 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27760 hard_frame_pointer_rtx,
27761 GEN_INT (amount)));
27762 arm_add_cfa_adjust_cfa_note (insn, amount,
27763 stack_pointer_rtx,
27764 hard_frame_pointer_rtx);
27766 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27767 deleted. */
27768 emit_insn (gen_force_register_use (stack_pointer_rtx));
27770 else
27772 /* In Thumb-2 mode, the frame pointer points to the last saved
27773 register. */
27774 amount = offsets->locals_base - offsets->saved_regs;
27775 if (amount)
27777 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27778 hard_frame_pointer_rtx,
27779 GEN_INT (amount)));
27780 arm_add_cfa_adjust_cfa_note (insn, amount,
27781 hard_frame_pointer_rtx,
27782 hard_frame_pointer_rtx);
27785 /* Force out any pending memory operations that reference stacked data
27786 before stack de-allocation occurs. */
27787 emit_insn (gen_blockage ());
27788 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27789 hard_frame_pointer_rtx));
27790 arm_add_cfa_adjust_cfa_note (insn, 0,
27791 stack_pointer_rtx,
27792 hard_frame_pointer_rtx);
27793 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27794 deleted. */
27795 emit_insn (gen_force_register_use (stack_pointer_rtx));
27798 else
27800 /* Pop off outgoing args and local frame to adjust stack pointer to
27801 last saved register. */
27802 amount = offsets->outgoing_args - offsets->saved_regs;
27803 if (amount)
27805 rtx_insn *tmp;
27806 /* Force out any pending memory operations that reference stacked data
27807 before stack de-allocation occurs. */
27808 emit_insn (gen_blockage ());
27809 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27810 stack_pointer_rtx,
27811 GEN_INT (amount)));
27812 arm_add_cfa_adjust_cfa_note (tmp, amount,
27813 stack_pointer_rtx, stack_pointer_rtx);
27814 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27815 not deleted. */
27816 emit_insn (gen_force_register_use (stack_pointer_rtx));
27820 if (TARGET_HARD_FLOAT && TARGET_VFP)
27822 /* Generate VFP register multi-pop. */
27823 int end_reg = LAST_VFP_REGNUM + 1;
27825 /* Scan the registers in reverse order. We need to match
27826 any groupings made in the prologue and generate matching
27827 vldm operations. The need to match groups is because,
27828 unlike pop, vldm can only do consecutive regs. */
27829 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27830 /* Look for a case where a reg does not need restoring. */
27831 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27832 && (!df_regs_ever_live_p (i + 1)
27833 || call_used_regs[i + 1]))
27835 /* Restore the regs discovered so far (from reg+2 to
27836 end_reg). */
27837 if (end_reg > i + 2)
27838 arm_emit_vfp_multi_reg_pop (i + 2,
27839 (end_reg - (i + 2)) / 2,
27840 stack_pointer_rtx);
27841 end_reg = i;
27844 /* Restore the remaining regs that we have discovered (or possibly
27845 even all of them, if the conditional in the for loop never
27846 fired). */
27847 if (end_reg > i + 2)
27848 arm_emit_vfp_multi_reg_pop (i + 2,
27849 (end_reg - (i + 2)) / 2,
27850 stack_pointer_rtx);
27853 if (TARGET_IWMMXT)
27854 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27855 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27857 rtx_insn *insn;
27858 rtx addr = gen_rtx_MEM (V2SImode,
27859 gen_rtx_POST_INC (SImode,
27860 stack_pointer_rtx));
27861 set_mem_alias_set (addr, get_frame_alias_set ());
27862 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27863 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27864 gen_rtx_REG (V2SImode, i),
27865 NULL_RTX);
27866 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27867 stack_pointer_rtx, stack_pointer_rtx);
27870 if (saved_regs_mask)
27872 rtx insn;
27873 bool return_in_pc = false;
27875 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27876 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27877 && !IS_STACKALIGN (func_type)
27878 && really_return
27879 && crtl->args.pretend_args_size == 0
27880 && saved_regs_mask & (1 << LR_REGNUM)
27881 && !crtl->calls_eh_return)
27883 saved_regs_mask &= ~(1 << LR_REGNUM);
27884 saved_regs_mask |= (1 << PC_REGNUM);
27885 return_in_pc = true;
27888 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27890 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27891 if (saved_regs_mask & (1 << i))
27893 rtx addr = gen_rtx_MEM (SImode,
27894 gen_rtx_POST_INC (SImode,
27895 stack_pointer_rtx));
27896 set_mem_alias_set (addr, get_frame_alias_set ());
27898 if (i == PC_REGNUM)
27900 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27901 XVECEXP (insn, 0, 0) = ret_rtx;
27902 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27903 gen_rtx_REG (SImode, i),
27904 addr);
27905 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27906 insn = emit_jump_insn (insn);
27908 else
27910 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27911 addr));
27912 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27913 gen_rtx_REG (SImode, i),
27914 NULL_RTX);
27915 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27916 stack_pointer_rtx,
27917 stack_pointer_rtx);
27921 else
27923 if (TARGET_LDRD
27924 && current_tune->prefer_ldrd_strd
27925 && !optimize_function_for_size_p (cfun))
27927 if (TARGET_THUMB2)
27928 thumb2_emit_ldrd_pop (saved_regs_mask);
27929 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27930 arm_emit_ldrd_pop (saved_regs_mask);
27931 else
27932 arm_emit_multi_reg_pop (saved_regs_mask);
27934 else
27935 arm_emit_multi_reg_pop (saved_regs_mask);
27938 if (return_in_pc == true)
27939 return;
27942 if (crtl->args.pretend_args_size)
27944 int i, j;
27945 rtx dwarf = NULL_RTX;
27946 rtx_insn *tmp =
27947 emit_insn (gen_addsi3 (stack_pointer_rtx,
27948 stack_pointer_rtx,
27949 GEN_INT (crtl->args.pretend_args_size)));
27951 RTX_FRAME_RELATED_P (tmp) = 1;
27953 if (cfun->machine->uses_anonymous_args)
27955 /* Restore pretend args. Refer arm_expand_prologue on how to save
27956 pretend_args in stack. */
27957 int num_regs = crtl->args.pretend_args_size / 4;
27958 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27959 for (j = 0, i = 0; j < num_regs; i++)
27960 if (saved_regs_mask & (1 << i))
27962 rtx reg = gen_rtx_REG (SImode, i);
27963 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27964 j++;
27966 REG_NOTES (tmp) = dwarf;
27968 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27969 stack_pointer_rtx, stack_pointer_rtx);
27972 if (!really_return)
27973 return;
27975 if (crtl->calls_eh_return)
27976 emit_insn (gen_addsi3 (stack_pointer_rtx,
27977 stack_pointer_rtx,
27978 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27980 if (IS_STACKALIGN (func_type))
27981 /* Restore the original stack pointer. Before prologue, the stack was
27982 realigned and the original stack pointer saved in r0. For details,
27983 see comment in arm_expand_prologue. */
27984 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27986 emit_jump_insn (simple_return_rtx);
27989 /* Implementation of insn prologue_thumb1_interwork. This is the first
27990 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27992 const char *
27993 thumb1_output_interwork (void)
27995 const char * name;
27996 FILE *f = asm_out_file;
27998 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27999 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28000 == SYMBOL_REF);
28001 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28003 /* Generate code sequence to switch us into Thumb mode. */
28004 /* The .code 32 directive has already been emitted by
28005 ASM_DECLARE_FUNCTION_NAME. */
28006 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28007 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28009 /* Generate a label, so that the debugger will notice the
28010 change in instruction sets. This label is also used by
28011 the assembler to bypass the ARM code when this function
28012 is called from a Thumb encoded function elsewhere in the
28013 same file. Hence the definition of STUB_NAME here must
28014 agree with the definition in gas/config/tc-arm.c. */
28016 #define STUB_NAME ".real_start_of"
28018 fprintf (f, "\t.code\t16\n");
28019 #ifdef ARM_PE
28020 if (arm_dllexport_name_p (name))
28021 name = arm_strip_name_encoding (name);
28022 #endif
28023 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28024 fprintf (f, "\t.thumb_func\n");
28025 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28027 return "";
28030 /* Handle the case of a double word load into a low register from
28031 a computed memory address. The computed address may involve a
28032 register which is overwritten by the load. */
28033 const char *
28034 thumb_load_double_from_address (rtx *operands)
28036 rtx addr;
28037 rtx base;
28038 rtx offset;
28039 rtx arg1;
28040 rtx arg2;
28042 gcc_assert (REG_P (operands[0]));
28043 gcc_assert (MEM_P (operands[1]));
28045 /* Get the memory address. */
28046 addr = XEXP (operands[1], 0);
28048 /* Work out how the memory address is computed. */
28049 switch (GET_CODE (addr))
28051 case REG:
28052 operands[2] = adjust_address (operands[1], SImode, 4);
28054 if (REGNO (operands[0]) == REGNO (addr))
28056 output_asm_insn ("ldr\t%H0, %2", operands);
28057 output_asm_insn ("ldr\t%0, %1", operands);
28059 else
28061 output_asm_insn ("ldr\t%0, %1", operands);
28062 output_asm_insn ("ldr\t%H0, %2", operands);
28064 break;
28066 case CONST:
28067 /* Compute <address> + 4 for the high order load. */
28068 operands[2] = adjust_address (operands[1], SImode, 4);
28070 output_asm_insn ("ldr\t%0, %1", operands);
28071 output_asm_insn ("ldr\t%H0, %2", operands);
28072 break;
28074 case PLUS:
28075 arg1 = XEXP (addr, 0);
28076 arg2 = XEXP (addr, 1);
28078 if (CONSTANT_P (arg1))
28079 base = arg2, offset = arg1;
28080 else
28081 base = arg1, offset = arg2;
28083 gcc_assert (REG_P (base));
28085 /* Catch the case of <address> = <reg> + <reg> */
28086 if (REG_P (offset))
28088 int reg_offset = REGNO (offset);
28089 int reg_base = REGNO (base);
28090 int reg_dest = REGNO (operands[0]);
28092 /* Add the base and offset registers together into the
28093 higher destination register. */
28094 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28095 reg_dest + 1, reg_base, reg_offset);
28097 /* Load the lower destination register from the address in
28098 the higher destination register. */
28099 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28100 reg_dest, reg_dest + 1);
28102 /* Load the higher destination register from its own address
28103 plus 4. */
28104 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28105 reg_dest + 1, reg_dest + 1);
28107 else
28109 /* Compute <address> + 4 for the high order load. */
28110 operands[2] = adjust_address (operands[1], SImode, 4);
28112 /* If the computed address is held in the low order register
28113 then load the high order register first, otherwise always
28114 load the low order register first. */
28115 if (REGNO (operands[0]) == REGNO (base))
28117 output_asm_insn ("ldr\t%H0, %2", operands);
28118 output_asm_insn ("ldr\t%0, %1", operands);
28120 else
28122 output_asm_insn ("ldr\t%0, %1", operands);
28123 output_asm_insn ("ldr\t%H0, %2", operands);
28126 break;
28128 case LABEL_REF:
28129 /* With no registers to worry about we can just load the value
28130 directly. */
28131 operands[2] = adjust_address (operands[1], SImode, 4);
28133 output_asm_insn ("ldr\t%H0, %2", operands);
28134 output_asm_insn ("ldr\t%0, %1", operands);
28135 break;
28137 default:
28138 gcc_unreachable ();
28141 return "";
28144 const char *
28145 thumb_output_move_mem_multiple (int n, rtx *operands)
28147 rtx tmp;
28149 switch (n)
28151 case 2:
28152 if (REGNO (operands[4]) > REGNO (operands[5]))
28154 tmp = operands[4];
28155 operands[4] = operands[5];
28156 operands[5] = tmp;
28158 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28159 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28160 break;
28162 case 3:
28163 if (REGNO (operands[4]) > REGNO (operands[5]))
28165 tmp = operands[4];
28166 operands[4] = operands[5];
28167 operands[5] = tmp;
28169 if (REGNO (operands[5]) > REGNO (operands[6]))
28171 tmp = operands[5];
28172 operands[5] = operands[6];
28173 operands[6] = tmp;
28175 if (REGNO (operands[4]) > REGNO (operands[5]))
28177 tmp = operands[4];
28178 operands[4] = operands[5];
28179 operands[5] = tmp;
28182 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28183 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28184 break;
28186 default:
28187 gcc_unreachable ();
28190 return "";
28193 /* Output a call-via instruction for thumb state. */
28194 const char *
28195 thumb_call_via_reg (rtx reg)
28197 int regno = REGNO (reg);
28198 rtx *labelp;
28200 gcc_assert (regno < LR_REGNUM);
28202 /* If we are in the normal text section we can use a single instance
28203 per compilation unit. If we are doing function sections, then we need
28204 an entry per section, since we can't rely on reachability. */
28205 if (in_section == text_section)
28207 thumb_call_reg_needed = 1;
28209 if (thumb_call_via_label[regno] == NULL)
28210 thumb_call_via_label[regno] = gen_label_rtx ();
28211 labelp = thumb_call_via_label + regno;
28213 else
28215 if (cfun->machine->call_via[regno] == NULL)
28216 cfun->machine->call_via[regno] = gen_label_rtx ();
28217 labelp = cfun->machine->call_via + regno;
28220 output_asm_insn ("bl\t%a0", labelp);
28221 return "";
28224 /* Routines for generating rtl. */
28225 void
28226 thumb_expand_movmemqi (rtx *operands)
28228 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28229 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28230 HOST_WIDE_INT len = INTVAL (operands[2]);
28231 HOST_WIDE_INT offset = 0;
28233 while (len >= 12)
28235 emit_insn (gen_movmem12b (out, in, out, in));
28236 len -= 12;
28239 if (len >= 8)
28241 emit_insn (gen_movmem8b (out, in, out, in));
28242 len -= 8;
28245 if (len >= 4)
28247 rtx reg = gen_reg_rtx (SImode);
28248 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28249 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28250 len -= 4;
28251 offset += 4;
28254 if (len >= 2)
28256 rtx reg = gen_reg_rtx (HImode);
28257 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28258 plus_constant (Pmode, in,
28259 offset))));
28260 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28261 offset)),
28262 reg));
28263 len -= 2;
28264 offset += 2;
28267 if (len)
28269 rtx reg = gen_reg_rtx (QImode);
28270 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28271 plus_constant (Pmode, in,
28272 offset))));
28273 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28274 offset)),
28275 reg));
28279 void
28280 thumb_reload_out_hi (rtx *operands)
28282 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28285 /* Handle reading a half-word from memory during reload. */
28286 void
28287 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
28289 gcc_unreachable ();
28292 /* Return the length of a function name prefix
28293 that starts with the character 'c'. */
28294 static int
28295 arm_get_strip_length (int c)
28297 switch (c)
28299 ARM_NAME_ENCODING_LENGTHS
28300 default: return 0;
28304 /* Return a pointer to a function's name with any
28305 and all prefix encodings stripped from it. */
28306 const char *
28307 arm_strip_name_encoding (const char *name)
28309 int skip;
28311 while ((skip = arm_get_strip_length (* name)))
28312 name += skip;
28314 return name;
28317 /* If there is a '*' anywhere in the name's prefix, then
28318 emit the stripped name verbatim, otherwise prepend an
28319 underscore if leading underscores are being used. */
28320 void
28321 arm_asm_output_labelref (FILE *stream, const char *name)
28323 int skip;
28324 int verbatim = 0;
28326 while ((skip = arm_get_strip_length (* name)))
28328 verbatim |= (*name == '*');
28329 name += skip;
28332 if (verbatim)
28333 fputs (name, stream);
28334 else
28335 asm_fprintf (stream, "%U%s", name);
28338 /* This function is used to emit an EABI tag and its associated value.
28339 We emit the numerical value of the tag in case the assembler does not
28340 support textual tags. (Eg gas prior to 2.20). If requested we include
28341 the tag name in a comment so that anyone reading the assembler output
28342 will know which tag is being set.
28344 This function is not static because arm-c.c needs it too. */
28346 void
28347 arm_emit_eabi_attribute (const char *name, int num, int val)
28349 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28350 if (flag_verbose_asm || flag_debug_asm)
28351 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28352 asm_fprintf (asm_out_file, "\n");
28355 static void
28356 arm_file_start (void)
28358 int val;
28360 if (TARGET_UNIFIED_ASM)
28361 asm_fprintf (asm_out_file, "\t.syntax unified\n");
28363 if (TARGET_BPABI)
28365 const char *fpu_name;
28366 if (arm_selected_arch)
28368 /* armv7ve doesn't support any extensions. */
28369 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
28371 /* Keep backward compatability for assemblers
28372 which don't support armv7ve. */
28373 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28374 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28375 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28376 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28377 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28379 else
28381 const char* pos = strchr (arm_selected_arch->name, '+');
28382 if (pos)
28384 char buf[15];
28385 gcc_assert (strlen (arm_selected_arch->name)
28386 <= sizeof (buf) / sizeof (*pos));
28387 strncpy (buf, arm_selected_arch->name,
28388 (pos - arm_selected_arch->name) * sizeof (*pos));
28389 buf[pos - arm_selected_arch->name] = '\0';
28390 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
28391 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
28393 else
28394 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
28397 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
28398 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
28399 else
28401 const char* truncated_name
28402 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
28403 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28406 if (TARGET_SOFT_FLOAT)
28408 fpu_name = "softvfp";
28410 else
28412 fpu_name = arm_fpu_desc->name;
28413 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
28415 if (TARGET_HARD_FLOAT)
28416 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28417 if (TARGET_HARD_FLOAT_ABI)
28418 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28421 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
28423 /* Some of these attributes only apply when the corresponding features
28424 are used. However we don't have any easy way of figuring this out.
28425 Conservatively record the setting that would have been used. */
28427 if (flag_rounding_math)
28428 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28430 if (!flag_unsafe_math_optimizations)
28432 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28433 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28435 if (flag_signaling_nans)
28436 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28438 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28439 flag_finite_math_only ? 1 : 3);
28441 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28442 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28443 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28444 flag_short_enums ? 1 : 2);
28446 /* Tag_ABI_optimization_goals. */
28447 if (optimize_size)
28448 val = 4;
28449 else if (optimize >= 2)
28450 val = 2;
28451 else if (optimize)
28452 val = 1;
28453 else
28454 val = 6;
28455 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28457 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28458 unaligned_access);
28460 if (arm_fp16_format)
28461 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28462 (int) arm_fp16_format);
28464 if (arm_lang_output_object_attributes_hook)
28465 arm_lang_output_object_attributes_hook();
28468 default_file_start ();
28471 static void
28472 arm_file_end (void)
28474 int regno;
28476 if (NEED_INDICATE_EXEC_STACK)
28477 /* Add .note.GNU-stack. */
28478 file_end_indicate_exec_stack ();
28480 if (! thumb_call_reg_needed)
28481 return;
28483 switch_to_section (text_section);
28484 asm_fprintf (asm_out_file, "\t.code 16\n");
28485 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28487 for (regno = 0; regno < LR_REGNUM; regno++)
28489 rtx label = thumb_call_via_label[regno];
28491 if (label != 0)
28493 targetm.asm_out.internal_label (asm_out_file, "L",
28494 CODE_LABEL_NUMBER (label));
28495 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28500 #ifndef ARM_PE
28501 /* Symbols in the text segment can be accessed without indirecting via the
28502 constant pool; it may take an extra binary operation, but this is still
28503 faster than indirecting via memory. Don't do this when not optimizing,
28504 since we won't be calculating al of the offsets necessary to do this
28505 simplification. */
28507 static void
28508 arm_encode_section_info (tree decl, rtx rtl, int first)
28510 if (optimize > 0 && TREE_CONSTANT (decl))
28511 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28513 default_encode_section_info (decl, rtl, first);
28515 #endif /* !ARM_PE */
28517 static void
28518 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28520 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28521 && !strcmp (prefix, "L"))
28523 arm_ccfsm_state = 0;
28524 arm_target_insn = NULL;
28526 default_internal_label (stream, prefix, labelno);
28529 /* Output code to add DELTA to the first argument, and then jump
28530 to FUNCTION. Used for C++ multiple inheritance. */
28531 static void
28532 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28533 HOST_WIDE_INT delta,
28534 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28535 tree function)
28537 static int thunk_label = 0;
28538 char label[256];
28539 char labelpc[256];
28540 int mi_delta = delta;
28541 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28542 int shift = 0;
28543 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28544 ? 1 : 0);
28545 if (mi_delta < 0)
28546 mi_delta = - mi_delta;
28548 final_start_function (emit_barrier (), file, 1);
28550 if (TARGET_THUMB1)
28552 int labelno = thunk_label++;
28553 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28554 /* Thunks are entered in arm mode when avaiable. */
28555 if (TARGET_THUMB1_ONLY)
28557 /* push r3 so we can use it as a temporary. */
28558 /* TODO: Omit this save if r3 is not used. */
28559 fputs ("\tpush {r3}\n", file);
28560 fputs ("\tldr\tr3, ", file);
28562 else
28564 fputs ("\tldr\tr12, ", file);
28566 assemble_name (file, label);
28567 fputc ('\n', file);
28568 if (flag_pic)
28570 /* If we are generating PIC, the ldr instruction below loads
28571 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28572 the address of the add + 8, so we have:
28574 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28575 = target + 1.
28577 Note that we have "+ 1" because some versions of GNU ld
28578 don't set the low bit of the result for R_ARM_REL32
28579 relocations against thumb function symbols.
28580 On ARMv6M this is +4, not +8. */
28581 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28582 assemble_name (file, labelpc);
28583 fputs (":\n", file);
28584 if (TARGET_THUMB1_ONLY)
28586 /* This is 2 insns after the start of the thunk, so we know it
28587 is 4-byte aligned. */
28588 fputs ("\tadd\tr3, pc, r3\n", file);
28589 fputs ("\tmov r12, r3\n", file);
28591 else
28592 fputs ("\tadd\tr12, pc, r12\n", file);
28594 else if (TARGET_THUMB1_ONLY)
28595 fputs ("\tmov r12, r3\n", file);
28597 if (TARGET_THUMB1_ONLY)
28599 if (mi_delta > 255)
28601 fputs ("\tldr\tr3, ", file);
28602 assemble_name (file, label);
28603 fputs ("+4\n", file);
28604 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
28605 mi_op, this_regno, this_regno);
28607 else if (mi_delta != 0)
28609 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28610 mi_op, this_regno, this_regno,
28611 mi_delta);
28614 else
28616 /* TODO: Use movw/movt for large constants when available. */
28617 while (mi_delta != 0)
28619 if ((mi_delta & (3 << shift)) == 0)
28620 shift += 2;
28621 else
28623 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28624 mi_op, this_regno, this_regno,
28625 mi_delta & (0xff << shift));
28626 mi_delta &= ~(0xff << shift);
28627 shift += 8;
28631 if (TARGET_THUMB1)
28633 if (TARGET_THUMB1_ONLY)
28634 fputs ("\tpop\t{r3}\n", file);
28636 fprintf (file, "\tbx\tr12\n");
28637 ASM_OUTPUT_ALIGN (file, 2);
28638 assemble_name (file, label);
28639 fputs (":\n", file);
28640 if (flag_pic)
28642 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28643 rtx tem = XEXP (DECL_RTL (function), 0);
28644 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28645 pipeline offset is four rather than eight. Adjust the offset
28646 accordingly. */
28647 tem = plus_constant (GET_MODE (tem), tem,
28648 TARGET_THUMB1_ONLY ? -3 : -7);
28649 tem = gen_rtx_MINUS (GET_MODE (tem),
28650 tem,
28651 gen_rtx_SYMBOL_REF (Pmode,
28652 ggc_strdup (labelpc)));
28653 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28655 else
28656 /* Output ".word .LTHUNKn". */
28657 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28659 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28660 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28662 else
28664 fputs ("\tb\t", file);
28665 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28666 if (NEED_PLT_RELOC)
28667 fputs ("(PLT)", file);
28668 fputc ('\n', file);
28671 final_end_function ();
28675 arm_emit_vector_const (FILE *file, rtx x)
28677 int i;
28678 const char * pattern;
28680 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28682 switch (GET_MODE (x))
28684 case V2SImode: pattern = "%08x"; break;
28685 case V4HImode: pattern = "%04x"; break;
28686 case V8QImode: pattern = "%02x"; break;
28687 default: gcc_unreachable ();
28690 fprintf (file, "0x");
28691 for (i = CONST_VECTOR_NUNITS (x); i--;)
28693 rtx element;
28695 element = CONST_VECTOR_ELT (x, i);
28696 fprintf (file, pattern, INTVAL (element));
28699 return 1;
28702 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28703 HFmode constant pool entries are actually loaded with ldr. */
28704 void
28705 arm_emit_fp16_const (rtx c)
28707 REAL_VALUE_TYPE r;
28708 long bits;
28710 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28711 bits = real_to_target (NULL, &r, HFmode);
28712 if (WORDS_BIG_ENDIAN)
28713 assemble_zeros (2);
28714 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28715 if (!WORDS_BIG_ENDIAN)
28716 assemble_zeros (2);
28719 const char *
28720 arm_output_load_gr (rtx *operands)
28722 rtx reg;
28723 rtx offset;
28724 rtx wcgr;
28725 rtx sum;
28727 if (!MEM_P (operands [1])
28728 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28729 || !REG_P (reg = XEXP (sum, 0))
28730 || !CONST_INT_P (offset = XEXP (sum, 1))
28731 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28732 return "wldrw%?\t%0, %1";
28734 /* Fix up an out-of-range load of a GR register. */
28735 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28736 wcgr = operands[0];
28737 operands[0] = reg;
28738 output_asm_insn ("ldr%?\t%0, %1", operands);
28740 operands[0] = wcgr;
28741 operands[1] = reg;
28742 output_asm_insn ("tmcr%?\t%0, %1", operands);
28743 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28745 return "";
28748 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28750 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28751 named arg and all anonymous args onto the stack.
28752 XXX I know the prologue shouldn't be pushing registers, but it is faster
28753 that way. */
28755 static void
28756 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28757 enum machine_mode mode,
28758 tree type,
28759 int *pretend_size,
28760 int second_time ATTRIBUTE_UNUSED)
28762 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28763 int nregs;
28765 cfun->machine->uses_anonymous_args = 1;
28766 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28768 nregs = pcum->aapcs_ncrn;
28769 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28770 nregs++;
28772 else
28773 nregs = pcum->nregs;
28775 if (nregs < NUM_ARG_REGS)
28776 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28779 /* We can't rely on the caller doing the proper promotion when
28780 using APCS or ATPCS. */
28782 static bool
28783 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28785 return !TARGET_AAPCS_BASED;
28788 static enum machine_mode
28789 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28790 enum machine_mode mode,
28791 int *punsignedp ATTRIBUTE_UNUSED,
28792 const_tree fntype ATTRIBUTE_UNUSED,
28793 int for_return ATTRIBUTE_UNUSED)
28795 if (GET_MODE_CLASS (mode) == MODE_INT
28796 && GET_MODE_SIZE (mode) < 4)
28797 return SImode;
28799 return mode;
28802 /* AAPCS based ABIs use short enums by default. */
28804 static bool
28805 arm_default_short_enums (void)
28807 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28811 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28813 static bool
28814 arm_align_anon_bitfield (void)
28816 return TARGET_AAPCS_BASED;
28820 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28822 static tree
28823 arm_cxx_guard_type (void)
28825 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28829 /* The EABI says test the least significant bit of a guard variable. */
28831 static bool
28832 arm_cxx_guard_mask_bit (void)
28834 return TARGET_AAPCS_BASED;
28838 /* The EABI specifies that all array cookies are 8 bytes long. */
28840 static tree
28841 arm_get_cookie_size (tree type)
28843 tree size;
28845 if (!TARGET_AAPCS_BASED)
28846 return default_cxx_get_cookie_size (type);
28848 size = build_int_cst (sizetype, 8);
28849 return size;
28853 /* The EABI says that array cookies should also contain the element size. */
28855 static bool
28856 arm_cookie_has_size (void)
28858 return TARGET_AAPCS_BASED;
28862 /* The EABI says constructors and destructors should return a pointer to
28863 the object constructed/destroyed. */
28865 static bool
28866 arm_cxx_cdtor_returns_this (void)
28868 return TARGET_AAPCS_BASED;
28871 /* The EABI says that an inline function may never be the key
28872 method. */
28874 static bool
28875 arm_cxx_key_method_may_be_inline (void)
28877 return !TARGET_AAPCS_BASED;
28880 static void
28881 arm_cxx_determine_class_data_visibility (tree decl)
28883 if (!TARGET_AAPCS_BASED
28884 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28885 return;
28887 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28888 is exported. However, on systems without dynamic vague linkage,
28889 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28890 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28891 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28892 else
28893 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28894 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28897 static bool
28898 arm_cxx_class_data_always_comdat (void)
28900 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28901 vague linkage if the class has no key function. */
28902 return !TARGET_AAPCS_BASED;
28906 /* The EABI says __aeabi_atexit should be used to register static
28907 destructors. */
28909 static bool
28910 arm_cxx_use_aeabi_atexit (void)
28912 return TARGET_AAPCS_BASED;
28916 void
28917 arm_set_return_address (rtx source, rtx scratch)
28919 arm_stack_offsets *offsets;
28920 HOST_WIDE_INT delta;
28921 rtx addr;
28922 unsigned long saved_regs;
28924 offsets = arm_get_frame_offsets ();
28925 saved_regs = offsets->saved_regs_mask;
28927 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28928 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28929 else
28931 if (frame_pointer_needed)
28932 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28933 else
28935 /* LR will be the first saved register. */
28936 delta = offsets->outgoing_args - (offsets->frame + 4);
28939 if (delta >= 4096)
28941 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28942 GEN_INT (delta & ~4095)));
28943 addr = scratch;
28944 delta &= 4095;
28946 else
28947 addr = stack_pointer_rtx;
28949 addr = plus_constant (Pmode, addr, delta);
28951 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28956 void
28957 thumb_set_return_address (rtx source, rtx scratch)
28959 arm_stack_offsets *offsets;
28960 HOST_WIDE_INT delta;
28961 HOST_WIDE_INT limit;
28962 int reg;
28963 rtx addr;
28964 unsigned long mask;
28966 emit_use (source);
28968 offsets = arm_get_frame_offsets ();
28969 mask = offsets->saved_regs_mask;
28970 if (mask & (1 << LR_REGNUM))
28972 limit = 1024;
28973 /* Find the saved regs. */
28974 if (frame_pointer_needed)
28976 delta = offsets->soft_frame - offsets->saved_args;
28977 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28978 if (TARGET_THUMB1)
28979 limit = 128;
28981 else
28983 delta = offsets->outgoing_args - offsets->saved_args;
28984 reg = SP_REGNUM;
28986 /* Allow for the stack frame. */
28987 if (TARGET_THUMB1 && TARGET_BACKTRACE)
28988 delta -= 16;
28989 /* The link register is always the first saved register. */
28990 delta -= 4;
28992 /* Construct the address. */
28993 addr = gen_rtx_REG (SImode, reg);
28994 if (delta > limit)
28996 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28997 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28998 addr = scratch;
29000 else
29001 addr = plus_constant (Pmode, addr, delta);
29003 emit_move_insn (gen_frame_mem (Pmode, addr), source);
29005 else
29006 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29009 /* Implements target hook vector_mode_supported_p. */
29010 bool
29011 arm_vector_mode_supported_p (enum machine_mode mode)
29013 /* Neon also supports V2SImode, etc. listed in the clause below. */
29014 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29015 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
29016 return true;
29018 if ((TARGET_NEON || TARGET_IWMMXT)
29019 && ((mode == V2SImode)
29020 || (mode == V4HImode)
29021 || (mode == V8QImode)))
29022 return true;
29024 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29025 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29026 || mode == V2HAmode))
29027 return true;
29029 return false;
29032 /* Implements target hook array_mode_supported_p. */
29034 static bool
29035 arm_array_mode_supported_p (enum machine_mode mode,
29036 unsigned HOST_WIDE_INT nelems)
29038 if (TARGET_NEON
29039 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29040 && (nelems >= 2 && nelems <= 4))
29041 return true;
29043 return false;
29046 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29047 registers when autovectorizing for Neon, at least until multiple vector
29048 widths are supported properly by the middle-end. */
29050 static enum machine_mode
29051 arm_preferred_simd_mode (enum machine_mode mode)
29053 if (TARGET_NEON)
29054 switch (mode)
29056 case SFmode:
29057 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29058 case SImode:
29059 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29060 case HImode:
29061 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29062 case QImode:
29063 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29064 case DImode:
29065 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29066 return V2DImode;
29067 break;
29069 default:;
29072 if (TARGET_REALLY_IWMMXT)
29073 switch (mode)
29075 case SImode:
29076 return V2SImode;
29077 case HImode:
29078 return V4HImode;
29079 case QImode:
29080 return V8QImode;
29082 default:;
29085 return word_mode;
29088 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29090 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29091 using r0-r4 for function arguments, r7 for the stack frame and don't have
29092 enough left over to do doubleword arithmetic. For Thumb-2 all the
29093 potentially problematic instructions accept high registers so this is not
29094 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29095 that require many low registers. */
29096 static bool
29097 arm_class_likely_spilled_p (reg_class_t rclass)
29099 if ((TARGET_THUMB1 && rclass == LO_REGS)
29100 || rclass == CC_REG)
29101 return true;
29103 return false;
29106 /* Implements target hook small_register_classes_for_mode_p. */
29107 bool
29108 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
29110 return TARGET_THUMB1;
29113 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29114 ARM insns and therefore guarantee that the shift count is modulo 256.
29115 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29116 guarantee no particular behavior for out-of-range counts. */
29118 static unsigned HOST_WIDE_INT
29119 arm_shift_truncation_mask (enum machine_mode mode)
29121 return mode == SImode ? 255 : 0;
29125 /* Map internal gcc register numbers to DWARF2 register numbers. */
29127 unsigned int
29128 arm_dbx_register_number (unsigned int regno)
29130 if (regno < 16)
29131 return regno;
29133 if (IS_VFP_REGNUM (regno))
29135 /* See comment in arm_dwarf_register_span. */
29136 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29137 return 64 + regno - FIRST_VFP_REGNUM;
29138 else
29139 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29142 if (IS_IWMMXT_GR_REGNUM (regno))
29143 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29145 if (IS_IWMMXT_REGNUM (regno))
29146 return 112 + regno - FIRST_IWMMXT_REGNUM;
29148 gcc_unreachable ();
29151 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29152 GCC models tham as 64 32-bit registers, so we need to describe this to
29153 the DWARF generation code. Other registers can use the default. */
29154 static rtx
29155 arm_dwarf_register_span (rtx rtl)
29157 enum machine_mode mode;
29158 unsigned regno;
29159 rtx parts[16];
29160 int nregs;
29161 int i;
29163 regno = REGNO (rtl);
29164 if (!IS_VFP_REGNUM (regno))
29165 return NULL_RTX;
29167 /* XXX FIXME: The EABI defines two VFP register ranges:
29168 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29169 256-287: D0-D31
29170 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29171 corresponding D register. Until GDB supports this, we shall use the
29172 legacy encodings. We also use these encodings for D0-D15 for
29173 compatibility with older debuggers. */
29174 mode = GET_MODE (rtl);
29175 if (GET_MODE_SIZE (mode) < 8)
29176 return NULL_RTX;
29178 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29180 nregs = GET_MODE_SIZE (mode) / 4;
29181 for (i = 0; i < nregs; i += 2)
29182 if (TARGET_BIG_END)
29184 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29185 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29187 else
29189 parts[i] = gen_rtx_REG (SImode, regno + i);
29190 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29193 else
29195 nregs = GET_MODE_SIZE (mode) / 8;
29196 for (i = 0; i < nregs; i++)
29197 parts[i] = gen_rtx_REG (DImode, regno + i);
29200 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29203 #if ARM_UNWIND_INFO
29204 /* Emit unwind directives for a store-multiple instruction or stack pointer
29205 push during alignment.
29206 These should only ever be generated by the function prologue code, so
29207 expect them to have a particular form.
29208 The store-multiple instruction sometimes pushes pc as the last register,
29209 although it should not be tracked into unwind information, or for -Os
29210 sometimes pushes some dummy registers before first register that needs
29211 to be tracked in unwind information; such dummy registers are there just
29212 to avoid separate stack adjustment, and will not be restored in the
29213 epilogue. */
29215 static void
29216 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29218 int i;
29219 HOST_WIDE_INT offset;
29220 HOST_WIDE_INT nregs;
29221 int reg_size;
29222 unsigned reg;
29223 unsigned lastreg;
29224 unsigned padfirst = 0, padlast = 0;
29225 rtx e;
29227 e = XVECEXP (p, 0, 0);
29228 gcc_assert (GET_CODE (e) == SET);
29230 /* First insn will adjust the stack pointer. */
29231 gcc_assert (GET_CODE (e) == SET
29232 && REG_P (SET_DEST (e))
29233 && REGNO (SET_DEST (e)) == SP_REGNUM
29234 && GET_CODE (SET_SRC (e)) == PLUS);
29236 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29237 nregs = XVECLEN (p, 0) - 1;
29238 gcc_assert (nregs);
29240 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29241 if (reg < 16)
29243 /* For -Os dummy registers can be pushed at the beginning to
29244 avoid separate stack pointer adjustment. */
29245 e = XVECEXP (p, 0, 1);
29246 e = XEXP (SET_DEST (e), 0);
29247 if (GET_CODE (e) == PLUS)
29248 padfirst = INTVAL (XEXP (e, 1));
29249 gcc_assert (padfirst == 0 || optimize_size);
29250 /* The function prologue may also push pc, but not annotate it as it is
29251 never restored. We turn this into a stack pointer adjustment. */
29252 e = XVECEXP (p, 0, nregs);
29253 e = XEXP (SET_DEST (e), 0);
29254 if (GET_CODE (e) == PLUS)
29255 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29256 else
29257 padlast = offset - 4;
29258 gcc_assert (padlast == 0 || padlast == 4);
29259 if (padlast == 4)
29260 fprintf (asm_out_file, "\t.pad #4\n");
29261 reg_size = 4;
29262 fprintf (asm_out_file, "\t.save {");
29264 else if (IS_VFP_REGNUM (reg))
29266 reg_size = 8;
29267 fprintf (asm_out_file, "\t.vsave {");
29269 else
29270 /* Unknown register type. */
29271 gcc_unreachable ();
29273 /* If the stack increment doesn't match the size of the saved registers,
29274 something has gone horribly wrong. */
29275 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29277 offset = padfirst;
29278 lastreg = 0;
29279 /* The remaining insns will describe the stores. */
29280 for (i = 1; i <= nregs; i++)
29282 /* Expect (set (mem <addr>) (reg)).
29283 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29284 e = XVECEXP (p, 0, i);
29285 gcc_assert (GET_CODE (e) == SET
29286 && MEM_P (SET_DEST (e))
29287 && REG_P (SET_SRC (e)));
29289 reg = REGNO (SET_SRC (e));
29290 gcc_assert (reg >= lastreg);
29292 if (i != 1)
29293 fprintf (asm_out_file, ", ");
29294 /* We can't use %r for vfp because we need to use the
29295 double precision register names. */
29296 if (IS_VFP_REGNUM (reg))
29297 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29298 else
29299 asm_fprintf (asm_out_file, "%r", reg);
29301 #ifdef ENABLE_CHECKING
29302 /* Check that the addresses are consecutive. */
29303 e = XEXP (SET_DEST (e), 0);
29304 if (GET_CODE (e) == PLUS)
29305 gcc_assert (REG_P (XEXP (e, 0))
29306 && REGNO (XEXP (e, 0)) == SP_REGNUM
29307 && CONST_INT_P (XEXP (e, 1))
29308 && offset == INTVAL (XEXP (e, 1)));
29309 else
29310 gcc_assert (i == 1
29311 && REG_P (e)
29312 && REGNO (e) == SP_REGNUM);
29313 offset += reg_size;
29314 #endif
29316 fprintf (asm_out_file, "}\n");
29317 if (padfirst)
29318 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29321 /* Emit unwind directives for a SET. */
29323 static void
29324 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29326 rtx e0;
29327 rtx e1;
29328 unsigned reg;
29330 e0 = XEXP (p, 0);
29331 e1 = XEXP (p, 1);
29332 switch (GET_CODE (e0))
29334 case MEM:
29335 /* Pushing a single register. */
29336 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29337 || !REG_P (XEXP (XEXP (e0, 0), 0))
29338 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29339 abort ();
29341 asm_fprintf (asm_out_file, "\t.save ");
29342 if (IS_VFP_REGNUM (REGNO (e1)))
29343 asm_fprintf(asm_out_file, "{d%d}\n",
29344 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29345 else
29346 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29347 break;
29349 case REG:
29350 if (REGNO (e0) == SP_REGNUM)
29352 /* A stack increment. */
29353 if (GET_CODE (e1) != PLUS
29354 || !REG_P (XEXP (e1, 0))
29355 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29356 || !CONST_INT_P (XEXP (e1, 1)))
29357 abort ();
29359 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29360 -INTVAL (XEXP (e1, 1)));
29362 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29364 HOST_WIDE_INT offset;
29366 if (GET_CODE (e1) == PLUS)
29368 if (!REG_P (XEXP (e1, 0))
29369 || !CONST_INT_P (XEXP (e1, 1)))
29370 abort ();
29371 reg = REGNO (XEXP (e1, 0));
29372 offset = INTVAL (XEXP (e1, 1));
29373 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29374 HARD_FRAME_POINTER_REGNUM, reg,
29375 offset);
29377 else if (REG_P (e1))
29379 reg = REGNO (e1);
29380 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29381 HARD_FRAME_POINTER_REGNUM, reg);
29383 else
29384 abort ();
29386 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29388 /* Move from sp to reg. */
29389 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29391 else if (GET_CODE (e1) == PLUS
29392 && REG_P (XEXP (e1, 0))
29393 && REGNO (XEXP (e1, 0)) == SP_REGNUM
29394 && CONST_INT_P (XEXP (e1, 1)))
29396 /* Set reg to offset from sp. */
29397 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29398 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29400 else
29401 abort ();
29402 break;
29404 default:
29405 abort ();
29410 /* Emit unwind directives for the given insn. */
29412 static void
29413 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
29415 rtx note, pat;
29416 bool handled_one = false;
29418 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29419 return;
29421 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29422 && (TREE_NOTHROW (current_function_decl)
29423 || crtl->all_throwers_are_sibcalls))
29424 return;
29426 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29427 return;
29429 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29431 switch (REG_NOTE_KIND (note))
29433 case REG_FRAME_RELATED_EXPR:
29434 pat = XEXP (note, 0);
29435 goto found;
29437 case REG_CFA_REGISTER:
29438 pat = XEXP (note, 0);
29439 if (pat == NULL)
29441 pat = PATTERN (insn);
29442 if (GET_CODE (pat) == PARALLEL)
29443 pat = XVECEXP (pat, 0, 0);
29446 /* Only emitted for IS_STACKALIGN re-alignment. */
29448 rtx dest, src;
29449 unsigned reg;
29451 src = SET_SRC (pat);
29452 dest = SET_DEST (pat);
29454 gcc_assert (src == stack_pointer_rtx);
29455 reg = REGNO (dest);
29456 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29457 reg + 0x90, reg);
29459 handled_one = true;
29460 break;
29462 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29463 to get correct dwarf information for shrink-wrap. We should not
29464 emit unwind information for it because these are used either for
29465 pretend arguments or notes to adjust sp and restore registers from
29466 stack. */
29467 case REG_CFA_DEF_CFA:
29468 case REG_CFA_ADJUST_CFA:
29469 case REG_CFA_RESTORE:
29470 return;
29472 case REG_CFA_EXPRESSION:
29473 case REG_CFA_OFFSET:
29474 /* ??? Only handling here what we actually emit. */
29475 gcc_unreachable ();
29477 default:
29478 break;
29481 if (handled_one)
29482 return;
29483 pat = PATTERN (insn);
29484 found:
29486 switch (GET_CODE (pat))
29488 case SET:
29489 arm_unwind_emit_set (asm_out_file, pat);
29490 break;
29492 case SEQUENCE:
29493 /* Store multiple. */
29494 arm_unwind_emit_sequence (asm_out_file, pat);
29495 break;
29497 default:
29498 abort();
29503 /* Output a reference from a function exception table to the type_info
29504 object X. The EABI specifies that the symbol should be relocated by
29505 an R_ARM_TARGET2 relocation. */
29507 static bool
29508 arm_output_ttype (rtx x)
29510 fputs ("\t.word\t", asm_out_file);
29511 output_addr_const (asm_out_file, x);
29512 /* Use special relocations for symbol references. */
29513 if (!CONST_INT_P (x))
29514 fputs ("(TARGET2)", asm_out_file);
29515 fputc ('\n', asm_out_file);
29517 return TRUE;
29520 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29522 static void
29523 arm_asm_emit_except_personality (rtx personality)
29525 fputs ("\t.personality\t", asm_out_file);
29526 output_addr_const (asm_out_file, personality);
29527 fputc ('\n', asm_out_file);
29530 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29532 static void
29533 arm_asm_init_sections (void)
29535 exception_section = get_unnamed_section (0, output_section_asm_op,
29536 "\t.handlerdata");
29538 #endif /* ARM_UNWIND_INFO */
29540 /* Output unwind directives for the start/end of a function. */
29542 void
29543 arm_output_fn_unwind (FILE * f, bool prologue)
29545 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29546 return;
29548 if (prologue)
29549 fputs ("\t.fnstart\n", f);
29550 else
29552 /* If this function will never be unwound, then mark it as such.
29553 The came condition is used in arm_unwind_emit to suppress
29554 the frame annotations. */
29555 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29556 && (TREE_NOTHROW (current_function_decl)
29557 || crtl->all_throwers_are_sibcalls))
29558 fputs("\t.cantunwind\n", f);
29560 fputs ("\t.fnend\n", f);
29564 static bool
29565 arm_emit_tls_decoration (FILE *fp, rtx x)
29567 enum tls_reloc reloc;
29568 rtx val;
29570 val = XVECEXP (x, 0, 0);
29571 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29573 output_addr_const (fp, val);
29575 switch (reloc)
29577 case TLS_GD32:
29578 fputs ("(tlsgd)", fp);
29579 break;
29580 case TLS_LDM32:
29581 fputs ("(tlsldm)", fp);
29582 break;
29583 case TLS_LDO32:
29584 fputs ("(tlsldo)", fp);
29585 break;
29586 case TLS_IE32:
29587 fputs ("(gottpoff)", fp);
29588 break;
29589 case TLS_LE32:
29590 fputs ("(tpoff)", fp);
29591 break;
29592 case TLS_DESCSEQ:
29593 fputs ("(tlsdesc)", fp);
29594 break;
29595 default:
29596 gcc_unreachable ();
29599 switch (reloc)
29601 case TLS_GD32:
29602 case TLS_LDM32:
29603 case TLS_IE32:
29604 case TLS_DESCSEQ:
29605 fputs (" + (. - ", fp);
29606 output_addr_const (fp, XVECEXP (x, 0, 2));
29607 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29608 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29609 output_addr_const (fp, XVECEXP (x, 0, 3));
29610 fputc (')', fp);
29611 break;
29612 default:
29613 break;
29616 return TRUE;
29619 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29621 static void
29622 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29624 gcc_assert (size == 4);
29625 fputs ("\t.word\t", file);
29626 output_addr_const (file, x);
29627 fputs ("(tlsldo)", file);
29630 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29632 static bool
29633 arm_output_addr_const_extra (FILE *fp, rtx x)
29635 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29636 return arm_emit_tls_decoration (fp, x);
29637 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29639 char label[256];
29640 int labelno = INTVAL (XVECEXP (x, 0, 0));
29642 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29643 assemble_name_raw (fp, label);
29645 return TRUE;
29647 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29649 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29650 if (GOT_PCREL)
29651 fputs ("+.", fp);
29652 fputs ("-(", fp);
29653 output_addr_const (fp, XVECEXP (x, 0, 0));
29654 fputc (')', fp);
29655 return TRUE;
29657 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29659 output_addr_const (fp, XVECEXP (x, 0, 0));
29660 if (GOT_PCREL)
29661 fputs ("+.", fp);
29662 fputs ("-(", fp);
29663 output_addr_const (fp, XVECEXP (x, 0, 1));
29664 fputc (')', fp);
29665 return TRUE;
29667 else if (GET_CODE (x) == CONST_VECTOR)
29668 return arm_emit_vector_const (fp, x);
29670 return FALSE;
29673 /* Output assembly for a shift instruction.
29674 SET_FLAGS determines how the instruction modifies the condition codes.
29675 0 - Do not set condition codes.
29676 1 - Set condition codes.
29677 2 - Use smallest instruction. */
29678 const char *
29679 arm_output_shift(rtx * operands, int set_flags)
29681 char pattern[100];
29682 static const char flag_chars[3] = {'?', '.', '!'};
29683 const char *shift;
29684 HOST_WIDE_INT val;
29685 char c;
29687 c = flag_chars[set_flags];
29688 if (TARGET_UNIFIED_ASM)
29690 shift = shift_op(operands[3], &val);
29691 if (shift)
29693 if (val != -1)
29694 operands[2] = GEN_INT(val);
29695 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29697 else
29698 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29700 else
29701 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29702 output_asm_insn (pattern, operands);
29703 return "";
29706 /* Output assembly for a WMMX immediate shift instruction. */
29707 const char *
29708 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29710 int shift = INTVAL (operands[2]);
29711 char templ[50];
29712 enum machine_mode opmode = GET_MODE (operands[0]);
29714 gcc_assert (shift >= 0);
29716 /* If the shift value in the register versions is > 63 (for D qualifier),
29717 31 (for W qualifier) or 15 (for H qualifier). */
29718 if (((opmode == V4HImode) && (shift > 15))
29719 || ((opmode == V2SImode) && (shift > 31))
29720 || ((opmode == DImode) && (shift > 63)))
29722 if (wror_or_wsra)
29724 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29725 output_asm_insn (templ, operands);
29726 if (opmode == DImode)
29728 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29729 output_asm_insn (templ, operands);
29732 else
29734 /* The destination register will contain all zeros. */
29735 sprintf (templ, "wzero\t%%0");
29736 output_asm_insn (templ, operands);
29738 return "";
29741 if ((opmode == DImode) && (shift > 32))
29743 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29744 output_asm_insn (templ, operands);
29745 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29746 output_asm_insn (templ, operands);
29748 else
29750 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29751 output_asm_insn (templ, operands);
29753 return "";
29756 /* Output assembly for a WMMX tinsr instruction. */
29757 const char *
29758 arm_output_iwmmxt_tinsr (rtx *operands)
29760 int mask = INTVAL (operands[3]);
29761 int i;
29762 char templ[50];
29763 int units = mode_nunits[GET_MODE (operands[0])];
29764 gcc_assert ((mask & (mask - 1)) == 0);
29765 for (i = 0; i < units; ++i)
29767 if ((mask & 0x01) == 1)
29769 break;
29771 mask >>= 1;
29773 gcc_assert (i < units);
29775 switch (GET_MODE (operands[0]))
29777 case V8QImode:
29778 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29779 break;
29780 case V4HImode:
29781 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29782 break;
29783 case V2SImode:
29784 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29785 break;
29786 default:
29787 gcc_unreachable ();
29788 break;
29790 output_asm_insn (templ, operands);
29792 return "";
29795 /* Output a Thumb-1 casesi dispatch sequence. */
29796 const char *
29797 thumb1_output_casesi (rtx *operands)
29799 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
29801 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29803 switch (GET_MODE(diff_vec))
29805 case QImode:
29806 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29807 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29808 case HImode:
29809 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29810 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29811 case SImode:
29812 return "bl\t%___gnu_thumb1_case_si";
29813 default:
29814 gcc_unreachable ();
29818 /* Output a Thumb-2 casesi instruction. */
29819 const char *
29820 thumb2_output_casesi (rtx *operands)
29822 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
29824 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29826 output_asm_insn ("cmp\t%0, %1", operands);
29827 output_asm_insn ("bhi\t%l3", operands);
29828 switch (GET_MODE(diff_vec))
29830 case QImode:
29831 return "tbb\t[%|pc, %0]";
29832 case HImode:
29833 return "tbh\t[%|pc, %0, lsl #1]";
29834 case SImode:
29835 if (flag_pic)
29837 output_asm_insn ("adr\t%4, %l2", operands);
29838 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29839 output_asm_insn ("add\t%4, %4, %5", operands);
29840 return "bx\t%4";
29842 else
29844 output_asm_insn ("adr\t%4, %l2", operands);
29845 return "ldr\t%|pc, [%4, %0, lsl #2]";
29847 default:
29848 gcc_unreachable ();
29852 /* Most ARM cores are single issue, but some newer ones can dual issue.
29853 The scheduler descriptions rely on this being correct. */
29854 static int
29855 arm_issue_rate (void)
29857 switch (arm_tune)
29859 case cortexa15:
29860 case cortexa57:
29861 return 3;
29863 case cortexr4:
29864 case cortexr4f:
29865 case cortexr5:
29866 case genericv7a:
29867 case cortexa5:
29868 case cortexa7:
29869 case cortexa8:
29870 case cortexa9:
29871 case cortexa12:
29872 case cortexa53:
29873 case fa726te:
29874 case marvell_pj4:
29875 return 2;
29877 default:
29878 return 1;
29882 /* A table and a function to perform ARM-specific name mangling for
29883 NEON vector types in order to conform to the AAPCS (see "Procedure
29884 Call Standard for the ARM Architecture", Appendix A). To qualify
29885 for emission with the mangled names defined in that document, a
29886 vector type must not only be of the correct mode but also be
29887 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29888 typedef struct
29890 enum machine_mode mode;
29891 const char *element_type_name;
29892 const char *aapcs_name;
29893 } arm_mangle_map_entry;
29895 static arm_mangle_map_entry arm_mangle_map[] = {
29896 /* 64-bit containerized types. */
29897 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29898 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29899 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29900 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29901 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29902 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29903 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29904 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29905 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29906 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29908 /* 128-bit containerized types. */
29909 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29910 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29911 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29912 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29913 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29914 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29915 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29916 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29917 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29918 { VOIDmode, NULL, NULL }
29921 const char *
29922 arm_mangle_type (const_tree type)
29924 arm_mangle_map_entry *pos = arm_mangle_map;
29926 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29927 has to be managled as if it is in the "std" namespace. */
29928 if (TARGET_AAPCS_BASED
29929 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29930 return "St9__va_list";
29932 /* Half-precision float. */
29933 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29934 return "Dh";
29936 if (TREE_CODE (type) != VECTOR_TYPE)
29937 return NULL;
29939 /* Check the mode of the vector type, and the name of the vector
29940 element type, against the table. */
29941 while (pos->mode != VOIDmode)
29943 tree elt_type = TREE_TYPE (type);
29945 if (pos->mode == TYPE_MODE (type)
29946 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29947 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29948 pos->element_type_name))
29949 return pos->aapcs_name;
29951 pos++;
29954 /* Use the default mangling for unrecognized (possibly user-defined)
29955 vector types. */
29956 return NULL;
29959 /* Order of allocation of core registers for Thumb: this allocation is
29960 written over the corresponding initial entries of the array
29961 initialized with REG_ALLOC_ORDER. We allocate all low registers
29962 first. Saving and restoring a low register is usually cheaper than
29963 using a call-clobbered high register. */
29965 static const int thumb_core_reg_alloc_order[] =
29967 3, 2, 1, 0, 4, 5, 6, 7,
29968 14, 12, 8, 9, 10, 11
29971 /* Adjust register allocation order when compiling for Thumb. */
29973 void
29974 arm_order_regs_for_local_alloc (void)
29976 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29977 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29978 if (TARGET_THUMB)
29979 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29980 sizeof (thumb_core_reg_alloc_order));
29983 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29985 bool
29986 arm_frame_pointer_required (void)
29988 return (cfun->has_nonlocal_label
29989 || SUBTARGET_FRAME_POINTER_REQUIRED
29990 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
29993 /* Only thumb1 can't support conditional execution, so return true if
29994 the target is not thumb1. */
29995 static bool
29996 arm_have_conditional_execution (void)
29998 return !TARGET_THUMB1;
30001 tree
30002 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
30004 enum machine_mode in_mode, out_mode;
30005 int in_n, out_n;
30006 bool out_unsigned_p = TYPE_UNSIGNED (type_out);
30008 if (TREE_CODE (type_out) != VECTOR_TYPE
30009 || TREE_CODE (type_in) != VECTOR_TYPE)
30010 return NULL_TREE;
30012 out_mode = TYPE_MODE (TREE_TYPE (type_out));
30013 out_n = TYPE_VECTOR_SUBPARTS (type_out);
30014 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30015 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30017 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
30018 decl of the vectorized builtin for the appropriate vector mode.
30019 NULL_TREE is returned if no such builtin is available. */
30020 #undef ARM_CHECK_BUILTIN_MODE
30021 #define ARM_CHECK_BUILTIN_MODE(C) \
30022 (TARGET_NEON && TARGET_FPU_ARMV8 \
30023 && flag_unsafe_math_optimizations \
30024 && ARM_CHECK_BUILTIN_MODE_1 (C))
30026 #undef ARM_CHECK_BUILTIN_MODE_1
30027 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30028 (out_mode == SFmode && out_n == C \
30029 && in_mode == SFmode && in_n == C)
30031 #undef ARM_FIND_VRINT_VARIANT
30032 #define ARM_FIND_VRINT_VARIANT(N) \
30033 (ARM_CHECK_BUILTIN_MODE (2) \
30034 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
30035 : (ARM_CHECK_BUILTIN_MODE (4) \
30036 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
30037 : NULL_TREE))
30039 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
30041 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
30042 switch (fn)
30044 case BUILT_IN_FLOORF:
30045 return ARM_FIND_VRINT_VARIANT (vrintm);
30046 case BUILT_IN_CEILF:
30047 return ARM_FIND_VRINT_VARIANT (vrintp);
30048 case BUILT_IN_TRUNCF:
30049 return ARM_FIND_VRINT_VARIANT (vrintz);
30050 case BUILT_IN_ROUNDF:
30051 return ARM_FIND_VRINT_VARIANT (vrinta);
30052 #undef ARM_CHECK_BUILTIN_MODE_1
30053 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30054 (out_mode == SImode && out_n == C \
30055 && in_mode == SFmode && in_n == C)
30057 #define ARM_FIND_VCVT_VARIANT(N) \
30058 (ARM_CHECK_BUILTIN_MODE (2) \
30059 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
30060 : (ARM_CHECK_BUILTIN_MODE (4) \
30061 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
30062 : NULL_TREE))
30064 #define ARM_FIND_VCVTU_VARIANT(N) \
30065 (ARM_CHECK_BUILTIN_MODE (2) \
30066 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
30067 : (ARM_CHECK_BUILTIN_MODE (4) \
30068 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
30069 : NULL_TREE))
30070 case BUILT_IN_LROUNDF:
30071 return out_unsigned_p
30072 ? ARM_FIND_VCVTU_VARIANT (vcvta)
30073 : ARM_FIND_VCVT_VARIANT (vcvta);
30074 case BUILT_IN_LCEILF:
30075 return out_unsigned_p
30076 ? ARM_FIND_VCVTU_VARIANT (vcvtp)
30077 : ARM_FIND_VCVT_VARIANT (vcvtp);
30078 case BUILT_IN_LFLOORF:
30079 return out_unsigned_p
30080 ? ARM_FIND_VCVTU_VARIANT (vcvtm)
30081 : ARM_FIND_VCVT_VARIANT (vcvtm);
30082 #undef ARM_CHECK_BUILTIN_MODE
30083 #define ARM_CHECK_BUILTIN_MODE(C, N) \
30084 (out_mode == N##mode && out_n == C \
30085 && in_mode == N##mode && in_n == C)
30086 case BUILT_IN_BSWAP16:
30087 if (ARM_CHECK_BUILTIN_MODE (4, HI))
30088 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
30089 else if (ARM_CHECK_BUILTIN_MODE (8, HI))
30090 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
30091 else
30092 return NULL_TREE;
30093 case BUILT_IN_BSWAP32:
30094 if (ARM_CHECK_BUILTIN_MODE (2, SI))
30095 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
30096 else if (ARM_CHECK_BUILTIN_MODE (4, SI))
30097 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
30098 else
30099 return NULL_TREE;
30100 case BUILT_IN_BSWAP64:
30101 if (ARM_CHECK_BUILTIN_MODE (2, DI))
30102 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
30103 else
30104 return NULL_TREE;
30105 case BUILT_IN_COPYSIGNF:
30106 if (ARM_CHECK_BUILTIN_MODE (2, SF))
30107 return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf, false);
30108 else if (ARM_CHECK_BUILTIN_MODE (4, SF))
30109 return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf, false);
30110 else
30111 return NULL_TREE;
30113 default:
30114 return NULL_TREE;
30117 return NULL_TREE;
30119 #undef ARM_FIND_VCVT_VARIANT
30120 #undef ARM_FIND_VCVTU_VARIANT
30121 #undef ARM_CHECK_BUILTIN_MODE
30122 #undef ARM_FIND_VRINT_VARIANT
30125 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30126 static HOST_WIDE_INT
30127 arm_vector_alignment (const_tree type)
30129 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30131 if (TARGET_AAPCS_BASED)
30132 align = MIN (align, 64);
30134 return align;
30137 static unsigned int
30138 arm_autovectorize_vector_sizes (void)
30140 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
30143 static bool
30144 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30146 /* Vectors which aren't in packed structures will not be less aligned than
30147 the natural alignment of their element type, so this is safe. */
30148 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30149 return !is_packed;
30151 return default_builtin_vector_alignment_reachable (type, is_packed);
30154 static bool
30155 arm_builtin_support_vector_misalignment (enum machine_mode mode,
30156 const_tree type, int misalignment,
30157 bool is_packed)
30159 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30161 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30163 if (is_packed)
30164 return align == 1;
30166 /* If the misalignment is unknown, we should be able to handle the access
30167 so long as it is not to a member of a packed data structure. */
30168 if (misalignment == -1)
30169 return true;
30171 /* Return true if the misalignment is a multiple of the natural alignment
30172 of the vector's element type. This is probably always going to be
30173 true in practice, since we've already established that this isn't a
30174 packed access. */
30175 return ((misalignment % align) == 0);
30178 return default_builtin_support_vector_misalignment (mode, type, misalignment,
30179 is_packed);
30182 static void
30183 arm_conditional_register_usage (void)
30185 int regno;
30187 if (TARGET_THUMB1 && optimize_size)
30189 /* When optimizing for size on Thumb-1, it's better not
30190 to use the HI regs, because of the overhead of
30191 stacking them. */
30192 for (regno = FIRST_HI_REGNUM;
30193 regno <= LAST_HI_REGNUM; ++regno)
30194 fixed_regs[regno] = call_used_regs[regno] = 1;
30197 /* The link register can be clobbered by any branch insn,
30198 but we have no way to track that at present, so mark
30199 it as unavailable. */
30200 if (TARGET_THUMB1)
30201 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30203 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
30205 /* VFPv3 registers are disabled when earlier VFP
30206 versions are selected due to the definition of
30207 LAST_VFP_REGNUM. */
30208 for (regno = FIRST_VFP_REGNUM;
30209 regno <= LAST_VFP_REGNUM; ++ regno)
30211 fixed_regs[regno] = 0;
30212 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30213 || regno >= FIRST_VFP_REGNUM + 32;
30217 if (TARGET_REALLY_IWMMXT)
30219 regno = FIRST_IWMMXT_GR_REGNUM;
30220 /* The 2002/10/09 revision of the XScale ABI has wCG0
30221 and wCG1 as call-preserved registers. The 2002/11/21
30222 revision changed this so that all wCG registers are
30223 scratch registers. */
30224 for (regno = FIRST_IWMMXT_GR_REGNUM;
30225 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30226 fixed_regs[regno] = 0;
30227 /* The XScale ABI has wR0 - wR9 as scratch registers,
30228 the rest as call-preserved registers. */
30229 for (regno = FIRST_IWMMXT_REGNUM;
30230 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30232 fixed_regs[regno] = 0;
30233 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30237 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30239 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30240 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30242 else if (TARGET_APCS_STACK)
30244 fixed_regs[10] = 1;
30245 call_used_regs[10] = 1;
30247 /* -mcaller-super-interworking reserves r11 for calls to
30248 _interwork_r11_call_via_rN(). Making the register global
30249 is an easy way of ensuring that it remains valid for all
30250 calls. */
30251 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30252 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30254 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30255 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30256 if (TARGET_CALLER_INTERWORKING)
30257 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30259 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30262 static reg_class_t
30263 arm_preferred_rename_class (reg_class_t rclass)
30265 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30266 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30267 and code size can be reduced. */
30268 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30269 return LO_REGS;
30270 else
30271 return NO_REGS;
30274 /* Compute the atrribute "length" of insn "*push_multi".
30275 So this function MUST be kept in sync with that insn pattern. */
30277 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30279 int i, regno, hi_reg;
30280 int num_saves = XVECLEN (parallel_op, 0);
30282 /* ARM mode. */
30283 if (TARGET_ARM)
30284 return 4;
30285 /* Thumb1 mode. */
30286 if (TARGET_THUMB1)
30287 return 2;
30289 /* Thumb2 mode. */
30290 regno = REGNO (first_op);
30291 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30292 for (i = 1; i < num_saves && !hi_reg; i++)
30294 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30295 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30298 if (!hi_reg)
30299 return 2;
30300 return 4;
30303 /* Compute the number of instructions emitted by output_move_double. */
30305 arm_count_output_move_double_insns (rtx *operands)
30307 int count;
30308 rtx ops[2];
30309 /* output_move_double may modify the operands array, so call it
30310 here on a copy of the array. */
30311 ops[0] = operands[0];
30312 ops[1] = operands[1];
30313 output_move_double (ops, false, &count);
30314 return count;
30318 vfp3_const_double_for_fract_bits (rtx operand)
30320 REAL_VALUE_TYPE r0;
30322 if (!CONST_DOUBLE_P (operand))
30323 return 0;
30325 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30326 if (exact_real_inverse (DFmode, &r0))
30328 if (exact_real_truncate (DFmode, &r0))
30330 HOST_WIDE_INT value = real_to_integer (&r0);
30331 value = value & 0xffffffff;
30332 if ((value != 0) && ( (value & (value - 1)) == 0))
30333 return int_log2 (value);
30336 return 0;
30340 vfp3_const_double_for_bits (rtx operand)
30342 REAL_VALUE_TYPE r0;
30344 if (!CONST_DOUBLE_P (operand))
30345 return 0;
30347 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30348 if (exact_real_truncate (DFmode, &r0))
30350 HOST_WIDE_INT value = real_to_integer (&r0);
30351 value = value & 0xffffffff;
30352 if ((value != 0) && ( (value & (value - 1)) == 0))
30353 return int_log2 (value);
30356 return 0;
30359 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30361 static void
30362 arm_pre_atomic_barrier (enum memmodel model)
30364 if (need_atomic_barrier_p (model, true))
30365 emit_insn (gen_memory_barrier ());
30368 static void
30369 arm_post_atomic_barrier (enum memmodel model)
30371 if (need_atomic_barrier_p (model, false))
30372 emit_insn (gen_memory_barrier ());
30375 /* Emit the load-exclusive and store-exclusive instructions.
30376 Use acquire and release versions if necessary. */
30378 static void
30379 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
30381 rtx (*gen) (rtx, rtx);
30383 if (acq)
30385 switch (mode)
30387 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30388 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30389 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30390 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30391 default:
30392 gcc_unreachable ();
30395 else
30397 switch (mode)
30399 case QImode: gen = gen_arm_load_exclusiveqi; break;
30400 case HImode: gen = gen_arm_load_exclusivehi; break;
30401 case SImode: gen = gen_arm_load_exclusivesi; break;
30402 case DImode: gen = gen_arm_load_exclusivedi; break;
30403 default:
30404 gcc_unreachable ();
30408 emit_insn (gen (rval, mem));
30411 static void
30412 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
30413 rtx mem, bool rel)
30415 rtx (*gen) (rtx, rtx, rtx);
30417 if (rel)
30419 switch (mode)
30421 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
30422 case HImode: gen = gen_arm_store_release_exclusivehi; break;
30423 case SImode: gen = gen_arm_store_release_exclusivesi; break;
30424 case DImode: gen = gen_arm_store_release_exclusivedi; break;
30425 default:
30426 gcc_unreachable ();
30429 else
30431 switch (mode)
30433 case QImode: gen = gen_arm_store_exclusiveqi; break;
30434 case HImode: gen = gen_arm_store_exclusivehi; break;
30435 case SImode: gen = gen_arm_store_exclusivesi; break;
30436 case DImode: gen = gen_arm_store_exclusivedi; break;
30437 default:
30438 gcc_unreachable ();
30442 emit_insn (gen (bval, rval, mem));
30445 /* Mark the previous jump instruction as unlikely. */
30447 static void
30448 emit_unlikely_jump (rtx insn)
30450 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
30452 insn = emit_jump_insn (insn);
30453 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
30456 /* Expand a compare and swap pattern. */
30458 void
30459 arm_expand_compare_and_swap (rtx operands[])
30461 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30462 enum machine_mode mode;
30463 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
30465 bval = operands[0];
30466 rval = operands[1];
30467 mem = operands[2];
30468 oldval = operands[3];
30469 newval = operands[4];
30470 is_weak = operands[5];
30471 mod_s = operands[6];
30472 mod_f = operands[7];
30473 mode = GET_MODE (mem);
30475 /* Normally the succ memory model must be stronger than fail, but in the
30476 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30477 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30479 if (TARGET_HAVE_LDACQ
30480 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
30481 && INTVAL (mod_s) == MEMMODEL_RELEASE)
30482 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30484 switch (mode)
30486 case QImode:
30487 case HImode:
30488 /* For narrow modes, we're going to perform the comparison in SImode,
30489 so do the zero-extension now. */
30490 rval = gen_reg_rtx (SImode);
30491 oldval = convert_modes (SImode, mode, oldval, true);
30492 /* FALLTHRU */
30494 case SImode:
30495 /* Force the value into a register if needed. We waited until after
30496 the zero-extension above to do this properly. */
30497 if (!arm_add_operand (oldval, SImode))
30498 oldval = force_reg (SImode, oldval);
30499 break;
30501 case DImode:
30502 if (!cmpdi_operand (oldval, mode))
30503 oldval = force_reg (mode, oldval);
30504 break;
30506 default:
30507 gcc_unreachable ();
30510 switch (mode)
30512 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
30513 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
30514 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
30515 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
30516 default:
30517 gcc_unreachable ();
30520 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
30522 if (mode == QImode || mode == HImode)
30523 emit_move_insn (operands[1], gen_lowpart (mode, rval));
30525 /* In all cases, we arrange for success to be signaled by Z set.
30526 This arrangement allows for the boolean result to be used directly
30527 in a subsequent branch, post optimization. */
30528 x = gen_rtx_REG (CCmode, CC_REGNUM);
30529 x = gen_rtx_EQ (SImode, x, const0_rtx);
30530 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
30533 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30534 another memory store between the load-exclusive and store-exclusive can
30535 reset the monitor from Exclusive to Open state. This means we must wait
30536 until after reload to split the pattern, lest we get a register spill in
30537 the middle of the atomic sequence. */
30539 void
30540 arm_split_compare_and_swap (rtx operands[])
30542 rtx rval, mem, oldval, newval, scratch;
30543 enum machine_mode mode;
30544 enum memmodel mod_s, mod_f;
30545 bool is_weak;
30546 rtx_code_label *label1, *label2;
30547 rtx x, cond;
30549 rval = operands[0];
30550 mem = operands[1];
30551 oldval = operands[2];
30552 newval = operands[3];
30553 is_weak = (operands[4] != const0_rtx);
30554 mod_s = (enum memmodel) INTVAL (operands[5]);
30555 mod_f = (enum memmodel) INTVAL (operands[6]);
30556 scratch = operands[7];
30557 mode = GET_MODE (mem);
30559 bool use_acquire = TARGET_HAVE_LDACQ
30560 && !(mod_s == MEMMODEL_RELAXED
30561 || mod_s == MEMMODEL_CONSUME
30562 || mod_s == MEMMODEL_RELEASE);
30564 bool use_release = TARGET_HAVE_LDACQ
30565 && !(mod_s == MEMMODEL_RELAXED
30566 || mod_s == MEMMODEL_CONSUME
30567 || mod_s == MEMMODEL_ACQUIRE);
30569 /* Checks whether a barrier is needed and emits one accordingly. */
30570 if (!(use_acquire || use_release))
30571 arm_pre_atomic_barrier (mod_s);
30573 label1 = NULL;
30574 if (!is_weak)
30576 label1 = gen_label_rtx ();
30577 emit_label (label1);
30579 label2 = gen_label_rtx ();
30581 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30583 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30584 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30585 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30586 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30587 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30589 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30591 /* Weak or strong, we want EQ to be true for success, so that we
30592 match the flags that we got from the compare above. */
30593 cond = gen_rtx_REG (CCmode, CC_REGNUM);
30594 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30595 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30597 if (!is_weak)
30599 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30600 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30601 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30602 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30605 if (mod_f != MEMMODEL_RELAXED)
30606 emit_label (label2);
30608 /* Checks whether a barrier is needed and emits one accordingly. */
30609 if (!(use_acquire || use_release))
30610 arm_post_atomic_barrier (mod_s);
30612 if (mod_f == MEMMODEL_RELAXED)
30613 emit_label (label2);
30616 void
30617 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30618 rtx value, rtx model_rtx, rtx cond)
30620 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30621 enum machine_mode mode = GET_MODE (mem);
30622 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
30623 rtx_code_label *label;
30624 rtx x;
30626 bool use_acquire = TARGET_HAVE_LDACQ
30627 && !(model == MEMMODEL_RELAXED
30628 || model == MEMMODEL_CONSUME
30629 || model == MEMMODEL_RELEASE);
30631 bool use_release = TARGET_HAVE_LDACQ
30632 && !(model == MEMMODEL_RELAXED
30633 || model == MEMMODEL_CONSUME
30634 || model == MEMMODEL_ACQUIRE);
30636 /* Checks whether a barrier is needed and emits one accordingly. */
30637 if (!(use_acquire || use_release))
30638 arm_pre_atomic_barrier (model);
30640 label = gen_label_rtx ();
30641 emit_label (label);
30643 if (new_out)
30644 new_out = gen_lowpart (wmode, new_out);
30645 if (old_out)
30646 old_out = gen_lowpart (wmode, old_out);
30647 else
30648 old_out = new_out;
30649 value = simplify_gen_subreg (wmode, value, mode, 0);
30651 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30653 switch (code)
30655 case SET:
30656 new_out = value;
30657 break;
30659 case NOT:
30660 x = gen_rtx_AND (wmode, old_out, value);
30661 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30662 x = gen_rtx_NOT (wmode, new_out);
30663 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30664 break;
30666 case MINUS:
30667 if (CONST_INT_P (value))
30669 value = GEN_INT (-INTVAL (value));
30670 code = PLUS;
30672 /* FALLTHRU */
30674 case PLUS:
30675 if (mode == DImode)
30677 /* DImode plus/minus need to clobber flags. */
30678 /* The adddi3 and subdi3 patterns are incorrectly written so that
30679 they require matching operands, even when we could easily support
30680 three operands. Thankfully, this can be fixed up post-splitting,
30681 as the individual add+adc patterns do accept three operands and
30682 post-reload cprop can make these moves go away. */
30683 emit_move_insn (new_out, old_out);
30684 if (code == PLUS)
30685 x = gen_adddi3 (new_out, new_out, value);
30686 else
30687 x = gen_subdi3 (new_out, new_out, value);
30688 emit_insn (x);
30689 break;
30691 /* FALLTHRU */
30693 default:
30694 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30695 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30696 break;
30699 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30700 use_release);
30702 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30703 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30705 /* Checks whether a barrier is needed and emits one accordingly. */
30706 if (!(use_acquire || use_release))
30707 arm_post_atomic_barrier (model);
30710 #define MAX_VECT_LEN 16
30712 struct expand_vec_perm_d
30714 rtx target, op0, op1;
30715 unsigned char perm[MAX_VECT_LEN];
30716 enum machine_mode vmode;
30717 unsigned char nelt;
30718 bool one_vector_p;
30719 bool testing_p;
30722 /* Generate a variable permutation. */
30724 static void
30725 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30727 enum machine_mode vmode = GET_MODE (target);
30728 bool one_vector_p = rtx_equal_p (op0, op1);
30730 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30731 gcc_checking_assert (GET_MODE (op0) == vmode);
30732 gcc_checking_assert (GET_MODE (op1) == vmode);
30733 gcc_checking_assert (GET_MODE (sel) == vmode);
30734 gcc_checking_assert (TARGET_NEON);
30736 if (one_vector_p)
30738 if (vmode == V8QImode)
30739 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30740 else
30741 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30743 else
30745 rtx pair;
30747 if (vmode == V8QImode)
30749 pair = gen_reg_rtx (V16QImode);
30750 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30751 pair = gen_lowpart (TImode, pair);
30752 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30754 else
30756 pair = gen_reg_rtx (OImode);
30757 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30758 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30763 void
30764 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30766 enum machine_mode vmode = GET_MODE (target);
30767 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30768 bool one_vector_p = rtx_equal_p (op0, op1);
30769 rtx rmask[MAX_VECT_LEN], mask;
30771 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30772 numbering of elements for big-endian, we must reverse the order. */
30773 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30775 /* The VTBL instruction does not use a modulo index, so we must take care
30776 of that ourselves. */
30777 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30778 for (i = 0; i < nelt; ++i)
30779 rmask[i] = mask;
30780 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30781 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30783 arm_expand_vec_perm_1 (target, op0, op1, sel);
30786 /* Generate or test for an insn that supports a constant permutation. */
30788 /* Recognize patterns for the VUZP insns. */
30790 static bool
30791 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30793 unsigned int i, odd, mask, nelt = d->nelt;
30794 rtx out0, out1, in0, in1, x;
30795 rtx (*gen)(rtx, rtx, rtx, rtx);
30797 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30798 return false;
30800 /* Note that these are little-endian tests. Adjust for big-endian later. */
30801 if (d->perm[0] == 0)
30802 odd = 0;
30803 else if (d->perm[0] == 1)
30804 odd = 1;
30805 else
30806 return false;
30807 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30809 for (i = 0; i < nelt; i++)
30811 unsigned elt = (i * 2 + odd) & mask;
30812 if (d->perm[i] != elt)
30813 return false;
30816 /* Success! */
30817 if (d->testing_p)
30818 return true;
30820 switch (d->vmode)
30822 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30823 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
30824 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
30825 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
30826 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
30827 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
30828 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
30829 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
30830 default:
30831 gcc_unreachable ();
30834 in0 = d->op0;
30835 in1 = d->op1;
30836 if (BYTES_BIG_ENDIAN)
30838 x = in0, in0 = in1, in1 = x;
30839 odd = !odd;
30842 out0 = d->target;
30843 out1 = gen_reg_rtx (d->vmode);
30844 if (odd)
30845 x = out0, out0 = out1, out1 = x;
30847 emit_insn (gen (out0, in0, in1, out1));
30848 return true;
30851 /* Recognize patterns for the VZIP insns. */
30853 static bool
30854 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30856 unsigned int i, high, mask, nelt = d->nelt;
30857 rtx out0, out1, in0, in1, x;
30858 rtx (*gen)(rtx, rtx, rtx, rtx);
30860 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30861 return false;
30863 /* Note that these are little-endian tests. Adjust for big-endian later. */
30864 high = nelt / 2;
30865 if (d->perm[0] == high)
30867 else if (d->perm[0] == 0)
30868 high = 0;
30869 else
30870 return false;
30871 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30873 for (i = 0; i < nelt / 2; i++)
30875 unsigned elt = (i + high) & mask;
30876 if (d->perm[i * 2] != elt)
30877 return false;
30878 elt = (elt + nelt) & mask;
30879 if (d->perm[i * 2 + 1] != elt)
30880 return false;
30883 /* Success! */
30884 if (d->testing_p)
30885 return true;
30887 switch (d->vmode)
30889 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30890 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
30891 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
30892 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
30893 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
30894 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
30895 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
30896 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
30897 default:
30898 gcc_unreachable ();
30901 in0 = d->op0;
30902 in1 = d->op1;
30903 if (BYTES_BIG_ENDIAN)
30905 x = in0, in0 = in1, in1 = x;
30906 high = !high;
30909 out0 = d->target;
30910 out1 = gen_reg_rtx (d->vmode);
30911 if (high)
30912 x = out0, out0 = out1, out1 = x;
30914 emit_insn (gen (out0, in0, in1, out1));
30915 return true;
30918 /* Recognize patterns for the VREV insns. */
30920 static bool
30921 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30923 unsigned int i, j, diff, nelt = d->nelt;
30924 rtx (*gen)(rtx, rtx, rtx);
30926 if (!d->one_vector_p)
30927 return false;
30929 diff = d->perm[0];
30930 switch (diff)
30932 case 7:
30933 switch (d->vmode)
30935 case V16QImode: gen = gen_neon_vrev64v16qi; break;
30936 case V8QImode: gen = gen_neon_vrev64v8qi; break;
30937 default:
30938 return false;
30940 break;
30941 case 3:
30942 switch (d->vmode)
30944 case V16QImode: gen = gen_neon_vrev32v16qi; break;
30945 case V8QImode: gen = gen_neon_vrev32v8qi; break;
30946 case V8HImode: gen = gen_neon_vrev64v8hi; break;
30947 case V4HImode: gen = gen_neon_vrev64v4hi; break;
30948 default:
30949 return false;
30951 break;
30952 case 1:
30953 switch (d->vmode)
30955 case V16QImode: gen = gen_neon_vrev16v16qi; break;
30956 case V8QImode: gen = gen_neon_vrev16v8qi; break;
30957 case V8HImode: gen = gen_neon_vrev32v8hi; break;
30958 case V4HImode: gen = gen_neon_vrev32v4hi; break;
30959 case V4SImode: gen = gen_neon_vrev64v4si; break;
30960 case V2SImode: gen = gen_neon_vrev64v2si; break;
30961 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
30962 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
30963 default:
30964 return false;
30966 break;
30967 default:
30968 return false;
30971 for (i = 0; i < nelt ; i += diff + 1)
30972 for (j = 0; j <= diff; j += 1)
30974 /* This is guaranteed to be true as the value of diff
30975 is 7, 3, 1 and we should have enough elements in the
30976 queue to generate this. Getting a vector mask with a
30977 value of diff other than these values implies that
30978 something is wrong by the time we get here. */
30979 gcc_assert (i + j < nelt);
30980 if (d->perm[i + j] != i + diff - j)
30981 return false;
30984 /* Success! */
30985 if (d->testing_p)
30986 return true;
30988 /* ??? The third operand is an artifact of the builtin infrastructure
30989 and is ignored by the actual instruction. */
30990 emit_insn (gen (d->target, d->op0, const0_rtx));
30991 return true;
30994 /* Recognize patterns for the VTRN insns. */
30996 static bool
30997 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
30999 unsigned int i, odd, mask, nelt = d->nelt;
31000 rtx out0, out1, in0, in1, x;
31001 rtx (*gen)(rtx, rtx, rtx, rtx);
31003 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31004 return false;
31006 /* Note that these are little-endian tests. Adjust for big-endian later. */
31007 if (d->perm[0] == 0)
31008 odd = 0;
31009 else if (d->perm[0] == 1)
31010 odd = 1;
31011 else
31012 return false;
31013 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31015 for (i = 0; i < nelt; i += 2)
31017 if (d->perm[i] != i + odd)
31018 return false;
31019 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31020 return false;
31023 /* Success! */
31024 if (d->testing_p)
31025 return true;
31027 switch (d->vmode)
31029 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
31030 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
31031 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
31032 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
31033 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
31034 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
31035 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
31036 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
31037 default:
31038 gcc_unreachable ();
31041 in0 = d->op0;
31042 in1 = d->op1;
31043 if (BYTES_BIG_ENDIAN)
31045 x = in0, in0 = in1, in1 = x;
31046 odd = !odd;
31049 out0 = d->target;
31050 out1 = gen_reg_rtx (d->vmode);
31051 if (odd)
31052 x = out0, out0 = out1, out1 = x;
31054 emit_insn (gen (out0, in0, in1, out1));
31055 return true;
31058 /* Recognize patterns for the VEXT insns. */
31060 static bool
31061 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31063 unsigned int i, nelt = d->nelt;
31064 rtx (*gen) (rtx, rtx, rtx, rtx);
31065 rtx offset;
31067 unsigned int location;
31069 unsigned int next = d->perm[0] + 1;
31071 /* TODO: Handle GCC's numbering of elements for big-endian. */
31072 if (BYTES_BIG_ENDIAN)
31073 return false;
31075 /* Check if the extracted indexes are increasing by one. */
31076 for (i = 1; i < nelt; next++, i++)
31078 /* If we hit the most significant element of the 2nd vector in
31079 the previous iteration, no need to test further. */
31080 if (next == 2 * nelt)
31081 return false;
31083 /* If we are operating on only one vector: it could be a
31084 rotation. If there are only two elements of size < 64, let
31085 arm_evpc_neon_vrev catch it. */
31086 if (d->one_vector_p && (next == nelt))
31088 if ((nelt == 2) && (d->vmode != V2DImode))
31089 return false;
31090 else
31091 next = 0;
31094 if (d->perm[i] != next)
31095 return false;
31098 location = d->perm[0];
31100 switch (d->vmode)
31102 case V16QImode: gen = gen_neon_vextv16qi; break;
31103 case V8QImode: gen = gen_neon_vextv8qi; break;
31104 case V4HImode: gen = gen_neon_vextv4hi; break;
31105 case V8HImode: gen = gen_neon_vextv8hi; break;
31106 case V2SImode: gen = gen_neon_vextv2si; break;
31107 case V4SImode: gen = gen_neon_vextv4si; break;
31108 case V2SFmode: gen = gen_neon_vextv2sf; break;
31109 case V4SFmode: gen = gen_neon_vextv4sf; break;
31110 case V2DImode: gen = gen_neon_vextv2di; break;
31111 default:
31112 return false;
31115 /* Success! */
31116 if (d->testing_p)
31117 return true;
31119 offset = GEN_INT (location);
31120 emit_insn (gen (d->target, d->op0, d->op1, offset));
31121 return true;
31124 /* The NEON VTBL instruction is a fully variable permuation that's even
31125 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31126 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31127 can do slightly better by expanding this as a constant where we don't
31128 have to apply a mask. */
31130 static bool
31131 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31133 rtx rperm[MAX_VECT_LEN], sel;
31134 enum machine_mode vmode = d->vmode;
31135 unsigned int i, nelt = d->nelt;
31137 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31138 numbering of elements for big-endian, we must reverse the order. */
31139 if (BYTES_BIG_ENDIAN)
31140 return false;
31142 if (d->testing_p)
31143 return true;
31145 /* Generic code will try constant permutation twice. Once with the
31146 original mode and again with the elements lowered to QImode.
31147 So wait and don't do the selector expansion ourselves. */
31148 if (vmode != V8QImode && vmode != V16QImode)
31149 return false;
31151 for (i = 0; i < nelt; ++i)
31152 rperm[i] = GEN_INT (d->perm[i]);
31153 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31154 sel = force_reg (vmode, sel);
31156 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31157 return true;
31160 static bool
31161 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31163 /* Check if the input mask matches vext before reordering the
31164 operands. */
31165 if (TARGET_NEON)
31166 if (arm_evpc_neon_vext (d))
31167 return true;
31169 /* The pattern matching functions above are written to look for a small
31170 number to begin the sequence (0, 1, N/2). If we begin with an index
31171 from the second operand, we can swap the operands. */
31172 if (d->perm[0] >= d->nelt)
31174 unsigned i, nelt = d->nelt;
31175 rtx x;
31177 for (i = 0; i < nelt; ++i)
31178 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
31180 x = d->op0;
31181 d->op0 = d->op1;
31182 d->op1 = x;
31185 if (TARGET_NEON)
31187 if (arm_evpc_neon_vuzp (d))
31188 return true;
31189 if (arm_evpc_neon_vzip (d))
31190 return true;
31191 if (arm_evpc_neon_vrev (d))
31192 return true;
31193 if (arm_evpc_neon_vtrn (d))
31194 return true;
31195 return arm_evpc_neon_vtbl (d);
31197 return false;
31200 /* Expand a vec_perm_const pattern. */
31202 bool
31203 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
31205 struct expand_vec_perm_d d;
31206 int i, nelt, which;
31208 d.target = target;
31209 d.op0 = op0;
31210 d.op1 = op1;
31212 d.vmode = GET_MODE (target);
31213 gcc_assert (VECTOR_MODE_P (d.vmode));
31214 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31215 d.testing_p = false;
31217 for (i = which = 0; i < nelt; ++i)
31219 rtx e = XVECEXP (sel, 0, i);
31220 int ei = INTVAL (e) & (2 * nelt - 1);
31221 which |= (ei < nelt ? 1 : 2);
31222 d.perm[i] = ei;
31225 switch (which)
31227 default:
31228 gcc_unreachable();
31230 case 3:
31231 d.one_vector_p = false;
31232 if (!rtx_equal_p (op0, op1))
31233 break;
31235 /* The elements of PERM do not suggest that only the first operand
31236 is used, but both operands are identical. Allow easier matching
31237 of the permutation by folding the permutation into the single
31238 input vector. */
31239 /* FALLTHRU */
31240 case 2:
31241 for (i = 0; i < nelt; ++i)
31242 d.perm[i] &= nelt - 1;
31243 d.op0 = op1;
31244 d.one_vector_p = true;
31245 break;
31247 case 1:
31248 d.op1 = op0;
31249 d.one_vector_p = true;
31250 break;
31253 return arm_expand_vec_perm_const_1 (&d);
31256 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
31258 static bool
31259 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
31260 const unsigned char *sel)
31262 struct expand_vec_perm_d d;
31263 unsigned int i, nelt, which;
31264 bool ret;
31266 d.vmode = vmode;
31267 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31268 d.testing_p = true;
31269 memcpy (d.perm, sel, nelt);
31271 /* Categorize the set of elements in the selector. */
31272 for (i = which = 0; i < nelt; ++i)
31274 unsigned char e = d.perm[i];
31275 gcc_assert (e < 2 * nelt);
31276 which |= (e < nelt ? 1 : 2);
31279 /* For all elements from second vector, fold the elements to first. */
31280 if (which == 2)
31281 for (i = 0; i < nelt; ++i)
31282 d.perm[i] -= nelt;
31284 /* Check whether the mask can be applied to the vector type. */
31285 d.one_vector_p = (which != 3);
31287 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31288 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31289 if (!d.one_vector_p)
31290 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31292 start_sequence ();
31293 ret = arm_expand_vec_perm_const_1 (&d);
31294 end_sequence ();
31296 return ret;
31299 bool
31300 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
31302 /* If we are soft float and we do not have ldrd
31303 then all auto increment forms are ok. */
31304 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31305 return true;
31307 switch (code)
31309 /* Post increment and Pre Decrement are supported for all
31310 instruction forms except for vector forms. */
31311 case ARM_POST_INC:
31312 case ARM_PRE_DEC:
31313 if (VECTOR_MODE_P (mode))
31315 if (code != ARM_PRE_DEC)
31316 return true;
31317 else
31318 return false;
31321 return true;
31323 case ARM_POST_DEC:
31324 case ARM_PRE_INC:
31325 /* Without LDRD and mode size greater than
31326 word size, there is no point in auto-incrementing
31327 because ldm and stm will not have these forms. */
31328 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31329 return false;
31331 /* Vector and floating point modes do not support
31332 these auto increment forms. */
31333 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31334 return false;
31336 return true;
31338 default:
31339 return false;
31343 return false;
31346 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31347 on ARM, since we know that shifts by negative amounts are no-ops.
31348 Additionally, the default expansion code is not available or suitable
31349 for post-reload insn splits (this can occur when the register allocator
31350 chooses not to do a shift in NEON).
31352 This function is used in both initial expand and post-reload splits, and
31353 handles all kinds of 64-bit shifts.
31355 Input requirements:
31356 - It is safe for the input and output to be the same register, but
31357 early-clobber rules apply for the shift amount and scratch registers.
31358 - Shift by register requires both scratch registers. In all other cases
31359 the scratch registers may be NULL.
31360 - Ashiftrt by a register also clobbers the CC register. */
31361 void
31362 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31363 rtx amount, rtx scratch1, rtx scratch2)
31365 rtx out_high = gen_highpart (SImode, out);
31366 rtx out_low = gen_lowpart (SImode, out);
31367 rtx in_high = gen_highpart (SImode, in);
31368 rtx in_low = gen_lowpart (SImode, in);
31370 /* Terminology:
31371 in = the register pair containing the input value.
31372 out = the destination register pair.
31373 up = the high- or low-part of each pair.
31374 down = the opposite part to "up".
31375 In a shift, we can consider bits to shift from "up"-stream to
31376 "down"-stream, so in a left-shift "up" is the low-part and "down"
31377 is the high-part of each register pair. */
31379 rtx out_up = code == ASHIFT ? out_low : out_high;
31380 rtx out_down = code == ASHIFT ? out_high : out_low;
31381 rtx in_up = code == ASHIFT ? in_low : in_high;
31382 rtx in_down = code == ASHIFT ? in_high : in_low;
31384 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31385 gcc_assert (out
31386 && (REG_P (out) || GET_CODE (out) == SUBREG)
31387 && GET_MODE (out) == DImode);
31388 gcc_assert (in
31389 && (REG_P (in) || GET_CODE (in) == SUBREG)
31390 && GET_MODE (in) == DImode);
31391 gcc_assert (amount
31392 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31393 && GET_MODE (amount) == SImode)
31394 || CONST_INT_P (amount)));
31395 gcc_assert (scratch1 == NULL
31396 || (GET_CODE (scratch1) == SCRATCH)
31397 || (GET_MODE (scratch1) == SImode
31398 && REG_P (scratch1)));
31399 gcc_assert (scratch2 == NULL
31400 || (GET_CODE (scratch2) == SCRATCH)
31401 || (GET_MODE (scratch2) == SImode
31402 && REG_P (scratch2)));
31403 gcc_assert (!REG_P (out) || !REG_P (amount)
31404 || !HARD_REGISTER_P (out)
31405 || (REGNO (out) != REGNO (amount)
31406 && REGNO (out) + 1 != REGNO (amount)));
31408 /* Macros to make following code more readable. */
31409 #define SUB_32(DEST,SRC) \
31410 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31411 #define RSB_32(DEST,SRC) \
31412 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31413 #define SUB_S_32(DEST,SRC) \
31414 gen_addsi3_compare0 ((DEST), (SRC), \
31415 GEN_INT (-32))
31416 #define SET(DEST,SRC) \
31417 gen_rtx_SET (SImode, (DEST), (SRC))
31418 #define SHIFT(CODE,SRC,AMOUNT) \
31419 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31420 #define LSHIFT(CODE,SRC,AMOUNT) \
31421 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31422 SImode, (SRC), (AMOUNT))
31423 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31424 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31425 SImode, (SRC), (AMOUNT))
31426 #define ORR(A,B) \
31427 gen_rtx_IOR (SImode, (A), (B))
31428 #define BRANCH(COND,LABEL) \
31429 gen_arm_cond_branch ((LABEL), \
31430 gen_rtx_ ## COND (CCmode, cc_reg, \
31431 const0_rtx), \
31432 cc_reg)
31434 /* Shifts by register and shifts by constant are handled separately. */
31435 if (CONST_INT_P (amount))
31437 /* We have a shift-by-constant. */
31439 /* First, handle out-of-range shift amounts.
31440 In both cases we try to match the result an ARM instruction in a
31441 shift-by-register would give. This helps reduce execution
31442 differences between optimization levels, but it won't stop other
31443 parts of the compiler doing different things. This is "undefined
31444 behaviour, in any case. */
31445 if (INTVAL (amount) <= 0)
31446 emit_insn (gen_movdi (out, in));
31447 else if (INTVAL (amount) >= 64)
31449 if (code == ASHIFTRT)
31451 rtx const31_rtx = GEN_INT (31);
31452 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31453 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31455 else
31456 emit_insn (gen_movdi (out, const0_rtx));
31459 /* Now handle valid shifts. */
31460 else if (INTVAL (amount) < 32)
31462 /* Shifts by a constant less than 32. */
31463 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31465 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31466 emit_insn (SET (out_down,
31467 ORR (REV_LSHIFT (code, in_up, reverse_amount),
31468 out_down)));
31469 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31471 else
31473 /* Shifts by a constant greater than 31. */
31474 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31476 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31477 if (code == ASHIFTRT)
31478 emit_insn (gen_ashrsi3 (out_up, in_up,
31479 GEN_INT (31)));
31480 else
31481 emit_insn (SET (out_up, const0_rtx));
31484 else
31486 /* We have a shift-by-register. */
31487 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
31489 /* This alternative requires the scratch registers. */
31490 gcc_assert (scratch1 && REG_P (scratch1));
31491 gcc_assert (scratch2 && REG_P (scratch2));
31493 /* We will need the values "amount-32" and "32-amount" later.
31494 Swapping them around now allows the later code to be more general. */
31495 switch (code)
31497 case ASHIFT:
31498 emit_insn (SUB_32 (scratch1, amount));
31499 emit_insn (RSB_32 (scratch2, amount));
31500 break;
31501 case ASHIFTRT:
31502 emit_insn (RSB_32 (scratch1, amount));
31503 /* Also set CC = amount > 32. */
31504 emit_insn (SUB_S_32 (scratch2, amount));
31505 break;
31506 case LSHIFTRT:
31507 emit_insn (RSB_32 (scratch1, amount));
31508 emit_insn (SUB_32 (scratch2, amount));
31509 break;
31510 default:
31511 gcc_unreachable ();
31514 /* Emit code like this:
31516 arithmetic-left:
31517 out_down = in_down << amount;
31518 out_down = (in_up << (amount - 32)) | out_down;
31519 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31520 out_up = in_up << amount;
31522 arithmetic-right:
31523 out_down = in_down >> amount;
31524 out_down = (in_up << (32 - amount)) | out_down;
31525 if (amount < 32)
31526 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31527 out_up = in_up << amount;
31529 logical-right:
31530 out_down = in_down >> amount;
31531 out_down = (in_up << (32 - amount)) | out_down;
31532 if (amount < 32)
31533 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31534 out_up = in_up << amount;
31536 The ARM and Thumb2 variants are the same but implemented slightly
31537 differently. If this were only called during expand we could just
31538 use the Thumb2 case and let combine do the right thing, but this
31539 can also be called from post-reload splitters. */
31541 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31543 if (!TARGET_THUMB2)
31545 /* Emit code for ARM mode. */
31546 emit_insn (SET (out_down,
31547 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31548 if (code == ASHIFTRT)
31550 rtx_code_label *done_label = gen_label_rtx ();
31551 emit_jump_insn (BRANCH (LT, done_label));
31552 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31553 out_down)));
31554 emit_label (done_label);
31556 else
31557 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31558 out_down)));
31560 else
31562 /* Emit code for Thumb2 mode.
31563 Thumb2 can't do shift and or in one insn. */
31564 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31565 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31567 if (code == ASHIFTRT)
31569 rtx_code_label *done_label = gen_label_rtx ();
31570 emit_jump_insn (BRANCH (LT, done_label));
31571 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31572 emit_insn (SET (out_down, ORR (out_down, scratch2)));
31573 emit_label (done_label);
31575 else
31577 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31578 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31582 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31585 #undef SUB_32
31586 #undef RSB_32
31587 #undef SUB_S_32
31588 #undef SET
31589 #undef SHIFT
31590 #undef LSHIFT
31591 #undef REV_LSHIFT
31592 #undef ORR
31593 #undef BRANCH
31597 /* Returns true if a valid comparison operation and makes
31598 the operands in a form that is valid. */
31599 bool
31600 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31602 enum rtx_code code = GET_CODE (*comparison);
31603 int code_int;
31604 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31605 ? GET_MODE (*op2) : GET_MODE (*op1);
31607 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31609 if (code == UNEQ || code == LTGT)
31610 return false;
31612 code_int = (int)code;
31613 arm_canonicalize_comparison (&code_int, op1, op2, 0);
31614 PUT_CODE (*comparison, (enum rtx_code)code_int);
31616 switch (mode)
31618 case SImode:
31619 if (!arm_add_operand (*op1, mode))
31620 *op1 = force_reg (mode, *op1);
31621 if (!arm_add_operand (*op2, mode))
31622 *op2 = force_reg (mode, *op2);
31623 return true;
31625 case DImode:
31626 if (!cmpdi_operand (*op1, mode))
31627 *op1 = force_reg (mode, *op1);
31628 if (!cmpdi_operand (*op2, mode))
31629 *op2 = force_reg (mode, *op2);
31630 return true;
31632 case SFmode:
31633 case DFmode:
31634 if (!arm_float_compare_operand (*op1, mode))
31635 *op1 = force_reg (mode, *op1);
31636 if (!arm_float_compare_operand (*op2, mode))
31637 *op2 = force_reg (mode, *op2);
31638 return true;
31639 default:
31640 break;
31643 return false;
31647 /* Maximum number of instructions to set block of memory. */
31648 static int
31649 arm_block_set_max_insns (void)
31651 if (optimize_function_for_size_p (cfun))
31652 return 4;
31653 else
31654 return current_tune->max_insns_inline_memset;
31657 /* Return TRUE if it's profitable to set block of memory for
31658 non-vectorized case. VAL is the value to set the memory
31659 with. LENGTH is the number of bytes to set. ALIGN is the
31660 alignment of the destination memory in bytes. UNALIGNED_P
31661 is TRUE if we can only set the memory with instructions
31662 meeting alignment requirements. USE_STRD_P is TRUE if we
31663 can use strd to set the memory. */
31664 static bool
31665 arm_block_set_non_vect_profit_p (rtx val,
31666 unsigned HOST_WIDE_INT length,
31667 unsigned HOST_WIDE_INT align,
31668 bool unaligned_p, bool use_strd_p)
31670 int num = 0;
31671 /* For leftovers in bytes of 0-7, we can set the memory block using
31672 strb/strh/str with minimum instruction number. */
31673 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
31675 if (unaligned_p)
31677 num = arm_const_inline_cost (SET, val);
31678 num += length / align + length % align;
31680 else if (use_strd_p)
31682 num = arm_const_double_inline_cost (val);
31683 num += (length >> 3) + leftover[length & 7];
31685 else
31687 num = arm_const_inline_cost (SET, val);
31688 num += (length >> 2) + leftover[length & 3];
31691 /* We may be able to combine last pair STRH/STRB into a single STR
31692 by shifting one byte back. */
31693 if (unaligned_access && length > 3 && (length & 3) == 3)
31694 num--;
31696 return (num <= arm_block_set_max_insns ());
31699 /* Return TRUE if it's profitable to set block of memory for
31700 vectorized case. LENGTH is the number of bytes to set.
31701 ALIGN is the alignment of destination memory in bytes.
31702 MODE is the vector mode used to set the memory. */
31703 static bool
31704 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
31705 unsigned HOST_WIDE_INT align,
31706 enum machine_mode mode)
31708 int num;
31709 bool unaligned_p = ((align & 3) != 0);
31710 unsigned int nelt = GET_MODE_NUNITS (mode);
31712 /* Instruction loading constant value. */
31713 num = 1;
31714 /* Instructions storing the memory. */
31715 num += (length + nelt - 1) / nelt;
31716 /* Instructions adjusting the address expression. Only need to
31717 adjust address expression if it's 4 bytes aligned and bytes
31718 leftover can only be stored by mis-aligned store instruction. */
31719 if (!unaligned_p && (length & 3) != 0)
31720 num++;
31722 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
31723 if (!unaligned_p && mode == V16QImode)
31724 num--;
31726 return (num <= arm_block_set_max_insns ());
31729 /* Set a block of memory using vectorization instructions for the
31730 unaligned case. We fill the first LENGTH bytes of the memory
31731 area starting from DSTBASE with byte constant VALUE. ALIGN is
31732 the alignment requirement of memory. Return TRUE if succeeded. */
31733 static bool
31734 arm_block_set_unaligned_vect (rtx dstbase,
31735 unsigned HOST_WIDE_INT length,
31736 unsigned HOST_WIDE_INT value,
31737 unsigned HOST_WIDE_INT align)
31739 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
31740 rtx dst, mem;
31741 rtx val_elt, val_vec, reg;
31742 rtx rval[MAX_VECT_LEN];
31743 rtx (*gen_func) (rtx, rtx);
31744 enum machine_mode mode;
31745 unsigned HOST_WIDE_INT v = value;
31747 gcc_assert ((align & 0x3) != 0);
31748 nelt_v8 = GET_MODE_NUNITS (V8QImode);
31749 nelt_v16 = GET_MODE_NUNITS (V16QImode);
31750 if (length >= nelt_v16)
31752 mode = V16QImode;
31753 gen_func = gen_movmisalignv16qi;
31755 else
31757 mode = V8QImode;
31758 gen_func = gen_movmisalignv8qi;
31760 nelt_mode = GET_MODE_NUNITS (mode);
31761 gcc_assert (length >= nelt_mode);
31762 /* Skip if it isn't profitable. */
31763 if (!arm_block_set_vect_profit_p (length, align, mode))
31764 return false;
31766 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31767 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31769 v = sext_hwi (v, BITS_PER_WORD);
31770 val_elt = GEN_INT (v);
31771 for (j = 0; j < nelt_mode; j++)
31772 rval[j] = val_elt;
31774 reg = gen_reg_rtx (mode);
31775 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
31776 /* Emit instruction loading the constant value. */
31777 emit_move_insn (reg, val_vec);
31779 /* Handle nelt_mode bytes in a vector. */
31780 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
31782 emit_insn ((*gen_func) (mem, reg));
31783 if (i + 2 * nelt_mode <= length)
31784 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
31787 /* If there are not less than nelt_v8 bytes leftover, we must be in
31788 V16QI mode. */
31789 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
31791 /* Handle (8, 16) bytes leftover. */
31792 if (i + nelt_v8 < length)
31794 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
31795 /* We are shifting bytes back, set the alignment accordingly. */
31796 if ((length & 1) != 0 && align >= 2)
31797 set_mem_align (mem, BITS_PER_UNIT);
31799 emit_insn (gen_movmisalignv16qi (mem, reg));
31801 /* Handle (0, 8] bytes leftover. */
31802 else if (i < length && i + nelt_v8 >= length)
31804 if (mode == V16QImode)
31806 reg = gen_lowpart (V8QImode, reg);
31807 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
31809 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
31810 + (nelt_mode - nelt_v8))));
31811 /* We are shifting bytes back, set the alignment accordingly. */
31812 if ((length & 1) != 0 && align >= 2)
31813 set_mem_align (mem, BITS_PER_UNIT);
31815 emit_insn (gen_movmisalignv8qi (mem, reg));
31818 return true;
31821 /* Set a block of memory using vectorization instructions for the
31822 aligned case. We fill the first LENGTH bytes of the memory area
31823 starting from DSTBASE with byte constant VALUE. ALIGN is the
31824 alignment requirement of memory. Return TRUE if succeeded. */
31825 static bool
31826 arm_block_set_aligned_vect (rtx dstbase,
31827 unsigned HOST_WIDE_INT length,
31828 unsigned HOST_WIDE_INT value,
31829 unsigned HOST_WIDE_INT align)
31831 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
31832 rtx dst, addr, mem;
31833 rtx val_elt, val_vec, reg;
31834 rtx rval[MAX_VECT_LEN];
31835 enum machine_mode mode;
31836 unsigned HOST_WIDE_INT v = value;
31838 gcc_assert ((align & 0x3) == 0);
31839 nelt_v8 = GET_MODE_NUNITS (V8QImode);
31840 nelt_v16 = GET_MODE_NUNITS (V16QImode);
31841 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
31842 mode = V16QImode;
31843 else
31844 mode = V8QImode;
31846 nelt_mode = GET_MODE_NUNITS (mode);
31847 gcc_assert (length >= nelt_mode);
31848 /* Skip if it isn't profitable. */
31849 if (!arm_block_set_vect_profit_p (length, align, mode))
31850 return false;
31852 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31854 v = sext_hwi (v, BITS_PER_WORD);
31855 val_elt = GEN_INT (v);
31856 for (j = 0; j < nelt_mode; j++)
31857 rval[j] = val_elt;
31859 reg = gen_reg_rtx (mode);
31860 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
31861 /* Emit instruction loading the constant value. */
31862 emit_move_insn (reg, val_vec);
31864 i = 0;
31865 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
31866 if (mode == V16QImode)
31868 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31869 emit_insn (gen_movmisalignv16qi (mem, reg));
31870 i += nelt_mode;
31871 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
31872 if (i + nelt_v8 < length && i + nelt_v16 > length)
31874 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31875 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31876 /* We are shifting bytes back, set the alignment accordingly. */
31877 if ((length & 0x3) == 0)
31878 set_mem_align (mem, BITS_PER_UNIT * 4);
31879 else if ((length & 0x1) == 0)
31880 set_mem_align (mem, BITS_PER_UNIT * 2);
31881 else
31882 set_mem_align (mem, BITS_PER_UNIT);
31884 emit_insn (gen_movmisalignv16qi (mem, reg));
31885 return true;
31887 /* Fall through for bytes leftover. */
31888 mode = V8QImode;
31889 nelt_mode = GET_MODE_NUNITS (mode);
31890 reg = gen_lowpart (V8QImode, reg);
31893 /* Handle 8 bytes in a vector. */
31894 for (; (i + nelt_mode <= length); i += nelt_mode)
31896 addr = plus_constant (Pmode, dst, i);
31897 mem = adjust_automodify_address (dstbase, mode, addr, i);
31898 emit_move_insn (mem, reg);
31901 /* Handle single word leftover by shifting 4 bytes back. We can
31902 use aligned access for this case. */
31903 if (i + UNITS_PER_WORD == length)
31905 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
31906 mem = adjust_automodify_address (dstbase, mode,
31907 addr, i - UNITS_PER_WORD);
31908 /* We are shifting 4 bytes back, set the alignment accordingly. */
31909 if (align > UNITS_PER_WORD)
31910 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
31912 emit_move_insn (mem, reg);
31914 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
31915 We have to use unaligned access for this case. */
31916 else if (i < length)
31918 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31919 mem = adjust_automodify_address (dstbase, mode, dst, 0);
31920 /* We are shifting bytes back, set the alignment accordingly. */
31921 if ((length & 1) == 0)
31922 set_mem_align (mem, BITS_PER_UNIT * 2);
31923 else
31924 set_mem_align (mem, BITS_PER_UNIT);
31926 emit_insn (gen_movmisalignv8qi (mem, reg));
31929 return true;
31932 /* Set a block of memory using plain strh/strb instructions, only
31933 using instructions allowed by ALIGN on processor. We fill the
31934 first LENGTH bytes of the memory area starting from DSTBASE
31935 with byte constant VALUE. ALIGN is the alignment requirement
31936 of memory. */
31937 static bool
31938 arm_block_set_unaligned_non_vect (rtx dstbase,
31939 unsigned HOST_WIDE_INT length,
31940 unsigned HOST_WIDE_INT value,
31941 unsigned HOST_WIDE_INT align)
31943 unsigned int i;
31944 rtx dst, addr, mem;
31945 rtx val_exp, val_reg, reg;
31946 enum machine_mode mode;
31947 HOST_WIDE_INT v = value;
31949 gcc_assert (align == 1 || align == 2);
31951 if (align == 2)
31952 v |= (value << BITS_PER_UNIT);
31954 v = sext_hwi (v, BITS_PER_WORD);
31955 val_exp = GEN_INT (v);
31956 /* Skip if it isn't profitable. */
31957 if (!arm_block_set_non_vect_profit_p (val_exp, length,
31958 align, true, false))
31959 return false;
31961 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31962 mode = (align == 2 ? HImode : QImode);
31963 val_reg = force_reg (SImode, val_exp);
31964 reg = gen_lowpart (mode, val_reg);
31966 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
31968 addr = plus_constant (Pmode, dst, i);
31969 mem = adjust_automodify_address (dstbase, mode, addr, i);
31970 emit_move_insn (mem, reg);
31973 /* Handle single byte leftover. */
31974 if (i + 1 == length)
31976 reg = gen_lowpart (QImode, val_reg);
31977 addr = plus_constant (Pmode, dst, i);
31978 mem = adjust_automodify_address (dstbase, QImode, addr, i);
31979 emit_move_insn (mem, reg);
31980 i++;
31983 gcc_assert (i == length);
31984 return true;
31987 /* Set a block of memory using plain strd/str/strh/strb instructions,
31988 to permit unaligned copies on processors which support unaligned
31989 semantics for those instructions. We fill the first LENGTH bytes
31990 of the memory area starting from DSTBASE with byte constant VALUE.
31991 ALIGN is the alignment requirement of memory. */
31992 static bool
31993 arm_block_set_aligned_non_vect (rtx dstbase,
31994 unsigned HOST_WIDE_INT length,
31995 unsigned HOST_WIDE_INT value,
31996 unsigned HOST_WIDE_INT align)
31998 unsigned int i;
31999 rtx dst, addr, mem;
32000 rtx val_exp, val_reg, reg;
32001 unsigned HOST_WIDE_INT v;
32002 bool use_strd_p;
32004 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32005 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32007 v = (value | (value << 8) | (value << 16) | (value << 24));
32008 if (length < UNITS_PER_WORD)
32009 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32011 if (use_strd_p)
32012 v |= (v << BITS_PER_WORD);
32013 else
32014 v = sext_hwi (v, BITS_PER_WORD);
32016 val_exp = GEN_INT (v);
32017 /* Skip if it isn't profitable. */
32018 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32019 align, false, use_strd_p))
32021 if (!use_strd_p)
32022 return false;
32024 /* Try without strd. */
32025 v = (v >> BITS_PER_WORD);
32026 v = sext_hwi (v, BITS_PER_WORD);
32027 val_exp = GEN_INT (v);
32028 use_strd_p = false;
32029 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32030 align, false, use_strd_p))
32031 return false;
32034 i = 0;
32035 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32036 /* Handle double words using strd if possible. */
32037 if (use_strd_p)
32039 val_reg = force_reg (DImode, val_exp);
32040 reg = val_reg;
32041 for (; (i + 8 <= length); i += 8)
32043 addr = plus_constant (Pmode, dst, i);
32044 mem = adjust_automodify_address (dstbase, DImode, addr, i);
32045 emit_move_insn (mem, reg);
32048 else
32049 val_reg = force_reg (SImode, val_exp);
32051 /* Handle words. */
32052 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32053 for (; (i + 4 <= length); i += 4)
32055 addr = plus_constant (Pmode, dst, i);
32056 mem = adjust_automodify_address (dstbase, SImode, addr, i);
32057 if ((align & 3) == 0)
32058 emit_move_insn (mem, reg);
32059 else
32060 emit_insn (gen_unaligned_storesi (mem, reg));
32063 /* Merge last pair of STRH and STRB into a STR if possible. */
32064 if (unaligned_access && i > 0 && (i + 3) == length)
32066 addr = plus_constant (Pmode, dst, i - 1);
32067 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32068 /* We are shifting one byte back, set the alignment accordingly. */
32069 if ((align & 1) == 0)
32070 set_mem_align (mem, BITS_PER_UNIT);
32072 /* Most likely this is an unaligned access, and we can't tell at
32073 compilation time. */
32074 emit_insn (gen_unaligned_storesi (mem, reg));
32075 return true;
32078 /* Handle half word leftover. */
32079 if (i + 2 <= length)
32081 reg = gen_lowpart (HImode, val_reg);
32082 addr = plus_constant (Pmode, dst, i);
32083 mem = adjust_automodify_address (dstbase, HImode, addr, i);
32084 if ((align & 1) == 0)
32085 emit_move_insn (mem, reg);
32086 else
32087 emit_insn (gen_unaligned_storehi (mem, reg));
32089 i += 2;
32092 /* Handle single byte leftover. */
32093 if (i + 1 == length)
32095 reg = gen_lowpart (QImode, val_reg);
32096 addr = plus_constant (Pmode, dst, i);
32097 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32098 emit_move_insn (mem, reg);
32101 return true;
32104 /* Set a block of memory using vectorization instructions for both
32105 aligned and unaligned cases. We fill the first LENGTH bytes of
32106 the memory area starting from DSTBASE with byte constant VALUE.
32107 ALIGN is the alignment requirement of memory. */
32108 static bool
32109 arm_block_set_vect (rtx dstbase,
32110 unsigned HOST_WIDE_INT length,
32111 unsigned HOST_WIDE_INT value,
32112 unsigned HOST_WIDE_INT align)
32114 /* Check whether we need to use unaligned store instruction. */
32115 if (((align & 3) != 0 || (length & 3) != 0)
32116 /* Check whether unaligned store instruction is available. */
32117 && (!unaligned_access || BYTES_BIG_ENDIAN))
32118 return false;
32120 if ((align & 3) == 0)
32121 return arm_block_set_aligned_vect (dstbase, length, value, align);
32122 else
32123 return arm_block_set_unaligned_vect (dstbase, length, value, align);
32126 /* Expand string store operation. Firstly we try to do that by using
32127 vectorization instructions, then try with ARM unaligned access and
32128 double-word store if profitable. OPERANDS[0] is the destination,
32129 OPERANDS[1] is the number of bytes, operands[2] is the value to
32130 initialize the memory, OPERANDS[3] is the known alignment of the
32131 destination. */
32132 bool
32133 arm_gen_setmem (rtx *operands)
32135 rtx dstbase = operands[0];
32136 unsigned HOST_WIDE_INT length;
32137 unsigned HOST_WIDE_INT value;
32138 unsigned HOST_WIDE_INT align;
32140 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32141 return false;
32143 length = UINTVAL (operands[1]);
32144 if (length > 64)
32145 return false;
32147 value = (UINTVAL (operands[2]) & 0xFF);
32148 align = UINTVAL (operands[3]);
32149 if (TARGET_NEON && length >= 8
32150 && current_tune->string_ops_prefer_neon
32151 && arm_block_set_vect (dstbase, length, value, align))
32152 return true;
32154 if (!unaligned_access && (align & 3) != 0)
32155 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32157 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32160 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
32162 static unsigned HOST_WIDE_INT
32163 arm_asan_shadow_offset (void)
32165 return (unsigned HOST_WIDE_INT) 1 << 29;
32169 /* This is a temporary fix for PR60655. Ideally we need
32170 to handle most of these cases in the generic part but
32171 currently we reject minus (..) (sym_ref). We try to
32172 ameliorate the case with minus (sym_ref1) (sym_ref2)
32173 where they are in the same section. */
32175 static bool
32176 arm_const_not_ok_for_debug_p (rtx p)
32178 tree decl_op0 = NULL;
32179 tree decl_op1 = NULL;
32181 if (GET_CODE (p) == MINUS)
32183 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
32185 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
32186 if (decl_op1
32187 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
32188 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
32190 if ((TREE_CODE (decl_op1) == VAR_DECL
32191 || TREE_CODE (decl_op1) == CONST_DECL)
32192 && (TREE_CODE (decl_op0) == VAR_DECL
32193 || TREE_CODE (decl_op0) == CONST_DECL))
32194 return (get_variable_section (decl_op1, false)
32195 != get_variable_section (decl_op0, false));
32197 if (TREE_CODE (decl_op1) == LABEL_DECL
32198 && TREE_CODE (decl_op0) == LABEL_DECL)
32199 return (DECL_CONTEXT (decl_op1)
32200 != DECL_CONTEXT (decl_op0));
32203 return true;
32207 return false;
32210 static void
32211 arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
32213 const unsigned ARM_FE_INVALID = 1;
32214 const unsigned ARM_FE_DIVBYZERO = 2;
32215 const unsigned ARM_FE_OVERFLOW = 4;
32216 const unsigned ARM_FE_UNDERFLOW = 8;
32217 const unsigned ARM_FE_INEXACT = 16;
32218 const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID
32219 | ARM_FE_DIVBYZERO
32220 | ARM_FE_OVERFLOW
32221 | ARM_FE_UNDERFLOW
32222 | ARM_FE_INEXACT);
32223 const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8;
32224 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
32225 tree new_fenv_var, reload_fenv, restore_fnenv;
32226 tree update_call, atomic_feraiseexcept, hold_fnclex;
32228 if (!TARGET_VFP || !TARGET_HARD_FLOAT)
32229 return;
32231 /* Generate the equivalent of :
32232 unsigned int fenv_var;
32233 fenv_var = __builtin_arm_get_fpscr ();
32235 unsigned int masked_fenv;
32236 masked_fenv = fenv_var & mask;
32238 __builtin_arm_set_fpscr (masked_fenv); */
32240 fenv_var = create_tmp_var (unsigned_type_node, NULL);
32241 get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR];
32242 set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR];
32243 mask = build_int_cst (unsigned_type_node,
32244 ~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT)
32245 | ARM_FE_ALL_EXCEPT));
32246 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
32247 fenv_var, build_call_expr (get_fpscr, 0));
32248 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
32249 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
32250 *hold = build2 (COMPOUND_EXPR, void_type_node,
32251 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
32252 hold_fnclex);
32254 /* Store the value of masked_fenv to clear the exceptions:
32255 __builtin_arm_set_fpscr (masked_fenv); */
32257 *clear = build_call_expr (set_fpscr, 1, masked_fenv);
32259 /* Generate the equivalent of :
32260 unsigned int new_fenv_var;
32261 new_fenv_var = __builtin_arm_get_fpscr ();
32263 __builtin_arm_set_fpscr (fenv_var);
32265 __atomic_feraiseexcept (new_fenv_var); */
32267 new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
32268 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
32269 build_call_expr (get_fpscr, 0));
32270 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
32271 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
32272 update_call = build_call_expr (atomic_feraiseexcept, 1,
32273 fold_convert (integer_type_node, new_fenv_var));
32274 *update = build2 (COMPOUND_EXPR, void_type_node,
32275 build2 (COMPOUND_EXPR, void_type_node,
32276 reload_fenv, restore_fnenv), update_call);
32279 /* return TRUE if x is a reference to a value in a constant pool */
32280 extern bool
32281 arm_is_constant_pool_ref (rtx x)
32283 return (MEM_P (x)
32284 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
32285 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
32288 #include "gt-arm.h"